Sql-server – Alternative query to avoid count(distinct)

countdistinctsql-server-2008-r2

i have a performance issue with my query and i need to rewrite it to have my result in better time. If you can help me with an idea?

My query is the following:

select 
    dc.Country,
    COUNT(DISTINCT fvcd.IdAppInstance) AS Visitors
FROM 
    dbo.Fact_VisitorsCountryDetails fvcd
    INNER JOIN dbo.Dim_Country dc ON dc.Id = fvcd.IdCountry
WHERE
    fvcd.IdDate BETWEEN @IdMinDate AND @IdMaxDate 
    AND fvcd.IdPartnerid = @IdPartnerId
GROUP BY
    dc.Country
ORDER BY
    Visitors DESC;

My DDL is:

CREATE TABLE [dbo].[Dim_Country](
    [Id] [bigint] NOT NULL,
    [Country] [varchar](24) NULL,
 CONSTRAINT [PK_Dim_Country_Id] PRIMARY KEY CLUSTERED 
(
    [Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]

CREATE TABLE [dbo].[Fact_VisitorsCountryDetails](
    [IdDate] [bigint] NULL,
    [IdPartnerId] [bigint] NULL,
    [IdCountry] [bigint] NULL,
    [IdAppinstance] [bigint] NULL
) ON [PRIMARY]

GO

ALTER TABLE [dbo].[Fact_VisitorsCountryDetails]  WITH NOCHECK ADD  CONSTRAINT [FK_Fact_VisitorsCountryDetails_Dim_Country_Id] FOREIGN KEY([IdCountry])
REFERENCES [dbo].[Dim_Country] ([Id])
GO

ALTER TABLE [dbo].[Fact_VisitorsCountryDetails] CHECK CONSTRAINT [FK_Fact_VisitorsCountryDetails_Dim_Country_Id]
GO

ALTER TABLE [dbo].[Fact_VisitorsCountryDetails]  WITH NOCHECK ADD  CONSTRAINT [FK_Fact_VisitorsCountryDetails_Dim_Date_Id] FOREIGN KEY([IdDate])
REFERENCES [dbo].[Dim_Date] ([Id])
GO

ALTER TABLE [dbo].[Fact_VisitorsCountryDetails] CHECK CONSTRAINT [FK_Fact_VisitorsCountryDetails_Dim_Date_Id]
GO

ALTER TABLE [dbo].[Fact_VisitorsCountryDetails]  WITH NOCHECK ADD  CONSTRAINT [FK_Fact_VisitorsCountryDetails_Dim_PartnerId_Id] FOREIGN KEY([IdPartnerId])
REFERENCES [dbo].[Dim_PartnerId] ([Id])
GO

ALTER TABLE [dbo].[Fact_VisitorsCountryDetails] CHECK CONSTRAINT [FK_Fact_VisitorsCountryDetails_Dim_PartnerId_Id]
GO

Best Answer

DISTINCT is always a huge overhead. You might try to move the aggregation into a Derived Table (of course this might only help if the optimizer is not smart enough to do that automatically)

select dc.Country,
  fvcd.Visitors
from 
 (
   select 
       fvcd.IdCountry,
       COUNT(DISTINCT fvcd.IdAppInstance) AS Visitors
   FROM 
       dbo.Fact_VisitorsCountryDetails fvcd
   WHERE
       fvcd.IdDate BETWEEN @IdMinDate AND @IdMaxDate 
       AND fvcd.IdPartnerid = @IdPartnerId
   GROUP BY
       fvcd.IdCountry
 ) fvcd
INNER JOIN dbo.Dim_Country dc ON dc.Id = fvcd.IdCountry   
ORDER BY
    Visitors DESC;