Sql-server – Performance issues summing a count column over a date range

aggregatesql serversum

In our application, we have a query which on an API endpoint to return users who have 'activity' to a named client or matter (one or many – specified by ID). This query takes about 15 seconds to return when the activity table has 30 million rows (note there is also ~600k rows in the 'Asset' table and 2700 'users').

The rough schema for the tables can be found at the bottom of my question. Below is our query to find 'users who have activity over a threshold'. For brevity I have placed the view definition below also.

As this endpoint returns a page of data, a second, similar, query is also ran to get the total count of elements to populate paging response – essentially then the performance of the endpoint is 2x the query performance.

My question is essentially, what techniques should I be applying to get improve the performance of this query? A 'benchmark' we try to stick to is 'subsecond' on our endpoint responses.

Query plan can be found here.

SELECT DISTINCT t.type, t.sid, t.name, t.emailAddress, t.jobTitle
FROM sec.Trustee t
    INNER JOIN (
        SELECT data.sid, SUM(data.hoursBilled) as hoursBilled, SUM(data.docsAccessed) as docsAccessed,     data.asset_type as asset_type, data.displayId as displayId, data.displayName as displayName
        FROM (
            SELECT billing.trustee_sid as sid, 0 as hoursBilled, billing.recordedValue as docsAccessed, a.type as asset_type, a.displayId, a.displayName
            FROM sec._DocumentsBilling billing
            INNER JOIN sec.SessionSid s
            ON s.sid = billing.client_sid
            AND s.setID = @P0
            INNER JOIN sec.Asset a
            ON a.sid = billing.client_sid
            AND billing.recordedDate > @P1
            AND billing.client_sid IN (@P2)
        ) data
        GROUP BY data.sid, data.asset_type, data.displayId, data.displayName 
        HAVING SUM(data.docsAccessed) > @P4
) trusteeData ON trusteeData.sid = t.sid
ORDER BY sid
OFFSET @P6 ROWS
FETCH NEXT @P7 ROWS ONLY

The biggest performance cost is joining the 'Asset' table onto the result of this activity, product requirements require us to match by the displayId, rather than the sid (which is the PK of the Asset table.

  • Clients and Matters are collectively referred to as 'Assets', with the discriminator table being the type column.
  • If the matterId in the billing table is set to an empty string, we count that as 'client' billing.
  • It is not likely that all 'user matchers' are going to be required (e.g. email/'foreignid'/'domainUser'/'unqualifiedDomainUser') – and we have the ability to drop this to only choose that that are required for a given tenant (customer).
CREATE VIEW sec._DocumentsBilling AS
    SELECT data.billingType as billingType, data.trustee_sid as trustee_sid, data.recordedValue, client.sid as client_sid, client.clientGroup_sid as clientGroup_sid, client.securityType as clientSecurityType, matter.sid as matter_sid, matter.securityType as matterSecurityType, matter.matterGroup_sid as matterGroup_sid, data.recordedDate
    FROM (
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM sec.userActivityDocuments d
        INNER JOIN sec.Trustee t ON t.emailAddress = d.userString AND d.userType = N'EMAIL'
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM sec.userActivityDocuments d
        INNER JOIN sec.Trustee t ON t.foreignId = d.userString AND d.userType = N'FOREIGNID'
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM sec.userActivityDocuments d
        INNER JOIN sec.Trustee t ON t.domainUser = d.userString AND d.userType = N'DOMAIN'
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM sec.userActivityDocuments d
        INNER JOIN sec.Trustee t ON t.unqualifiedDomainUser = d.userString AND d.userType = N'UNQUALIFIED_DOMAIN'
    ) data
    INNER JOIN sec.Asset client ON client.displayId = data.clientId
    LEFT JOIN sec.Asset matter ON matter.client_sid = client.sid AND matter.displayId = data.matterId
    WHERE (matter.sid IS NULL AND data.matterId = '') OR matter.sid IS NOT NULL
GO

CREATE TABLE [tenant].[Asset](
    [type] [nvarchar](100) NOT NULL,
    [sid] [bigint] IDENTITY(1,1) NOT NULL,
    [displayId] [nvarchar](32) NOT NULL,
    [tenant] [bigint] NOT NULL,
    [client_sid] [bigint] NULL,
    [clientGroup_sid] [bigint] NULL,
    [matterGroup_sid] [bigint] NULL,
) 

CREATE TABLE [tenant].[Trustee](
    [type] [nvarchar](32) NOT NULL,
    [sid] [bigint] IDENTITY(1,1) NOT NULL,
    [emailAddress] [nvarchar](255) NULL,
    [name] [nvarchar](255) NULL,
    [jobTitle] [nvarchar](255) NULL,
    [foreignId] [nvarchar](255) NULL,
    [tenant] [bigint] NOT NULL,
    [domainUser] [nvarchar](255) NULL,
    [unqualifiedDomainUser] [nvarchar](255) NULL,
)

CREATE TABLE [tenant].[userActivityDocuments](
    [id] [binary](16) NOT NULL,
    [tenant] [bigint] NOT NULL,
    [userType] [nvarchar](64) NOT NULL,
    [systemName] [nvarchar](200) NOT NULL,
    [clientId] [nvarchar](32) NOT NULL,
    [matterId] [nvarchar](32) NOT NULL,
    [recordedValue] [int] NOT NULL,
    [recordedDate] [date] NOT NULL,
    [recordedDateTime] [datetime2](7) NOT NULL,
    [userString] [nvarchar](255) NOT NULL,
    [collectionTime] [bigint] NOT NULL
)

ALTER TABLE [tenant].[Asset] ADD  CONSTRAINT [PK_Asset] PRIMARY KEY CLUSTERED 
(
    [tenant] ASC,
    [sid] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO

ALTER TABLE [tenant].[Asset] ADD  CONSTRAINT [UNQ_ASSET_NAME_1] UNIQUE NONCLUSTERED 
(
    [tenant] ASC,
    [displayId] ASC,
    [type] ASC,
    [client_sid] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO

ALTER TABLE [tenant].[Trustee] ADD  CONSTRAINT [PK_Trustee] PRIMARY KEY CLUSTERED 
(
    [tenant] ASC,
    [sid] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO

CREATE UNIQUE NONCLUSTERED INDEX [FIDX_Trustee_1] ON [tenant].[Trustee]
(
    [tenant] ASC,
    [domainUser] ASC
)
INCLUDE([isDeleting]) 
WHERE ([domainUser] IS NOT NULL)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO

CREATE UNIQUE NONCLUSTERED INDEX [FIDX_Trustee_2] ON [tenant].[Trustee]
(
    [tenant] ASC,
    [foreignId] ASC
)
INCLUDE([isDeleting]) 
WHERE ([foreignId] IS NOT NULL)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO

CREATE NONCLUSTERED INDEX [FIDX_Trustee_3] ON [tenant].[Trustee]
(
    [tenant] ASC,
    [emailAddress] ASC
)
INCLUDE([isDeleting]) 
WHERE ([emailAddress] IS NOT NULL)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO

CREATE NONCLUSTERED INDEX [FIDX_Trustee_5] ON [tenant].[Trustee]
(
    [tenant] ASC,
    [unqualifiedDomainUser] ASC
)
INCLUDE([isDeleting]) 
WHERE ([unqualifiedDomainUser] IS NOT NULL)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO

ALTER TABLE [tenant].[userActivityDocuments] ADD  CONSTRAINT [PK_userActivityDocuments] PRIMARY KEY CLUSTERED 
(
    [id] ASC,
    [tenant] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO

CREATE NONCLUSTERED INDEX [IDX_UserActivityDocuments_1] ON [tenant].[userActivityDocuments]
(
    [tenant] ASC,
    [clientId] ASC,
    [matterId] ASC,
    [userType] ASC,
    [recordedDate] ASC
)
INCLUDE([userString],[recordedValue]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 100) ON [PRIMARY]
GO

Best Answer

I can't speak for the query accessing the view but the samples you provided give a quick insight into the performance of the view.

You are getting a filter operator at the end of all of your joins (INNER & LEFT OUTER) to be able to satisfy the where clause:

WHERE (matter.sid IS NULL AND data.matterId = '') OR matter.sid IS NOT NULL

enter image description here

Splitting the view up further with the use of UNION puts the filter operator a bit earlier in the execution plan, but not (n)early enough. (heh)

enter image description here

Splitting the OR was a good beginning for improving the performance of the view , but there are additional parts to improve.

We could change the first part of the filter:

 WHERE (matter.sid IS NULL AND data.matterId = '')

By removing AND data.matterId = '')

and adding it to each part of the UNION subquery:

SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.emailAddress = d.userString AND d.userType = N'EMAIL'
WHERE  matterId = ''
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.foreignId = d.userString AND d.userType = N'FOREIGNID'
WHERE  matterId = ''
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.domainUser = d.userString AND d.userType = N'DOMAIN'
WHERE  matterId = ''
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.unqualifiedDomainUser = d.userString AND d.userType = N'UNQUALIFIED_DOMAIN'
WHERE  matterId = ''

However, this still leaves us with a filter operator due to the LEFT JOIN + IS NULL filtering:

enter image description here

 LEFT JOIN [tenant].Asset matter ON matter.client_sid = client.sid AND
 matter.displayId = data.matterId
     WHERE (matter.sid IS NULL)

We could improve this type of filtering by changing it to a NOT EXISTS statement.

This leaves us with:

CREATE VIEW [tenant]._DocumentsBilling  AS
      SELECT data.billingType as billingType, data.trustee_sid as trustee_sid, data.recordedValue, client.sid as client_sid, client.clientGroup_sid as clientGroup_sid,NULL  as matter_sid, NULL as matterGroup_sid, data.recordedDate
    FROM (
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM [tenant].userActivityDocuments d
        INNER JOIN [tenant].Trustee t ON t.emailAddress = d.userString AND d.userType = N'EMAIL'
        WHERE  matterId = ''
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM [tenant].userActivityDocuments d
        INNER JOIN [tenant].Trustee t ON t.foreignId = d.userString AND d.userType = N'FOREIGNID'
        WHERE  matterId = ''
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM [tenant].userActivityDocuments d
        INNER JOIN [tenant].Trustee t ON t.domainUser = d.userString AND d.userType = N'DOMAIN'
        WHERE  matterId = ''
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM [tenant].userActivityDocuments d
        INNER JOIN [tenant].Trustee t ON t.unqualifiedDomainUser = d.userString AND d.userType = N'UNQUALIFIED_DOMAIN'
        WHERE  matterId = ''
    ) data
    INNER JOIN [tenant].Asset client ON client.displayId = data.clientId

    WHERE NOT EXISTS (SELECT * FROM  [tenant].Asset matter WHERE matter.client_sid = client.sid AND matter.displayId = data.matterId)
    
    UNION
            SELECT data.billingType as billingType, data.trustee_sid as trustee_sid, data.recordedValue, client.sid as client_sid, client.clientGroup_sid as clientGroup_sid, matter.sid as matter_sid, matter.matterGroup_sid as matterGroup_sid, data.recordedDate
    FROM (
            SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM [tenant].userActivityDocuments d
        INNER JOIN [tenant].Trustee t ON t.emailAddress = d.userString AND d.userType = N'EMAIL'
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM [tenant].userActivityDocuments d
        INNER JOIN [tenant].Trustee t ON t.foreignId = d.userString AND d.userType = N'FOREIGNID'
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM [tenant].userActivityDocuments d
        INNER JOIN [tenant].Trustee t ON t.domainUser = d.userString AND d.userType = N'DOMAIN'
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM [tenant].userActivityDocuments d
        INNER JOIN [tenant].Trustee t ON t.unqualifiedDomainUser = d.userString AND d.userType = N'UNQUALIFIED_DOMAIN'
    ) data
    INNER JOIN [tenant].Asset client ON client.displayId = data.clientId
    LEFT JOIN [tenant].Asset matter ON matter.client_sid = client.sid AND matter.displayId = data.matterId
    WHERE  matter.sid IS NOT NULL;

Which should give you earlier filtering and better performance.

Final execution plan here.