Sql-server – Poor Performance Running dm_exec_query_stats

dmvperformancequery-performancesql server

I have a job that executes every 5 minutes to gather info from DMVs that has not changed in over 2 years, normally running under a minute.

All of a sudden starting last week, with no changes deployed, this job started running over 12 minutes on only 1 server out of 15. Looking at the execution plan nothing has changed, but the reads, writes, CPU time, and rowcounts all went up by an order of magnitude. The number of rows inserted hasn't changed by much either.

Could this be bad statistics in the underlying system tables? If so, how can it be solved, because I can't run UPDATE STATISTICS on any of the DMVs which just give a message about the object not existing.

Here is the code in question:

SELECT
    ISNULL(DB_NAME(st.dbid), 'NULL') AS [DatabaseName]  
    ,ISNULL(OBJECT_SCHEMA_NAME(st.objectid, st.dbid), 'NULL') AS [SchemaName] 
    ,ISNULL(OBJECT_NAME(st.objectid, st.dbid), 'NULL') AS [ObjectName]   
    ,cp.objtype AS [ObjectType]
    ,qs.statement_start_offset AS [StatementStartOffset]
    ,qs.statement_end_offset AS [StatementEndOffset]
    ,qs.query_hash AS [QueryHash]
    ,qs.query_plan_hash AS [QueryPlanHash]
    ,qs.sql_handle AS [SQLHandle]
    ,qs.plan_handle AS [PlanHandle]
    ,qs.plan_generation_num AS [PlanGenerationNumber]
    ,cp.usecounts AS [UseCounts]
    ,cp.refcounts AS [RefCounts]
    -- find the offset of the actual statement being executed
    ,SUBSTRING(st.text, 
            CASE
                WHEN qs.statement_start_offset = 0 OR qs.statement_start_offset IS NULL THEN 1  
                ELSE qs.statement_start_offset/2 + 1
            END, 
            CASE 
                WHEN qs.statement_end_offset = 0 OR qs.statement_end_offset = -1 OR qs.statement_end_offset IS NULL THEN LEN(st.text)  
                ELSE qs.statement_end_offset/2 
            END - 
            CASE
                WHEN qs.statement_start_offset = 0 OR qs.statement_start_offset IS NULL THEN 1  
                ELSE qs.statement_start_offset/2
            END + 1 
        ) AS [Statement]  
    ,qs.last_execution_time AS [LastExecutionTime]
    ,qs.creation_time AS [CreationTime]
    ,qs.execution_count AS [ExecutionCount]
    ,qs.total_logical_reads AS [TotalLogicalReads]
    ,qs.last_logical_reads AS [LastLogicalReads]
    ,qs.min_logical_reads AS [MinLogicalReads]
    ,qs.max_logical_reads AS [MaxLogicalReads]
    ,qs.total_logical_writes AS [TotalLogicalWrites]
    ,qs.last_logical_writes AS [LastLogicalWrites]
    ,qs.min_logical_writes AS [MinLogicalWrites]
    ,qs.max_logical_writes AS [MaxLogicalWrites]
    ,qs.total_physical_reads AS [TotalPhysicalReads]
    ,qs.last_physical_reads AS [LastPhysicalReads]
    ,qs.min_physical_reads AS [MinPhysicalReads]
    ,qs.max_physical_reads AS [MaxPhysicalReads]
    ,qs.total_worker_time AS [TotalWorkerTime]
    ,qs.last_worker_time AS [LastWorkerTime]
    ,qs.min_worker_time AS [MinWorkerTime]
    ,qs.max_worker_time AS [MaxWorkerTime]
    ,qs.total_clr_time AS [TotalCLRTime]
    ,qs.last_clr_time AS [LastCLRTime]
    ,qs.min_clr_time AS [MinCLRTime]
    ,qs.max_clr_time AS [MaxCLRTime]
    ,qs.total_elapsed_time AS [TotalElapsedTime]
    ,qs.last_elapsed_time AS [LastElapsedTime]
    ,qs.min_elapsed_time AS [MinElapsedTime]
    ,qs.max_elapsed_time AS [MaxElapsedTime]
    ,qs.total_rows AS [TotalRows]
    ,qs.last_rows AS [LastRows]
    ,qs.min_rows AS [MinRows]
    ,qs.max_rows AS [MaxRows]
INTO #QueryUsageStats
FROM sys.dm_exec_query_stats qs   
INNER JOIN sys.dm_exec_cached_plans cp 
ON qs.plan_handle = cp.plan_handle 
CROSS APPLY sys.dm_exec_sql_text(qs.plan_handle) st
WHERE 1=2;

CREATE UNIQUE CLUSTERED INDEX #PK_#QueryUsageStats ON #QueryUsageStats
(
    [DatabaseName] ASC,
    [SchemaName] ASC,
    [ObjectName] ASC,
    [ObjectType] ASC,
    [StatementStartOffset] ASC,
    [StatementEndOffset] ASC,
    [QueryHash] ASC,
    [QueryPlanHash] ASC,
    [PlanHandle] ASC
);

INSERT INTO #QueryUsageStats
SELECT
    ISNULL(DB_NAME(st.dbid), 'NULL') AS [DatabaseName]  
    ,ISNULL(OBJECT_SCHEMA_NAME(st.objectid, st.dbid), 'NULL') AS [SchemaName] 
    ,ISNULL(OBJECT_NAME(st.objectid, st.dbid), 'NULL') AS [ObjectName]   
    ,cp.objtype AS [ObjectType]
    ,qs.statement_start_offset AS [StatementStartOffset]
    ,qs.statement_end_offset AS [StatementEndOffset]
    ,qs.query_hash AS [QueryHash]
    ,qs.query_plan_hash AS [QueryPlanHash]
    ,qs.sql_handle AS [SQLHandle]
    ,qs.plan_handle AS [PlanHandle]
    ,qs.plan_generation_num AS [PlanGenerationNumber]
    ,cp.usecounts AS [UseCounts]
    ,cp.refcounts AS [RefCounts]
    -- find the offset of the actual statement being executed
    ,SUBSTRING(st.text, 
            CASE
                WHEN qs.statement_start_offset = 0 OR qs.statement_start_offset IS NULL THEN 1  
                ELSE qs.statement_start_offset/2 + 1
            END, 
            CASE 
                WHEN qs.statement_end_offset = 0 OR qs.statement_end_offset = -1 OR qs.statement_end_offset IS NULL THEN LEN(st.text)  
                ELSE qs.statement_end_offset/2 
            END - 
            CASE
                WHEN qs.statement_start_offset = 0 OR qs.statement_start_offset IS NULL THEN 1  
                ELSE qs.statement_start_offset/2
            END + 1 
        ) AS [Statement]  
    ,qs.last_execution_time AS [LastExecutionTime]
    ,qs.creation_time AS [CreationTime]
    ,qs.execution_count AS [ExecutionCount]
    ,qs.total_logical_reads AS [TotalLogicalReads]
    ,qs.last_logical_reads AS [LastLogicalReads]
    ,qs.min_logical_reads AS [MinLogicalReads]
    ,qs.max_logical_reads AS [MaxLogicalReads]
    ,qs.total_logical_writes AS [TotalLogicalWrites]
    ,qs.last_logical_writes AS [LastLogicalWrites]
    ,qs.min_logical_writes AS [MinLogicalWrites]
    ,qs.max_logical_writes AS [MaxLogicalWrites]
    ,qs.total_physical_reads AS [TotalPhysicalReads]
    ,qs.last_physical_reads AS [LastPhysicalReads]
    ,qs.min_physical_reads AS [MinPhysicalReads]
    ,qs.max_physical_reads AS [MaxPhysicalReads]
    ,qs.total_worker_time AS [TotalWorkerTime]
    ,qs.last_worker_time AS [LastWorkerTime]
    ,qs.min_worker_time AS [MinWorkerTime]
    ,qs.max_worker_time AS [MaxWorkerTime]
    ,qs.total_clr_time AS [TotalCLRTime]
    ,qs.last_clr_time AS [LastCLRTime]
    ,qs.min_clr_time AS [MinCLRTime]
    ,qs.max_clr_time AS [MaxCLRTime]
    ,qs.total_elapsed_time AS [TotalElapsedTime]
    ,qs.last_elapsed_time AS [LastElapsedTime]
    ,qs.min_elapsed_time AS [MinElapsedTime]
    ,qs.max_elapsed_time AS [MaxElapsedTime]
    ,qs.total_rows AS [TotalRows]
    ,qs.last_rows AS [LastRows]
    ,qs.min_rows AS [MinRows]
    ,qs.max_rows AS [MaxRows]
FROM sys.dm_exec_query_stats qs   
INNER JOIN sys.dm_exec_cached_plans cp 
ON qs.plan_handle = cp.plan_handle 
CROSS APPLY sys.dm_exec_sql_text(qs.plan_handle) st;

Here is the execution plan produced:

enter image description here

NOTE that I have also removed the clustered index and the also run the query just as a select discarding the results of the query, and it still takes more than 3 or more minutes. In fact just selecting the data from sys.dm_exec_query_stats seems to take over 3 minutes even without joining to the other tables now and even without the order by clause.

Best Answer

Nick Craver ran into performance issues with this too in Opserver. Here's the query he ended up with after working directly with MS folks on the query plan - the joins look kinda contorted, but as I recall, that was the only way he could get consistently good performance and avoid the timeout issues:

SELECT AvgCPU, AvgDuration, AvgReads, AvgCPUPerMinute,
       TotalCPU, TotalDuration, TotalReads,
       PercentCPU, PercentDuration, PercentReads, PercentExecutions,
       ExecutionCount,
       ExecutionsPerMinute,
       PlanCreationTime, LastExecutionTime,
       SUBSTRING(st.text,
                 (StatementStartOffset / 2) + 1,
                 ((CASE StatementEndOffset
                   WHEN -1 THEN DATALENGTH(st.text)
                   ELSE StatementEndOffset
                   END - StatementStartOffset) / 2) + 1) AS QueryText,
        st.Text FullText,
        query_plan AS QueryPlan,
        PlanHandle,
        StatementStartOffset,
        StatementEndOffset,
        MinReturnedRows,
        MaxReturnedRows,
        AvgReturnedRows,
        TotalReturnedRows,
        LastReturnedRows,
        DB_NAME(DatabaseId) AS CompiledOnDatabase
FROM (SELECT TOP (@MaxResultCount) 
             total_worker_time / execution_count AS AvgCPU,
             total_elapsed_time / execution_count AS AvgDuration,
             total_logical_reads / execution_count AS AvgReads,
             Cast(total_worker_time / age_minutes As BigInt) AS AvgCPUPerMinute,
             execution_count / age_minutes AS ExecutionsPerMinute,
             Cast(total_worker_time / age_minutes_lifetime As BigInt) AS AvgCPUPerMinuteLifetime,
             execution_count / age_minutes_lifetime AS ExecutionsPerMinuteLifetime,
             total_worker_time AS TotalCPU,
             total_elapsed_time AS TotalDuration,
             total_logical_reads AS TotalReads,
             execution_count ExecutionCount,
             CAST(ROUND(100.00 * total_worker_time / t.TotalWorker, 2) AS MONEY) AS PercentCPU,
             CAST(ROUND(100.00 * total_elapsed_time / t.TotalElapsed, 2) AS MONEY) AS PercentDuration,
             CAST(ROUND(100.00 * total_logical_reads / t.TotalReads, 2) AS MONEY) AS PercentReads,
             CAST(ROUND(100.00 * execution_count / t.TotalExecs, 2) AS MONEY) AS PercentExecutions,
             qs.creation_time AS PlanCreationTime,
             qs.last_execution_time AS LastExecutionTime,
             qs.plan_handle AS PlanHandle,
             qs.statement_start_offset AS StatementStartOffset,
             qs.statement_end_offset AS StatementEndOffset,
             qs.min_rows AS MinReturnedRows,
             qs.max_rows AS MaxReturnedRows,
             CAST(qs.total_rows as MONEY) / execution_count AS AvgReturnedRows,
             qs.total_rows AS TotalReturnedRows,
             qs.last_rows AS LastReturnedRows,
             qs.sql_handle AS SqlHandle,
             Cast(pa.value as Int) DatabaseId
        FROM (SELECT *, 
                     CAST((CASE WHEN DATEDIFF(second, creation_time, GETDATE()) > 0 And execution_count > 1
                                THEN DATEDIFF(second, creation_time, GETDATE()) / 60.0
                                ELSE Null END) as MONEY) as age_minutes, 
                     CAST((CASE WHEN DATEDIFF(second, creation_time, last_execution_time) > 0 And execution_count > 1
                                THEN DATEDIFF(second, creation_time, last_execution_time) / 60.0
                                ELSE Null END) as MONEY) as age_minutes_lifetime
                FROM sys.dm_exec_query_stats) AS qs
             CROSS JOIN(SELECT SUM(execution_count) TotalExecs,
                               SUM(total_elapsed_time) TotalElapsed,
                               SUM(total_worker_time) TotalWorker,
                               SUM(total_logical_reads) TotalReads
                          FROM sys.dm_exec_query_stats) AS t
             CROSS APPLY sys.dm_exec_plan_attributes(qs.plan_handle) AS pa
     WHERE pa.attribute = 'dbid'
       {0}) sq
    CROSS APPLY sys.dm_exec_sql_text(SqlHandle) AS st
    CROSS APPLY sys.dm_exec_query_plan(PlanHandle) AS qp