Sql-server – composite clustered index with partitioning

partitioningsql server

I have a table with hundreds of millions of rows. Data is regularly retrieved via various queries and inserted.
There is a clustered index on

  • LookupID (int)
  • TypeID (int)
  • Created (datetime)

(These 3 ensure uniqueness for each row.)

There is also a unique ID (int) which could be used as a clustered index.

I tried moving the clustered index to the unique ID and created a non clustered index on the existing composite; my thinking was it would be a more efficient index as the numbers are sequential. It didn't seem to make a huge difference to query performance, but there are so many use cases further testing would be required to definitively answer this.

My question is 2 fold – if I introduce partitioning based on Created (for the export of data for archiving), would it be better to have the composite as a non clustered index? I have just started reading about partitioning in detail and haven't quite got to terms with 'partition alignment'.
The second question is, am I wrong in my assumption that changing these indexes would be a worthwhile endeavour?

Apologies if this is too broad or should be in stackoverflow.

Best Answer

I do highly recommend the link above then. I actually gave a presentation on this to my SQL User Group a few months ago. Here is a possible table structure for you including a partition aligned non-clustered index, as well as the procedure I used to rotate the partitions around. It should help you get started.

Link to article: http://kejser.org/table-pattern-rotating-log-ring-buffer/

CREATE PARTITION FUNCTION [PF_100](int) AS RANGE RIGHT FOR VALUES (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100)
GO

CREATE PARTITION SCHEME [PS_100_dbo] AS PARTITION [PF_100] ALL TO (DBO)
GO

CREATE TABLE dbo.MyLog
    (
    UniqueID UNIQUEIDENTIFIER NOT NULL CONSTRAINT DF_MyLog_UniqueID DEFAULT (NEWSEQUENTIALID())
    , LookupID INT NOT NULL
    , TypeID INT NOT NULL
    , Created DATETIME2(7) NOT NULL
    , PartitionOffset AS (CONVERT(INT, DATEDIFF(DAY,CONVERT(DATETIME, 0),Created)%(100))) PERSISTED NOT NULL
    , CONSTRAINT PK_MyLog_UniqueID PRIMARY KEY CLUSTERED (UniqueID, PartitionOffset)
    ) ON PS_100_dbo (PartitionOffset)

CREATE NONCLUSTERED INDEX IDX_MyLog_LookupID_TypeID_Created
    ON dbo.MyLog (LookupID, TypeID, Created, PartitionOffset)
    ON PS_100_dbo (PartitionOffset)

CREATE TABLE dbo.MyLog_SwitchOffset
    (
    UniqueID UNIQUEIDENTIFIER NOT NULL CONSTRAINT DF_MyLog_SwitchOffset_UniqueID DEFAULT (NEWSEQUENTIALID())
    , LookupID INT NOT NULL
    , TypeID INT NOT NULL
    , Created DATETIME2(7) NOT NULL
    , PartitionOffset AS (CONVERT(INT, DATEDIFF(DAY,CONVERT(DATETIME, 0),Created)%(100))) PERSISTED NOT NULL
    , CONSTRAINT PK_MyLog_SwitchOffset_UniqueID PRIMARY KEY CLUSTERED (UniqueID, PartitionOffset)
    ) ON PS_100_dbo (PartitionOffset)

GO

CREATE OR ALTER PROCEDURE dbo.usp_SwichPartitions
    @SourceTableName NVARCHAR(128)
    , @TargetTableName NVARCHAR(100)
    , @PartitionsToKeep INT
    , @IsDebug BIT = 0
AS
BEGIN

    /** Procedure dbo.usp_SwichPartitions

        Based on http://kejser.org/table-pattern-rotating-log-ring-buffer/

        This procedure:
        * does a few sanity checks.
        * creates a small table with all possible dates, assuming that today is the most recent partition and that the offset modulo is as described.
        * Determines how many should be kept (most recent X partitions)
        * for any partitions that we want to discard, AND has data, switch to the @TargetTableName
        * truncate @TargetTableName

        INPUTS:
            @SourceTableName NVARCHAR(100)  The base table (include schema!) 
            @TargetTableName NVARCHAR(100)  The table to switch to (include schema!)
            @PartitionsToKeep INT = 5       The number of partitions to keep (keep most recent X days)
            @IsDebug BIT = 0                Debug mode?

        @SourceTableName and @TargetTableName MUST exist (they must also have the exact same definition [indexes optional]

        @PartitionsToKeep must be greater than 1.

        @IsDebug (default 0), if set to 1, shows you a "what if" scenario, scripting out all of the required commands.
            NO CHANGES WILL BE MADE

    */

    /** For Testing values...
        DECLARE @SourceTableName nvarchar(100) = 'dbo.LogTable'
        DECLARE @TargetTableName nvarchar(100) = 'dbo.LogTable_SwitchTarget'
        DECLARE @PartitionsToKeep int = 5
        DECLARE @IsDebug bit = 0
        */


    SET NOCOUNT ON

    /** INPUT VALIDATION **/
    DECLARE @ThrowMessage NVARCHAR(1000);
    IF OBJECT_ID(@SourceTableName) IS NULL
    BEGIN
        SET @ThrowMessage = 'Specified @SourceTableName: ' + COALESCE(@SourceTableName, 'NULL') + ' does not exist (include schema).';
        ;THROW 50000, @ThrowMessage, 1;
    END

    IF OBJECT_ID(@TargetTableName) IS NULL
    BEGIN
        SET @ThrowMessage = 'Specified @TargetTableName: ' + COALESCE(@TargetTableName, 'NULL') + ' does not exist (include schema).';
        ;THROW 50000, @ThrowMessage, 1;
    END

    IF COALESCE(@PartitionsToKeep, 0) <= 1
    BEGIN
        SET @ThrowMessage = 'You must keep at least one (1) partition, you specified @PartitionsToKeep: ' + COALESCE(CAST(@PartitionsToKeep AS VARCHAR(10)), 'NULL') + '.';
        ;THROW 50000, @ThrowMessage, 1;
    END

    /** VARIABLE DECLARATION **/
    DECLARE @SQLCommand NVARCHAR(4000);
    DECLARE @AllPartitionCount INT;
    DECLARE @PartitionToSwitch INT;
    DECLARE @PartitionDate DATE;
    DECLARE @PartitionRowCount BIGINT;
    DECLARE @TimeToUse_UTC DATETIME2(7);
    DECLARE @RotateLogID INT;

    SET @TimeToUse_UTC = SYSUTCDATETIME();
    SET @AllPartitionCount = (  SELECT COUNT(*) 
                                FROM sys.partitions AS P 
                                WHERE P.object_id = OBJECT_ID(@SourceTableName) 
                                    AND P.index_id IN (0, 1)
                                );

    DECLARE @PartitionsTable TABLE
        (
        PartitionTableID INT NOT NULL PRIMARY KEY
        , PartitionDate DATE NOT NULL UNIQUE
        , PartitionOffset INT NULL
        , PartitionNumber INT NULL
        , IsKeepPartition BIT NOT NULL DEFAULT (0)
        , PartitionRowCount BIGINT NOT NULL DEFAULT (0)
        );

    IF @AllPartitionCount IS NULL 
        OR @PartitionsToKeep > @AllPartitionCount
    BEGIN
        ;THROW 50000, 'Cannot keep more partitions than the current buffer size', 1;
    END

    /** Build up a table of dates (starting from today) and going back for as long as many days as there are partitions.
        - PartitionOffset should match the computed column in the log table (# of Days since epoch % Total Number of partitions)  [-1 because of 0]
        **/
    ;WITH CTE_Number AS
        (
        SELECT 0 AS N
        UNION ALL
        SELECT N + 1
        FROM CTE_Number
        WHERE N <= @AllPartitionCount
        )
    INSERT INTO @PartitionsTable
    (PartitionTableID, PartitionDate, IsKeepPartition)
    SELECT TOP (@AllPartitionCount)
        N
        , DATEADD(DAY, (N *-1), @TimeToUse_UTC)
        , CASE WHEN N < @PartitionsToKeep  THEN 1 ELSE 0 END
    FROM CTE_Number
    ORDER BY N
    OPTION (MAXRECURSION 1000);

    UPDATE @PartitionsTable
    SET PartitionOffset = CONVERT(INT, (DATEDIFF(DAY, CAST(0 AS DATETIME), PartitionDate) % @AllPartitionCount)) 

    UPDATE @PartitionsTable
    SET PartitionNumber = PartitionOffset + 1

    --Get Row Count...  
    UPDATE @PartitionsTable
    SET PartitionRowCount = P.row_count
    FROM @PartitionsTable AS T
        INNER JOIN sys.dm_db_partition_stats AS P ON P.object_id = OBJECT_ID(@SourceTableName) AND P.index_id IN (0, 1) AND (P.partition_number) = T.PartitionNumber;

    IF @IsDebug = 1
    BEGIN
        SELECT PartitionTableID
            , PartitionDate
            , PartitionOffset
            , PartitionNumber
            , IsKeepPartition 
            , PartitionRowCount
        FROM @PartitionsTable
        ORDER BY PartitionDate DESC;
    END

    IF @IsDebug = 1
    BEGIN
        PRINT '/** Variables ';
        PRINT ' @TimeToUse_UTC = ' + CONVERT(VARCHAR(20), @TimeToUse_UTC, 121);
        PRINT ' @PartitionsToKeep = ' + CAST(@PartitionsToKeep AS VARCHAR(10));
        PRINT ' @SourceTableName = ' + COALESCE(@SourceTableName, 'NULL');
        PRINT ' @TargetTableName = ' + COALESCE(@TargetTableName, 'NULL');
        PRINT ' @AllPartitionCount = ' + CAST(@AllPartitionCount AS VARCHAR(10));
        PRINT ' **/';
        PRINT ' ';
    END

    /** Loop through every partitions that needs to be deleted that also 
        has a PartitionRowCount.  Order by PartitionDate, removing the oldest first.
        */
    DECLARE curPartLoop CURSOR LOCAL STATIC FORWARD_ONLY READ_ONLY
    FOR SELECT PartitionNumber
            , PartitionDate
            , PartitionRowCount
        FROM @PartitionsTable
        WHERE IsKeepPartition = 0
            AND PartitionRowCount > 0
        ORDER BY PartitionDate;

    OPEN curPartLoop;

    FETCH NEXT FROM curPartLoop 
    INTO @PartitionToSwitch, @PartitionDate, @PartitionRowCount;

    IF @IsDebug = 1 
        AND NOT(@@FETCH_STATUS = 0)
    BEGIN
        PRINT '/** NO PARTITIONS DETECTED AS NEEDING TO BE REMOVED **/';
    END

    WHILE @@FETCH_STATUS = 0
    BEGIN

        SET @SQLCommand = CONCAT('ALTER TABLE ', @SourceTableName ,' SWITCH PARTITION ', CAST(@PartitionToSwitch AS NVARCHAR(10)), ' TO ', @TargetTableName, ' PARTITION ', CAST(@PartitionToSwitch AS NVARCHAR(10)));

        IF @IsDebug = 1
        BEGIN
            PRINT '/** @PartitionToSwitch = ' + CAST(@PartitionToSwitch AS VARCHAR(10));
            PRINT ' @PartitionDate = ' + CONVERT(VARCHAR(20), @PartitionDate, 121);
            PRINT ' @PartitionRowCount = ' + CAST(@PartitionRowCount AS VARCHAR(10));
            PRINT ' */';
            PRINT @SQLCommand;
            PRINT ' ';
        END

        IF @IsDebug = 0
        BEGIN

            EXEC sp_executesql @SQLCommand;

        END

        FETCH NEXT FROM curPartLoop 
        INTO @PartitionToSwitch, @PartitionDate, @PartitionRowCount;

    END --WHILE @@FETCH_STATUS = 0

    --Cleanup.
    CLOSE curPartLoop;
    DEALLOCATE curPartLoop;

    --Truncate the switch table.
    SET @SQLCommand = CONCAT('TRUNCATE TABLE ', @TargetTableName);

    IF @IsDebug = 1
    BEGIN   
        PRINT '/* TRUNCATE SWITCH TABLE */';
        PRINT @SQLCommand;
    END

    IF @IsDebug = 0
    BEGIN

        EXEC sp_executesql @SQLCommand;

    END

END