SQL Server – Check and Add Partition Values in Partition Function

azure-sql-databasepartitioningsql server

I have an azure sql db with a partitioned table with a partition function on varchar datatype.

In an Etl process I will get a list of partiton values as a parameter that i will the need to:

  • check partition function if the partition values exist, if not iterate through list of value and add to partition function.

  • for the list of partition values that is supplied as a parameter, iterate through the $partition function to get the partitionids into a new variable as a comma seperated list

  • truncate the partitioned table with the command: truncate parttable with partitions (@newvariable)

What I got so far in code is:

-- Table, Partition Function, Partition Scheme

CREATE PARTITION FUNCTION PF_TEXT(varchar(20)) AS RANGE RIGHT FOR VALUES ('A', 'B', 'C', 'D', 'E')

CREATE PARTITION SCHEME PS_TEXT AS PARTITION PF_TEXT ALL TO ([PRIMARY])

CREATE TABLE [dbo].[fact_partitioned](
    [Col1] [int], NOT NULL
    [Col2] [int], NOT NULL
    [TEXT] [varchar(20)] NOT NULL
) ON PS_TEXT ([TEXT])



---------------------------

DECLARE @cmd nvarchar(max);
DECLARE @partitions nvarchar(max);
DECLARE @partitionids nvarchar(max);

SET @partitions = 'A','B','C'



-- Code to check Partition Function for @partitions values exist?
-- For @partitions values that don't exist, iterate @partitions values
and add to partition function.

--code to iterate @partitions values over $partition.PF_TEXT(@partition)
add result to @partitionids


SET @cmd = N'TRUNCATE TABLE dbo.pt WITH (PARTITIONS (' + @partitionids + N'));'; 

PRINT @cmd;

--EXEC sys.sp_executesql @cmd;

Best Answer

SQL Server table partitioning is natively range partitioning. Although you can implement list partitioning (i.e. partitioning columns exactly match boundaries), take care to ensure exact boundary matches to avoid excessive data movement and logging during SPLIT operations. Consider creating a CHECK constraint on the fact table matching the boundary values (e.g. TEXT IN('A','B','C', …).

Below is an example script based on the DDL and sample data in your question, which assumes (at least) a clustered index exists on the fact table. I added some additional boundaries to illustrate new boundaries between existing ones can be introduced during the ETL process that without requiring data movement, assuming partitioning column values match boundaries exactly. See inline comments for details of each step.

The move from staging to the fact table took a couple of seconds in my test. Be sure to update fact table stats afterward.

CREATE PARTITION FUNCTION PF_TEXT(varchar(20))
    AS RANGE RIGHT 
    FOR VALUES ('A', 'B', 'C', 'D', 'E', 'M', 'S');
CREATE PARTITION SCHEME PS_TEXT
    AS PARTITION PF_TEXT
    ALL TO ([PRIMARY]);

CREATE TABLE dbo.fact_partitioned(
    Col1 int NOT NULL,
    Col2 int NOT NULL,
    TEXT varchar(20) NOT NULL
        INDEX cdx CLUSTERED ON PS_TEXT (TEXT)
) ON PS_TEXT (TEXT);

--load some example data into fact table
WITH 
     t10 AS (SELECT n FROM (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) t(n))
    ,t1k AS (SELECT 0 AS n FROM t10 AS a CROSS JOIN t10 AS b CROSS JOIN t10 AS c)
    ,t1g AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS num FROM t1k AS a CROSS JOIN t1k AS b CROSS JOIN t1k AS c)
INSERT INTO dbo.fact_partitioned WITH(TABLOCKX) (Col1, Col2, TEXT)
SELECT 1,1,'A'
FROM t1g
WHERE num <= 1000000
UNION ALL
SELECT 1,1,'B'
FROM t1g
WHERE num <= 1000000
UNION ALL
SELECT 1,1,'C'
FROM t1g
WHERE num <= 1000000
UNION ALL
SELECT 1,1,'D'
FROM t1g
WHERE num <= 1000000
UNION ALL
SELECT 1,1,'E'
FROM t1g
WHERE num <= 1000000
UNION ALL
SELECT 1,1,'M'
FROM t1g
WHERE num <= 1000000
UNION ALL
SELECT 1,1,'S'
FROM t1g
WHERE num <= 1000000;
GO

--example ETL objects and data
DROP TABLE IF EXISTS dbo.fact_partitioned_staging;

--staging table must have same indexes as target (aligned)
CREATE TABLE dbo.fact_partitioned_staging(
    Col1 int NOT NULL,
    Col2 int NOT NULL,
    TEXT varchar(20) NOT NULL
        INDEX cdx CLUSTERED ON PS_TEXT (TEXT)
) ON PS_TEXT (TEXT);

WITH 
     t10 AS (SELECT n FROM (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) t(n))
    ,t1k AS (SELECT 0 AS n FROM t10 AS a CROSS JOIN t10 AS b CROSS JOIN t10 AS c)
    ,t1g AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS num FROM t1k AS a CROSS JOIN t1k AS b CROSS JOIN t1k AS c)
INSERT INTO dbo.fact_partitioned_staging WITH(TABLOCKX) (Col1, Col2, TEXT)
--replaces existing partition
SELECT 2,2,'C'
FROM t1g
WHERE num <= 1000000
UNION ALL
--new partion BETWEEN 'E' and 'M'
SELECT 2,2,'G'
FROM t1g
WHERE num <= 1000000
--new partion AFTER 'S'
UNION ALL
SELECT 2,2,'X'
FROM t1g
WHERE num <= 1000000;
GO

--dynamic list partitioning load
SET XACT_ABORT ON; --best practice with explict transactions
DECLARE @SQL nvarchar(MAX);
BEGIN TRY

    BEGIN TRAN;

    --get distinct incremental load TEXT values from staging
    DECLARE @incremental_load_boundaries TABLE(
          incremental_load_boundary varchar(20) NOT NULL PRIMARY KEY
        );
    INSERT INTO @incremental_load_boundaries(incremental_load_boundary)
        SELECT DISTINCT TEXT
        FROM dbo.fact_partitioned_staging;

    --recreate new partition function/scheme with existing partition boundaries plus new distinct TEXT values
    IF EXISTS(SELECT 1 FROM sys.partition_schemes WHERE name = N'PS_TEXT_STAGING')
        DROP PARTITION SCHEME PS_TEXT_STAGING;
    IF EXISTS(SELECT 1 FROM sys.partition_functions WHERE name = N'PF_TEXT_STAGING')
        DROP PARTITION FUNCTION PF_TEXT_STAGING;
    SELECT @SQL =
        N'CREATE PARTITION FUNCTION PF_TEXT_STAGING(varchar(20)) AS RANGE RIGHT FOR VALUES ('
        + STRING_AGG(QUOTENAME(boundary,''''),',')
        + N');'
    FROM (
        --existing target table boundaries
        SELECT CAST(value AS varchar(20)) AS boundary
        FROM sys.partition_range_values
        WHERE function_id = (SELECT function_id FROM sys.partition_functions WHERE name = N'PF_TEXT')
        UNION
        --staging table boundaries
        SELECT incremental_load_boundary
        FROM @incremental_load_boundaries
        ) AS boundaries;
    PRINT @SQL;
    EXECUTE sp_executesql @sql;
    CREATE PARTITION SCHEME PS_TEXT_STAGING AS PARTITION PF_TEXT_STAGING ALL TO ([PRIMARY]);

    --partition staging table using new partition scheme with end state boundaries
    CREATE CLUSTERED INDEX cdx ON dbo.fact_partitioned_staging(TEXT)
        WITH(DROP_EXISTING=ON)
        ON PS_TEXT_STAGING(TEXT);

    --SPLIT existing partition function to add new boundaries
    --no data movement needed when existing partition column values exactly match existing boundaries
    SELECT @SQL = STRING_AGG(
        'ALTER PARTITION SCHEME PS_TEXT NEXT USED [PRIMARY];ALTER PARTITION FUNCTION PF_TEXT() SPLIT RANGE('
        + QUOTENAME(new_boundaries.boundary, '''')
        + N')'
        , ';')
    FROM (
        --existing target table boundaries
        SELECT CAST(value AS varchar(20)) AS boundary
        FROM sys.partition_range_values
        WHERE function_id = (SELECT function_id FROM sys.partition_functions WHERE name = N'PF_TEXT_STAGING')
        EXCEPT
        --all boundaries
        SELECT CAST(value AS varchar(20)) AS boundary
        FROM sys.partition_range_values
        WHERE function_id = (SELECT function_id FROM sys.partition_functions WHERE name = N'PF_TEXT')
        ) AS new_boundaries
    PRINT @SQL;
    EXECUTE sp_executesql @sql;

    --truncate partitions to be reloaded
    SELECT @SQL =
        N'TRUNCATE TABLE dbo.fact_partitioned WITH(PARTITIONS(' 
        + STRING_AGG(CAST(partition_number AS varchar(10)),',')
        + N'));'
    FROM (
        SELECT DISTINCT $PARTITION.PF_TEXT(incremental_load_boundary) AS partition_number
        FROM @incremental_load_boundaries
        ) AS existing_boundaries;
    PRINT @SQL
    EXECUTE sp_executesql @sql;

    --switch in incremental load partitions
    SELECT @SQL = STRING_AGG(
        N'ALTER TABLE dbo.fact_partitioned_staging SWITCH PARTITION ' 
        + CAST(partition_number AS varchar(10))
        + N' TO dbo.fact_partitioned PARTITION '
        + CAST(partition_number AS varchar(10))
        , ';')
        + N';'
    FROM (
        SELECT DISTINCT $PARTITION.PF_TEXT(incremental_load_boundary) AS partition_number
        FROM @incremental_load_boundaries
        ) AS existing_boundaries;
    PRINT @SQL
    EXECUTE sp_executesql @sql;

    --assert staging table is now empty
    IF EXISTS(SELECT 1 FROM dbo.fact_partitioned_staging)
        RAISERROR('Assertion failed: staging table is not empty',16,1);

    COMMIT;

END TRY
BEGIN CATCH
    IF @@ROWCOUNT > 0 ROLLBACK;
    THROW;
END CATCH;
GO