Sql-server – Most efficient way to call same Table-Valued Function on multiple columns in a Query

functionsoptimizationperformancequery-performancesql serversql-server-2016

I am trying to tune a query where the same table-valued function (TVF) is called on 20 columns.

The first thing I did was convert the scalar function into an inline table-valued function.

Is using CROSS APPLY the best performing way to execute the same function on multiple columns in a query?

A simplistic example:

SELECT   Col1 = A.val
        ,Col2 = B.val
        ,Col3 = C.val
        --do the same for other 17 columns
        ,Col21
        ,Col22
        ,Col23
FROM t
CROSS APPLY
    dbo.function1(Col1) A
CROSS APPLY
    dbo.function1(Col2) B
CROSS APPLY
    dbo.function1(Col3) C
--do the same for other 17 columns

Are there better alternatives?

The same function can be called in multiple queries against X number of columns.

Here's the function:

CREATE FUNCTION dbo.ConvertAmountVerified_TVF
(
    @amt VARCHAR(60)
)
RETURNS TABLE
WITH SCHEMABINDING
AS
RETURN
(
    WITH cteLastChar
    AS(
        SELECT LastChar = RIGHT(RTRIM(@amt), 1)
    )
    SELECT
        AmountVerified  = CAST(RET.Y AS NUMERIC(18,2))
    FROM (SELECT 1 t) t
    OUTER APPLY (
        SELECT N =
                CAST(
                    CASE 
                        WHEN CHARINDEX(L.LastChar  COLLATE Latin1_General_CS_AS, '{ABCDEFGHI}', 0) >0
                            THEN CHARINDEX(L.LastChar  COLLATE Latin1_General_CS_AS, '{ABCDEFGHI}', 0)-1
                        WHEN CHARINDEX(L.LastChar  COLLATE Latin1_General_CS_AS, 'JKLMNOPQR', 0) >0
                            THEN CHARINDEX(L.LastChar  COLLATE Latin1_General_CS_AS, 'JKLMNOPQR', 0)-1
                        WHEN CHARINDEX(L.LastChar  COLLATE Latin1_General_CS_AS, 'pqrstuvwxy', 0) >0
                            THEN CHARINDEX(L.LastChar  COLLATE Latin1_General_CS_AS, 'pqrstuvwxy', 0)-1
                        ELSE 
                            NULL
                    END
                AS VARCHAR(1))
        FROM
            cteLastChar L
    ) NUM
    OUTER APPLY (
        SELECT N =
            CASE 
                WHEN CHARINDEX(L.LastChar  COLLATE Latin1_General_CS_AS, '{ABCDEFGHI}', 0) >0
                    THEN 0
                WHEN CHARINDEX(L.LastChar  COLLATE Latin1_General_CS_AS, 'JKLMNOPQRpqrstuvwxy', 0) >0
                    THEN 1
                ELSE 0
            END
        FROM cteLastChar L
    ) NEG
    OUTER APPLY(
        SELECT Amt= CASE
                        WHEN NUM.N IS NULL
                            THEN @amt 
                        ELSE
                            SUBSTRING(RTRIM(@amt),1, LEN(@amt) - 1) + Num.N
                    END
    ) TP
    OUTER APPLY(
        SELECT Y =  CASE
                        WHEN NEG.N = 0
                            THEN (CAST(TP.Amt AS NUMERIC) / 100)
                        WHEN NEG.N = 1
                            THEN (CAST (TP.Amt AS NUMERIC) /100) * -1
                    END
    ) RET
) ;

GO

Here's the scalar function version that I inherited, if anyone is interested:

CREATE   FUNCTION dbo.ConvertAmountVerified 
(
    @amt VARCHAR(50)
)
RETURNS NUMERIC (18,3)  
AS
BEGIN   
    -- Declare the return variable here
    DECLARE @Amount NUMERIC(18, 3);
    DECLARE @TempAmount VARCHAR (50);
    DECLARE @Num VARCHAR(1);
    DECLARE @LastChar VARCHAR(1);
    DECLARE @Negative BIT ;
    -- Get Last Character
    SELECT @LastChar = RIGHT(RTRIM(@amt), 1) ;
    SELECT @Num = CASE @LastChar  collate latin1_general_cs_as
                        WHEN '{'  THEN '0'                                  
                        WHEN 'A' THEN '1'                       
                        WHEN 'B' THEN '2'                       
                        WHEN 'C' THEN '3'                       
                        WHEN 'D' THEN '4'                       
                        WHEN 'E' THEN '5'                       
                        WHEN 'F' THEN '6'                       
                        WHEN 'G' THEN '7'                       
                        WHEN 'H' THEN '8'                       
                        WHEN 'I' THEN '9'                       
                        WHEN '}' THEN '0'   
                        WHEN 'J' THEN '1'
                        WHEN 'K' THEN '2'                       
                        WHEN 'L' THEN '3'                       
                        WHEN 'M' THEN '4'                       
                        WHEN 'N' THEN '5'                       
                        WHEN 'O' THEN '6'                       
                        WHEN 'P' THEN '7'                       
                        WHEN 'Q' THEN '8'                       
                        WHEN 'R' THEN '9'

                        ---ASCII
                        WHEN 'p' Then '0'
                        WHEN 'q' Then '1'
                        WHEN 'r' Then '2'
                        WHEN 's' Then '3'
                        WHEN 't' Then '4'
                        WHEN 'u' Then '5'
                        WHEN 'v' Then '6'
                        WHEN 'w' Then '7'
                        WHEN 'x' Then '8'
                        WHEN 'y' Then '9'

                        ELSE ''

                END 
    SELECT @Negative = CASE @LastChar collate latin1_general_cs_as
                        WHEN '{' THEN 0         

                        WHEN 'A' THEN 0                 
                        WHEN 'B' THEN 0                     
                        WHEN 'C' THEN 0                     
                        WHEN 'D' THEN 0                     
                        WHEN 'E' THEN 0                     
                        WHEN 'F' THEN 0                     
                        WHEN 'G' THEN 0                     
                        WHEN 'H' THEN 0                     
                        WHEN 'I' THEN 0                     
                        WHEN '}' THEN 1 

                        WHEN 'J' THEN 1                     
                        WHEN 'K' THEN 1                     
                        WHEN 'L' THEN 1                     
                        WHEN 'M' THEN 1                 
                        WHEN 'N' THEN 1                     
                        WHEN 'O' THEN 1                     
                        WHEN 'P' THEN 1                     
                        WHEN 'Q' THEN 1                     
                        WHEN 'R' THEN 1

                        ---ASCII
                        WHEN 'p' Then '1'
                        WHEN 'q' Then '1'
                        WHEN 'r' Then '1'
                        WHEN 's' Then '1'
                        WHEN 't' Then '1'
                        WHEN 'u' Then '1'
                        WHEN 'v' Then '1'
                        WHEN 'w' Then '1'
                        WHEN 'x' Then '1'
                        WHEN 'y' Then '1'
                        ELSE 0
                END 
    -- Add the T-SQL statements to compute the return value here
    if (@Num ='')
    begin
    SELECT @TempAmount=@amt;
    end 
    else
    begin
    SELECT @TempAmount = SUBSTRING(RTRIM(@amt),1, LEN(@amt) - 1) + @Num;

    end
    SELECT @Amount = CASE @Negative
                     WHEN 0 THEN (CAST(@TempAmount AS NUMERIC) / 100)
                     WHEN 1 THEN (CAST (@TempAmount AS NUMERIC) /100) * -1
                     END ;
    -- Return the result of the function
    RETURN @Amount

END

Sample test data:

SELECT dbo.ConvertAmountVerified('00064170')    --  641.700
SELECT * FROM dbo.ConvertAmountVerified_TVF('00064170') --  641.700

SELECT dbo.ConvertAmountVerified('00057600A')   --  5760.010
SELECT * FROM dbo.ConvertAmountVerified_TVF('00057600A')    --  5760.010

SELECT dbo.ConvertAmountVerified('00059224y')   --  -5922.490
SELECT * FROM dbo.ConvertAmountVerified_TVF('00059224y')    --  -5922.490

Best Answer

FIRST: it should be mentioned that the absolutely fastest method of getting the desired results is to do the following:

  1. Migrate data into either new columns or even a new table:
    1. New column approach:
      1. Add new columns as {name}_new to the table with the DECIMAL(18, 3) datatype
      2. Do a one-time migration of the data from the old VARCHAR columns to the DECIMAL columns
      3. rename the old columns to {name}_old
      4. rename new columns to be just {name}
    2. New table approach:
      1. Create new table as {table_name}_new using DECIMAL(18, 3) datatype
      2. Do a one-time migration of the data from current table to new DECIMAL-based table.
      3. rename old table to _old
      4. remove _new from new table
  2. Update app, etc to never insert data encoded in this manner
  3. after one release cycle, if no problems, drop old columns or table
  4. drop TVFs and UDF
  5. Never speak of this again!

THAT BEING SAID: You can get rid of a lot of that code as it is largely unnecessary duplication. Also, there are at least two bugs that cause the output to sometimes be incorrect, or sometimes throw an error. And those bugs were copied into Joe's code as it produces the same results (including the error) as the O.P.'s code. For example:

  • These values produce a correct result:

    00062929x
    00021577E
    00000509H
    
  • These values produce an incorrect result:

    00002020Q
    00016723L
    00009431O
    00017221R
    
  • This value produces an error:

    00062145}
    anything ending with "}"
    

Comparing all 3 versions against 448,740 rows using SET STATISTICS TIME ON;, they all ran in just over 5000 ms of elapsed time. But for CPU time, the results were:

  • O.P.'s TVF: 7031 ms
  • Joe's TVF: 3734 ms
  • Solomon's TVF: 1407 ms

SETUP: DATA

The following creates a table and populates it. This should create the same data set across all systems running SQL Server 2017 since they will have the same rows in spt_values. This helps provide a basis of comparison across other people testing on their system since randomly generated data would factor into timing differences across systems, or even between tests on the same system if the sample data is regenerated. I started with the same 3 column table as Joe did, but used the sample values from the question as a template to come up with a variety of numeric values appended with each of the possible trailing character options (including no trailing character). This is also why I forced the Collation on the columns: I didn't want the fact that I am using a binary-Collation Instance to unfairly negate the effect of using the COLLATE keyword to force a different Collation in the TVF).

The only difference is in the ordering of the rows in the table.

USE [tempdb];
SET NOCOUNT ON;

CREATE TABLE dbo.TestVals
(
  [TestValsID] INT IDENTITY(1, 1) NOT NULL PRIMARY KEY,
  [Col1] VARCHAR(50) COLLATE Latin1_General_100_CI_AS NOT NULL,
  [Col2] VARCHAR(50) COLLATE Latin1_General_100_CI_AS NOT NULL,
  [Col3] VARCHAR(50) COLLATE Latin1_General_100_CI_AS NOT NULL
);

;WITH cte AS
(
  SELECT (val.[number] + tmp.[blah]) AS [num]
  FROM [master].[dbo].[spt_values] val
  CROSS JOIN (VALUES (1), (7845), (0), (237), (61063), (999)) tmp(blah)
  WHERE val.[number] BETWEEN 0 AND 1000000
)
INSERT INTO dbo.TestVals ([Col1], [Col2], [Col3])
  SELECT FORMATMESSAGE('%08d%s', cte.[num], tab.[col]) AS [Col1],
       FORMATMESSAGE('%08d%s', ((cte.[num] + 2) * 2), tab.[col]) AS [Col2],
       FORMATMESSAGE('%08d%s', ((cte.[num] + 1) * 3), tab.[col]) AS [Col3]
  FROM    cte
  CROSS JOIN (VALUES (''), ('{'), ('A'), ('B'), ('C'), ('D'), ('E'), ('F'),
              ('G'), ('H'), ('I'), ('}'), ('J'), ('K'), ('L'), ('M'), ('N'),
              ('O'), ('P'), ('Q'), ('R'), ('p'), ('q'), ('r'), ('s'), ('t'),
              ('u'), ('v'), ('w'), ('x'), ('y')) tab(col)
  ORDER BY NEWID();
-- 463698 rows

SETUP: TVF

GO
CREATE OR ALTER FUNCTION dbo.ConvertAmountVerified_Solomon
(
    @amt VARCHAR(50)
)
RETURNS TABLE
WITH SCHEMABINDING
AS
RETURN

    WITH ctePosition AS
    (
        SELECT CHARINDEX(RIGHT(RTRIM(@amt), 1) COLLATE Latin1_General_100_BIN2,
                             '{ABCDEFGHI}JKLMNOPQRpqrstuvwxy') AS [Value]
    ),
    cteAppend AS
    (
        SELECT pos.[Value] AS [Position],
               IIF(pos.[Value] > 0,
                      CHAR(48 + ((pos.[Value] - 1) % 10)),
                      '') AS [Value]
        FROM   ctePosition pos
    )
    SELECT (CONVERT(DECIMAL(18, 3),
                    IIF(app.[Position] > 0,
                           SUBSTRING(RTRIM(@amt), 1, LEN(@amt) - 1) + app.[Value],
                           @amt))
                        / 100. )
                    * IIF(app.[Position] > 10, -1., 1.) AS [AmountVerified]
    FROM   cteAppend app;
GO

Please note:

  1. I used a binary (i.e. _BIN2) Collation which is faster than a case-sensitive Collation as it does not need to account for any linguistic rules.
  2. The only thing that really matters is the location (i.e. the "index") of the right-most character within the list of alpha characters plus the two curly brackets. Everything done operationally is derived from that position more so than the value of the character itself.
  3. I used the input parameter and return value datatypes as indicated in the original UDF that was rewritten by the O.P. Unless there was good reason to go from VARCHAR(50) to VARCHAR(60), and from NUMERIC (18,3) to NUMERIC (18,2) (good reason would be "they were wrong"), then I would stick with the original signature / types.
  4. I added a period / decimal point to the end of the 3 numeric literals / constants: 100., -1., and 1.. This was not in my original version of this TVF (in the history of this answer) but I noticed some CONVERT_IMPLICIT calls in the XML execution plan (since 100 is an INT but the operation needs to be NUMERIC / DECIMAL) so I just took care of that ahead of time.
  5. I create a string character using the CHAR() function rather than passing a string version of a number (e.g. '2') into a CONVERT function (which was what I was originally doing, again in the history). This appears to be ever so slightly faster. Only a few milliseconds, but still.

TEST

Please note that I had to filter out rows ending with } as that caused the O.P.'s and Joe's TVFs to error. While my code handles the } correctly, I wanted to be consistent with what rows were being tested across the 3 versions. This is why the number of rows generated by the setup query is slightly higher than the number I noted above the test results for how many rows were being tested.

SET STATISTICS TIME ON;

DECLARE @Dummy DECIMAL(18, 3);
SELECT --@Dummy =  -- commented out = results to client; uncomment to not return results
cnvrtS.[AmountVerified]
FROM  dbo.TestVals vals
CROSS APPLY dbo.ConvertAmountVerified_Solomon(vals.[Col1]) cnvrtS
WHERE RIGHT(vals.[Col1], 1) <> '}'; -- filter out rows that cause error in O.P.'s code

SET STATISTICS TIME OFF;
GO

CPU time is only slightly lower when uncommenting the --@Dummy =, and the ranking among the 3 TVFs is the same. But interestingly enough, when uncommenting the variable, the rankings change a little:

  • Joe's TVF: 3295 ms
  • O.P.'s TVF: 2240 ms
  • Solomon's TVF: 1203 ms

Not sure why the O.P.'s code would perform so much better in this scenario (whereas my and Joe's code only improved marginally), but it does seem consistent across many tests. And no, I did not look at execution plan differences as I don't have time to investigate that.

EVEN FASTERER

I have completed testing of the alternate approach and it does provide a slight but definite improvement to what is shown above. The new approach uses SQLCLR and it appears to scale better. I found that when adding in the second column to the query, the T-SQL approach double in time. But, when adding in additional columns using a SQLCLR Scalar UDF, the time went up, but not by the same amount as the single column timing. Maybe there is some initial overhead in invoking the SQLCLR method (not associated with the overhead of the initial loading of the App Domain and of the Assembly into the App Domain) because the timings were (elapsed time, not CPU time):

  • 1 column: 1018 ms
  • 2 columns: 1750 - 1800 ms
  • 3 columns: 2500 - 2600 ms

So it's possible that the timing (of dumping to a variable, not returning the result set) has a 200 ms - 250 ms overhead and then 750 ms - 800 ms per instance time. CPU timings were: 950 ms, 1750 ms, and 2400 ms for 1, 2, and 3 instances of the UDF, respectively.

C# CODE

using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;

public class Transformations
{
    private const string _CHARLIST_ = "{ABCDEFGHI}JKLMNOPQRpqrstuvwxy";

    [SqlFunction(IsDeterministic = true, IsPrecise = true,
        DataAccess = DataAccessKind.None, SystemDataAccess = SystemDataAccessKind.None)]
    public static SqlDouble ConvertAmountVerified_SQLCLR(
        [SqlFacet(MaxSize = 50)] SqlString Amt)
    {
        string _Amount = Amt.Value.TrimEnd();

        int _LastCharIndex = (_Amount.Length - 1);
        int _Position = _CHARLIST_.IndexOf(_Amount[_LastCharIndex]);

        if (_Position >= 0)
        {
            char[] _TempAmount = _Amount.ToCharArray();
            _TempAmount[_LastCharIndex] = char.ConvertFromUtf32(48 + (_Position % 10))[0];
            _Amount = new string(_TempAmount);
        }

        decimal _Return = decimal.Parse(_Amount) / 100M;

        if (_Position > 9)
        {
            _Return *= -1M;
        }

        return new SqlDouble((double)_Return);
    }
}

I originally used SqlDecimal as the return type, but there is a performance penalty for using that as opposed to SqlDouble / FLOAT. Sometimes FLOAT has issues (due to it being an imprecise type), but I verified against the T-SQL TVF via the following query and no differences were detected:

SELECT cnvrtS.[AmountVerified],
       dbo.ConvertAmountVerified_SQLCLR(vals.[Col1])
FROM   dbo.TestVals vals
CROSS APPLY dbo.ConvertAmountVerified_Solomon(vals.[Col1]) cnvrtS
WHERE  cnvrtS.[AmountVerified] <> dbo.ConvertAmountVerified_SQLCLR(vals.[Col1]);

TEST

SET STATISTICS TIME ON;

DECLARE @Dummy DECIMAL(18, 3), @Dummy2 DECIMAL(18, 3), @Dummy3 DECIMAL(18, 3);
SELECT @Dummy = 
       dbo.ConvertAmountVerified_SQLCLR(vals.[Col1])
              , @Dummy2 =
       dbo.ConvertAmountVerified_SQLCLR(vals.[Col2])
              , @Dummy3 =
       dbo.ConvertAmountVerified_SQLCLR(vals.[Col3])
FROM  dbo.TestVals vals
WHERE RIGHT(vals.[Col1], 1) <> '}';

SET STATISTICS TIME OFF;