Sql-server – way to pass two SQL tables w/o JOIN for processing into R using MS SQL Server 2016

rsql serversql-server-2016

Is there a way to pass two SQL Server tables without a JOIN for processing into R using MS SQL Server 2016?

For Example, @input_data_1=TABLE, passes the 'TABLE' as a dataframe into R with default variable name InputDataSet.

Is there a way to somehow access other tables residing in SQL Server through R?

Best Answer

@input_data_1 can be a T-SQL statement, or a reference to a view masking a complex SQL statement, so things can really be as complicated as you make them. Here's some simple examples of how to combine tables of the same or different structures. You could conceivably do some filtering in the r script on the table source column (xsource):

DROP VIEW IF EXISTS dbo.vw_union1
DROP VIEW IF EXISTS dbo.vw_union2
DROP TABLE IF EXISTS dbo.table1
DROP TABLE IF EXISTS dbo.table2
DROP TABLE IF EXISTS dbo.table3
GO

CREATE TABLE dbo.table1 (
    col1 INT NOT NULL,
    col2 INT NOT NULL,

    CONSTRAINT PK_table1 PRIMARY KEY ( col1 )
)
GO

-- Same structure as table1
CREATE TABLE dbo.table2 (
    col1 INT NOT NULL,
    col2 INT NOT NULL,

    CONSTRAINT PK_table2 PRIMARY KEY ( col1 )
)
GO

-- Different structure to table1
CREATE TABLE dbo.table3 (
    col7 VARCHAR(5) NOT NULL,
    col8 DATE NOT NULL,

    CONSTRAINT PK_table3 PRIMARY KEY ( col7 )
)
GO

INSERT INTO dbo.table1 ( col1, col2 )
VALUES 
    ( 1, 2 ),
    ( 3, 4 )


INSERT INTO dbo.table2 ( col1, col2 )
VALUES 
    ( 1, 6 ),
    ( 3, 8 )

GO

INSERT INTO dbo.table3 ( col7, col8 )
VALUES 
    ( 'A', GETDATE() ),
    ( 'B', GETDATE() )

GO

CREATE VIEW dbo.vw_union1
AS
SELECT 'table1' AS xsource, col1, col2 
FROM dbo.table1
UNION ALL
SELECT 'table2' AS xsource, col1, col2 
FROM dbo.table2
GO


CREATE VIEW dbo.vw_union2
AS
SELECT 'table1' AS xsource, col1, col2 , NULL AS col7, NULL AS col8
FROM dbo.table1
UNION ALL
SELECT 'table3' AS xsource, NULL AS col1, NULL AS col2, col7, col8 
FROM dbo.table3
GO

-- Long SQL statement; same text as vw_union1
EXEC dbo.sp_execute_external_script
    @language = N'R',
    @input_data_1 = N'SELECT ''table1'' AS xsource, col1, col2 FROM dbo.table1 UNION ALL SELECT ''table2'' AS xsource, col1, col2 FROM dbo.table2',
    @script = N'OutputDataSet <- aggregate(InputDataSet$col2, by=list(InputDataSet$col1), FUN=median)'
GO

-- SELECT from view where structures are the same
EXEC dbo.sp_execute_external_script
    @language = N'R',
    @input_data_1 = N'SELECT xsource, col2 FROM dbo.vw_union1',
    @script = N'OutputDataSet <- aggregate(InputDataSet$col2, by=list(InputDataSet$xsource), FUN=median)'
GO


-- SELECT from view where structures are the different
EXEC dbo.sp_execute_external_script
    @language = N'R',
    @input_data_1 = N'SELECT xsource, col2 FROM dbo.vw_union2',
    @script = N'OutputDataSet <- aggregate(InputDataSet$col2, by=list(InputDataSet$xsource), FUN=median)'
GO

-- subset example
EXEC dbo.sp_execute_external_script
    @language = N'R',
    @input_data_1 = N'SELECT xsource, col2 FROM dbo.vw_union1',
    @script = N'InputDataSet <- subset( InputDataSet, xsource == "table1")
OutputDataSet <- aggregate(InputDataSet$col2, by=list(InputDataSet$xsource), FUN=median)'
GO

There is not an @input_data_2 or @input_data_n at this time.