Skip to content

Commit

Permalink
Merge pull request #347 from TomWhite-MedStar/Feature_345_datetime_ca…
Browse files Browse the repository at this point in the history
…lculation_in_spark

Added datediff() and dateadd() for hours/minutes/seconds for Spark, Oracle, PostGres, RedShift
  • Loading branch information
schuemie authored Mar 19, 2024
2 parents 1fe7c22 + 3bfb529 commit 2cf1d91
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 12 deletions.
39 changes: 27 additions & 12 deletions inst/csv/replacementPatterns.csv
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ oracle,"DATEADD(m,@months,@date)","ADD_MONTHS(@date, @months)"
oracle,"DATEADD(year,@years,@date)","ADD_MONTHS(@date, 12 * @years)"
oracle,"DATEADD(yyyy,@years,@date)","ADD_MONTHS(@date, 12 * @years)"
oracle,"DATEADD(yy,@years,@date)","ADD_MONTHS(@date, 12 * @years)"
oracle,"DATEDIFF(second,@start, @end)","EXTRACT(SECOND FROM (@end - @start))"
oracle,"DATEDIFF(minute,@start, @end)","EXTRACT(MINUTE FROM (@end - @start))"
oracle,"DATEDIFF(hour,@start, @end)","EXTRACT(HOUR FROM (@end - @start))"
oracle,"DATEDIFF(day,@start, @end)",CEIL(CAST(@end AS DATE) - CAST(@start AS DATE))
oracle,"DATEDIFF(dd,@start, @end)",CEIL(CAST(@end AS DATE) - CAST(@start AS DATE))
oracle,"DATEDIFF(d,@start, @end)",CEIL(CAST(@end AS DATE) - CAST(@start AS DATE))
Expand Down Expand Up @@ -171,6 +174,9 @@ postgresql,"DATEADD(month,@months,@date)",(@date + @months*INTERVAL'1 month')
postgresql,"DATEADD(yy,@years,@date)",(@date + @years*INTERVAL'1 year')
postgresql,"DATEADD(yyyy,@years,@date)",(@date + @years*INTERVAL'1 year')
postgresql,"DATEADD(year,@years,@date)",(@date + @years*INTERVAL'1 year')
postgresql,"DATEDIFF(second,@start, @end)",EXTRACT(EPOCH FROM (@end - @start))
postgresql,"DATEDIFF(minute,@start, @end)",(EXTRACT(EPOCH FROM (@end - @start)) / 60)
postgresql,"DATEDIFF(hour,@start, @end)",(EXTRACT(EPOCH FROM (@end - @start)) / 3600)
postgresql,"DATEDIFF(d,@start, @end)",(CAST(@end AS DATE) - CAST(@start AS DATE))
postgresql,"DATEDIFF(dd,@start, @end)",(CAST(@end AS DATE) - CAST(@start AS DATE))
postgresql,"DATEDIFF(day,@start, @end)",(CAST(@end AS DATE) - CAST(@start AS DATE))
Expand Down Expand Up @@ -300,6 +306,7 @@ redshift,"DATEDIFF(n,@start,@end)","DATEDIFF(minute,@start,@end)"
redshift,"DATEDIFF(ss,@start,@end)","DATEDIFF(second,@start,@end)"
redshift,"DATEDIFF(mcs,@start,@end)","DATEDIFF(microsecond,@start,@end)"
redshift,"DATEDIFF_BIG(dd,@start,@end)","DATEDIFF(day,@start,@end)"
redshift,"DATEDIFF_BIG(day,@start,@end)","DATEDIFF(day,@start,@end)"
redshift,"DATEDIFF_BIG(m,@start,@end)","DATEDIFF(month,@start,@end)"
redshift,"DATEDIFF_BIG(mm,@start,@end)","DATEDIFF(month,@start,@end)"
redshift,"DATEDIFF_BIG(yyyy,@start,@end)","DATEDIFF(year,@start,@end)"
Expand All @@ -309,9 +316,12 @@ redshift,"DATEDIFF_BIG(q,@start,@end)","DATEDIFF(quarter,@start,@end)"
redshift,"DATEDIFF_BIG(wk,@start,@end)","DATEDIFF(week,@start,@end)"
redshift,"DATEDIFF_BIG(ww,@start,@end)","DATEDIFF(week,@start,@end)"
redshift,"DATEDIFF_BIG(hh,@start,@end)","DATEDIFF(hour,@start,@end)"
redshift,"DATEDIFF_BIG(hour,@start,@end)","DATEDIFF(hour,@start,@end)"
redshift,"DATEDIFF_BIG(mi,@start,@end)","DATEDIFF(minute,@start,@end)"
redshift,"DATEDIFF_BIG(minute,@start,@end)","DATEDIFF(minute,@start,@end)"
redshift,"DATEDIFF_BIG(n,@start,@end)","DATEDIFF(minute,@start,@end)"
redshift,"DATEDIFF_BIG(ss,@start,@end)","DATEDIFF(second,@start,@end)"
redshift,"DATEDIFF_BIG(second,@start,@end)","DATEDIFF(second,@start,@end)"
redshift,"DATEDIFF_BIG(mcs,@start,@end)","DATEDIFF(microsecond,@start,@end)"
redshift,"DATEPART(dd,@date)","DATEPART(day,@date)"
redshift,"DATEPART(m,@date)","DATEPART(month,@date)"
Expand Down Expand Up @@ -1070,23 +1080,28 @@ spark,"HASHBYTES('MD5',@a)","MD5(@a)"
spark,"CONVERT(VARBINARY, CONCAT('0x', @a), 1)","CAST(CONCAT('x', @a) AS BIT(32))"
spark,"CONVERT(DATE, @a)","TO_DATE(@a, 'yyyy-MM-dd')"
spark,"DATEPART(@part, @date)","DATE_PART('@part', @date)"
spark,"DATEADD(second,@seconds,@datetime)",(@datetime + INTERVAL @seconds second)
spark,"DATEADD(minute,@minutes,@datetime)",(@datetime + INTERVAL @minutes minute)
spark,"DATEADD(hour,@hours,@datetime)",(@datetime + INTERVAL @hours hour)
spark,"DATEADD(d,@days,@date)","date_add(@date, @days)"
spark,"DATEADD(dd,@days,@date)","date_add(@date, @days)"
spark,"DATEADD(day,@days,@date)","date_add(@date, @days)"
spark,"DATEADD(m,@months,@date)",(@date + INTERVAL @months month)
spark,"DATEADD(mm,@months,@date)",(@date + INTERVAL @months month)
spark,"DATEADD(month,@months,@date)",(@date + INTERVAL @months month)
spark,"DATEADD(yy,@years,@date)",(@date + INTERVAL @years year)
spark,"DATEADD(yyyy,@years,@date)",(@date + INTERVAL @years year)
spark,"DATEADD(year,@years,@date)",(@date + INTERVAL @years year)
spark,"DATEADD(second,@seconds,@datetime)",DATE_ADD(second,@seconds,@datetime)
spark,"DATEADD(minute,@minutes,@datetime)",DATE_ADD(minute,@minutes,@datetime)
spark,"DATEADD(hour,@hours,@datetime)",DATE_ADD(hour,@hours,@datetime)
spark,"DATEADD(d,@days,@date)",DATE_ADD(day,@days,@date)
spark,"DATEADD(dd,@days,@date)",DATE_ADD(day,@days,@date)
spark,"DATEADD(day,@days,@date)",DATE_ADD(day,@days,@date)
spark,"DATEADD(m,@months,@date)",DATE_ADD(month,@months,@date)
spark,"DATEADD(mm,@months,@date)",DATE_ADD(month,@months,@date)
spark,"DATEADD(month,@months,@date)",DATE_ADD(month,@months,@date)
spark,"DATEADD(yy,@years,@date)",DATE_ADD(year,@years,@date)
spark,"DATEADD(yyyy,@years,@date)",DATE_ADD(year,@years,@date)
spark,"DATEADD(year,@years,@date)",DATE_ADD(year,@years,@date)
spark,"date_add(@date, @(-?[0-9]+)a.0)","date_add(@date, @a)"
spark,INTERVAL @(-?[0-9]+)a.0,INTERVAL @a
spark,"DATEDIFF(second,@start,@end)","datediff(second,@end,@start)"
spark,"DATEDIFF(minute,@start,@end)","datediff(minute,@end,@start)"
spark,"DATEDIFF(hour,@start,@end)","datediff(hour,@end,@start)"
spark,"DATEDIFF(d,@start, @end)","datediff(@end,@start)"
spark,"DATEDIFF(dd,@start, @end)","datediff(@end,@start)"
spark,"DATEDIFF(day,@start, @end)","datediff(@end,@start)"
spark,"DATEDIFF(month,@start,@end)","datediff(month,@end,@start)"
spark,"DATEDIFF(year,@start,@end)","datediff(year,@end,@start)"
spark,"CONVERT(VARCHAR,@date,112)","@date"
spark,GETDATE(),CURRENT_DATE
spark,CAST(@a AS varchar(@b)) +,"SUBSTRING(CAST(@a AS string), 0, @b) ||"
Expand Down
24 changes: 24 additions & 0 deletions tests/testthat/test-translate-oracle.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,30 @@ test_that("translate sql server -> Oracle DATEDIFF", {
sql,
"SELECT CEIL(CAST(drug_era_end_date AS DATE) - CAST(drug_era_start_date AS DATE)) FROM drug_era;"
)

sql <- translate("SELECT DATEDIFF(second,drug_era_start_date,drug_era_end_date) FROM drug_era;",
targetDialect = "oracle"
)
expect_equal_ignore_spaces(
sql,
"SELECT EXTRACT(SECOND FROM (drug_era_end_date - drug_era_start_date)) FROM drug_era;"
)

sql <- translate("SELECT DATEDIFF(minute,drug_era_start_date,drug_era_end_date) FROM drug_era;",
targetDialect = "oracle"
)
expect_equal_ignore_spaces(
sql,
"SELECT EXTRACT(MINUTE FROM (drug_era_end_date - drug_era_start_date)) FROM drug_era;"
)

sql <- translate("SELECT DATEDIFF(hour,drug_era_start_date,drug_era_end_date) FROM drug_era;",
targetDialect = "oracle"
)
expect_equal_ignore_spaces(
sql,
"SELECT EXTRACT(HOUR FROM (drug_era_end_date - drug_era_start_date)) FROM drug_era;"
)
})

test_that("translate sql server -> Oracle DATEDIFF year", {
Expand Down
17 changes: 17 additions & 0 deletions tests/testthat/test-translate-postgresql.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,23 @@ test_that("translate sql server -> PostgreSQL date diff (month)", {
expect_equal_ignore_spaces(sql, "SELECT (extract(year from age(CAST(drug_era_end_date AS DATE), CAST(drug_era_start_date AS DATE)))*12 + extract(month from age(CAST(drug_era_end_date AS DATE), CAST(drug_era_start_date AS DATE)))) FROM drug_era;")
})

test_that("translate sql server -> PostgreSQL date diff (hour, minute, second)", {
sql <- translate("SELECT DATEDIFF(hour,drug_exposure_start_datetime,drug_exposure_end_datetime) FROM drug_exposure;",
targetDialect = "postgresql"
)
expect_equal_ignore_spaces(sql, "SELECT (EXTRACT(EPOCH FROM (drug_exposure_end_datetime - drug_exposure_start_datetime)) / 3600) FROM drug_exposure;")

sql <- translate("SELECT DATEDIFF(minute,drug_exposure_start_datetime,drug_exposure_end_datetime) FROM drug_exposure;",
targetDialect = "postgresql"
)
expect_equal_ignore_spaces(sql, "SELECT (EXTRACT(EPOCH FROM (drug_exposure_end_datetime - drug_exposure_start_datetime)) / 60) FROM drug_exposure;")

sql <- translate("SELECT DATEDIFF(second,drug_exposure_start_datetime,drug_exposure_end_datetime) FROM drug_exposure;",
targetDialect = "postgresql"
)
expect_equal_ignore_spaces(sql, "SELECT EXTRACT(EPOCH FROM (drug_exposure_end_datetime - drug_exposure_start_datetime)) FROM drug_exposure;")
})

test_that("translate sql server -> Postgres WITH SELECT", {
sql <- translate("WITH cte1 AS (SELECT a FROM b) SELECT c FROM cte1;",
targetDialect = "postgresql"
Expand Down
68 changes: 68 additions & 0 deletions tests/testthat/test-translate-redshift.R
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,14 @@ test_that("translate sql server -> RedShift DATEADD hh", {
sql,
"SELECT DATEADD(hour, CAST(3 as int), drug_era_end_date) FROM drug_era;"
)

sql <- translate("SELECT DATEADD(hour, 3, drug_era_end_date) FROM drug_era;",
targetDialect = "redshift"
)
expect_equal_ignore_spaces(
sql,
"SELECT DATEADD(hour, CAST(3 as int), drug_era_end_date) FROM drug_era;"
)
})

test_that("translate sql server -> RedShift DATEADD mi", {
Expand All @@ -236,6 +244,14 @@ test_that("translate sql server -> RedShift DATEADD mi", {
sql,
"SELECT DATEADD(minute, CAST(3 as int), drug_era_end_date) FROM drug_era;"
)

sql <- translate("SELECT DATEADD(minute, 3, drug_era_end_date) FROM drug_era;",
targetDialect = "redshift"
)
expect_equal_ignore_spaces(
sql,
"SELECT DATEADD(minute, CAST(3 as int), drug_era_end_date) FROM drug_era;"
)
})

test_that("translate sql server -> RedShift DATEADD ss", {
Expand All @@ -246,6 +262,14 @@ test_that("translate sql server -> RedShift DATEADD ss", {
sql,
"SELECT DATEADD(second, CAST(3 as int), drug_era_end_date) FROM drug_era;"
)

sql <- translate("SELECT DATEADD(second, 3, drug_era_end_date) FROM drug_era;",
targetDialect = "redshift"
)
expect_equal_ignore_spaces(
sql,
"SELECT DATEADD(second, CAST(3 as int), drug_era_end_date) FROM drug_era;"
)
})

test_that("translate sql server -> RedShift DATEADD mcs", {
Expand Down Expand Up @@ -367,6 +391,14 @@ test_that("translate sql server -> RedShift DATEDIFF hh", {
sql,
"SELECT DATEDIFF(hour, drug_era_start_date, drug_era_end_date) FROM drug_era;"
)

sql <- translate("SELECT DATEDIFF(hour, drug_era_start_date, drug_era_end_date) FROM drug_era;",
targetDialect = "redshift"
)
expect_equal_ignore_spaces(
sql,
"SELECT DATEDIFF(hour, drug_era_start_date, drug_era_end_date) FROM drug_era;"
)
})

test_that("translate sql server -> RedShift DATEDIFF mi", {
Expand All @@ -377,6 +409,14 @@ test_that("translate sql server -> RedShift DATEDIFF mi", {
sql,
"SELECT DATEDIFF(minute, drug_era_start_date, drug_era_end_date) FROM drug_era;"
)

sql <- translate("SELECT DATEDIFF(minute, drug_era_start_date, drug_era_end_date) FROM drug_era;",
targetDialect = "redshift"
)
expect_equal_ignore_spaces(
sql,
"SELECT DATEDIFF(minute, drug_era_start_date, drug_era_end_date) FROM drug_era;"
)
})

test_that("translate sql server -> RedShift DATEDIFF n", {
Expand All @@ -397,6 +437,14 @@ test_that("translate sql server -> RedShift DATEDIFF ss", {
sql,
"SELECT DATEDIFF(second, drug_era_start_date, drug_era_end_date) FROM drug_era;"
)

sql <- translate("SELECT DATEDIFF(second, drug_era_start_date, drug_era_end_date) FROM drug_era;",
targetDialect = "redshift"
)
expect_equal_ignore_spaces(
sql,
"SELECT DATEDIFF(second, drug_era_start_date, drug_era_end_date) FROM drug_era;"
)
})

test_that("translate sql server -> RedShift DATEDIFF mcs", {
Expand Down Expand Up @@ -554,6 +602,11 @@ test_that("translate sql server -> RedShift DATEPART dd", {
targetDialect = "redshift"
)
expect_equal_ignore_spaces(sql, "SELECT DATEPART(day, drug_era_end_date) FROM drug_era;")

sql <- translate("SELECT DATEPART(day, drug_era_end_date) FROM drug_era;",
targetDialect = "redshift"
)
expect_equal_ignore_spaces(sql, "SELECT DATEPART(day, drug_era_end_date) FROM drug_era;")
})

test_that("translate sql server -> RedShift DATEPART m", {
Expand Down Expand Up @@ -617,13 +670,23 @@ test_that("translate sql server -> RedShift DATEPART hh", {
targetDialect = "redshift"
)
expect_equal_ignore_spaces(sql, "SELECT DATEPART(hour, drug_era_end_date) FROM drug_era;")

sql <- translate("SELECT DATEPART(hour, drug_era_end_date) FROM drug_era;",
targetDialect = "redshift"
)
expect_equal_ignore_spaces(sql, "SELECT DATEPART(hour, drug_era_end_date) FROM drug_era;")
})

test_that("translate sql server -> RedShift DATEPART mi", {
sql <- translate("SELECT DATEPART(mi, drug_era_end_date) FROM drug_era;",
targetDialect = "redshift"
)
expect_equal_ignore_spaces(sql, "SELECT DATEPART(minute, drug_era_end_date) FROM drug_era;")

sql <- translate("SELECT DATEPART(minute, drug_era_end_date) FROM drug_era;",
targetDialect = "redshift"
)
expect_equal_ignore_spaces(sql, "SELECT DATEPART(minute, drug_era_end_date) FROM drug_era;")
})

test_that("translate sql server -> RedShift DATEPART n", {
Expand All @@ -638,6 +701,11 @@ test_that("translate sql server -> RedShift DATEPART ss", {
targetDialect = "redshift"
)
expect_equal_ignore_spaces(sql, "SELECT DATEPART(second, drug_era_end_date) FROM drug_era;")

sql <- translate("SELECT DATEPART(second, drug_era_end_date) FROM drug_era;",
targetDialect = "redshift"
)
expect_equal_ignore_spaces(sql, "SELECT DATEPART(second, drug_era_end_date) FROM drug_era;")
})

test_that("translate sql server -> RedShift DATEPART mcs", {
Expand Down
43 changes: 43 additions & 0 deletions tests/testthat/test-translate-spark.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,23 @@ test_that("translate sql server -> spark convert date", {


test_that("translate sql server -> spark dateadd", {
# Need custom translation pattern for negative intervals in Spark
sql <- translate("SELECT dateadd(second, -1 * 2, '2019-01-01 00:00:00')",
targetDialect = "spark"
)
expect_equal_ignore_spaces(sql, "SELECT ('2019-01-01 00:00:00' - INTERVAL 2 second)")

sql <- translate("SELECT dateadd(minute, -1 * 3, '2019-01-01 00:00:00')",
targetDialect = "spark"
)
expect_equal_ignore_spaces(sql, "SELECT ('2019-01-01 00:00:00' - INTERVAL 3 minute)")

sql <- translate("SELECT dateadd(hour, -1 * 4, '2019-01-01 00:00:00')",
targetDialect = "spark"
)
expect_equal_ignore_spaces(sql, "SELECT ('2019-01-01 00:00:00' - INTERVAL 4 hour)")

# Positive intervals have typical translation patterns
sql <- translate("SELECT dateadd(second, 1, '2019-01-01 00:00:00')",
targetDialect = "spark"
)
Expand Down Expand Up @@ -132,6 +149,32 @@ test_that("translate sql server -> spark datediff", {
targetDialect = "spark"
)
expect_equal_ignore_spaces(sql, "SELECT datediff('2019-01-02', '2019-01-01')")

sql <- translate("SELECT datediff(second, '2019-01-01', '2019-01-02')",
targetDialect = "spark"
)
expect_equal_ignore_spaces(sql, "SELECT datediff(second, '2019-01-02', '2019-01-01')")

sql <- translate("SELECT datediff(minute, '2019-01-01', '2019-01-02')",
targetDialect = "spark"
)
expect_equal_ignore_spaces(sql, "SELECT datediff(minute, '2019-01-02', '2019-01-01')")

sql <- translate("SELECT datediff(hour, '2019-01-01', '2019-01-02')",
targetDialect = "spark"
)
expect_equal_ignore_spaces(sql, "SELECT datediff(hour, '2019-01-02', '2019-01-01')")

sql <- translate("SELECT datediff(month, '2019-01-01', '2019-01-02')",
targetDialect = "spark"
)
expect_equal_ignore_spaces(sql, "SELECT datediff(month, '2019-01-02', '2019-01-01')")

sql <- translate("SELECT datediff(year, '2019-01-01', '2019-01-02')",
targetDialect = "spark"
)
expect_equal_ignore_spaces(sql, "SELECT datediff(year, '2019-01-02', '2019-01-01')")

})

test_that("translate sql server -> spark convert date", {
Expand Down

0 comments on commit 2cf1d91

Please sign in to comment.