-
Notifications
You must be signed in to change notification settings - Fork 100
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support micro seconds precisison during copy unload #492
base: master
Are you sure you want to change the base?
Changes from 3 commits
91398eb
52adb3e
285795a
0dedb0b
371a675
6151285
b373cd1
afed2e8
ded0e0d
f6b8e6c
86a0fa9
4698777
21f4a50
0be43cc
5e7bddd
4de6d57
aaff19f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,15 +41,18 @@ private[snowflake] object Conversions { | |
// Note - we use a pattern with timezone in the beginning, to make sure | ||
// parsing with PATTERN_NTZ fails for PATTERN_TZLTZ strings. | ||
// Note - for JDK 1.6, we use Z ipo XX for SimpleDateFormat | ||
// Because simpleDateFormat only support milliseconds, | ||
// we need to refactor this and handle nano seconds field separately | ||
private val PATTERN_TZLTZ = | ||
if (System.getProperty("java.version").startsWith("1.6.")) { | ||
"Z yyyy-MM-dd HH:mm:ss.SSS" | ||
"Z yyyy-MM-dd HH:mm:ss." | ||
} else { | ||
"XX yyyy-MM-dd HH:mm:ss.SSS" | ||
"XX yyyy-MM-dd HH:mm:ss." | ||
} | ||
|
||
// For NTZ, Snowflake serializes w/o timezone | ||
private val PATTERN_NTZ = "yyyy-MM-dd HH:mm:ss.SSS" | ||
// and handle nano seconds field separately during parsing | ||
private val PATTERN_NTZ = "yyyy-MM-dd HH:mm:ss." | ||
|
||
// For DATE, simple ISO format | ||
private val PATTERN_DATE = "yyyy-MM-dd" | ||
|
@@ -193,8 +196,25 @@ private[snowflake] object Conversions { | |
* Parse a string exported from a Snowflake TIMESTAMP column | ||
*/ | ||
private def parseTimestamp(s: String, isInternalRow: Boolean): Any = { | ||
// Need to handle the nano seconds filed separately | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please add an internal parameter to enable/disable the change? It's enabled by default. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm.. do you mean to have a parameter such as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree users need to use this fix. This is why we can set the internal parameter as true by default. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the suggestion. Will push a new commit to add this parameter. |
||
// valueOf only works with yyyy-[m]m-[d]d hh:mm:ss[.f...] | ||
// so we need to do a little parsing | ||
val timestampRegex = """\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3,9}""".r | ||
|
||
val parsedTS = timestampRegex.findFirstMatchIn(s) match { | ||
case Some(ts) => ts.toString() | ||
case None => throw new IllegalArgumentException(s"Malformed timestamp $s") | ||
} | ||
|
||
val ts = java.sql.Timestamp.valueOf(parsedTS) | ||
val nanoFraction = ts.getNanos | ||
|
||
val res = new Timestamp(snowflakeTimestampFormat.parse(s).getTime) | ||
if (isInternalRow) DateTimeUtils.fromJavaTimestamp(res) | ||
|
||
res.setNanos(nanoFraction) | ||
// Since fromJavaTimestamp and spark only support microsecond | ||
// level precision so have to divide the nano field by 1000 | ||
if (isInternalRow) (DateTimeUtils.fromJavaTimestamp(res) + nanoFraction/1000) | ||
else res | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -193,4 +193,88 @@ class ConversionsSuite extends FunSuite { | |
|
||
assert(expect == result.toString()) | ||
} | ||
|
||
test("Data with micro-seconds and nano-seconds precision should be correctly converted"){ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a unit test. It proves that it can parse timestamp with micro/nano-seconds. |
||
val convertRow = Conversions.createRowConverter[Row](TestUtils.testSchema) | ||
val doubleMin = Double.MinValue.toString | ||
val longMax = Long.MaxValue.toString | ||
// scalastyle:off | ||
val unicodeString = "Unicode是樂趣" | ||
// scalastyle:on | ||
|
||
val timestampString = "2014-03-01 00:00:01.123456" | ||
|
||
val expectedTimestampMicro: Timestamp = java.sql.Timestamp.valueOf(timestampString) | ||
|
||
val dateString = "2015-07-01" | ||
val expectedDate = TestUtils.toMillis(2015, 6, 1, 0, 0, 0) | ||
|
||
|
||
|
||
val timestampString2 = "2014-03-01 00:00:01.123456789" | ||
|
||
val expectedTimestampMicro2: Timestamp = java.sql.Timestamp.valueOf(timestampString2) | ||
|
||
val dateString2 = "2015-07-01" | ||
val expectedDate2 = TestUtils.toMillis(2015, 6, 1, 0, 0, 0) | ||
|
||
val convertedRow = convertRow( | ||
Array( | ||
"1", | ||
dateString, | ||
"123.45", | ||
doubleMin, | ||
"1.0", | ||
"42", | ||
longMax, | ||
"23", | ||
unicodeString, | ||
timestampString | ||
) | ||
) | ||
|
||
val expectedRow = Row( | ||
1.asInstanceOf[Byte], | ||
new Date(expectedDate), | ||
new java.math.BigDecimal("123.45"), | ||
Double.MinValue, | ||
1.0f, | ||
42, | ||
Long.MaxValue, | ||
23.toShort, | ||
unicodeString, | ||
expectedTimestampMicro | ||
) | ||
|
||
val convertedRow2 = convertRow( | ||
Array( | ||
"1", | ||
dateString2, | ||
"123.45", | ||
doubleMin, | ||
"1.0", | ||
"42", | ||
longMax, | ||
"23", | ||
unicodeString, | ||
timestampString2 | ||
) | ||
) | ||
|
||
val expectedRow2 = Row( | ||
1.asInstanceOf[Byte], | ||
new Date(expectedDate2), | ||
new java.math.BigDecimal("123.45"), | ||
Double.MinValue, | ||
1.0f, | ||
42, | ||
Long.MaxValue, | ||
23.toShort, | ||
unicodeString, | ||
expectedTimestampMicro2 | ||
) | ||
|
||
assert(convertedRow == expectedRow) | ||
assert(convertedRow2 == expectedRow2) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test case failed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Mingli-Rui Yea sorry, haven't finished the change yet, should've mentioned this. Will push another commit this week.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It will be great to add new test cases instead of changing existing one, e.g.
test("testTimestamp copy unload")
.