Skip to content

Commit

Permalink
Fix tests
Browse files Browse the repository at this point in the history
Signed-off-by: Nghia Truong <[email protected]>
  • Loading branch information
ttnghia committed Nov 7, 2024
1 parent e78fb32 commit 15a4113
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 10 deletions.
6 changes: 2 additions & 4 deletions integration_tests/src/main/python/json_matrix_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,20 +123,18 @@ def test_json_tuple_allow_comments_off(std_input_path):
@allow_non_gpu('FileSourceScanExec')
@pytest.mark.parametrize('read_func', [read_json_df, read_json_sql])
def test_scan_json_allow_single_quotes_off(std_input_path, read_func, spark_tmp_table_factory):
assert_gpu_fallback_collect(
assert_gpu_and_cpu_are_equal_collect(
read_func(std_input_path + '/' + WITH_SQ_FILE,
WITH_SQ_SCHEMA,
spark_tmp_table_factory,
{"allowSingleQuotes": "false"}),
'FileSourceScanExec',
conf=_enable_all_types_json_scan_conf)

@allow_non_gpu('ProjectExec', TEXT_INPUT_EXEC)
def test_from_json_allow_single_quotes_off(std_input_path):
schema = WITH_SQ_SCHEMA
assert_gpu_fallback_collect(
assert_gpu_and_cpu_are_equal_collect(
lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_SQ_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {'allowSingleQuotes': "false"})),
'JsonToStructs',
conf =_enable_json_to_structs_conf)

# On is the default so it really needs to work
Expand Down
14 changes: 8 additions & 6 deletions integration_tests/src/main/python/json_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,28 +699,30 @@ def test_from_json_map_with_invalid():

@allow_non_gpu(*non_utc_allow)
@pytest.mark.parametrize('allow_single_quotes', ['true', 'false'])
@pytest.mark.parametrize('allow_numeric_leading_zeros', ['true', 'false'])
@pytest.mark.parametrize('allow_non_numeric_numbers', ['true', 'false'])
@pytest.mark.parametrize('allow_unquoted_chars', ['true', 'false'])
def test_from_json_map_with_options(allow_single_quotes, allow_numeric_leading_zeros,
def test_from_json_map_with_options(allow_single_quotes,
allow_non_numeric_numbers, allow_unquoted_chars):
# Test the input with:
# - Double quotes
# - Single quotes
# - Numbers with leading zeros
# - Non-numeric numbers
# - Unquoted control characters in quoted strings
# TODO: add `\n` into the last pattern
json_string_gen = StringGen(r'{"a": "[0-9]{0,5}"}') \
.with_special_pattern(r"""{'a': "[0-9]{0,5}"}""", weight=50) \
.with_special_pattern(r'{"a": 0[0-9]{0,5}}', weight=50) \
.with_special_pattern(r'{"a": Infinity}', weight=50) \
.with_special_pattern(r'{"a\tb": "01\r\n\t23"}', weight=50)
.with_special_pattern(r'{"a\tb": "01\r\t23"}', weight=50)
options = {"allowSingleQuotes": allow_single_quotes,
"allowNumericLeadingZeros": allow_numeric_leading_zeros,
"allowNonNumericNumbers": allow_non_numeric_numbers,
# Cannot test `allowNumericLeadingZeros==true` because the GPU output always has
# leading zeros while the CPU output does not, thus test will always fail.
"allowNumericLeadingZeros": "false",
"allowNonNumericNumbers": allow_non_numeric_numbers,
"allowUnquotedControlChars": allow_unquoted_chars}
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, json_string_gen) \
lambda spark : unary_op_df(spark, json_string_gen, length=20) \
.select(f.from_json(f.col('a'), 'MAP<STRING,STRING>', options)),
conf=_enable_all_types_conf)

Expand Down

0 comments on commit 15a4113

Please sign in to comment.