-
Notifications
You must be signed in to change notification settings - Fork 57
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SNOW-1727532 Set number of values for repeated fields #861
Changes from all commits
bef322d
4d9c70b
39cbea1
e928fed
d7dff95
31bdbdb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,10 @@ public class ClientBufferParameters { | |
|
||
private boolean isIcebergMode; | ||
|
||
private boolean enableDistinctValuesCount; | ||
|
||
private boolean enableValuesCount; | ||
|
||
/** | ||
* Private constructor used for test methods | ||
* | ||
|
@@ -38,13 +42,17 @@ private ClientBufferParameters( | |
Constants.BdecParquetCompression bdecParquetCompression, | ||
boolean enableNewJsonParsingLogic, | ||
Optional<Integer> maxRowGroups, | ||
boolean isIcebergMode) { | ||
boolean isIcebergMode, | ||
boolean enableDistinctValuesCount, | ||
boolean enableValuesCount) { | ||
this.maxChunkSizeInBytes = maxChunkSizeInBytes; | ||
this.maxAllowedRowSizeInBytes = maxAllowedRowSizeInBytes; | ||
this.bdecParquetCompression = bdecParquetCompression; | ||
this.enableNewJsonParsingLogic = enableNewJsonParsingLogic; | ||
this.maxRowGroups = maxRowGroups; | ||
this.isIcebergMode = isIcebergMode; | ||
this.enableDistinctValuesCount = enableDistinctValuesCount; | ||
this.enableValuesCount = enableValuesCount; | ||
} | ||
|
||
/** @param clientInternal reference to the client object where the relevant parameters are set */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've never understood why clientInternal can or should be allowed to be null. Looks like a testcase author leaking their convenience needs into production code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The clientInterTest need this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there's another ctor for testcases to pass in whatever booleans they want, so still unclear why there's a need to pass in a null clientInternal to this ctor. |
||
|
@@ -73,6 +81,14 @@ public ClientBufferParameters(SnowflakeStreamingIngestClientInternal clientInter | |
isIcebergMode | ||
? Optional.of(InternalParameterProvider.MAX_ROW_GROUP_COUNT_ICEBERG_MODE_DEFAULT) | ||
: Optional.empty(); | ||
this.enableDistinctValuesCount = | ||
clientInternal != null | ||
? clientInternal.getInternalParameterProvider().isEnableDistinctValuesCount() | ||
: InternalParameterProvider.ENABLE_DISTINCT_VALUES_COUNT_DEFAULT; | ||
this.enableValuesCount = | ||
clientInternal != null | ||
? clientInternal.getInternalParameterProvider().isEnableValuesCount() | ||
: InternalParameterProvider.ENABLE_VALUES_COUNT_DEFAULT; | ||
} | ||
|
||
/** | ||
|
@@ -87,14 +103,18 @@ public static ClientBufferParameters test_createClientBufferParameters( | |
Constants.BdecParquetCompression bdecParquetCompression, | ||
boolean enableNewJsonParsingLogic, | ||
Optional<Integer> maxRowGroups, | ||
boolean isIcebergMode) { | ||
boolean isIcebergMode, | ||
boolean enableDistinctValuesCount, | ||
boolean enableValuesCount) { | ||
return new ClientBufferParameters( | ||
maxChunkSizeInBytes, | ||
maxAllowedRowSizeInBytes, | ||
bdecParquetCompression, | ||
enableNewJsonParsingLogic, | ||
maxRowGroups, | ||
isIcebergMode); | ||
isIcebergMode, | ||
enableDistinctValuesCount, | ||
enableValuesCount); | ||
} | ||
|
||
public long getMaxChunkSizeInBytes() { | ||
|
@@ -125,6 +145,14 @@ public String getParquetMessageTypeName() { | |
return isIcebergMode ? PARQUET_MESSAGE_TYPE_NAME : BDEC_PARQUET_MESSAGE_TYPE_NAME; | ||
} | ||
|
||
public boolean isEnableDistinctValuesCount() { | ||
return enableDistinctValuesCount; | ||
} | ||
|
||
public boolean isEnableValuesCount() { | ||
return enableValuesCount; | ||
} | ||
|
||
public boolean isEnableDictionaryEncoding() { | ||
return isIcebergMode; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we remove the getColumnDisplayName() method from the RowBufferStats class now?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
actually, is there a need to even make this change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If a root column is a structured data type,
statsMap.get(columnName)
will trow NPE as we only stores leaf column stats. UsefieldIndex
instead for logging to avoid this.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please add this as a code comment.