diff --git a/ecl/hql/hqlatoms.cpp b/ecl/hql/hqlatoms.cpp index 6e798b7298b..c88fc79fdbe 100644 --- a/ecl/hql/hqlatoms.cpp +++ b/ecl/hql/hqlatoms.cpp @@ -183,6 +183,7 @@ IAtom * fieldsAtom; IAtom * __fileposAtom; IAtom * _signed_Atom; IAtom * filenameAtom; +IAtom * fileTypeAtom; IAtom * filepositionAtom; IAtom * _files_Atom; IAtom * filterAtom; @@ -661,6 +662,7 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM) MAKEATOM(field); MAKEATOM(fields); MAKEATOM(filename); + MAKEATOM(fileType); MAKEATOM(__filepos); MAKESYSATOM(signed); MAKEATOM(fileposition); diff --git a/ecl/hql/hqlatoms.hpp b/ecl/hql/hqlatoms.hpp index 70eb07d144b..9aba3ba4c33 100644 --- a/ecl/hql/hqlatoms.hpp +++ b/ecl/hql/hqlatoms.hpp @@ -185,6 +185,7 @@ extern HQL_API IAtom * fewAtom; extern HQL_API IAtom * fieldAtom; extern HQL_API IAtom * fieldsAtom; extern HQL_API IAtom * filenameAtom; +extern HQL_API IAtom * fileTypeAtom; extern HQL_API IAtom * __fileposAtom; extern HQL_API IAtom * _signed_Atom; extern HQL_API IAtom * filepositionAtom; diff --git a/ecl/hql/hqlattr.cpp b/ecl/hql/hqlattr.cpp index af0e32c7e6a..0fc5eede29d 100644 --- a/ecl/hql/hqlattr.cpp +++ b/ecl/hql/hqlattr.cpp @@ -507,6 +507,7 @@ unsigned getOperatorMetaFlags(node_operator op) case no_thor: case no_flat: case no_pipe: + case no_filetype: case no_joined: case no_any: case no_xml: @@ -622,7 +623,7 @@ unsigned getOperatorMetaFlags(node_operator op) case no_unused6: case no_unused13: case no_unused14: case no_unused15: - case no_unused34: case no_unused35: case no_unused36: case no_unused37: case no_unused38: + case no_unused35: case no_unused36: case no_unused37: case no_unused38: case no_unused40: case no_unused41: case no_unused42: case no_unused43: case no_unused44: case no_unused45: case no_unused46: case no_unused47: case no_unused48: case no_unused49: case no_unused50: case no_unused52: case no_unused80: diff --git a/ecl/hql/hqlexpr.cpp b/ecl/hql/hqlexpr.cpp index 324fce4dd7b..2f0937f1bec 100644 --- a/ecl/hql/hqlexpr.cpp +++ b/ecl/hql/hqlexpr.cpp @@ -1604,6 +1604,7 @@ const char *getOpString(node_operator op) case no_csv: return "CSV"; case no_xml: return "XML"; case no_json: return "JSON"; + case no_filetype: return "TYPE"; case no_when: return "WHEN"; case no_priority: return "PRIORITY"; @@ -2020,7 +2021,7 @@ const char *getOpString(node_operator op) case no_unused6: case no_unused13: case no_unused14: case no_unused15: - case no_unused34: case no_unused35: case no_unused36: case no_unused37: case no_unused38: + case no_unused35: case no_unused36: case no_unused37: case no_unused38: case no_unused40: case no_unused41: case no_unused42: case no_unused43: case no_unused44: case no_unused45: case no_unused46: case no_unused47: case no_unused48: case no_unused49: case no_unused50: case no_unused52: case no_unused80: @@ -5095,6 +5096,7 @@ unsigned CHqlRealExpression::getCachedEclCRC() case no_csv: case no_xml: case no_json: + case no_filetype: case no_null: if (thisType && (thisType->getTypeCode() == type_null)) thisType = nullptr; diff --git a/ecl/hql/hqlexpr.hpp b/ecl/hql/hqlexpr.hpp index 66071721e4f..1dbc2601dea 100644 --- a/ecl/hql/hqlexpr.hpp +++ b/ecl/hql/hqlexpr.hpp @@ -358,7 +358,7 @@ enum node_operator : unsigned short { no_unlikely, no_inline, no_nwaydistribute, - no_unused34, + no_filetype, // File format/type information (input or output) no_unused35, no_unused36, no_unused37, diff --git a/ecl/hql/hqlgram.hpp b/ecl/hql/hqlgram.hpp index 58d3c735ac5..a1540c66aa1 100644 --- a/ecl/hql/hqlgram.hpp +++ b/ecl/hql/hqlgram.hpp @@ -766,6 +766,8 @@ class HqlGram : implements IErrorReceiver, public CInterface void checkValidPipeRecord(const attribute & errpos, IHqlExpression * record, IHqlExpression * attrs, IHqlExpression * expr); void checkValidLookupFlag(IHqlExpression * dataset, IHqlExpression * filename, attribute & atr); + void setPluggableModeExpr(attribute & targetAttr, attribute & pluginAttr, attribute & options); + void createAppendDictionaries(attribute & targetAttr, attribute & leftAttr, attribute & rightAttr, IAtom * kind); void createAppendFiles(attribute & targetAttr, attribute & leftAttr, attribute & rightAttr, IAtom * kind); IHqlExpression * createAppendFiles(attribute & filesAttr, IHqlExpression * _attrs); diff --git a/ecl/hql/hqlgram.y b/ecl/hql/hqlgram.y index 9e3e190beb7..43e4bf77a3e 100644 --- a/ecl/hql/hqlgram.y +++ b/ecl/hql/hqlgram.y @@ -3566,6 +3566,11 @@ outputFlag $3.unwindCommaList(args); $$.setExpr(createExprAttribute(jsonAtom, args), $1); } + | TYPE '(' UNKNOWN_ID attribs ')' + { + parser->setPluggableModeExpr($$, $3, $4); + $$.setPosition($1); + } | UPDATE { $$.setExpr(createComma(createAttribute(updateAtom), createAttribute(overwriteAtom)), $1); } @@ -10494,7 +10499,7 @@ mode : FLAT { $$.setExpr(createValue(no_flat, makeNullType())); } | CSV { $$.setExpr(createValue(no_csv, makeNullType())); } | CSV '(' csvOptions ')' - { + { HqlExprArray args; $3.unwindCommaList(args); $$.setExpr(createValue(no_csv, makeNullType(), args)); @@ -10542,6 +10547,10 @@ mode $$.setExpr(createValue(no_json, makeNullType(), args)); } | pipe + | TYPE '(' UNKNOWN_ID attribs ')' + { + parser->setPluggableModeExpr($$, $3, $4); + } ; dsOption diff --git a/ecl/hql/hqlgram2.cpp b/ecl/hql/hqlgram2.cpp index 999af21edaf..754437f0698 100644 --- a/ecl/hql/hqlgram2.cpp +++ b/ecl/hql/hqlgram2.cpp @@ -8992,6 +8992,36 @@ bool HqlGram::convertAllToAttribute(attribute &atr) return true; } +void HqlGram::setPluggableModeExpr(attribute & targetAttr, attribute & pluginAttr, attribute & options) +{ + const char * fileFormatStr = str(pluginAttr.getId()); + bool isSupportedFileType = false; + + // TODO: Hardcoded tests for known filetype plugins; should be + // an iteration through a list of available types, checking for names + isSupportedFileType = strisame(fileFormatStr, "parquet"); + + if (isSupportedFileType) + { + // Do we just cite the name of the plugin, or perhaps something + // like a unique ID from the plugin's factory (assuming there is one)? + OwnedHqlExpr fileTypeExp = createExprAttribute(fileTypeAtom, createConstant(fileFormatStr)); + + // Create an option list with the filetype first (easy to find while debugging) + HqlExprArray args; + args.append(*LINK(fileTypeExp)); + options.unwindCommaList(args); + targetAttr.setExpr(createValue(no_filetype, makeNullType(), args)); + } + else + { + // ERR_UNKNOWN_TYPE is generic; create file type-specific error? + reportError(ERR_UNKNOWN_TYPE, pluginAttr, "Unknown plugin file type %s", fileFormatStr); + options.release(); + // Set the target to something... + targetAttr.setExpr(createValue(no_thor, makeNullType())); + } +} void HqlGram::checkValidRecordMode(IHqlExpression * dataset, attribute & atr, attribute & modeattr) { diff --git a/ecl/hql/hqlir.cpp b/ecl/hql/hqlir.cpp index 11dfd32d759..6d78b5de9ec 100644 --- a/ecl/hql/hqlir.cpp +++ b/ecl/hql/hqlir.cpp @@ -291,7 +291,6 @@ const char * getOperatorIRText(node_operator op) EXPAND_CASE(no,unlikely); EXPAND_CASE(no,inline); EXPAND_CASE(no,nwaydistribute); - EXPAND_CASE(no,unused34); EXPAND_CASE(no,unused35); EXPAND_CASE(no,unused36); EXPAND_CASE(no,unused37); @@ -662,6 +661,7 @@ const char * getOperatorIRText(node_operator op) EXPAND_CASE(no,getenv); EXPAND_CASE(no,json); EXPAND_CASE(no,matched_injoin); + EXPAND_CASE(no,filetype); } return ""; diff --git a/ecl/hql/hqltrans.cpp b/ecl/hql/hqltrans.cpp index 164b43e4b98..1114743a108 100644 --- a/ecl/hql/hqltrans.cpp +++ b/ecl/hql/hqltrans.cpp @@ -2687,6 +2687,7 @@ bool onlyTransformOnce(IHqlExpression * expr) case no_csv: case no_xml: case no_json: + case no_filetype: case no_list: return (expr->numChildren() == 0); case no_select: diff --git a/ecl/hqlcpp/hqlsource.cpp b/ecl/hqlcpp/hqlsource.cpp index 16d6f900bfd..3c22a022168 100644 --- a/ecl/hqlcpp/hqlsource.cpp +++ b/ecl/hqlcpp/hqlsource.cpp @@ -632,7 +632,7 @@ static bool forceLegacyMapping(IHqlExpression * expr) class SourceBuilder { public: - SourceBuilder(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr, bool canReadGenerically) + SourceBuilder(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr, bool canReadGenerically, bool forceReadGenerically) : tableExpr(_tableExpr), newInputMapping(false), translator(_translator) { nameExpr.setown(foldHqlExpression(_nameExpr)); @@ -661,7 +661,7 @@ class SourceBuilder isUnfilteredCount = false; requiresOrderedMerge = false; genericDiskReads = translator.queryOptions().genericDiskReads; - genericDiskRead = genericDiskReads && canReadGenerically; + genericDiskRead = (genericDiskReads && canReadGenerically) || forceReadGenerically; rootSelfRow = NULL; activityKind = TAKnone; @@ -2847,8 +2847,8 @@ void SourceBuilder::gatherSteppingMeta(IHqlExpression * expr, SourceSteppingInfo class DiskReadBuilderBase : public SourceBuilder { public: - DiskReadBuilderBase(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr, bool canReadGenerically) - : SourceBuilder(_translator, _tableExpr, _nameExpr, canReadGenerically), monitors(_tableExpr, _translator, 0, true, true) + DiskReadBuilderBase(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr, bool canReadGenerically, bool forceReadGenerically) + : SourceBuilder(_translator, _tableExpr, _nameExpr, canReadGenerically, forceReadGenerically), monitors(_tableExpr, _translator, 0, true, true) { fpos.setown(getFilepos(tableExpr, false)); lfpos.setown(getFilepos(tableExpr, true)); @@ -2912,7 +2912,17 @@ void DiskReadBuilderBase::buildMembers(IHqlExpression * expr) if ((modeOp != no_thor) && (modeOp != no_flat)) { StringBuffer format; - format.append(getOpString(modeOp)).toLowerCase(); + if (modeOp != no_filetype) + { + format.append(getOpString(modeOp)).toLowerCase(); + } + else + { + // Pluggable file type; cite the file type name + IHqlExpression * fileType = queryAttributeChild(mode, fileTypeAtom, 0); + getStringValue(format, fileType); + format.toLowerCase(); + } instance->startctx.addQuotedF("virtual const char * queryFormat() { return \"%s\"; }", format.str()); } } @@ -3096,9 +3106,10 @@ class DiskReadBuilder : public DiskReadBuilderBase { public: DiskReadBuilder(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr) - : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr, (_tableExpr->queryChild(2)->getOperator() != no_pipe)) + : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr, (_tableExpr->queryChild(2)->getOperator() != no_pipe), (_tableExpr->queryChild(2)->getOperator() == no_filetype)) { extractCanMatch = (modeOp == no_thor) || (modeOp == no_flat) || + (modeOp == no_filetype) || ((modeOp == no_csv) && genericDiskRead); } @@ -3232,10 +3243,15 @@ void DiskReadBuilder::buildFormatOption(BuildCtx & ctx, IHqlExpression * name, I void DiskReadBuilder::buildFormatOptions(BuildCtx & fixedCtx, BuildCtx & dynCtx, IHqlExpression * expr) { + IHqlExpression * pluggableFileTypeAtom = expr->queryAttribute(fileTypeAtom); // null if pluggable file type not used + ForEachChild(i, expr) { IHqlExpression * cur = expr->queryChild(i); - if (cur->isAttribute()) + + // Skip if expression is a pluggable file type (we don't want it appearing as an option) + // or if it is not an attribute + if (cur != pluggableFileTypeAtom && cur->isAttribute()) { OwnedHqlExpr name = createConstant(str(cur->queryName())); if (cur->numChildren()) @@ -3398,7 +3414,7 @@ class DiskNormalizeBuilder : public DiskReadBuilderBase { public: DiskNormalizeBuilder(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr) - : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr, false) + : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr, false, false) { } @@ -3470,7 +3486,7 @@ class DiskAggregateBuilder : public DiskReadBuilderBase { public: DiskAggregateBuilder(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr) - : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr, false) + : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr, false, false) { failedFilterValue.clear(); } @@ -3538,7 +3554,7 @@ class DiskCountBuilder : public DiskReadBuilderBase { public: DiskCountBuilder(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr, node_operator _aggOp) - : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr, false) + : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr, false, false) { aggOp = _aggOp; isCompoundCount = true; @@ -3637,7 +3653,7 @@ class DiskGroupAggregateBuilder : public DiskReadBuilderBase { public: DiskGroupAggregateBuilder(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr) - : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr, false) + : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr, false, false) { failedFilterValue.clear(); } @@ -3708,7 +3724,7 @@ class ChildBuilderBase : public SourceBuilder { public: ChildBuilderBase(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr) - : SourceBuilder(_translator, _tableExpr, _nameExpr, false) + : SourceBuilder(_translator, _tableExpr, _nameExpr, false, false) { } @@ -3983,7 +3999,7 @@ class IndexReadBuilderBase : public SourceBuilder friend class MonitorRemovalTransformer; public: IndexReadBuilderBase(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr) - : SourceBuilder(_translator, _tableExpr, _nameExpr, false), + : SourceBuilder(_translator, _tableExpr, _nameExpr, false, false), monitors(_tableExpr, _translator, -(int)numPayloadFields(_tableExpr), false, getHintBool(_tableExpr, createValueSetsAtom, _translator.queryOptions().createValueSets)) { } @@ -4874,6 +4890,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityTable(BuildCtx & ctx, IHqlExpr case no_flat: case no_pipe: case no_csv: + case no_filetype: return doBuildActivityDiskRead(ctx, expr); case no_xml: case no_json: @@ -4889,7 +4906,7 @@ class FetchBuilder : public SourceBuilder { public: FetchBuilder(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr, IHqlExpression * _fetchExpr) - : SourceBuilder(_translator, _tableExpr, _nameExpr, false) + : SourceBuilder(_translator, _tableExpr, _nameExpr, false, false) { compoundExpr.set(_fetchExpr); fetchExpr.set(queryFetch(_fetchExpr)); @@ -4959,6 +4976,7 @@ void FetchBuilder::buildMembers(IHqlExpression * expr) } case no_xml: case no_json: + case no_filetype: break; default: translator.buildFormatCrcFunction(instance->classctx, "getDiskFormatCrc", physicalRecord); diff --git a/ecl/regress/filetypeplugin.ecl b/ecl/regress/filetypeplugin.ecl new file mode 100644 index 00000000000..2c40a90724b --- /dev/null +++ b/ecl/regress/filetypeplugin.ecl @@ -0,0 +1,43 @@ +/*############################################################################## + + HPCC SYSTEMS software Copyright (C) 2024 HPCC Systems®. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +############################################################################## */ + +// If option is FALSE or omitted, generic disk reads are off by default but +// the use of DATASET(,TYPE()) should enable generic disk reads for that +// one activity +// #OPTION('genericDiskReads', TRUE); + +NamesLayout := RECORD + STRING20 surname; + STRING10 forename; + INTEGER2 age := 25; +END; + +//------------------------------------------------------------------------------ + +namesTableParquet_1 := DATASET(DYNAMIC('~parquet_in::no_options'), NamesLayout, TYPE(PARQUET), OPT); +// OUTPUT(namesTableParquet_1, {namesTableParquet_1}, '~parquet_out::no_options', TYPE(PARQUET), OVERWRITE); +OUTPUT(namesTableParquet_1); + +namesTableParquet_2 := DATASET(DYNAMIC('~parquet_in::with_options'), NamesLayout, TYPE(PARQUET : RANDOMFLAG, FOO(TRUE), BAR('BAZ')), OPT); +// OUTPUT(namesTableParquet_2, {namesTableParquet_2}, '~parquet_out::with_options', TYPE(PARQUET : BACKWARDS(TRUE)), OVERWRITE); +OUTPUT(namesTableParquet_2); + +// Following is present just to see what the IR output looks like +testCSV_1 := DATASET(DYNAMIC('~csv_in::no_options'), NamesLayout, CSV, OPT); +OUTPUT(testCSV_1); +testCSV_2 := DATASET(DYNAMIC('~csv_in::with_options'), NamesLayout, CSV(HEADING(1)), OPT); +OUTPUT(testCSV_2);