Skip to content

Commit

Permalink
HCK-7920: implemented variant subtype resolve logic on REing from ins…
Browse files Browse the repository at this point in the history
…tance
  • Loading branch information
WilhelmWesser committed Sep 12, 2024
1 parent fd02aae commit 1d74b8f
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 5 deletions.
17 changes: 12 additions & 5 deletions reverse_engineering/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const { parseDDLStatements } = require('./parseDDLStatements');
const { isSupportUnityCatalog } = require('./helpers/databricksHelper');
const unityTagsHelper = require('./helpers/unityTagsHelper');
const { adaptJsonSchema } = require('./adaptJsonSchema');
const { getVariantColumnsWithResolvedSubType } = require('./helpers/variantPropertiesSubTypeResolveHelper');

const DEFAULT_DATABRICKS_CATALOG_NAME = 'hive_metastore';

Expand Down Expand Up @@ -265,12 +266,14 @@ module.exports = {
logger,
);

const columnsOfTypeString = (tableData.properties || []).filter(
property => property.mode === 'string',
const columnsPotentiallyContainingJSON = (tableData.properties || []).filter(
property => property.mode === 'string' || property.mode === 'var',
);
const hasPotentiallyContainingJSONColumns = !dependencies.lodash.isEmpty(
columnsPotentiallyContainingJSON,
);
const hasColumnsOfTypeString = !dependencies.lodash.isEmpty(columnsOfTypeString);
let documents = [];
if (hasColumnsOfTypeString) {
if (hasPotentiallyContainingJSONColumns) {
progress({
message: 'Start getting documents from table',
containerName: 'databases',
Expand All @@ -280,10 +283,14 @@ module.exports = {
connectionInfo: connectionData,
dbName,
tableName: table.name,
fields: columnsOfTypeString,
fields: columnsPotentiallyContainingJSON,
recordSamplingSettings: data.recordSamplingSettings,
logger,
});
tableData.schema = getVariantColumnsWithResolvedSubType({
propertiesSchema: tableData.schema,
documents,
});
progress({
message: 'Documents retrieved successfully',
containerName: 'databases',
Expand Down
10 changes: 10 additions & 0 deletions reverse_engineering/helpers/columnsREHelper.js
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,14 @@ const handleType = typeContainer => {
...handleSubtype(typeContainer.val, 'map'),
};
}
if (typeContainer.type === 'variant') {
return {
type: 'document',
childType: 'variant',
variantType: 'JSON',
mode: 'var',
};
}
switch (typeContainer.type) {
case 'tinyint':
case 'smallint':
Expand Down Expand Up @@ -118,4 +126,6 @@ const reverseTableColumn = column => {
};
};

const handleVariantType = typeContainer => {};

module.exports = { reverseTableColumn };
37 changes: 37 additions & 0 deletions reverse_engineering/helpers/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,42 @@ const getTemplateDocByJsonSchema = schema => {
}, {});
};

/**
*
* @param {Array<number | string | boolean>} list
* @returns {number | string | boolean}
*/
const getMostFrequentValueInList = list => {
const itemsToFrequencyMap = {};

list.forEach(item => {
if (itemsToFrequencyMap[item]) {
itemsToFrequencyMap[item] += 1;
return;
}

itemsToFrequencyMap[item] = 1;
});

const frequencyMapEntries = Object.entries(itemsToFrequencyMap);
const defaultMostFrequentEntry = frequencyMapEntries[0];

const mostFrequentItemEntry = frequencyMapEntries.reduce(
([mostFrequentItemValue, mostFrequentItemFrequency], [item, frequency]) => {
if (frequency > mostFrequentItemFrequency) {
return [item, frequency];
}

return [mostFrequentItemValue, mostFrequentItemFrequency];
},
defaultMostFrequentEntry,
);

const [mostFrequentItemValue] = mostFrequentItemEntry;

return mostFrequentItemValue;
};

module.exports = {
prepareNamesForInsertionIntoScalaCode,
splitTableAndViewNames,
Expand All @@ -110,4 +146,5 @@ module.exports = {
isSupportGettingListOfViews,
removeParentheses,
getTemplateDocByJsonSchema,
getMostFrequentValueInList,
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
const { getMostFrequentValueInList } = require('./utils');

/**
* @typedef { (string | number | boolean) } Primitive
*/

/**
*
* @param {{
* propertiesSchema: object,
* documents: object[]
* }} param
* @returns {object}
*/
const getVariantColumnsWithResolvedSubType = ({ propertiesSchema, documents = [] }) => {
const propertiesEntriesWithUpdatedSubtypes = Object.entries(propertiesSchema).map(([propertyName, propertyData]) =>
getVariantColumnWithResolvedSubType({ propertyName, propertyData, documents }),
);

return Object.fromEntries(propertiesEntriesWithUpdatedSubtypes);
};

/**
*
* @param {{
* propertyName: string,
* propertyValue: object,
* documents: object[]
* }} param
* @returns {[string, object]}
*/
const getVariantColumnWithResolvedSubType = ({ propertyName, propertyData, documents }) => {
if (propertyData?.mode !== 'var') {
return [propertyName, propertyData];
}

const propertyDocumentsRecords = documents.map(document => document[propertyName]);
const parsedDocumentsRecords = propertyDocumentsRecords.map(getParsedVariantRecord);
const parsedDocumentsRecordsTypes = parsedDocumentsRecords.map(getDocumentRecordType);
const mostFrequentType = getMostFrequentValueInList(parsedDocumentsRecordsTypes);

const updatedPropertyValue = {
...propertyData,
subtype: mostFrequentType,
};

return [propertyName, updatedPropertyValue];
};

/**
*
* @param {string} record
* @returns {Primitive | object | Array<Primitive>}
*/
const getParsedVariantRecord = record => {
try {
const parsedRecord = JSON.parse(record);
return parsedRecord;
} catch {
return {};
}
};

/**
*
* @param {Primitive | object | Array<Primitive | object>} parsedRecord
* @returns {string}
*/
const getDocumentRecordType = parsedRecord => {
if (Array.isArray(parsedRecord)) {
return 'array';
}

if (typeof parsedRecord === 'object') {
return parsedRecord ? 'object' : null;
}

return typeof parsedRecord;
};

module.exports = {
getVariantColumnsWithResolvedSubType,
};

0 comments on commit 1d74b8f

Please sign in to comment.