Skip to content

Commit

Permalink
Feature/hck 4350 datastax vector data type (#121)
Browse files Browse the repository at this point in the history
* types: add vector data type

* PP: add vector data type properties config

* vector: add snippet for default properties

* model-config: add 5.x option to db version list

* cassandra-driver: bump version to 4.7.2

* FE: add handle vector data type

* RE: add handle vector data type from instance

* RE: add handle vector data type from file

* custom-index: add support similarity_function option

* RE: add handle similarity_function option for custom index

* RE: add additional check for retrieved vector data type

* RE: remove define vector type for cassandra custom type
  • Loading branch information
serhii-filonenko authored Dec 28, 2023
1 parent f22bae5 commit ba82ccc
Show file tree
Hide file tree
Showing 341 changed files with 139,573 additions and 61,010 deletions.
4 changes: 4 additions & 0 deletions forward_engineering/helpers/indexHelper.js
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@ const getCustomOptions = (options, isSASI) => {
result.ascii = 'true';
}

if (options.similarity_function && !isSASI) {
result.similarity_function = `${options.similarity_function}`;
}

if(!isSASI){
return result;
}
Expand Down
18 changes: 18 additions & 0 deletions forward_engineering/helpers/typeHelper.js
Original file line number Diff line number Diff line change
Expand Up @@ -158,10 +158,18 @@ const getStructuralTypeHandler = (type, isNeedToBeFrozen, udtTypeMap) => {
return `map<${keyType}, ${valueType}>`;
};

const vector = (propertyData, propertyName) => {
const valueType = getValueTypeFromArray(propertyData, "float", udtTypeMap, propertyName)
const dimension = getVectorDimension(propertyData);

return `vector<${valueType}, ${dimension}>`
};

return ifType(type)
("map", setFrozen(map))
("list", setFrozen(list))
("set", setFrozen(typeSet))
("vector", setFrozen(vector))
("tuple", tuple)
();
};
Expand Down Expand Up @@ -221,6 +229,16 @@ const getTypeByData = (propertyData, udtTypeMap, propertyName) => {
return getHandlerByType(type, udtTypeMap)(propertyData, propertyName);
};

const getVectorDimension = (propertyData) => {
if (!isNaN(+propertyData.dimension)) {
return propertyData.dimension
}

const config = getFieldConfig(propertyData.type, "dimension");

return config?.defaultValue;
};

module.exports = {
getTypeByData
};
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"2.x",
"3.x",
"4.x",
"5.x",
"DSE 4.x",
"DSE 5.x",
"DSE 6.x",
Expand Down
37 changes: 37 additions & 0 deletions properties_pane/entity_level/entityLevelConfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,43 @@ making sure that you maintain a proper JSON format.
"propertyKeyword": "ascii",
"propertyType": "checkbox",
"propertyTooltip": "When set to true, SAI converts alphabetic, numeric, and symbolic characters that are not in the Basic Latin Unicode block (the first 127 ASCII characters) to the ASCII equivalent, if one exists."
},
{
"propertyName": "similarity_function",
"propertyKeyword": "similarity_function",
"propertyType": "select",
"propertyTooltip": "Vector search relies on computing the similarity or distance between vectors to identify relevant matches. The similarity function is used to compute the similarity between two vectors.",
"options": [
"",
"EUCLIDEAN",
"DOT_PRODUCT",
"COSINE"
],
"dependency": {
"type": "not",
"values": [
{
"level": "model",
"key": "dbVersion",
"value": "2.x"
},
{
"level": "model",
"key": "dbVersion",
"value": "3.x"
},
{
"level": "model",
"key": "dbVersion",
"value": "4.x"
},
{
"level": "model",
"key": "dbVersion",
"value": "DSE 4.x"
}
]
}
}
]
},
Expand Down
111 changes: 111 additions & 0 deletions properties_pane/field_level/fieldLevelConfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -1302,6 +1302,117 @@ making sure that you maintain a proper JSON format.
"additionalItems",
"comments"
],
"vector": [
"name",
"code",
"sampleName",
"schemaId",
"refPath",
"description",
{
"propertyKeyword": "type",
"typeDecorator": {
"value": "vctr",
"useAsTypeName": true
}
},
{
"propertyName": "Subtype",
"propertyKeyword": "subtype",
"shouldValidate": false,
"propertyType": "select",
"typeDecorator": {
"value": "flt",
"useAngleBrackets": true,
"dependency": {
"key": "subtype",
"value": "float"
}
},
"typeable": true,
"options": [
"float"
]
},
{
"propertyName": "Dimension",
"propertyKeyword": "dimension",
"propertyType": "numeric",
"valueType": "integer",
"allowNegative": false,
"typeDecorator": true,
"defaultValue": 1,
"maxValue": 8192
},
{
"propertyName": "Frozen",
"propertyKeyword": "frozen",
"shouldValidate": false,
"propertyType": "checkbox",
"valueType": "boolean"
},
"dependencies",
{
"propertyName": "Required",
"propertyKeyword": "required",
"shouldValidate": false,
"propertyType": "checkbox",
"valueType": "boolean",
"disabledOnCondition": [
{
"key": "primaryKey",
"value": true
}
]
},
{
"propertyName": "Primary key",
"propertyKeyword": "primaryKey",
"shouldValidate": false,
"propertyType": "checkbox",
"valueType": "boolean",
"hidden": true
},
{
"propertyName": "Clustering key",
"propertyKeyword": "compositeClusteringKey",
"shouldValidate": false,
"propertyType": "checkbox",
"valueType": "boolean",
"dependency": {
"key": "compositeClusteringKey",
"value": true
},
"disabledOnCondition": [
{
"key": "compositeClusteringKey",
"value": true
}
]
},
{
"propertyName": "Partition key",
"propertyKeyword": "compositePartitionKey",
"shouldValidate": false,
"propertyType": "checkbox",
"valueType": "boolean",
"dependency": {
"key": "compositePartitionKey",
"value": true
},
"disabledOnCondition": [
{
"key": "compositePartitionKey",
"value": true
}
]
},
"minItems",
"maxItems",
"uniqueItems",
"additionalItems",
"comments"
],
"___3": [],
"geospatial": [
"name",
Expand Down
1 change: 1 addition & 0 deletions properties_pane/model_level/modelLevelConfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ making sure that you maintain a proper JSON format.
"2.x",
"3.x",
"4.x",
"5.x",
"DSE 4.x",
"DSE 5.x",
"DSE 6.x",
Expand Down
3 changes: 2 additions & 1 deletion reverse_engineering/cassandraHelper.js
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,7 @@ module.exports = (_) => {
customOptions.case_sensitive = item.options.case_sensitive === 'true';
customOptions.normalize = item.options.normalize === 'true';
customOptions.ascii = item.options.ascii === 'true';
customOptions.similarity_function = item.options.similarity_function || '';
}

return {
Expand Down Expand Up @@ -922,7 +923,7 @@ module.exports = (_) => {
documents: data.records,
};

if (data.table.columns && data.table.columns.length) {
if (data?.table?.columns?.length) {
packageData.bucketInfo = getKeyspaceInfo(data.keyspaceName);
packageData.bucketInfo.UDFs = data.UDFs;
packageData.bucketInfo.UDAs = data.UDAs;
Expand Down
26 changes: 25 additions & 1 deletion reverse_engineering/cqlToCollectionsVisitor.js
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,9 @@ class Visitor extends CqlParserVisitor {
if(ctx.analyzedOption){
return {analyzed: ctx.analyzedOption.getText().toLowerCase() === `'true'`}
}
if(ctx.similarityFunctionOption){
return {similarity_function: ctx.similarityFunctionOption.getText().replaceAll('\'','')}
}
if(ctx.isLiteralOption){
return {isLiteral: ctx.isLiteralOption.getText().toLowerCase() === `'true'`}
}
Expand Down Expand Up @@ -678,7 +681,7 @@ class Visitor extends CqlParserVisitor {

visitDataType(ctx) {
const type = this.visit(ctx.dataTypeName());
const COMPLEX_TYPES = ['map', 'tuple', 'list', 'set'];
const COMPLEX_TYPES = ['map', 'tuple', 'list', 'set', 'vector'];
const hackoladeType = getTargetType(type);

const typeDescription = ctx.dataTypeDefinition();
Expand Down Expand Up @@ -713,6 +716,21 @@ class Visitor extends CqlParserVisitor {
};
}

if (hackoladeType.type === 'vector') {
const decimalLiteralContext = typeDescription.decimalLiteral();
const [dimension] = decimalLiteralContext ? this.visit(decimalLiteralContext) : [1];

return {
...hackoladeType,
subtype: description1.mode,
dimension: dimension,
properties: [{
...description1,
name: 'New column',
}],
};
}

return {
...hackoladeType,
subtype: `${hackoladeType.type}<${complexTypeMapper(description1.type || '')}>`,
Expand Down Expand Up @@ -809,6 +827,10 @@ class Visitor extends CqlParserVisitor {
return this.visit(ctx.dataType());
}

visitDecimalLiteral(ctx) {
return +ctx.getText();
}

visitBooleanLiteral(ctx) {
return (ctx.getText() || '').toLowerCase() === 'true';
}
Expand Down Expand Up @@ -975,6 +997,7 @@ const getTargetType = (type) => {
case "list":
case "set":
case "map":
case "vector":
return { type };
default:
return;
Expand All @@ -995,6 +1018,7 @@ const complexTypeMapper = type => {
case "list":
case "set":
case "map":
case "vector":
return type;
default:
return "udt";
Expand Down
9 changes: 9 additions & 0 deletions reverse_engineering/grammars/CqlLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,11 @@ K_NORMALIZE
: SQUOTE N O R M A L I Z E SQUOTE
;

K_SIMILARITY_FUNCTION
: S I M I L A R I T Y '_' F U N C T I O N
| SQUOTE S I M I L A R I T Y '_' F U N C T I O N SQUOTE
;

K_ANALYZED
: SQUOTE A N A L Y Z E D SQUOTE
;
Expand Down Expand Up @@ -1265,4 +1270,8 @@ OPERATOR_GTE

K_USERS
: U S E R S | 'USERS'
;

K_VECTOR
: V E C T O R | 'VECTOR'
;
11 changes: 9 additions & 2 deletions reverse_engineering/grammars/CqlParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,7 @@ customIndexOption
| kwAnalyzerClass COLON analyzerClassOption=stringLiteral COMMA?
| kwTokenizationLocale COLON tokenizationLocaleOption=stringLiteral COMMA?
| kwTokenizationSkipStopWords COLON tokenizationSkipStopWordsOption=stringLiteral COMMA?
| kwSimilarityFunction COLON similarityFunctionOption=stringLiteral COMMA?
;

createSearchIndex
Expand Down Expand Up @@ -897,10 +898,11 @@ dataTypeName
| K_VARINT
| K_TIMESTAMP
| K_UUID
| K_VECTOR
;

dataTypeDefinition
: syntaxBracketLa dataType (syntaxComma dataType)* syntaxBracketRa
: syntaxBracketLa dataType (syntaxComma (dataType | decimalLiteral))* syntaxBracketRa
;

orderDirection
Expand Down Expand Up @@ -1302,6 +1304,10 @@ kwCaseSensitive
: K_CASE_SENITIVE
;

kwSimilarityFunction
: K_SIMILARITY_FUNCTION
;

kwInitcond
: K_INITCOND
;
Expand Down Expand Up @@ -1738,5 +1744,6 @@ id
K_TUPLE |
K_VARCHAR |
K_VARINT |
K_USERS
K_USERS |
K_VECTOR
;
Loading

0 comments on commit ba82ccc

Please sign in to comment.