Skip to content

Commit

Permalink
Nested workload (#353)
Browse files Browse the repository at this point in the history
Signed-off-by: Finn Roblin <[email protected]>
  • Loading branch information
finnroblin authored Aug 30, 2024
1 parent 45207c5 commit 6b9619b
Show file tree
Hide file tree
Showing 7 changed files with 230 additions and 1 deletion.
2 changes: 1 addition & 1 deletion vectorsearch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ This workload allows the following parameters to be specified using `--workload-
| Name | Description |
|-----------------------------------------|-----------------------------------------------------------------------------------------------------------------------------|
| target_index_name | Name of index to add vectors to |
| target_field_name | Name of field to add vectors to |
| target_field_name | Name of field to add vectors to. Use "." to indicate a nested field |
| target_index_body | Path to target index definition |
| target_index_primary_shards | Target index primary shards |
| target_index_replica_shards | Target index replica shards |
Expand Down
56 changes: 56 additions & 0 deletions vectorsearch/indices/nested/nested-faiss-index.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{
"settings": {
"index": {
"knn": true
{%- if target_index_primary_shards is defined and target_index_primary_shards %}
,"number_of_shards": {{ target_index_primary_shards }}
{%- endif %}
{%- if target_index_replica_shards is defined %}
,"number_of_replicas": {{ target_index_replica_shards }}
{%- endif %}
}
},
"mappings": {
"dynamic": "strict",
"properties": {
{% if id_field_name is defined and id_field_name != "_id" %}
"{{id_field_name}}": {
"type": "keyword"
},
{%- endif %}
{% if target_field_name is defined and target_field_name %}
"{{ target_field_name.split('.')[0] }}": {
"type": "nested",
"properties": {
"{{ target_field_name.split('.')[1] }}": {
"type": "knn_vector",
"dimension": {{ target_index_dimension }},
"method": {
"name": "hnsw",
"space_type": "{{ target_index_space_type }}",
"engine": "faiss",
"parameters": {
{%- if hnsw_ef_search is defined and hnsw_ef_search %}
"ef_search": {{ hnsw_ef_search }}
{%- endif %}
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
{%- if hnsw_ef_search is defined and hnsw_ef_search %}
,
{%- endif %}
"ef_construction": {{ hnsw_ef_construction }}
{%- endif %}
{%- if hnsw_m is defined and hnsw_m %}
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
,
{%- endif %}
"m": {{ hnsw_m }}
{%- endif %}
}
}
}
}
}
{%- endif %}
}
}
}
51 changes: 51 additions & 0 deletions vectorsearch/indices/nested/nested-lucene-index.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"settings": {
"index": {
"knn": true
{%- if target_index_primary_shards is defined and target_index_primary_shards %}
,"number_of_shards": {{ target_index_primary_shards }}
{%- endif %}
{%- if target_index_replica_shards is defined and target_index_replica_shards %}
,"number_of_replicas": {{ target_index_replica_shards }}
{%- endif %}
}
},
"mappings": {
"dynamic": "strict",
"properties": {
{% if id_field_name is defined and id_field_name != "_id" %}
"{{id_field_name}}": {
"type": "keyword"
},
{%- endif %}
{% if target_field_name is defined and target_field_name %}
"{{ target_field_name.split('.')[0] }}": {
"type": "nested",
"properties": {
"{{ target_field_name.split('.')[1] }}": {
"type": "knn_vector",
"dimension": {{ target_index_dimension }},
"method": {
"name": "hnsw",
"space_type": "{{ target_index_space_type }}",
"engine": "lucene",
"parameters": {
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
"ef_construction": {{ hnsw_ef_construction }}
{%- endif %}
{%- if hnsw_m is defined and hnsw_m %}
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
,
{%- endif %}
"m": {{ hnsw_m }}
{%- endif %}
}
}
}
}
}
{%- endif %}
}
}
}

53 changes: 53 additions & 0 deletions vectorsearch/indices/nested/nested-nmslib-index.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{
"settings": {
"index": {
"knn": true
{%- if target_index_primary_shards is defined and target_index_primary_shards %}
,"number_of_shards": {{ target_index_primary_shards }}
{%- endif %}
{%- if target_index_replica_shards is defined %}
,"number_of_replicas": {{ target_index_replica_shards }}
{%- endif %}
{%- if hnsw_ef_search is defined and hnsw_ef_search %}
,"knn.algo_param.ef_search": {{ hnsw_ef_search }}
{%- endif %}
}
},
"mappings": {
"dynamic": "strict",
"properties": {
{% if id_field_name is defined and id_field_name != "_id" %}
"{{id_field_name}}": {
"type": "keyword"
},
{%- endif %}
{% if target_field_name is defined and target_field_name %}
"{{ target_field_name.split('.')[0] }}": {
"type": "nested",
"properties": {
"{{ target_field_name.split('.')[1] }}": {
"type": "knn_vector",
"dimension": {{ target_index_dimension }},
"method": {
"name": "hnsw",
"space_type": "{{ target_index_space_type }}",
"engine": "nmslib",
"parameters": {
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
"ef_construction": {{ hnsw_ef_construction }}
{%- endif %}
{%- if hnsw_m is defined and hnsw_m %}
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
,
{%- endif %}
"m": {{ hnsw_m }}
{%- endif %}
}
}
}
}
}
{%- endif %}
}
}
}
23 changes: 23 additions & 0 deletions vectorsearch/params/nested/nested-faiss.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"target_index_name": "target_index",
"target_field_name": "nested_field.target_field",
"target_index_body": "indices/nested/nested-faiss-index.json",
"target_index_dimension": 128,
"target_index_bulk_size": 100,
"target_index_bulk_index_data_set_format": "hdf5",
"target_index_bulk_index_data_set_path": "/tmp/data-nested.hdf5",
"target_index_bulk_indexing_clients": 10,

"target_index_max_num_segments": 1,
"target_index_space_type": "l2",

"query_k": 5,
"query_body": {
"docvalue_fields" : ["_id"],
"stored_fields" : "_none_"
},

"query_data_set_format": "hdf5",
"query_data_set_path":"/tmp/data-nested.hdf5",
"query_count": 10000
}
23 changes: 23 additions & 0 deletions vectorsearch/params/nested/nested-lucene.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"target_index_name": "target_index",
"target_field_name": "nested_field.target_field",
"target_index_body": "indices/nested/nested-lucene-index.json",
"target_index_dimension": 128,
"target_index_bulk_size": 100,
"target_index_bulk_index_data_set_format": "hdf5",
"target_index_bulk_index_data_set_path": "/tmp/data-nested.hdf5",
"target_index_bulk_indexing_clients": 10,

"target_index_max_num_segments": 1,
"target_index_space_type": "l2",

"query_k": 5,
"query_body": {
"docvalue_fields" : ["_id"],
"stored_fields" : "_none_"
},

"query_data_set_format": "hdf5",
"query_data_set_path":"/tmp/data-nested.hdf5",
"query_count": 10000
}
23 changes: 23 additions & 0 deletions vectorsearch/params/nested/nested-nmslib.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"target_index_name": "target_index",
"target_field_name": "nested_field.target_field",
"target_index_body": "indices/nested/nested-nmslib-index.json",
"target_index_dimension": 128,
"target_index_bulk_size": 100,
"target_index_bulk_index_data_set_format": "hdf5",
"target_index_bulk_index_data_set_path": "/tmp/data-nested.hdf5",
"target_index_bulk_indexing_clients": 10,

"target_index_max_num_segments": 1,
"target_index_space_type": "l2",

"query_k": 5,
"query_body": {
"docvalue_fields" : ["_id"],
"stored_fields" : "_none_"
},

"query_data_set_format": "hdf5",
"query_data_set_path":"/tmp/data-nested.hdf5",
"query_count": 10000
}

0 comments on commit 6b9619b

Please sign in to comment.