From 8524de9644577e3b3027be5716f017b4a08867f3 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Thu, 14 Dec 2023 10:26:29 +0100 Subject: [PATCH 1/3] Move 'query' field inside 'properties' Closes #134. Closes #106. --- .../invalid/misnamed-property.json | 8 +++-- .../invalid/multiple-types.json | 11 ++++-- .../valid/example-full.json | 8 +++-- .../valid/example-min.json | 12 +++++-- .../valid/multi-values.json | 4 ++- .../valid/text-processing-language.json | 4 ++- draft/index.html | 34 +++++++++++-------- draft/schemas/reconciliation-query-batch.json | 29 +++------------- 8 files changed, 62 insertions(+), 48 deletions(-) diff --git a/draft/examples/reconciliation-query-batch/invalid/misnamed-property.json b/draft/examples/reconciliation-query-batch/invalid/misnamed-property.json index 5781c9a..f51e78e 100644 --- a/draft/examples/reconciliation-query-batch/invalid/misnamed-property.json +++ b/draft/examples/reconciliation-query-batch/invalid/misnamed-property.json @@ -1,10 +1,12 @@ { "queries": [ { - "query": "Christel Hanewinckel", "type": "DifferentiatedPerson", "limit": 5, "props": [ + { + "v": "Christel Hanewinckel" + }, { "pid": "professionOrOccupation", "v": "Politik*" @@ -17,10 +19,12 @@ "type_strict": "should" }, { - "query": "Franz Thönnes", "type": "DifferentiatedPerson", "limit": 5, "props": [ + { + "v": "Franz Thönnes" + }, { "pid": "professionOrOccupation", "v": "Politik*" diff --git a/draft/examples/reconciliation-query-batch/invalid/multiple-types.json b/draft/examples/reconciliation-query-batch/invalid/multiple-types.json index b2a0f8d..ee00a4e 100644 --- a/draft/examples/reconciliation-query-batch/invalid/multiple-types.json +++ b/draft/examples/reconciliation-query-batch/invalid/multiple-types.json @@ -1,8 +1,15 @@ { "queries": [ { - "query": "Christel Hanewinckel", - "type": ["DifferentiatedPerson", "FictionalCharacter"], + "properties": [ + { + "v": "Christel Hanewinckel" + } + ], + "type": [ + "DifferentiatedPerson", + "FictionalCharacter" + ], "limit": 5 } ] diff --git a/draft/examples/reconciliation-query-batch/valid/example-full.json b/draft/examples/reconciliation-query-batch/valid/example-full.json index 5ddacf5..fc2c6fa 100644 --- a/draft/examples/reconciliation-query-batch/valid/example-full.json +++ b/draft/examples/reconciliation-query-batch/valid/example-full.json @@ -1,10 +1,12 @@ { "queries": [ { - "query": "Christel Hanewinckel", "type": "DifferentiatedPerson", "limit": 5, "properties": [ + { + "v": "Christel Hanewinckel" + }, { "pid": "professionOrOccupation", "v": "Politik*" @@ -16,10 +18,12 @@ ] }, { - "query": "Franz Thönnes", "type": "DifferentiatedPerson", "limit": 5, "properties": [ + { + "v": "Franz Thönnes" + }, { "pid": "professionOrOccupation", "v": "Politik*" diff --git a/draft/examples/reconciliation-query-batch/valid/example-min.json b/draft/examples/reconciliation-query-batch/valid/example-min.json index 130e18b..785eda3 100644 --- a/draft/examples/reconciliation-query-batch/valid/example-min.json +++ b/draft/examples/reconciliation-query-batch/valid/example-min.json @@ -1,10 +1,18 @@ { "queries": [ { - "query": "Hans-Eberhard Urbaniak" + "properties": [ + { + "v": "Hans-Eberhard Urbaniak" + } + ] }, { - "query": "Ernst Schwanhold" + "properties": [ + { + "v": "Ernst Schwanhold" + } + ] } ] } diff --git a/draft/examples/reconciliation-query-batch/valid/multi-values.json b/draft/examples/reconciliation-query-batch/valid/multi-values.json index aec03de..caaa898 100644 --- a/draft/examples/reconciliation-query-batch/valid/multi-values.json +++ b/draft/examples/reconciliation-query-batch/valid/multi-values.json @@ -1,10 +1,12 @@ { "queries": [ { - "query": "Christel Hanewinckel", "type": "DifferentiatedPerson", "limit": 5, "properties": [ + { + "v": "Christel Hanewinckel" + }, { "pid": "professionOrOccupation", "v": [ diff --git a/draft/examples/reconciliation-query-batch/valid/text-processing-language.json b/draft/examples/reconciliation-query-batch/valid/text-processing-language.json index c16ce91..2f67c31 100644 --- a/draft/examples/reconciliation-query-batch/valid/text-processing-language.json +++ b/draft/examples/reconciliation-query-batch/valid/text-processing-language.json @@ -1,9 +1,11 @@ { "queries": [ { - "query": "Deng Shuping", "lang": "en", "properties": [ + { + "v": "Deng Shuping" + }, { "pid": "professionOrOccupation", "v": "art historian" diff --git a/draft/index.html b/draft/index.html index 4fde551..9405e10 100644 --- a/draft/index.html +++ b/draft/index.html @@ -478,29 +478,35 @@

Reconciliation Queries

Structure of a Reconciliation Query

A reconciliation query consists of the following fields. - At least one of query or properties must be supplied, but all other - fields are optional.

-
query
-
A query string, consisting of a non-empty string. - By supplying such a string, a client intends to search for entities with similar - names. The specifics of how this similarity is defined are determined by the service.
+
properties
+
An array of property assignments (at least one).
type
-
A type identifier. Supplying such a type allows users to restrict +
An optional type identifier. Supplying such a type allows users to restrict the search to entities which bear this type. Whether this restriction should be a hard constraint or simply induce a change on the reconciliation scores can be determined by the service. In particular, services MAY return candidates which do not belong to the supplied type;
limit
-
A limit on the number of candidates to return, which must be a positive integer;
-
properties
-
An array of objects, where each object maps a property identifier (in the pid field) - to one or more property values (in the v field). These are used to further filter the set of candidates (similar to a WHERE clause in SQL), - by allowing clients to specify other attributes of entities that should match, beyond their name in the query field. - How reconciliation services handle this further restriction ("must match all properties" or "should match some") and how it affects the score, is up to the service. - A reconciliation service that supports properties SHOULD provide a suggest service for discovering these properties;
+
An optional limit on the number of candidates to return, which must be a positive integer;

+

+ A property assignment specifies the expected value of a property on the entities to match. + These are used to filter the set of candidates (similar to a WHERE clause in SQL), + by allowing clients to specify an attribute of entities that should match. It consists of: +

+
pid
+
A property identifier. If this is not provided, then this signals that + the client intends to search for entities with similar names. The specifics of how this similarity + is defined are determined by the service.
+
v
+
one or more property values. +
+ + How reconciliation services handle this further restriction ("must match all properties" or "should match some") and how it affects the score, is up to the service. + A reconciliation service that supports properties SHOULD provide a suggest service for discovering these properties. +

A reconciliation query batch is an array of reconciliation queries.

diff --git a/draft/schemas/reconciliation-query-batch.json b/draft/schemas/reconciliation-query-batch.json index b2c967e..a776908 100644 --- a/draft/schemas/reconciliation-query-batch.json +++ b/draft/schemas/reconciliation-query-batch.json @@ -43,10 +43,6 @@ "items": { "type": "object", "properties": { - "query": { - "type": "string", - "description": "A string to be matched against the name of the entities" - }, "type": { "description": "A type identifier indicating which class of entities to restrict the search to", "type": "string" @@ -61,13 +57,14 @@ }, "properties": { "type": "array", - "description": "An optional list of property mappings to refine the query", + "minItems": 1, + "description": "A list of property mappings to select candidates", "items": { "type": "object", "properties": { "pid": { "type": "string", - "description": "The identifier of the property, whose values will be compared to the values supplied" + "description": "The identifier of the property, whose values will be compared to the values supplied. If absent, values will be matched against the entity names" }, "v": { "description": "A value (or array of values) to match against the property values associated with the property on each candidate", @@ -85,7 +82,6 @@ } }, "required": [ - "pid", "v" ] } @@ -100,23 +96,8 @@ ] } }, - "anyOf": [ - { - "required": [ - "query" - ] - }, - { - "required": [ - "properties" - ], - "properties": { - "properties": { - "type": "array", - "minItems": 1 - } - } - } + "required": [ + "properties" ], "additionalProperties": false } From 68186b00f9340777c7a7bc526d7b1e22ff5908e2 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Mon, 26 Feb 2024 12:10:32 +0100 Subject: [PATCH 2/3] Introduce match_type as suggested by Osma --- ...-properties.json => empty-conditions.json} | 2 +- ...property.json => misnamed-conditions.json} | 0 .../invalid/missing-match-type.json | 18 +++++++++++ .../invalid/multiple-types.json | 3 +- .../invalid/no-root-object.json | 16 ++++++++-- .../valid/example-full.json | 10 +++++-- .../valid/example-min.json | 6 ++-- .../valid/multi-values.json | 4 ++- .../valid/no-query-string.json | 30 ++++++++++++++----- .../valid/text-processing-language.json | 5 +++- draft/index.html | 18 ++++++----- draft/schemas/reconciliation-query-batch.json | 21 +++++++++---- 12 files changed, 101 insertions(+), 32 deletions(-) rename draft/examples/reconciliation-query-batch/invalid/{empty-properties.json => empty-conditions.json} (60%) rename draft/examples/reconciliation-query-batch/invalid/{misnamed-property.json => misnamed-conditions.json} (100%) create mode 100644 draft/examples/reconciliation-query-batch/invalid/missing-match-type.json diff --git a/draft/examples/reconciliation-query-batch/invalid/empty-properties.json b/draft/examples/reconciliation-query-batch/invalid/empty-conditions.json similarity index 60% rename from draft/examples/reconciliation-query-batch/invalid/empty-properties.json rename to draft/examples/reconciliation-query-batch/invalid/empty-conditions.json index c2c6072..e60930d 100644 --- a/draft/examples/reconciliation-query-batch/invalid/empty-properties.json +++ b/draft/examples/reconciliation-query-batch/invalid/empty-conditions.json @@ -1,7 +1,7 @@ { "queries": [ { - "properties": [] + "conditions": [] } ] } diff --git a/draft/examples/reconciliation-query-batch/invalid/misnamed-property.json b/draft/examples/reconciliation-query-batch/invalid/misnamed-conditions.json similarity index 100% rename from draft/examples/reconciliation-query-batch/invalid/misnamed-property.json rename to draft/examples/reconciliation-query-batch/invalid/misnamed-conditions.json diff --git a/draft/examples/reconciliation-query-batch/invalid/missing-match-type.json b/draft/examples/reconciliation-query-batch/invalid/missing-match-type.json new file mode 100644 index 0000000..843c25c --- /dev/null +++ b/draft/examples/reconciliation-query-batch/invalid/missing-match-type.json @@ -0,0 +1,18 @@ +{ + "queries": [ + { + "conditions": [ + { + "v": "Hans-Eberhard Urbaniak" + } + ] + }, + { + "conditions": [ + { + "v": "Ernst Schwanhold" + } + ] + } + ] +} diff --git a/draft/examples/reconciliation-query-batch/invalid/multiple-types.json b/draft/examples/reconciliation-query-batch/invalid/multiple-types.json index ee00a4e..d2f3fb7 100644 --- a/draft/examples/reconciliation-query-batch/invalid/multiple-types.json +++ b/draft/examples/reconciliation-query-batch/invalid/multiple-types.json @@ -1,8 +1,9 @@ { "queries": [ { - "properties": [ + "conditions": [ { + "match_type": "name", "v": "Christel Hanewinckel" } ], diff --git a/draft/examples/reconciliation-query-batch/invalid/no-root-object.json b/draft/examples/reconciliation-query-batch/invalid/no-root-object.json index 0435777..143d568 100644 --- a/draft/examples/reconciliation-query-batch/invalid/no-root-object.json +++ b/draft/examples/reconciliation-query-batch/invalid/no-root-object.json @@ -1,8 +1,20 @@ [ { - "properties": [{"pid":"uid", "v": "27eb892afbb2"}] + "conditions": [ + { + "match_type": "property", + "pid": "uid", + "v": "27eb892afbb2" + } + ] }, { - "properties": [{"pid":"uid", "v": "ab09da9dd37e"}] + "conditions": [ + { + "match_type": "property", + "pid": "uid", + "v": "ab09da9dd37e" + } + ] } ] diff --git a/draft/examples/reconciliation-query-batch/valid/example-full.json b/draft/examples/reconciliation-query-batch/valid/example-full.json index 2e20c2a..6c61a09 100644 --- a/draft/examples/reconciliation-query-batch/valid/example-full.json +++ b/draft/examples/reconciliation-query-batch/valid/example-full.json @@ -3,11 +3,13 @@ { "type": "DifferentiatedPerson", "limit": 5, - "properties": [ + "conditions": [ { + "match_type": "name", "v": "Christel Hanewinckel" }, { + "match_type": "property", "pid": "professionOrOccupation", "v": "Politik*", "required": false, @@ -15,6 +17,7 @@ "match_qualifier": "WildcardMatch" }, { + "match_type": "property", "pid": "affiliation", "v": "http://d-nb.info/gnd/2022139-3", "required": false, @@ -26,11 +29,13 @@ { "type": "DifferentiatedPerson", "limit": 5, - "properties": [ + "conditions": [ { + "match_type": "name", "v": "Franz Thönnes" }, { + "match_type": "property", "pid": "professionOrOccupation", "v": "Politik*", "required": false, @@ -38,6 +43,7 @@ "match_qualifier": "WildcardMatch" }, { + "match_type": "property", "pid": "affiliation", "v": "http://d-nb.info/gnd/2022139-3", "required": false, diff --git a/draft/examples/reconciliation-query-batch/valid/example-min.json b/draft/examples/reconciliation-query-batch/valid/example-min.json index 785eda3..8d5d776 100644 --- a/draft/examples/reconciliation-query-batch/valid/example-min.json +++ b/draft/examples/reconciliation-query-batch/valid/example-min.json @@ -1,15 +1,17 @@ { "queries": [ { - "properties": [ + "conditions": [ { + "match_type": "name", "v": "Hans-Eberhard Urbaniak" } ] }, { - "properties": [ + "conditions": [ { + "match_type": "name", "v": "Ernst Schwanhold" } ] diff --git a/draft/examples/reconciliation-query-batch/valid/multi-values.json b/draft/examples/reconciliation-query-batch/valid/multi-values.json index caaa898..f4ba3b2 100644 --- a/draft/examples/reconciliation-query-batch/valid/multi-values.json +++ b/draft/examples/reconciliation-query-batch/valid/multi-values.json @@ -3,11 +3,13 @@ { "type": "DifferentiatedPerson", "limit": 5, - "properties": [ + "conditions": [ { + "match_type": "name", "v": "Christel Hanewinckel" }, { + "match_type": "property", "pid": "professionOrOccupation", "v": [ "Politik*", diff --git a/draft/examples/reconciliation-query-batch/valid/no-query-string.json b/draft/examples/reconciliation-query-batch/valid/no-query-string.json index eb027b4..ee78785 100644 --- a/draft/examples/reconciliation-query-batch/valid/no-query-string.json +++ b/draft/examples/reconciliation-query-batch/valid/no-query-string.json @@ -1,8 +1,22 @@ -{"queries":[ - { - "properties": [{"pid":"uid", "v": "27eb892afbb2"}] - }, - { - "properties": [{"pid":"uid", "v": "ab09da9dd37e"}] - } -]} +{ + "queries": [ + { + "conditions": [ + { + "match_type": "property", + "pid": "uid", + "v": "27eb892afbb2" + } + ] + }, + { + "conditions": [ + { + "match_type": "property", + "pid": "uid", + "v": "ab09da9dd37e" + } + ] + } + ] +} diff --git a/draft/examples/reconciliation-query-batch/valid/text-processing-language.json b/draft/examples/reconciliation-query-batch/valid/text-processing-language.json index 2f67c31..dda0f9b 100644 --- a/draft/examples/reconciliation-query-batch/valid/text-processing-language.json +++ b/draft/examples/reconciliation-query-batch/valid/text-processing-language.json @@ -2,15 +2,18 @@ "queries": [ { "lang": "en", - "properties": [ + "conditions": [ { + "match_type": "name", "v": "Deng Shuping" }, { + "match_type": "property", "pid": "professionOrOccupation", "v": "art historian" }, { + "match_type": "property", "pid": "variantName", "v": "鄧淑蘋", "lang": "zh-Hant" diff --git a/draft/index.html b/draft/index.html index 19d0991..021f10b 100644 --- a/draft/index.html +++ b/draft/index.html @@ -482,8 +482,8 @@

Structure of a Reconciliation Query

A reconciliation query consists of the following fields.

-
properties
-
An array of property assignments (at least one).
+
conditions
+
An array of conditions (at least one).
type
An optional type identifier. Supplying such a type allows users to restrict the search to entities which bear this type. Whether this restriction should be a @@ -495,16 +495,18 @@

Structure of a Reconciliation Query

- A property assignment specifies the expected value of a property on the entities to match. - These are used to filter the set of candidates (similar to a WHERE clause in SQL), + A condition specifies a constraint that should be matched by the entities to return. + It is used to filter the set of candidates (similar to a WHERE clause in SQL), by allowing clients to specify an attribute of entities that should match. It consists of:

+
match_type
+
Either name or property, depending on whether the condition related to entity names or their properties.
pid
-
A property identifier. If this is not provided, then this signals that - the client intends to search for entities with similar names. The specifics of how this similarity - is defined are determined by the service.
+
A property identifier, to be provided if and only if the match_type is property.
v
-
one or more property values. +
one or more property values. If match_type is name, then this value is to + be matched to entity names, otherwise to the property values via the supplied pid. The specifics of how this similarity + is defined are determined by the service.
required
An optional boolean indicating if a match for the property is required for an entity to enter the list of candidates (i.e. acting like a filter or a WHERE clause in SQL) or optional (i.e. only effecting the entity's rank in the list of candidates);
diff --git a/draft/schemas/reconciliation-query-batch.json b/draft/schemas/reconciliation-query-batch.json index 03a4af6..7bfe4ae 100644 --- a/draft/schemas/reconciliation-query-batch.json +++ b/draft/schemas/reconciliation-query-batch.json @@ -55,19 +55,27 @@ "type": "string", "description": "The text-processing language for the query" }, - "properties": { + "conditions": { "type": "array", "minItems": 1, - "description": "A list of property mappings to select candidates", + "description": "A list of conditions to select candidates", "items": { "type": "object", "properties": { + "match_type": { + "type": "string", + "description": "A string to indicate whether to match the supplied value to entity names or property values", + "enum": [ + "name", + "property" + ] + }, "pid": { "type": "string", - "description": "The identifier of the property, whose values will be compared to the values supplied. If absent, values will be matched against the entity names" + "description": "The identifier of the property, whose values will be compared to the values supplied. Required if 'match_type' is 'property'." }, "v": { - "description": "A value (or array of values) to match against the property values associated with the property on each candidate", + "description": "A value (or array of values) to match against the entity names or property values associated with the property on each candidate", "oneOf": [ { "$ref": "#/definitions/property_value" @@ -82,7 +90,7 @@ }, "required": { "type": "boolean", - "description": "A boolean indicating if a match for the property is required for an entity to enter the list of candidates" + "description": "A boolean indicating if a match of this condition is required for an entity to enter the list of candidates" }, "match_quantifier": { "type": "string", @@ -99,6 +107,7 @@ } }, "required": [ + "match_type", "v" ] } @@ -114,7 +123,7 @@ } }, "required": [ - "properties" + "conditions" ], "additionalProperties": false } From a2b49095e87f1b4a3cf4ef5ce894adba02a7de0c Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Thu, 14 Mar 2024 13:47:02 +0100 Subject: [PATCH 3/3] Update draft/index.html Co-authored-by: Fabian Steeg --- draft/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/draft/index.html b/draft/index.html index 021f10b..a9923b4 100644 --- a/draft/index.html +++ b/draft/index.html @@ -500,7 +500,7 @@

Structure of a Reconciliation Query

by allowing clients to specify an attribute of entities that should match. It consists of:
match_type
-
Either name or property, depending on whether the condition related to entity names or their properties.
+
Either name or property, depending on whether the condition relates to entity names or their properties.
pid
A property identifier, to be provided if and only if the match_type is property.
v