From 3162c3eb7e8c91071479bfeae0bcc99a4251136b Mon Sep 17 00:00:00 2001 From: missinglink Date: Tue, 1 Sep 2020 12:50:39 +0200 Subject: [PATCH] feat(venue_improvements): changes to how ampersands are parsed with a preferrence for venues over intersections in some cases --- classifier/IntersectionClassifier.js | 12 +++ classifier/scheme/person.js | 30 +++--- classifier/scheme/street.js | 92 +++++++++---------- classifier/scheme/street_name.js | 20 ++-- parser/AddressParser.js | 2 +- .../dictionaries/libpostal/en/place_names.txt | 4 +- test/intersection.test.js | 23 +++-- test/venue.usa.test.js | 16 ++++ 8 files changed, 118 insertions(+), 81 deletions(-) diff --git a/classifier/IntersectionClassifier.js b/classifier/IntersectionClassifier.js index 30b1fb4c..e0282a07 100644 --- a/classifier/IntersectionClassifier.js +++ b/classifier/IntersectionClassifier.js @@ -18,6 +18,7 @@ class IntersectionClassifier extends PhraseClassifier { // blacklist // delete this.index.corner + // index defined in code below, no dictionary files loaded: this.index['&'] = true this.index.and = true this.index.und = true @@ -39,6 +40,17 @@ class IntersectionClassifier extends PhraseClassifier { // use an inverted index for full token matching as it's O(1) if (this.index.hasOwnProperty(span.norm)) { + // do not classify 'and' sandwiched by two 'PlaceClassification' + // as an 'IntersectionClassification'. + // eg. 'Bar & Restaurant' + if ( + ['&', 'and', 'und'].includes(span.norm) && + prev.classifications.hasOwnProperty('PlaceClassification') && + next.classifications.hasOwnProperty('PlaceClassification') + ) { + return + } + // classify phrase span.classify(new IntersectionClassification(1)) diff --git a/classifier/scheme/person.js b/classifier/scheme/person.js index a3a8c841..d58fa579 100644 --- a/classifier/scheme/person.js +++ b/classifier/scheme/person.js @@ -9,11 +9,11 @@ module.exports = [ scheme: [ { is: ['GivenNameClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['GivenNameClassification'], - not: ['StreetClassification', 'StreetPrefixClassification', 'StopWordClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'StreetPrefixClassification', 'StopWordClassification'] } ] }, @@ -24,11 +24,11 @@ module.exports = [ scheme: [ { is: ['GivenNameClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['SurnameClassification'], - not: ['StreetClassification', 'StreetPrefixClassification', 'StopWordClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'StreetPrefixClassification', 'StopWordClassification'] } ] }, @@ -39,15 +39,15 @@ module.exports = [ scheme: [ { is: ['GivenNameClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['StopWordClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['SurnameClassification'], - not: ['StreetClassification', 'StreetPrefixClassification', 'StopWordClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'StreetPrefixClassification', 'StopWordClassification'] } ] }, @@ -58,15 +58,15 @@ module.exports = [ scheme: [ { is: ['GivenNameClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['GivenNameClassification', 'SurnameClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['SurnameClassification'], - not: ['StreetClassification', 'StreetPrefixClassification', 'StopWordClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'StreetPrefixClassification', 'StopWordClassification'] } ] }, @@ -77,11 +77,11 @@ module.exports = [ scheme: [ { is: ['GivenNameClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['AlphaClassification'], - not: ['StreetClassification', 'StreetPrefixClassification', 'StopWordClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'StreetPrefixClassification', 'StopWordClassification'] } ] }, @@ -92,15 +92,15 @@ module.exports = [ scheme: [ { is: ['GivenNameClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['StopWordClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['AlphaClassification'], - not: ['StreetClassification', 'StreetPrefixClassification', 'StopWordClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'StreetPrefixClassification', 'StopWordClassification'] } ] } diff --git a/classifier/scheme/street.js b/classifier/scheme/street.js index 6e135e7f..2d30b832 100644 --- a/classifier/scheme/street.js +++ b/classifier/scheme/street.js @@ -8,11 +8,11 @@ module.exports = [ scheme: [ { is: ['AlphaClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['StreetSuffixClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] } ] }, @@ -23,11 +23,11 @@ module.exports = [ scheme: [ { is: ['StreetPrefixClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['AlphaClassification', 'PersonClassification', 'StreetNameClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] } ] }, @@ -38,11 +38,11 @@ module.exports = [ scheme: [ { is: ['OrdinalClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['StreetSuffixClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] } ] }, @@ -53,11 +53,11 @@ module.exports = [ scheme: [ { is: ['NumericClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['StreetSuffixClassification'], - not: ['StreetClassification', 'IntersectionClassification', 'RoadTypeClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification', 'RoadTypeClassification'] } ] }, @@ -68,11 +68,11 @@ module.exports = [ scheme: [ { is: ['StopWordClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['PlaceClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] } ] }, @@ -83,11 +83,11 @@ module.exports = [ scheme: [ { is: ['DirectionalClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['OrdinalClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] } ] }, @@ -98,11 +98,11 @@ module.exports = [ scheme: [ { is: ['PersonalSuffixClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['StreetSuffixClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] } ] }, @@ -113,11 +113,11 @@ module.exports = [ scheme: [ { is: ['StopWordClassification'], - not: ['IntersectionClassification'] + not: ['PunctuationClassification', 'IntersectionClassification'] }, { is: ['StreetClassification'], - not: ['StopWordClassification'] + not: ['PunctuationClassification', 'StopWordClassification'] } ] }, @@ -128,11 +128,11 @@ module.exports = [ scheme: [ { is: ['PersonClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['StreetSuffixClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] } ] }, @@ -143,15 +143,15 @@ module.exports = [ scheme: [ { is: ['PersonClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['PersonalSuffixClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['StreetSuffixClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] } ] }, @@ -162,15 +162,15 @@ module.exports = [ scheme: [ { is: ['StreetPrefixClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['StopWordClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['AlphaClassification', 'PersonClassification'], - not: ['StreetClassification', 'StreetPrefixClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'StreetPrefixClassification'] } ] }, @@ -181,19 +181,19 @@ module.exports = [ scheme: [ { is: ['StreetPrefixClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['StopWordClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['StopWordClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['AlphaClassification'], - not: ['StreetClassification', 'StreetPrefixClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'StreetPrefixClassification'] } ] }, @@ -204,15 +204,15 @@ module.exports = [ scheme: [ { is: ['StreetPrefixClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['PersonalTitleClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['AlphaClassification', 'GivenNameClassification', 'PersonClassification'], - not: ['StreetClassification', 'StreetPrefixClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'StreetPrefixClassification'] } ] }, @@ -223,19 +223,19 @@ module.exports = [ scheme: [ { is: ['StreetPrefixClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['StopWordClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['PersonalTitleClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['AlphaClassification', 'GivenNameClassification', 'PersonClassification'], - not: ['StreetClassification', 'StreetPrefixClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'StreetPrefixClassification'] } ] }, @@ -246,15 +246,15 @@ module.exports = [ scheme: [ { is: ['StreetPrefixClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['GivenNameClassification', 'AlphaClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['SurnameClassification'], - not: ['StreetClassification', 'StreetPrefixClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'StreetPrefixClassification'] } ] }, @@ -265,11 +265,11 @@ module.exports = [ scheme: [ { is: ['AlphaClassification'], - not: ['StreetClassification', 'IntersectionClassification', 'StopWordClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification', 'StopWordClassification'] }, { is: ['StreetClassification'], - not: ['DirectionalClassification'] + not: ['PunctuationClassification', 'DirectionalClassification'] } ] }, @@ -295,11 +295,11 @@ module.exports = [ scheme: [ { is: ['AlphaClassification'], - not: ['StreetClassification', 'IntersectionClassification', 'StopWordClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification', 'StopWordClassification'] }, { is: ['StreetClassification'], - not: ['DirectionalClassification'] + not: ['PunctuationClassification', 'DirectionalClassification'] } ] }, @@ -310,11 +310,11 @@ module.exports = [ scheme: [ { is: ['StreetClassification'], - not: ['DirectionalClassification'] + not: ['PunctuationClassification', 'DirectionalClassification'] }, { is: ['DirectionalClassification'], - not: ['StreetClassification', 'IntersectionClassification', 'EndTokenSingleCharacterClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification', 'EndTokenSingleCharacterClassification'] } ] }, @@ -325,11 +325,11 @@ module.exports = [ scheme: [ { is: ['DirectionalClassification'], - not: ['StreetClassification', 'IntersectionClassification', 'EndTokenSingleCharacterClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification', 'EndTokenSingleCharacterClassification'] }, { is: ['StreetClassification'], - not: ['DirectionalClassification'] + not: ['PunctuationClassification', 'DirectionalClassification'] } ] } diff --git a/classifier/scheme/street_name.js b/classifier/scheme/street_name.js index 9b59f771..4706a5f3 100644 --- a/classifier/scheme/street_name.js +++ b/classifier/scheme/street_name.js @@ -8,11 +8,11 @@ module.exports = [ scheme: [ { is: ['StopWordClassification'], - not: ['DirectionalClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'DirectionalClassification', 'IntersectionClassification'] }, { is: ['AlphaClassification', 'PersonClassification'], - not: ['StreetClassification', 'IntersectionClassification', 'StreetSuffixClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification', 'StreetSuffixClassification'] } ] }, @@ -23,15 +23,15 @@ module.exports = [ scheme: [ { is: ['AlphaClassification'], - not: ['StreetClassification', 'IntersectionClassification', 'StopWordClassification', 'StreetPrefixClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification', 'StopWordClassification', 'StreetPrefixClassification'] }, { is: ['StopWordClassification'], - not: ['DirectionalClassification'] + not: ['PunctuationClassification', 'DirectionalClassification'] }, { is: ['AlphaClassification', 'PersonClassification'], - not: ['StreetClassification', 'IntersectionClassification', 'StreetSuffixClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification', 'StreetSuffixClassification'] } ] }, @@ -42,15 +42,15 @@ module.exports = [ scheme: [ { is: ['StopWordClassification'], - not: ['IntersectionClassification'] + not: ['PunctuationClassification', 'IntersectionClassification'] }, { is: ['NumericClassification'], - not: ['PostcodeClassification'] + not: ['PunctuationClassification', 'PostcodeClassification'] }, { is: ['AlphaClassification'], - not: ['StreetClassification', 'IntersectionClassification', 'LocalityClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification', 'LocalityClassification'] } ] }, @@ -61,11 +61,11 @@ module.exports = [ scheme: [ { is: ['StreetNameClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] }, { is: ['StreetNameClassification'], - not: ['StreetClassification', 'IntersectionClassification'] + not: ['PunctuationClassification', 'StreetClassification', 'IntersectionClassification'] } ] } diff --git a/parser/AddressParser.js b/parser/AddressParser.js index 522905c0..916ad035 100644 --- a/parser/AddressParser.js +++ b/parser/AddressParser.js @@ -63,6 +63,7 @@ class AddressParser extends Parser { new StopWordClassifier(), // phrase classifiers + new PlaceClassifier(), new IntersectionClassifier(), new PersonClassifier(), new GivenNameClassifier(), @@ -70,7 +71,6 @@ class AddressParser extends Parser { new PersonalSuffixClassifier(), new PersonalTitleClassifier(), new ChainClassifier(), - new PlaceClassifier(), new WhosOnFirstClassifier(), // composite classifiers diff --git a/resources/pelias/dictionaries/libpostal/en/place_names.txt b/resources/pelias/dictionaries/libpostal/en/place_names.txt index b04238da..fceb2298 100644 --- a/resources/pelias/dictionaries/libpostal/en/place_names.txt +++ b/resources/pelias/dictionaries/libpostal/en/place_names.txt @@ -3,4 +3,6 @@ cathedral stop !dist building -field \ No newline at end of file +field +home +pub diff --git a/test/intersection.test.js b/test/intersection.test.js index a113282c..c1fd7794 100644 --- a/test/intersection.test.js +++ b/test/intersection.test.js @@ -62,18 +62,25 @@ const testcase = (test, common) => { assert('carrer con', [{ street: 'carrer con' }]) // no street suffix - assert('foo & bar', [ + assert('foo @ bar', [ { street: 'foo' }, { street: 'bar' } ]) + + // ambiguous query containing ampersand (venue/intersection) + // note: we chose to prefer a venue solution in + // these cases over the intersection solution. + assert('foo & bar', [ + [{ venue: 'foo & bar' }], + [{ street: 'foo' }, { street: 'bar' }] + ], false) assert('foo and bar', [ - { street: 'foo' }, { street: 'bar' } - ]) + [{ venue: 'foo and bar' }], + [{ street: 'foo' }, { street: 'bar' }] + ], false) assert('foo at bar', [ - { street: 'foo' }, { street: 'bar' } - ]) - assert('foo @ bar', [ - { street: 'foo' }, { street: 'bar' } - ]) + [{ venue: 'foo at bar' }], + [{ street: 'foo' }, { street: 'bar' }] + ], false) // missing street suffix - alpha assert('main st & side ave', [ diff --git a/test/venue.usa.test.js b/test/venue.usa.test.js index 4a5566e1..e0f783b8 100644 --- a/test/venue.usa.test.js +++ b/test/venue.usa.test.js @@ -15,6 +15,22 @@ const testcase = (test, common) => { assert('philadelphia museum of art', [ { venue: 'philadelphia museum of art' } ]) + + // common venue suffixes containing ampersand + assert('Bar & Grill', [{ venue: 'Bar & Grill' }]) + assert('Restaurant & Bar', [{ venue: 'Restaurant & Bar' }]) + assert('Cafe & Pub', [{ venue: 'Cafe & Pub' }]) + + // venue names containing ampersand + assert('Andy\'s Bar & Grill', [ + { venue: 'Andy\'s Bar & Grill' } + ]) + assert('Adams Family Restaurant & Bar', [ + { venue: 'Adams Family Restaurant & Bar' } + ]) + // assert('Kells Irish Restaurant & Pub', [ + // { venue: 'Kells Irish Restaurant & Pub' } + // ]) } module.exports.all = (tape, common) => {