From 14c5be7777713231f1d52b69be860b5e77159110 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Mon, 9 Sep 2024 11:47:50 +0200 Subject: [PATCH 1/2] feat(street): improved matching of directional prefixes --- classifier/scheme/street.js | 8 ++++---- resources/pelias/dictionaries/libpostal/de/stopwords.txt | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 resources/pelias/dictionaries/libpostal/de/stopwords.txt diff --git a/classifier/scheme/street.js b/classifier/scheme/street.js index d06f1a89..55dc0fe5 100644 --- a/classifier/scheme/street.js +++ b/classifier/scheme/street.js @@ -107,17 +107,17 @@ module.exports = [ ] }, { - // Am Falkplatz + // N Main confidence: 0.98, Class: StreetClassification, scheme: [ { - is: ['StopWordClassification'], - not: ['IntersectionClassification'] + is: ['DirectionalClassification'], + not: ['IntersectionClassification', 'StreetClassification'] }, { is: ['StreetClassification'], - not: ['StopWordClassification'] + not: ['IntersectionClassification', 'DirectionalClassification'] } ] }, diff --git a/resources/pelias/dictionaries/libpostal/de/stopwords.txt b/resources/pelias/dictionaries/libpostal/de/stopwords.txt new file mode 100644 index 00000000..55b8489d --- /dev/null +++ b/resources/pelias/dictionaries/libpostal/de/stopwords.txt @@ -0,0 +1 @@ +!am \ No newline at end of file From e98ff9c7b4a535972dcf65caceb4bb73139bfff0 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Mon, 9 Sep 2024 11:57:22 +0200 Subject: [PATCH 2/2] feat(fr): bis/ter housenumber suffixes --- classifier/scheme/subdivision.js | 19 +++++++++++++++++++ parser/AddressParser.js | 1 + .../dictionaries/libpostal/fr/stopwords.txt | 2 ++ test/address.fra.test.js | 16 ++++++++++++++++ test/address.usa.test.js | 8 ++++---- 5 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 classifier/scheme/subdivision.js create mode 100644 resources/pelias/dictionaries/libpostal/fr/stopwords.txt diff --git a/classifier/scheme/subdivision.js b/classifier/scheme/subdivision.js new file mode 100644 index 00000000..c691a953 --- /dev/null +++ b/classifier/scheme/subdivision.js @@ -0,0 +1,19 @@ +const HouseNumberClassification = require('../../classification/HouseNumberClassification') + +module.exports = [ + { + // 10 bis / 10 ter + confidence: 0.99, + Class: HouseNumberClassification, + scheme: [ + { + is: ['HouseNumberClassification'], + not: ['IntersectionClassification'] + }, + { + is: ['StopWordClassification'], + not: ['IntersectionClassification', 'PunctuationClassification'] + } + ] + } +] diff --git a/parser/AddressParser.js b/parser/AddressParser.js index 34a84cf7..f88eff48 100644 --- a/parser/AddressParser.js +++ b/parser/AddressParser.js @@ -78,6 +78,7 @@ class AddressParser extends Parser { new CompositeClassifier(require('../classifier/scheme/street')), new CompositeClassifier(require('../classifier/scheme/venue')), new CompositeClassifier(require('../classifier/scheme/intersection')), + new CompositeClassifier(require('../classifier/scheme/subdivision')), // additional classifiers which act on unclassified tokens new CentralEuropeanStreetNameClassifier() diff --git a/resources/pelias/dictionaries/libpostal/fr/stopwords.txt b/resources/pelias/dictionaries/libpostal/fr/stopwords.txt new file mode 100644 index 00000000..8b54a1b1 --- /dev/null +++ b/resources/pelias/dictionaries/libpostal/fr/stopwords.txt @@ -0,0 +1,2 @@ +bis +ter \ No newline at end of file diff --git a/test/address.fra.test.js b/test/address.fra.test.js index 2544b78a..197965dd 100644 --- a/test/address.fra.test.js +++ b/test/address.fra.test.js @@ -117,6 +117,22 @@ const testcase = (test, common) => { assert(`Esplanade de la Liberté`, [{ street: 'Esplanade de la Liberté' }]) assert(`Esplanade du Géneral de Gaulle`, [{ street: 'Esplanade du Géneral de Gaulle' }]) assert(`Esplanade Méditerranée`, [{ street: 'Esplanade Méditerranée' }]) + + // bis/ter housenumber prefixes + assert(`1 bis Av. Amélie, 92320 Châtillon, France`, [ + { housenumber: '1 bis' }, + { street: 'Av. Amélie' }, + { postcode: '92320' }, + { locality: 'Châtillon' }, + { country: 'France' } + ]) + assert(`1 ter Av. Amélie, 92320 Châtillon, France`, [ + { housenumber: '1 ter' }, + { street: 'Av. Amélie' }, + { postcode: '92320' }, + { locality: 'Châtillon' }, + { country: 'France' } + ]) } module.exports.all = (tape, common) => { diff --git a/test/address.usa.test.js b/test/address.usa.test.js index 50abb10a..8d84a505 100644 --- a/test/address.usa.test.js +++ b/test/address.usa.test.js @@ -217,23 +217,23 @@ const testcase = (test, common) => { ]], false) // NYC Boroughs - assert('866 E 178th St, Bronx, NY 10460, USA', [[ + assert('866 E 178th St, Bronx, NY 10460, USA', [ { housenumber: '866' }, { street: 'E 178th St' }, { locality: 'Bronx' }, { region: 'NY' }, { postcode: '10460' }, { country: 'USA' } - ]], false) + ]) - assert('866 E 178th St, Staten Island, NY 10460, USA', [[ + assert('866 E 178th St, Staten Island, NY 10460, USA', [ { housenumber: '866' }, { street: 'E 178th St' }, { locality: 'Staten Island' }, { region: 'NY' }, { postcode: '10460' }, { country: 'USA' } - ]], false) + ]) // 'Massachusetts' and 'MA' should be interchangeable and both // forms should allow 'Boston' to be parsed as a locality.