Skip to content

Commit

Permalink
Merge pull request #849 from spencermountain/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
spencermountain authored Jun 21, 2021
2 parents 85aa1b0 + 836b174 commit f94dff6
Show file tree
Hide file tree
Showing 21 changed files with 2,658 additions and 2,024 deletions.
2 changes: 1 addition & 1 deletion builds/compromise-tokenize.js

Large diffs are not rendered by default.

1,647 changes: 935 additions & 712 deletions builds/compromise.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/compromise.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/compromise.mjs

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ While all _Major_ releases should be reviewed, our only two _large_ releases are
-->

#### 13.11.3 [June 2021]

- **[fix]** - regex backtracing issue \#847 (thanks @srubin)
- misc tagging fixes
update deps

#### 13.11.2 [May 2021]

- **[fix]** - verbphrase conjugation fixes
Expand Down
1 change: 1 addition & 0 deletions data/nouns/singulars.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ module.exports = [
'us dollar',
'us state',
'way',
'press release',

//double-consonant rule
'bottle',
Expand Down
1 change: 1 addition & 0 deletions data/nouns/uncountables.js
Original file line number Diff line number Diff line change
Expand Up @@ -176,4 +176,5 @@ module.exports = [
'wool',
'monogamy',
'polygamy',
'national security',
]
2 changes: 1 addition & 1 deletion data/people/maleNames.js
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,7 @@ module.exports = [
'robert',
'roberto',
'rocco',
'rocky',
// 'rocky',
'roderick',
'rodger',
'rodney',
Expand Down
1 change: 1 addition & 0 deletions data/verbs/infinitives.js
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,7 @@ module.exports = [
'talk',
'tighten',
'target',
'take part',
'taste',
'tend',
'test',
Expand Down
2,878 changes: 1,597 additions & 1,281 deletions package-lock.json

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"author": "Spencer Kelly <[email protected]> (http://spencermounta.in)",
"name": "compromise",
"description": "modest natural language processing",
"version": "13.11.2",
"version": "13.11.3",
"main": "./builds/compromise.js",
"unpkg": "./builds/compromise.min.js",
"module": "./builds/compromise.mjs",
Expand Down Expand Up @@ -77,22 +77,22 @@
"efrt-unpack": "2.2.0"
},
"devDependencies": {
"@babel/core": "7.14.0",
"@babel/preset-env": "7.14.1",
"@babel/core": "7.14.6",
"@babel/preset-env": "7.14.5",
"@rollup/plugin-alias": "3.1.2",
"@rollup/plugin-commonjs": "18.1.0",
"@rollup/plugin-commonjs": "19.0.0",
"@rollup/plugin-json": "4.1.0",
"@rollup/plugin-node-resolve": "11.2.1",
"@rollup/plugin-node-resolve": "13.0.0",
"amble": "1.3.0",
"codecov": "3.8.2",
"efrt": "2.2.2",
"nyc": "^15.1.0",
"rollup": "2.47.0",
"rollup": "2.52.2",
"rollup-plugin-babel": "4.4.0",
"rollup-plugin-filesize-check": "0.0.1",
"rollup-plugin-terser": "7.0.2",
"shelljs": "0.8.4",
"tap-dancer": "0.3.1",
"tap-dancer": "0.3.2",
"tape": "5.2.2"
},
"eslintIgnore": [
Expand Down
4 changes: 2 additions & 2 deletions rollup.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ export default [
babelrc: false,
presets: ['@babel/preset-env'],
}),
sizeCheck({ expect: 351, warn: 10 }),
sizeCheck({ expect: 391, warn: 10 }),
],
},
{
Expand All @@ -75,7 +75,7 @@ export default [
presets: ['@babel/preset-env'],
}),
terser(),
sizeCheck({ expect: 183, warn: 10 }),
sizeCheck({ expect: 190, warn: 10 }),
],
},
]
55 changes: 47 additions & 8 deletions scratch.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,56 @@ const nlp = require('./src/index')
// nlp.extend(require('./plugins/sentences/src'))
// const text = require('/Users/spencer/mountain/compromise/scripts/perf/flame/_sotu-text.js')
// const fmt = iso => (iso ? spacetime(iso).format('{day-short} {nice} {year}') : '-')
// nlp.verbose(true)
nlp.verbose(true)

// let doc = nlp('the dogs chased')
// let doc = nlp('i do not really yell').debug()
// let doc = nlp('i did not really yell').debug()

let doc = nlp(`http:subdomain.cool.com/`).debug()
// doc.verbs().toPresentTense()
doc.verbs().toPastTense()
console.log(doc.text())
let arr = [
// `drinks and food fuel shopping at new Saks`,
// `litigation costs`,
// `the dog, whose skip was Frank`,
// `on non-Microsoft operating systems,`,
// `it’s a myth that uncovered wounds heal faster`,
// `a national security issue `,
// `I have no excuse, but surprise and fear `,
// `in the woods, sketching, boating, fishing`,
// `formal thought patterns `,
// `every parenting system`,
// `with Scotland winning 49 matches `,
// `come into the coach and [take] part`,
// `There’s a big plum tree growing on it close to the line fence .`,
// `become overly weakened`,
// `a completely beaten man`,
// `the said card`,
// `one super strong character`,
// `we charged back`,
// `for suspected terrorists`,
// `for discounted beauty items`,
// `the number of suspected terrorists`,
// `number of registered party members`,
// `rely on bottled water`,
// `will have waited until release`,
// `selling like hot cakes`,
// `have given up on reason`,

// let doc = nlp('I will never do that')
// doc.verbs().toInfinitive()
// console.log(doc.text())
// `you have some valid points`,
// `for some reason`,
// `dirty tricks`,
// `press release`,
// `the same type of shouts`,
// `the same kind of shouts`,
// `they are essential to expand`,
// `had a rocky release`,
// `doing better for fights`,
// `might get better aim`,
`i think tipping blows`,
]
// `won’t take extra damage`,
// `called the taylor rule`,
// `naive and chubby cheeked`,
// `he’s devastated`,

let doc = nlp(arr[0]).debug()
// nlp(`I have to say the value `).debug()
4 changes: 2 additions & 2 deletions src/01-tokenizer/01-sentences.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@ const testHasEllipse = function (str, suffix) {
return hasEllipse.test(str)
}

const testHasLetter = function (str, suffix, prefixHasLetter) {
const testHasLetter = function (suffix, prefixHasLetter) {
return prefixHasLetter || hasLetter.test(suffix)
}

/** does this look like a sentence? */
const isSentence = function (str, suffix, abbrevs, prefixContext) {
// must have a letter
prefixContext.hasLetter = testHasLetter(str, suffix, prefixContext.hasLetter)
prefixContext.hasLetter = testHasLetter(suffix, prefixContext.hasLetter)
if (!prefixContext.hasLetter) {
return false
}
Expand Down
4 changes: 2 additions & 2 deletions src/02-tagger/01-init/01-lexicon.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const underOver = /^(under|over)-?/
const underOver = /^(under|over)-?.{3}/

/** match a word-sequence, like 'super bowl' in the lexicon */
const tryMultiple = function (terms, t, world) {
Expand Down Expand Up @@ -61,7 +61,7 @@ const checkLexicon = function (terms, world) {
}
// prefix strip: try to match 'take' for 'undertake'
if (underOver.test(str) === true) {
let noPrefix = str.replace(underOver, '')
let noPrefix = str.replace(/^(under|over)-?/, '')
if (lex.hasOwnProperty(noPrefix) === true) {
terms[t].tag(lex[noPrefix], 'noprefix-lexicon', world)
}
Expand Down
11 changes: 11 additions & 0 deletions src/02-tagger/04-correction/matches/03-adjective.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,17 @@ let list = [
{ match: '#Copula #Adjective? [(out|in|through)]$', group: 0, tag: 'Adjective', reason: 'still-out' },
// shut the door
{ match: '^[#Adjective] (the|your) #Noun', group: 0, tag: 'Infinitive', reason: 'shut-the' },
// the said card
{ match: 'the [said] #Noun', group: 0, tag: 'Adjective', reason: 'the-said-card' },
// a myth that uncovered wounds heal
{
match: '#Noun (that|which|whose) [#PastTense && !#Copula] #Noun',
group: 0,
tag: 'Adjective',
reason: 'that-past-noun',
},
// the very fed character
// { match: `#Determiner #Adverb [#PastTense] #Noun`, group: 0, tag: 'Adjective', reason: 'very-x-noun' },
]

module.exports = list
26 changes: 26 additions & 0 deletions src/02-tagger/04-correction/matches/04-noun.js
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,32 @@ module.exports = [
{ match: `to #PresentTense #Noun [#PresentTense] #Preposition`, group: 0, tag: 'Noun', reason: 'gas-exchange' },
// a comdominium, or simply condo
{ match: `a #Noun+ or #Adverb+? [#Verb]`, group: 0, tag: 'Noun', reason: 'noun-or-noun' },
// operating system
{ match: `[#Gerund] system`, group: 0, tag: 'Noun', reason: 'operating-system' },
// waited until release
{
match: `#PastTense (until|as|through|without) [#PresentTense]`,
group: 0,
tag: 'Noun',
reason: 'waited-until-release',
},
// selling like hot cakes
{ match: `#Gerund like #Adjective? [#PresentTense]`, group: 0, tag: 'Plural', reason: 'like-hot-cakes' },

// some valid reason
{ match: `some #Adjective [#PresentTense]`, group: 0, tag: 'Noun', reason: 'some-reason' },
// for some reason
{ match: `for some [#PresentTense]`, group: 0, tag: 'Noun', reason: 'for-some-reason' },
// same kind of shouts
{ match: `(same|some|the|that|a) kind of [#PresentTense]`, group: 0, tag: 'Noun', reason: 'some-kind-of' },
// a type of shout
{ match: `(same|some|the|that|a) type of [#PresentTense]`, group: 0, tag: 'Noun', reason: 'some-type-of' },
// doing better for fights
{ match: `#Gerund #Adjective #Preposition [#PresentTense]`, group: 0, tag: 'Noun', reason: 'doing-better-for-x' },
// get better aim
{ match: `(get|got|have|had) #Comparative [#PresentTense]`, group: 0, tag: 'Noun', reason: 'got-better-aim' },
// i think tipping sucks
{ match: `#Pronoun #Infinitive [#Gerund] #PresentTense`, group: 0, tag: 'Noun', reason: 'tipping-sucks' },

// in various sensory functions
// {
Expand Down
2 changes: 2 additions & 0 deletions src/02-tagger/04-correction/matches/05-adverb.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,6 @@ module.exports = [
// kinda sparkly
{ match: `#Adverb [#Adverb]$`, group: 0, tag: 'Adjective', reason: 'kinda-sparkly' },
{ match: `#Adverb [#Adverb] (and|or|then)`, group: 0, tag: 'Adjective', reason: 'kinda-sparkly-and' },
// super strong
{ match: `[super] #Adjective #Noun`, group: 0, tag: 'Adverb', reason: 'super-strong' },
]
10 changes: 9 additions & 1 deletion src/02-tagger/04-correction/matches/07-verbs.js
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ let list = [
{ match: '#Pronoun [#Adjective] #Determiner #Adjective? #Noun', group: 0, tag: 'Verb', reason: 'he-adj-the' },

//is eager to go
{ match: '#Copula [#Adjective to] #Verb', group: 0, tag: 'Verb', reason: 'adj-to' },
{ match: '#Copula [#Adjective] to #Verb', group: 0, tag: 'Verb', reason: 'adj-to' },
// open the door
{ match: '[open] #Determiner', group: 0, tag: 'Infinitive', reason: 'open-the' },
// compromises are possible
Expand All @@ -149,6 +149,14 @@ let list = [
{ match: '[shit] (#Determiner|#Possessive|them)', group: 0, tag: 'Verb', reason: 'swear1-verb' },
{ match: '[damn] (#Determiner|#Possessive|them)', group: 0, tag: 'Verb', reason: 'swear2-verb' },
{ match: '[fuck] (#Determiner|#Possessive|them)', group: 0, tag: 'Verb', reason: 'swear3-verb' },
// become overly weakened
{ match: '(become|fall|grow) #Adverb? [#PastTense]', group: 0, tag: 'Adjective', reason: 'overly-weakened' },
// a completely beaten man
{ match: '(a|an) #Adverb [#Participle] #Noun', group: 0, tag: 'Adjective', reason: 'completely-beaten' },
// whose name was
{ match: 'whose [#PresentTense] #Copula', group: 0, tag: 'Noun', reason: 'whos-name-was' },
// give up on reason
{ match: `#PhrasalVerb #PhrasalVerb #Preposition [#PresentTense]`, group: 0, tag: 'Noun', reason: 'given-up-on-x' }, // wants to be #Particle
]

module.exports = list
Loading

0 comments on commit f94dff6

Please sign in to comment.