Skip to content

Commit

Permalink
feat(*): drop usage of as helper from vectorOf and test
Browse files Browse the repository at this point in the history
Co-authored-by: Rachna <[email protected]>
  • Loading branch information
sanjayaksaxena and rachnachakraborty committed Feb 19, 2024
1 parent d5dfc21 commit 29a35b1
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 5 deletions.
20 changes: 15 additions & 5 deletions src/wink-nlp.js
Original file line number Diff line number Diff line change
Expand Up @@ -433,11 +433,21 @@ var nlp = function ( theModel, pipe, wordVectorsJSON = null ) {
methods.its = itsHelpers;
methods.as = asHelpers;
// Vector of a token method.
const dummyRDD = Object.create( null );
dummyRDD.wordVectors = wordVectorsJSON;
methods.vectorOf = function ( word ) {
if ( typeof word !== 'string' ) throw Error( 'winkNLP: input word must be of type string.' );
return asHelpers.vector( [ word ], dummyRDD );
methods.vectorOf = function ( word, safe = true ) {
const vectors = wordVectorsJSON.vectors;
const unkVector = wordVectorsJSON.unkVector;
const sliceUpTo = wordVectorsJSON.l2NormIndex + 1;

if ( typeof word !== 'string' ) {
throw Error( 'winkNLP: input word must be of type string.' );
}

const tv = vectors[ word.toLowerCase() ];
if ( tv === undefined ) {
// If unsafe, return the entire array.
return ( safe ) ? unkVector.slice( 0, sliceUpTo ) : unkVector.slice();
}
return ( safe ) ? tv.slice( 0, sliceUpTo ) : tv.slice();
}; // vectorOf()

return methods;
Expand Down
16 changes: 16 additions & 0 deletions test/wink-nlp-specs.js
Original file line number Diff line number Diff line change
Expand Up @@ -630,4 +630,20 @@ describe( 'vectorOf method', function () {
expect( myNLP.vectorOf( 'UNK$$$' ) ).to.deep.equal( zeroVector );
expect( doc2.tokens().out( its.value, as.vector) ).to.deep.equal( zeroVector );
} );

it( 'with safe=false and UNK$$$, array\'s length === 102 & last element === -1', function () {
const zeroVector = new Array( 102 );
zeroVector.fill( 0 );
zeroVector[ 101 ] = -1;
expect( myNLP.vectorOf( 'UNK$$$', false ) ).to.deep.equal( zeroVector );
expect( myNLP.vectorOf( 'UNK$$$', false ).length ).to.deep.equal( 102 );
} );

it( 'with safe=false and "the" word, array\'s length === 102 & last element === 0', function () {
// because the is the first word in the word vectors i.e. most often used word!
const theVector = myNLP.vectorOf( 'the' );
theVector.push( 0 );
expect( myNLP.vectorOf( 'the', false ) ).to.deep.equal( theVector );
expect( myNLP.vectorOf( 'the', false ).length ).to.deep.equal( 102 );
} );
} );

0 comments on commit 29a35b1

Please sign in to comment.