Skip to content

Commit

Permalink
Remove dash from words like eco-health
Browse files Browse the repository at this point in the history
  • Loading branch information
bendangelo committed Dec 8, 2023
1 parent 1984190 commit 37bab49
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
2 changes: 1 addition & 1 deletion lib/keyphrase.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class Keyphrase
autoload :Stoplist, "keyphrase/stoplist"

CLEAN_REGEX = /([^a-zA-Z0-9\'\- \.]|(?<!\w)\.)/ # don't remove ' because it might be part of a stop word
BLACKLIST_REGEX = /(?:^|\s)[^a-zA-Z\-]+\b|\'/ # remove words with no letters, ie 123.23.12. And last chance to remove '
BLACKLIST_REGEX = /(?:^|\s)[^a-zA-Z]+\b|\'|\-/ # remove words with no letters, ie 123.23.12. And last chance to remove ' and -
CLEAN_SPACES_REGEX = /\s+/
SENTENCES_REGEX = /[+!?,;:&\[\]\{\}\<\>\=\/\n\t\\"\\(\\)\u2019\u2013\|]|-(?!\w)|'(?=s)|(?<!\s)\.(?![a-zA-Z0-9])|(?<!\w)\#(?=\w)/u

Expand Down
4 changes: 2 additions & 2 deletions spec/keyphrase_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
it "should remove non-words" do
result = Keyphrase.analyse "re-move these's: 's & + ! @ # $ % ^ & * ( ) \\ | [ ] { } / ? ~ ` = - _ , . 🎄 £ tommy'humanity fff' 'fff don't"

expect(result.keys).to eq ["tommy humanity fff fff", "re-move"]
expect(result.keys).to eq ["tommy humanity fff fff", "re move"]
end

it do
Expand All @@ -51,7 +51,7 @@
result = Keyphrase.analyse "The Lofi Roman Empire - Music Of Ancient Rome | sleep, study, meditation
My homage to ancient Rome. Lo-Fi Beats + the Roman Empire works suprisingly well (:"

expect(result.keys).to eq ["Roman Empire works suprisingly", "Lofi Roman Empire", "Lo-Fi Beats", "Ancient Rome", "Music", "sleep", "study", "meditation", "homage"]
expect(result.keys).to eq ["Roman Empire works suprisingly", "Lofi Roman Empire", "Lo Fi Beats", "Ancient Rome", "Music", "sleep", "study", "meditation", "homage"]
end

it "should split hashtags into own keywords" do
Expand Down

0 comments on commit 37bab49

Please sign in to comment.