Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Break lifetime entanglement of TextExtract, TFIDF and Jieba #100

Merged
merged 12 commits into from
Apr 11, 2024
10 changes: 5 additions & 5 deletions benches/jieba_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
extern crate criterion;

use criterion::{black_box, Criterion, Throughput};
use jieba_rs::{Jieba, KeywordExtract, TextRank, TokenizeMode, TFIDF};
use jieba_rs::{Jieba, KeywordExtract, TextRank, TfIdf, TokenizeMode};
use lazy_static::lazy_static;

#[cfg(unix)]
Expand All @@ -11,8 +11,8 @@ static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;

lazy_static! {
static ref JIEBA: Jieba = Jieba::new();
static ref TFIDF_EXTRACTOR: TFIDF<'static> = TFIDF::new_with_jieba(&JIEBA);
static ref TEXTRANK_EXTRACTOR: TextRank<'static> = TextRank::new_with_jieba(&JIEBA);
static ref TFIDF_EXTRACTOR: TfIdf = TfIdf::default();
static ref TEXTRANK_EXTRACTOR: TextRank = TextRank::default();
}
static SENTENCE: &str = "我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。";

Expand Down Expand Up @@ -55,10 +55,10 @@ fn criterion_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("keywords");
group.throughput(Throughput::Bytes(SENTENCE.len() as u64));
group.bench_function("tfidf", |b| {
b.iter(|| TFIDF_EXTRACTOR.extract_tags(black_box(SENTENCE), 3, Vec::new()))
b.iter(|| TFIDF_EXTRACTOR.extract_keywords(&JIEBA, black_box(SENTENCE), 3, Vec::new()))
});
group.bench_function("textrank", |b| {
b.iter(|| TEXTRANK_EXTRACTOR.extract_tags(black_box(SENTENCE), 3, Vec::new()))
b.iter(|| TEXTRANK_EXTRACTOR.extract_keywords(&JIEBA, black_box(SENTENCE), 3, Vec::new()))
});
group.finish();
}
Expand Down
2 changes: 1 addition & 1 deletion build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::path::Path;
fn main() {
let path = Path::new(&env::var("OUT_DIR").unwrap()).join("hmm_prob.rs");
let hmm_file = File::open("src/data/hmm.model").expect("cannot open hmm.model");
let mut file = BufWriter::new(File::create(&path).unwrap());
let mut file = BufWriter::new(File::create(path).unwrap());
let reader = BufReader::new(hmm_file);
let mut lines = reader.lines().map(|x| x.unwrap()).skip_while(|x| x.starts_with('#'));
let prob_start = lines.next().unwrap();
Expand Down
Loading
Loading