-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathpercolator.h
86 lines (68 loc) · 3.49 KB
/
percolator.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
// See https://www.youtube.com/watch?v=f4lqBb1d7no&list=PLcGKfGEEONaDzd0Hkn2f1talsTu1HLDYu&index=21
// Describes the Predicate Index Twitter employs to reduce number of distinct rules to
// attempt to match against a new tweet.
#include "common.h"
#include "compilation_ctx.h"
#include "queries.h"
namespace Trinity {
struct percolator_document_proxy {
// Just override those two methods
// You can access the actual term via term_by_index(idx)
//
// You can e.g reset state, and then match()
virtual bool match_term(const uint16_t term) = 0;
virtual bool match_phrase(const uint16_t *, const uint16_t cnt) = 0;
};
class percolator_query final {
protected:
struct CCTX final
: public compilation_ctx {
std::unordered_map<str8_t, uint16_t> localMap;
std::vector<str8_t> allTerms; // we need to keep track of those here
uint16_t resolve_query_term(const str8_t term) override final {
const auto res = localMap.emplace(term, 0); // intern string
if (res.second) {
res.first->second = localMap.size();
const_cast<str8_t *>(&res.first->first)->Set(allocator.CopyOf(term.data(), term.size()), term.size());
EXPECT(allTerms.size() == localMap.size() - 1);
allTerms.emplace_back(res.first->first);
}
return res.first->second;
}
} comp_ctx;
exec_node root;
protected:
bool exec(const exec_node, percolator_document_proxy &) const;
public:
auto term_by_index(const uint16_t idx) const {
return comp_ctx.allTerms[idx - 1];
}
auto &distinct_terms() noexcept {
return comp_ctx.allTerms;
}
const auto &distinct_terms() const noexcept {
return comp_ctx.allTerms;
}
public:
// After compilation, you can access all distinct terms, i.e all distinct terms you may be
// interested in, in a document, via distinct_terms()
percolator_query(const Trinity::query &q) {
if (!q) {
root.fp = ENT::constfalse;
return;
}
root = compile_query(q.root, comp_ctx);
if (root.fp == ENT::constfalse || root.fp == ENT::dummyop)
root.fp = ENT::constfalse;
else
group_execnodes(root, comp_ctx.allocator);
}
percolator_query() {
root.fp = ENT::constfalse;
}
operator bool() const noexcept {
return root.fp != ENT::constfalse && root.fp != ENT::dummyop;
}
bool match(percolator_document_proxy &) const; // percolator_document_proxy is not const, because you may want to do whatever there
};
} // namespace Trinity