-
Notifications
You must be signed in to change notification settings - Fork 279
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Inverted index reader and writer: part 2 (#423)
- Loading branch information
Showing
9 changed files
with
202 additions
and
21 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
name: Subtask | ||
description: "Propose a subtask for infinity" | ||
title: "[Subtask]: " | ||
labels: [subtask] | ||
|
||
body: | ||
- type: textarea | ||
attributes: | ||
label: Parent Issue | ||
description: Write the ID of the parent issue | ||
placeholder: "Parent issue: #" | ||
validations: | ||
required: true | ||
|
||
- type: textarea | ||
attributes: | ||
label: Detail of Subtask | ||
description: | | ||
Describe the functions that this subtask should implement | ||
validations: | ||
required: true | ||
|
||
- type: textarea | ||
attributes: | ||
label: Describe implementation you've considered | ||
description: A clear and concise description of implementation you've considered or investigated. | ||
validations: | ||
required: false | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
// Copyright(C) 2023 InfiniFlow, Inc. All rights reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// https://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
module; | ||
|
||
import stl; | ||
import third_party; | ||
import config; | ||
import infinity_context; | ||
import analyzer; | ||
import chinese_analyzer; | ||
import standard_analyzer; | ||
import ngram_analyzer; | ||
|
||
module analyzer_pool; | ||
|
||
namespace infinity { | ||
|
||
constexpr StringView CHINESE = "chinese"; | ||
constexpr StringView STANDARD = "standard"; | ||
constexpr StringView NGRAM = "ngram"; | ||
|
||
constexpr u64 basis = 0xCBF29CE484222325ull; | ||
constexpr u64 prime = 0x100000001B3ull; | ||
|
||
constexpr u64 Str2Int(char const *str, u64 last_value = basis) { return *str ? Str2Int(str + 1, (*str ^ last_value) * prime) : last_value; } | ||
|
||
void AnalyzerPool::Set(const StringView &name) { | ||
Analyzer *try_analyzer = cache_[name].get(); | ||
if (!try_analyzer) { | ||
switch (Str2Int(name.data())) { | ||
case Str2Int(CHINESE.data()): { | ||
String path = InfinityContext::instance().config()->resource_dict_path(); | ||
UniquePtr<ChineseAnalyzer> analyzer = MakeUnique<ChineseAnalyzer>(Move(path)); | ||
if (analyzer->Load()) | ||
cache_[CHINESE] = Move(analyzer); | ||
} break; | ||
case Str2Int(STANDARD.data()): { | ||
UniquePtr<StandardAnalyzer> analyzer = MakeUnique<StandardAnalyzer>(); | ||
cache_[STANDARD] = Move(analyzer); | ||
} break; | ||
case Str2Int(NGRAM.data()): { | ||
u32 ngram = 2; /// TODO config | ||
UniquePtr<NGramAnalyzer> analyzer = MakeUnique<NGramAnalyzer>(ngram); | ||
cache_[NGRAM] = Move(analyzer); | ||
} break; | ||
default: | ||
break; | ||
} | ||
} | ||
} | ||
|
||
UniquePtr<Analyzer> AnalyzerPool::Get(const StringView &name) { | ||
Analyzer *analyzer = cache_[name].get(); | ||
if (!analyzer) | ||
return nullptr; | ||
switch (Str2Int(name.data())) { | ||
case Str2Int(CHINESE.data()): { | ||
return MakeUnique<ChineseAnalyzer>(*reinterpret_cast<ChineseAnalyzer *>(analyzer)); | ||
} break; | ||
case Str2Int(STANDARD.data()): { | ||
return MakeUnique<StandardAnalyzer>(*reinterpret_cast<StandardAnalyzer *>(analyzer)); | ||
} break; | ||
case Str2Int(NGRAM.data()): { | ||
return MakeUnique<NGramAnalyzer>(*reinterpret_cast<NGramAnalyzer *>(analyzer)); | ||
} break; | ||
default: | ||
return nullptr; | ||
} | ||
} | ||
|
||
} // namespace infinity |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
// Copyright(C) 2023 InfiniFlow, Inc. All rights reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// https://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
module; | ||
|
||
import stl; | ||
import singleton; | ||
import third_party; | ||
import analyzer; | ||
export module analyzer_pool; | ||
|
||
namespace infinity { | ||
|
||
export class AnalyzerPool : public Singleton<AnalyzerPool> { | ||
public: | ||
using CacheType = FlatHashMap<StringView, UniquePtr<Analyzer>>; | ||
|
||
UniquePtr<Analyzer> Get(const StringView &name); | ||
|
||
void Set(const StringView &name); | ||
|
||
private: | ||
CacheType cache_; | ||
}; | ||
|
||
} // namespace infinity |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters