-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Load the necessary algorithms dynamically
- Loading branch information
1 parent
c82c887
commit c81514c
Showing
20 changed files
with
116 additions
and
89 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +0,0 @@ | ||
from bdikit.schema_matching.best.algorithms.valentine import * | ||
from bdikit.schema_matching.best.algorithms.gpt import * | ||
from bdikit.schema_matching.best.algorithms.contrastivelearning import * | ||
from bdikit.schema_matching.best.algorithms.twophase import * | ||
from bdikit.schema_matching.best.algorithms.maxvalsim import * | ||
This file was deleted.
Oops, something went wrong.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,45 +1,63 @@ | ||
import importlib | ||
from enum import Enum | ||
from typing import Mapping, Any, Type | ||
from typing import Mapping, Any | ||
from bdikit.schema_matching.best.base import BaseSchemaMatcher | ||
from bdikit.schema_matching.best import ( | ||
SimFloodSchemaMatcher, | ||
ComaSchemaMatcher, | ||
CupidSchemaMatcher, | ||
DistributionBasedSchemaMatcher, | ||
JaccardSchemaMatcher, | ||
GPTSchemaMatcher, | ||
ContrastiveLearningSchemaMatcher, | ||
TwoPhaseSchemaMatcher, | ||
MaxValSimSchemaMatcher, | ||
) | ||
|
||
|
||
class SchemaMatchers(Enum): | ||
SIMFLOOD = ("similarity_flooding", SimFloodSchemaMatcher) | ||
COMA = ("coma", ComaSchemaMatcher) | ||
CUPID = ("cupid", CupidSchemaMatcher) | ||
DISTRIBUTION_BASED = ("distribution_based", DistributionBasedSchemaMatcher) | ||
JACCARD_DISTANCE = ("jaccard_distance", JaccardSchemaMatcher) | ||
GPT = ("gpt", GPTSchemaMatcher) | ||
CT_LEARNING = ("ct_learning", ContrastiveLearningSchemaMatcher) | ||
TWO_PHASE = ("two_phase", TwoPhaseSchemaMatcher) | ||
MAX_VAL_SIM = ("max_val_sim", MaxValSimSchemaMatcher) | ||
SIMFLOOD = ( | ||
"similarity_flooding", | ||
"bdikit.schema_matching.best.valentine.SimFloodSchemaMatcher", | ||
) | ||
COMA = ( | ||
"coma", | ||
"bdikit.schema_matching.best.valentine.ComaSchemaMatcher", | ||
) | ||
CUPID = ( | ||
"cupid", | ||
"bdikit.schema_matching.best.valentine.CupidSchemaMatcher", | ||
) | ||
DISTRIBUTION_BASED = ( | ||
"distribution_based", | ||
"bdikit.schema_matching.best.valentine.DistributionBasedSchemaMatcher", | ||
) | ||
JACCARD_DISTANCE = ( | ||
"jaccard_distance", | ||
"bdikit.schema_matching.best.valentine.JaccardDistanceSchemaMatcher", | ||
) | ||
GPT = ("gpt", "bdikit.schema_matching.best.gpt.GPTSchemaMatcher") | ||
CT_LEARNING = ( | ||
"ct_learning", | ||
"bdikit.schema_matching.best.contrastivelearning.ContrastiveLearningSchemaMatcher", | ||
) | ||
TWO_PHASE = ( | ||
"two_phase", | ||
"bdikit.schema_matching.best.twophase.TwoPhaseSchemaMatcher", | ||
) | ||
MAX_VAL_SIM = ( | ||
"max_val_sim", | ||
"bdikit.schema_matching.best.maxvalsim.MaxValSimSchemaMatcher", | ||
) | ||
|
||
def __init__(self, method_name: str, method_class: Type[BaseSchemaMatcher]): | ||
self.method_name = method_name | ||
self.method_class = method_class | ||
def __init__(self, matcher_name: str, matcher_path: str): | ||
self.matcher_name = matcher_name | ||
self.matcher_path = matcher_path | ||
|
||
@staticmethod | ||
def get_instance( | ||
method_name: str, **method_kwargs: Mapping[str, Any] | ||
def get_matcher( | ||
matcher_name: str, **matcher_kwargs: Mapping[str, Any] | ||
) -> BaseSchemaMatcher: | ||
methods = {method.method_name: method.method_class for method in SchemaMatchers} | ||
|
||
try: | ||
return methods[method_name](**method_kwargs) | ||
except KeyError: | ||
names = ", ".join(list(methods.keys())) | ||
if matcher_name not in matchers: | ||
names = ", ".join(list(matchers.keys())) | ||
raise ValueError( | ||
f"The {method_name} algorithm is not supported. " | ||
f"The {matcher_name} algorithm is not supported. " | ||
f"Supported algorithms are: {names}" | ||
) | ||
# Load the class dynamically | ||
module_path, class_name = matchers[matcher_name].rsplit(".", 1) | ||
module = importlib.import_module(module_path) | ||
|
||
return getattr(module, class_name)(**matcher_kwargs) | ||
|
||
|
||
matchers = {method.matcher_name: method.matcher_path for method in SchemaMatchers} |
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
from bdikit.schema_matching.topk.algorithms.contrastivelearning import * | ||
|
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,34 @@ | ||
import importlib | ||
from enum import Enum | ||
from typing import Mapping, Any, Type | ||
from typing import Mapping, Any | ||
from bdikit.schema_matching.topk.base import BaseTopkSchemaMatcher | ||
from bdikit.schema_matching.topk import CLTopkSchemaMatcher | ||
|
||
|
||
class TopkMatchers(Enum): | ||
CT_LEARNING = ("ct_learning", CLTopkSchemaMatcher) | ||
CT_LEARNING = ( | ||
"ct_learning", | ||
"bdikit.schema_matching.topk.contrastivelearning.CLTopkSchemaMatcher", | ||
) | ||
|
||
def __init__(self, method_name: str, method_class: Type[BaseTopkSchemaMatcher]): | ||
self.method_name = method_name | ||
self.method_class = method_class | ||
def __init__(self, matcher_name: str, matcher_path: str): | ||
self.matcher_name = matcher_name | ||
self.matcher_path = matcher_path | ||
|
||
@staticmethod | ||
def get_instance( | ||
method_name: str, **method_kwargs: Mapping[str, Any] | ||
def get_matcher( | ||
matcher_name: str, **matcher_kwargs: Mapping[str, Any] | ||
) -> BaseTopkSchemaMatcher: | ||
methods = {method.method_name: method.method_class for method in TopkMatchers} | ||
try: | ||
return methods[method_name](**method_kwargs) | ||
except KeyError: | ||
names = ", ".join(list(methods.keys())) | ||
if matcher_name not in matchers: | ||
names = ", ".join(list(matchers.keys())) | ||
raise ValueError( | ||
f"The {method_name} algorithm is not supported. " | ||
f"The {matcher_name} algorithm is not supported. " | ||
f"Supported algorithms are: {names}" | ||
) | ||
# Load the class dynamically | ||
module_path, class_name = matchers[matcher_name].rsplit(".", 1) | ||
module = importlib.import_module(module_path) | ||
|
||
return getattr(module, class_name)(**matcher_kwargs) | ||
|
||
|
||
matchers = {method.matcher_name: method.matcher_path for method in TopkMatchers} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +0,0 @@ | ||
from bdikit.value_matching.algorithms.polyfuzz import * | ||
from bdikit.value_matching.algorithms.gpt import * | ||
Empty file.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,36 +1,44 @@ | ||
import importlib | ||
from enum import Enum | ||
from typing import Mapping, Any, Type | ||
from typing import Mapping, Any | ||
from bdikit.value_matching.base import BaseValueMatcher | ||
from bdikit.value_matching import ( | ||
GPTValueMatcher, | ||
TFIDFValueMatcher, | ||
EditDistanceValueMatcher, | ||
EmbeddingValueMatcher, | ||
FastTextValueMatcher, | ||
) | ||
|
||
|
||
class ValueMatchers(Enum): | ||
TFIDF = ("tfidf", TFIDFValueMatcher) | ||
EDIT = ("edit_distance", EditDistanceValueMatcher) | ||
EMBEDDINGS = ("embedding", EmbeddingValueMatcher) | ||
FASTTEXT = ("fasttext", FastTextValueMatcher) | ||
GPT = ("gpt", GPTValueMatcher) | ||
TFIDF = ("tfidf", "bdikit.value_matching.polyfuzz.TFIDFValueMatcher") | ||
EDIT = ( | ||
"edit_distance", | ||
"bdikit.value_matching.polyfuzz.EditDistanceValueMatcher", | ||
) | ||
EMBEDDINGS = ( | ||
"embedding", | ||
"bdikit.value_matching.polyfuzz.EmbeddingValueMatcher", | ||
) | ||
FASTTEXT = ( | ||
"fasttext", | ||
"bdikit.value_matching.polyfuzz.FastTextValueMatcher", | ||
) | ||
GPT = ("gpt", "bdikit.value_matching.gpt.GPTValueMatcher") | ||
|
||
def __init__(self, method_name: str, method_class: Type[BaseValueMatcher]): | ||
self.method_name = method_name | ||
self.method_class = method_class | ||
def __init__(self, matcher_name: str, matcher_path: str): | ||
self.matcher_name = matcher_name | ||
self.matcher_path = matcher_path | ||
|
||
@staticmethod | ||
def get_instance( | ||
method_name: str, **method_kwargs: Mapping[str, Any] | ||
def get_matcher( | ||
matcher_name: str, **matcher_kwargs: Mapping[str, Any] | ||
) -> BaseValueMatcher: | ||
methods = {method.method_name: method.method_class for method in ValueMatchers} | ||
try: | ||
return methods[method_name](**method_kwargs) | ||
except KeyError: | ||
names = ", ".join(list(methods.keys())) | ||
if matcher_name not in matchers: | ||
names = ", ".join(list(matchers.keys())) | ||
raise ValueError( | ||
f"The {method_name} algorithm is not supported. " | ||
f"The {matcher_name} algorithm is not supported. " | ||
f"Supported algorithms are: {names}" | ||
) | ||
# Load the class dynamically | ||
module_path, class_name = matchers[matcher_name].rsplit(".", 1) | ||
module = importlib.import_module(module_path) | ||
|
||
return getattr(module, class_name)(**matcher_kwargs) | ||
|
||
|
||
matchers = {method.matcher_name: method.matcher_path for method in ValueMatchers} |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters