-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
llmexample is now also an OAEI matcher
- Loading branch information
Showing
4 changed files
with
196 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
72 changes: 72 additions & 0 deletions
72
...main/java/de/uni_mannheim/informatik/dws/melt/examples/llm_transformers/OLaLaForOAEI.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
package de.uni_mannheim.informatik.dws.melt.examples.llm_transformers; | ||
|
||
import de.uni_mannheim.informatik.dws.melt.matching_base.IMatcher; | ||
import de.uni_mannheim.informatik.dws.melt.matching_jena.MatcherPipelineYAAAJenaConstructor; | ||
import de.uni_mannheim.informatik.dws.melt.matching_jena.TextExtractor; | ||
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.elementlevel.HighPrecisionMatcher; | ||
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.filter.BadHostsFilter; | ||
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.filter.ConfidenceFilter; | ||
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.filter.extraction.NaiveDescendingExtractor; | ||
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.metalevel.AddAlignmentMatcher; | ||
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.metalevel.ConfidenceCombiner; | ||
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.util.StringProcessing; | ||
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.util.textExtractors.TextExtractorOnlyLabel; | ||
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.util.textExtractors.TextExtractorSet; | ||
import de.uni_mannheim.informatik.dws.melt.matching_ml.python.nlptransformers.LLMBinaryFilter; | ||
import de.uni_mannheim.informatik.dws.melt.matching_ml.python.nlptransformers.SentenceTransformersMatcher; | ||
import de.uni_mannheim.informatik.dws.melt.yet_another_alignment_api.Alignment; | ||
import java.util.Properties; | ||
import org.apache.jena.ontology.OntModel; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
/** | ||
* | ||
*/ | ||
public class OLaLaForOAEI implements IMatcher<OntModel,Alignment,Properties> { | ||
|
||
@Override | ||
public Alignment match(OntModel source, OntModel target, Alignment inputAlignment, Properties parameters) throws Exception { | ||
|
||
SentenceTransformersMatcher biEncoder = new SentenceTransformersMatcher( | ||
TextExtractor.appendStringPostProcessing(new TextExtractorSet(), StringProcessing::normalizeOnlyCamelCaseAndUnderscore), | ||
"multi-qa-mpnet-base-dot-v1"//"all-MiniLM-L6-v2" | ||
); | ||
biEncoder.setMultipleTextsToMultipleExamples(true); | ||
biEncoder.setTopK(5); | ||
//biEncoder.setTransformersCache(transformersCache); | ||
biEncoder.addResourceFilter(SentenceTransformersPredicateBadHosts.class); | ||
|
||
|
||
//String model = "TaylorAI/Flash-Llama-7B"; | ||
String model = "upstage/Llama-2-70b-instruct-v2"; | ||
|
||
LLMBinaryFilter llmTransformersFilter = new LLMBinaryFilter( | ||
new TextExtractorOnlyLabel(), | ||
model, | ||
CLIOptions.PREDEFINED_PROMPTS.get(7)); | ||
llmTransformersFilter.setMultipleTextsToMultipleExamples(true); | ||
//llmTransformersFilter.setTransformersCache(transformersCache); | ||
llmTransformersFilter | ||
.addGenerationArgument("max_new_tokens", 10) | ||
.addGenerationArgument("temperature", 0.0); | ||
llmTransformersFilter.addLoadingArguments(LLMConfiguration.getConfiguration(model).getLoadingArguments()); | ||
|
||
MatcherPipelineYAAAJenaConstructor highPrecision = new MatcherPipelineYAAAJenaConstructor( | ||
new HighPrecisionMatcher(), | ||
new BadHostsFilter() | ||
); | ||
Alignment highPrecisionAlignment = highPrecision.match(source, target, inputAlignment, parameters); | ||
|
||
MatcherPipelineYAAAJenaConstructor matcher = new MatcherPipelineYAAAJenaConstructor( | ||
biEncoder, | ||
llmTransformersFilter, | ||
new ConfidenceCombiner(LLMBinaryFilter.class), | ||
new AddAlignmentMatcher(highPrecisionAlignment), | ||
new NaiveDescendingExtractor(), | ||
new ConfidenceFilter(0.5) | ||
); | ||
|
||
return matcher.match(source, target, inputAlignment, parameters); | ||
} | ||
} |