From 65240a84cbc8f1349a59d52f609f003920262bc0 Mon Sep 17 00:00:00 2001 From: Victor Alfaro Date: Tue, 30 Jul 2024 14:57:05 -0600 Subject: [PATCH] #28813: applying feedback --- .../com/dotcms/ai/api/CompletionsAPIImpl.java | 2 +- .../com/dotcms/ai/api/EmbeddingsAPIImpl.java | 2 +- .../java/com/dotcms/ai/api/EmbeddingsRunner.java | 2 +- .../java/com/dotcms/ai/model/OpenAIModel.java | 8 +++++++- .../java/com/dotcms/ai/model/OpenAIModels.java | 6 ++++++ .../java/com/dotcms/ai/util/EncodingUtil.java | 16 ++++++++++------ .../com/dotcms/ai/viewtool/EmbeddingsTool.java | 2 +- 7 files changed, 27 insertions(+), 11 deletions(-) diff --git a/dotCMS/src/main/java/com/dotcms/ai/api/CompletionsAPIImpl.java b/dotCMS/src/main/java/com/dotcms/ai/api/CompletionsAPIImpl.java index 3a69c6935a76..f52f874142c1 100644 --- a/dotCMS/src/main/java/com/dotcms/ai/api/CompletionsAPIImpl.java +++ b/dotCMS/src/main/java/com/dotcms/ai/api/CompletionsAPIImpl.java @@ -204,7 +204,7 @@ private String getTextPrompt(final String prompt, final String supportingContent } private int countTokens(final String testString) { - return EncodingUtil.registry + return EncodingUtil.REGISTRY .getEncodingForModel(config.get().getModel().getCurrentModel()) .map(enc -> enc.countTokens(testString)) .orElseThrow(() -> new DotRuntimeException("Encoder not found")); diff --git a/dotCMS/src/main/java/com/dotcms/ai/api/EmbeddingsAPIImpl.java b/dotCMS/src/main/java/com/dotcms/ai/api/EmbeddingsAPIImpl.java index 2c49fac5efe2..78d3e618c149 100644 --- a/dotCMS/src/main/java/com/dotcms/ai/api/EmbeddingsAPIImpl.java +++ b/dotCMS/src/main/java/com/dotcms/ai/api/EmbeddingsAPIImpl.java @@ -327,7 +327,7 @@ public Tuple2> pullOrGenerateEmbeddings(final String conten return cachedEmbeddings; } - final List tokens = EncodingUtil.encoding.get().encode(content); + final List tokens = EncodingUtil.ENCODING.get().encode(content); if (tokens.isEmpty()) { debugLogger(this.getClass(), () -> String.format("No tokens for content ID '%s' were encoded: %s", contentId, content)); return Tuple.of(0, List.of()); diff --git a/dotCMS/src/main/java/com/dotcms/ai/api/EmbeddingsRunner.java b/dotCMS/src/main/java/com/dotcms/ai/api/EmbeddingsRunner.java index d2929074b97d..6f352c67ccd4 100644 --- a/dotCMS/src/main/java/com/dotcms/ai/api/EmbeddingsRunner.java +++ b/dotCMS/src/main/java/com/dotcms/ai/api/EmbeddingsRunner.java @@ -68,7 +68,7 @@ public void run() { int totalTokens = 0; for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) { final String sentence = cleanContent.substring(start, end); - final int tokenCount = EncodingUtil.encoding.get().countTokens(sentence); + final int tokenCount = EncodingUtil.ENCODING.get().countTokens(sentence); totalTokens += tokenCount; if (totalTokens < splitAtTokens) { diff --git a/dotCMS/src/main/java/com/dotcms/ai/model/OpenAIModel.java b/dotCMS/src/main/java/com/dotcms/ai/model/OpenAIModel.java index 3ff86d8ad44c..eeebccd7c12f 100644 --- a/dotCMS/src/main/java/com/dotcms/ai/model/OpenAIModel.java +++ b/dotCMS/src/main/java/com/dotcms/ai/model/OpenAIModel.java @@ -5,7 +5,13 @@ import java.io.Serializable; -public class OpenAIModel implements Serializable { +/** + * Represents an OpenAI model with details such as ID, object type, creation timestamp, and owner. + * This class is immutable and uses Jackson annotations for JSON serialization and deserialization. + * + * @author vico + */ + public class OpenAIModel implements Serializable { private final String id; private final String object; diff --git a/dotCMS/src/main/java/com/dotcms/ai/model/OpenAIModels.java b/dotCMS/src/main/java/com/dotcms/ai/model/OpenAIModels.java index faa691b6a9c1..1c851628489d 100644 --- a/dotCMS/src/main/java/com/dotcms/ai/model/OpenAIModels.java +++ b/dotCMS/src/main/java/com/dotcms/ai/model/OpenAIModels.java @@ -6,6 +6,12 @@ import java.io.Serializable; import java.util.List; +/** + * Represents a collection of OpenAI models with details such as the type of object and the list of models. + * This class is immutable and uses Jackson annotations for JSON serialization and deserialization. + * + * @author vico + */ public class OpenAIModels implements Serializable { private final String object; diff --git a/dotCMS/src/main/java/com/dotcms/ai/util/EncodingUtil.java b/dotCMS/src/main/java/com/dotcms/ai/util/EncodingUtil.java index 0a4258959cd7..5aa3d12ab0d9 100644 --- a/dotCMS/src/main/java/com/dotcms/ai/util/EncodingUtil.java +++ b/dotCMS/src/main/java/com/dotcms/ai/util/EncodingUtil.java @@ -6,14 +6,18 @@ import com.knuddels.jtokkit.api.EncodingRegistry; import io.vavr.Lazy; +/** + * Utility class for handling encoding operations related to AI models. + * It provides a registry for encoding and a lazy-loaded encoding instance based on the current model. + * The class uses the ConfigService to retrieve the current model configuration. + */ public class EncodingUtil { - public static final EncodingRegistry registry = Encodings.newDefaultEncodingRegistry(); + public static final EncodingRegistry REGISTRY = Encodings.newDefaultEncodingRegistry(); + public static final String MODEL = ConfigService.INSTANCE.config().getEmbeddingsModel().getCurrentModel(); + public static final Lazy ENCODING = Lazy.of(() -> REGISTRY.getEncodingForModel(MODEL).get()); - public static final String model = ConfigService.INSTANCE.config().getEmbeddingsModel().getCurrentModel(); - - public static Lazy encoding = Lazy.of(()-> - registry.getEncodingForModel(model).get() - ); + private EncodingUtil() { + } } diff --git a/dotCMS/src/main/java/com/dotcms/ai/viewtool/EmbeddingsTool.java b/dotCMS/src/main/java/com/dotcms/ai/viewtool/EmbeddingsTool.java index aa2813d123d0..4411ca1cd0fd 100644 --- a/dotCMS/src/main/java/com/dotcms/ai/viewtool/EmbeddingsTool.java +++ b/dotCMS/src/main/java/com/dotcms/ai/viewtool/EmbeddingsTool.java @@ -51,7 +51,7 @@ public void init(Object initData) { * @return The number of tokens in the prompt, or -1 if no encoding is found for the model. */ public int countTokens(final String prompt) { - return EncodingUtil.registry + return EncodingUtil.REGISTRY .getEncodingForModel(appConfig.getModel().getCurrentModel()) .map(encoding -> encoding.countTokens(prompt)) .orElse(-1);