add javadoc

Signed-off-by: zhichao-aws <[email protected]>
opensearch-project · Sep 26, 2023 · b48091f · b48091f
1 parent a47c8b6
commit b48091f
Show file tree

Hide file tree

Showing 4 changed files with 32 additions and 3 deletions.
diff --git a/src/main/java/org/opensearch/neuralsearch/processor/NLPProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/NLPProcessor.java
@@ -26,9 +26,11 @@
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableMap;
 
-// The abstract class for text processing use cases. Users provide a field name map
-// and a model id. During ingestion, the processor will use the corresponding model
-// to inference the input texts, and set the target fields according to the field name map.
+/**
+ * The abstract class for text processing use cases. Users provide a field name map and a model id.
+ * During ingestion, the processor will use the corresponding model to inference the input texts,
+ * and set the target fields according to the field name map.
+ */
 @Log4j2
 public abstract class NLPProcessor extends AbstractProcessor {
 

diff --git a/src/main/java/org/opensearch/neuralsearch/processor/SparseEncodingProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/SparseEncodingProcessor.java
@@ -17,6 +17,10 @@
 import org.opensearch.neuralsearch.ml.MLCommonsClientAccessor;
 import org.opensearch.neuralsearch.util.TokenWeightUtil;
 
+/**
+ * This processor is used for user input data text sparse encoding processing, model_id can be used to indicate which model user use,
+ * and field_map can be used to indicate which fields needs text embedding and the corresponding keys for the sparse encoding results.
+ */
 @Log4j2
 public class SparseEncodingProcessor extends NLPProcessor {
 

diff --git a/src/main/java/org/opensearch/neuralsearch/query/SparseEncodingQueryBuilder.java b/src/main/java/org/opensearch/neuralsearch/query/SparseEncodingQueryBuilder.java
@@ -42,6 +42,12 @@
 
 import com.google.common.annotations.VisibleForTesting;
 
+/**
+ * SparseEncodingQueryBuilder is responsible for handling "sparse_encoding" query types. It uses an ML SPARSE_ENCODING model
+ * or SPARSE_TOKENIZE model to produce a Map with String keys and Float values for input text. Then it will be transformed
+ * to Lucene FeatureQuery wrapped by Lucene BooleanQuery.
+ */
+
 @Log4j2
 @Getter
 @Setter
@@ -66,6 +72,12 @@ public static void initialize(MLCommonsClientAccessor mlClient) {
     private String modelId;
     private Supplier<Map<String, Float>> queryTokensSupplier;
 
+    /**
+     * Constructor from stream input
+     *
+     * @param in StreamInput to initialize object from
+     * @throws IOException thrown if unable to read from input stream
+     */
     public SparseEncodingQueryBuilder(StreamInput in) throws IOException {
         super(in);
         this.fieldName = in.readString();
@@ -98,6 +110,9 @@ protected void doXContent(XContentBuilder xContentBuilder, Params params) throws
      *    "model_id": "string"
      *  }
      *
+     * @param parser XContentParser
+     * @return NeuralQueryBuilder
+     * @throws IOException can be thrown by parser
      */
     public static SparseEncodingQueryBuilder fromXContent(XContentParser parser) throws IOException {
         SparseEncodingQueryBuilder sparseEncodingQueryBuilder = new SparseEncodingQueryBuilder();

diff --git a/src/main/java/org/opensearch/neuralsearch/util/TokenWeightUtil.java b/src/main/java/org/opensearch/neuralsearch/util/TokenWeightUtil.java
@@ -11,6 +11,12 @@
 import java.util.Map;
 import java.util.stream.Collectors;
 
+/**
+ * Utility class for working with sparse_encoding queries and ingest processor.
+ * Used to fetch the (token, weight) Map from the response returned by {@link org.opensearch.neuralsearch.ml.MLCommonsClientAccessor}
+ *
+ */
+
 public class TokenWeightUtil {
     public static String RESPONSE_KEY = "response";
 
@@ -36,6 +42,8 @@ public class TokenWeightUtil {
      *         { TOKEN_WEIGHT_MAP}
      *         ]
      *     }]
+     *
+     * @param mapResultList {@link Map} which is the response from {@link org.opensearch.neuralsearch.ml.MLCommonsClientAccessor}
      */
     public static List<Map<String, Float>> fetchListOfTokenWeightMap(List<Map<String, ?>> mapResultList) {
         List<Object> results = new ArrayList<>();