Skip to content

Commit

Permalink
add javadoc
Browse files Browse the repository at this point in the history
Signed-off-by: zhichao-aws <[email protected]>
  • Loading branch information
zhichao-aws committed Sep 26, 2023
1 parent a47c8b6 commit b48091f
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;

// The abstract class for text processing use cases. Users provide a field name map
// and a model id. During ingestion, the processor will use the corresponding model
// to inference the input texts, and set the target fields according to the field name map.
/**
* The abstract class for text processing use cases. Users provide a field name map and a model id.
* During ingestion, the processor will use the corresponding model to inference the input texts,
* and set the target fields according to the field name map.
*/
@Log4j2
public abstract class NLPProcessor extends AbstractProcessor {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
import org.opensearch.neuralsearch.ml.MLCommonsClientAccessor;
import org.opensearch.neuralsearch.util.TokenWeightUtil;

/**
* This processor is used for user input data text sparse encoding processing, model_id can be used to indicate which model user use,
* and field_map can be used to indicate which fields needs text embedding and the corresponding keys for the sparse encoding results.
*/
@Log4j2
public class SparseEncodingProcessor extends NLPProcessor {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@

import com.google.common.annotations.VisibleForTesting;

/**
* SparseEncodingQueryBuilder is responsible for handling "sparse_encoding" query types. It uses an ML SPARSE_ENCODING model
* or SPARSE_TOKENIZE model to produce a Map with String keys and Float values for input text. Then it will be transformed
* to Lucene FeatureQuery wrapped by Lucene BooleanQuery.
*/

@Log4j2
@Getter
@Setter
Expand All @@ -66,6 +72,12 @@ public static void initialize(MLCommonsClientAccessor mlClient) {
private String modelId;
private Supplier<Map<String, Float>> queryTokensSupplier;

/**
* Constructor from stream input
*
* @param in StreamInput to initialize object from
* @throws IOException thrown if unable to read from input stream
*/
public SparseEncodingQueryBuilder(StreamInput in) throws IOException {
super(in);
this.fieldName = in.readString();
Expand Down Expand Up @@ -98,6 +110,9 @@ protected void doXContent(XContentBuilder xContentBuilder, Params params) throws
* "model_id": "string"
* }
*
* @param parser XContentParser
* @return NeuralQueryBuilder
* @throws IOException can be thrown by parser
*/
public static SparseEncodingQueryBuilder fromXContent(XContentParser parser) throws IOException {
SparseEncodingQueryBuilder sparseEncodingQueryBuilder = new SparseEncodingQueryBuilder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
import java.util.Map;
import java.util.stream.Collectors;

/**
* Utility class for working with sparse_encoding queries and ingest processor.
* Used to fetch the (token, weight) Map from the response returned by {@link org.opensearch.neuralsearch.ml.MLCommonsClientAccessor}
*
*/

public class TokenWeightUtil {
public static String RESPONSE_KEY = "response";

Expand All @@ -36,6 +42,8 @@ public class TokenWeightUtil {
* { TOKEN_WEIGHT_MAP}
* ]
* }]
*
* @param mapResultList {@link Map} which is the response from {@link org.opensearch.neuralsearch.ml.MLCommonsClientAccessor}
*/
public static List<Map<String, Float>> fetchListOfTokenWeightMap(List<Map<String, ?>> mapResultList) {
List<Object> results = new ArrayList<>();
Expand Down

0 comments on commit b48091f

Please sign in to comment.