diff --git a/server/src/main/java/org/opensearch/index/mapper/FieldTypeInference.java b/server/src/main/java/org/opensearch/index/mapper/FieldTypeInference.java
new file mode 100644
index 0000000000000..f649e3c1ca251
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/mapper/FieldTypeInference.java
@@ -0,0 +1,174 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.mapper;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.opensearch.common.xcontent.XContentFactory;
+import org.opensearch.common.xcontent.json.JsonXContent;
+import org.opensearch.core.common.bytes.BytesReference;
+import org.opensearch.core.xcontent.XContentBuilder;
+import org.opensearch.search.lookup.SourceLookup;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+
+/**
+ * This class performs type inference by analyzing the _source documents. It uses a random sample of documents to infer the field type, similar to dynamic mapping type guessing logic.
+ * Unlike guessing based on the first document, where field could be missing, this method generates a random sample to make a more accurate inference.
+ * This approach is especially useful for handling missing fields, which is common in nested fields within derived fields of object types.
+ *
+ *
The sample size should be chosen carefully to ensure a high probability of selecting at least one document where the field is present.
+ * However, it's essential to strike a balance because a large sample size can lead to performance issues since each sample document's _source field is loaded and examined until the field is found.
+ *
+ *
Determining the sample size (S) is akin to deciding how many balls to draw from a bin, ensuring a high probability ((>=P)) of drawing at least one green ball (documents with the field) from a mixture of R red balls (documents without the field) and G green balls:
+ *
{@code
+ * P >= 1 - C(R, S) / C(R + G, S)
+ * }
+ * Here, C() represents the binomial coefficient.
+ * For a high confidence level, we aim for P >= 0.95. For example, with 10^7 documents where the field is present in 2% of them, the sample size S should be around 149 to achieve a probability of 0.95.
+ */
+public class FieldTypeInference {
+ private final IndexReader indexReader;
+ private final String indexName;
+ private final MapperService mapperService;
+ // TODO expose using a index setting
+ private int sampleSize;
+ private static final int DEFAULT_SAMPLE_SIZE = 150;
+ private static final int MAX_SAMPLE_SIZE_ALLOWED = 1000;
+
+ public FieldTypeInference(String indexName, MapperService mapperService, IndexReader indexReader) {
+ this.indexName = indexName;
+ this.mapperService = mapperService;
+ this.indexReader = indexReader;
+ this.sampleSize = DEFAULT_SAMPLE_SIZE;
+ }
+
+ public void setSampleSize(int sampleSize) {
+ this.sampleSize = Math.min(sampleSize, MAX_SAMPLE_SIZE_ALLOWED);
+ }
+
+ public int getSampleSize() {
+ return sampleSize;
+ }
+
+ public Mapper infer(ValueFetcher valueFetcher) throws IOException {
+ RandomSourceValuesGenerator valuesGenerator = new RandomSourceValuesGenerator(sampleSize, indexReader, valueFetcher);
+ Mapper inferredMapper = null;
+ while (inferredMapper == null && valuesGenerator.hasNext()) {
+ List