diff --git a/rag-springai-openai-llm/pom.xml b/rag-springai-openai-llm/pom.xml
index 4f04292..87c257d 100644
--- a/rag-springai-openai-llm/pom.xml
+++ b/rag-springai-openai-llm/pom.xml
@@ -43,7 +43,7 @@
org.springframework.ai
- spring-ai-pdf-document-reader
+ spring-ai-tika-document-reader
org.springframework.retry
diff --git a/rag-springai-openai-llm/src/main/java/com/learning/ai/llmragwithspringai/config/AppConfig.java b/rag-springai-openai-llm/src/main/java/com/learning/ai/llmragwithspringai/config/AppConfig.java
index 4d459fb..6ea1760 100644
--- a/rag-springai-openai-llm/src/main/java/com/learning/ai/llmragwithspringai/config/AppConfig.java
+++ b/rag-springai-openai-llm/src/main/java/com/learning/ai/llmragwithspringai/config/AppConfig.java
@@ -3,8 +3,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.reader.ExtractedTextFormatter;
-import org.springframework.ai.reader.pdf.PagePdfDocumentReader;
-import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
+import org.springframework.ai.reader.tika.TikaDocumentReader;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.beans.factory.annotation.Value;
@@ -18,7 +17,7 @@
public class AppConfig {
private static final Logger log = LoggerFactory.getLogger(AppConfig.class);
- @Value("classpath:Rohit_Gurunath_Sharma.pdf")
+ @Value("classpath:Rohit_Gurunath_Sharma.docx")
private Resource resource;
@Bean
@@ -30,14 +29,11 @@ TokenTextSplitter tokenTextSplitter() {
ApplicationRunner runner(VectorStore vectorStore, JdbcTemplate template, TokenTextSplitter tokenTextSplitter) {
return args -> {
log.info("Loading file(s) as Documents");
- PdfDocumentReaderConfig config = PdfDocumentReaderConfig.builder()
- .withPageExtractedTextFormatter(new ExtractedTextFormatter.Builder()
- .withNumberOfBottomTextLinesToDelete(3)
- .withNumberOfTopPagesToSkipBeforeDelete(1)
- .build())
- .withPagesPerDocument(1)
+ ExtractedTextFormatter textFormatter = ExtractedTextFormatter.builder()
+ .withNumberOfBottomTextLinesToDelete(3)
+ .withNumberOfTopPagesToSkipBeforeDelete(1)
.build();
- PagePdfDocumentReader pagePdfDocumentReader = new PagePdfDocumentReader(resource, config);
+ TikaDocumentReader pagePdfDocumentReader = new TikaDocumentReader(resource, textFormatter);
template.update("delete from vector_store");
vectorStore.accept(tokenTextSplitter.apply(pagePdfDocumentReader.get()));
log.info("Loaded document to database.");
diff --git a/rag-springai-openai-llm/src/main/resources/Rohit_Gurunath_Sharma.docx b/rag-springai-openai-llm/src/main/resources/Rohit_Gurunath_Sharma.docx
new file mode 100644
index 0000000..4bdba1a
Binary files /dev/null and b/rag-springai-openai-llm/src/main/resources/Rohit_Gurunath_Sharma.docx differ
diff --git a/rag-springai-openai-llm/src/main/resources/Rohit_Gurunath_Sharma.pdf b/rag-springai-openai-llm/src/main/resources/Rohit_Gurunath_Sharma.pdf
deleted file mode 100644
index 5ef9e87..0000000
Binary files a/rag-springai-openai-llm/src/main/resources/Rohit_Gurunath_Sharma.pdf and /dev/null differ
diff --git a/rag-springai-openai-llm/src/test/java/com/learning/ai/llmragwithspringai/LlmRagWithSpringAiApplicationIntTest.java b/rag-springai-openai-llm/src/test/java/com/learning/ai/llmragwithspringai/LlmRagWithSpringAiApplicationIntTest.java
index 16b62ae..55d1977 100644
--- a/rag-springai-openai-llm/src/test/java/com/learning/ai/llmragwithspringai/LlmRagWithSpringAiApplicationIntTest.java
+++ b/rag-springai-openai-llm/src/test/java/com/learning/ai/llmragwithspringai/LlmRagWithSpringAiApplicationIntTest.java
@@ -1,9 +1,7 @@
package com.learning.ai.llmragwithspringai;
import static io.restassured.RestAssured.given;
-import static org.hamcrest.Matchers.containsString;
-import static org.hamcrest.Matchers.hasSize;
-import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.*;
import com.learning.ai.llmragwithspringai.config.AbstractIntegrationTest;
import io.restassured.RestAssured;
@@ -30,7 +28,18 @@ void testRag() {
.get("/api/ai/chat")
.then()
.statusCode(200)
- .body("response", containsString("2007 T20 World Cup and the 2013 ICC Champions Trophy"));
+ .body("response", containsString("2007 T20 World Cup"))
+ .body("response", containsString("2013 ICC Champions Trophy"));
+ }
+
+ @Test
+ void testRag2() {
+ given().param("question", "Who is successful IPL captain")
+ .when()
+ .get("/api/ai/chat")
+ .then()
+ .statusCode(200)
+ .body("response", containsString("Rohit Sharma"));
}
@Test