diff --git a/rag-springai-openai-llm/pom.xml b/rag-springai-openai-llm/pom.xml index 4f04292..87c257d 100644 --- a/rag-springai-openai-llm/pom.xml +++ b/rag-springai-openai-llm/pom.xml @@ -43,7 +43,7 @@ org.springframework.ai - spring-ai-pdf-document-reader + spring-ai-tika-document-reader org.springframework.retry diff --git a/rag-springai-openai-llm/src/main/java/com/learning/ai/llmragwithspringai/config/AppConfig.java b/rag-springai-openai-llm/src/main/java/com/learning/ai/llmragwithspringai/config/AppConfig.java index 4d459fb..6ea1760 100644 --- a/rag-springai-openai-llm/src/main/java/com/learning/ai/llmragwithspringai/config/AppConfig.java +++ b/rag-springai-openai-llm/src/main/java/com/learning/ai/llmragwithspringai/config/AppConfig.java @@ -3,8 +3,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.ai.reader.ExtractedTextFormatter; -import org.springframework.ai.reader.pdf.PagePdfDocumentReader; -import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig; +import org.springframework.ai.reader.tika.TikaDocumentReader; import org.springframework.ai.transformer.splitter.TokenTextSplitter; import org.springframework.ai.vectorstore.VectorStore; import org.springframework.beans.factory.annotation.Value; @@ -18,7 +17,7 @@ public class AppConfig { private static final Logger log = LoggerFactory.getLogger(AppConfig.class); - @Value("classpath:Rohit_Gurunath_Sharma.pdf") + @Value("classpath:Rohit_Gurunath_Sharma.docx") private Resource resource; @Bean @@ -30,14 +29,11 @@ TokenTextSplitter tokenTextSplitter() { ApplicationRunner runner(VectorStore vectorStore, JdbcTemplate template, TokenTextSplitter tokenTextSplitter) { return args -> { log.info("Loading file(s) as Documents"); - PdfDocumentReaderConfig config = PdfDocumentReaderConfig.builder() - .withPageExtractedTextFormatter(new ExtractedTextFormatter.Builder() - .withNumberOfBottomTextLinesToDelete(3) - .withNumberOfTopPagesToSkipBeforeDelete(1) - .build()) - .withPagesPerDocument(1) + ExtractedTextFormatter textFormatter = ExtractedTextFormatter.builder() + .withNumberOfBottomTextLinesToDelete(3) + .withNumberOfTopPagesToSkipBeforeDelete(1) .build(); - PagePdfDocumentReader pagePdfDocumentReader = new PagePdfDocumentReader(resource, config); + TikaDocumentReader pagePdfDocumentReader = new TikaDocumentReader(resource, textFormatter); template.update("delete from vector_store"); vectorStore.accept(tokenTextSplitter.apply(pagePdfDocumentReader.get())); log.info("Loaded document to database."); diff --git a/rag-springai-openai-llm/src/main/resources/Rohit_Gurunath_Sharma.docx b/rag-springai-openai-llm/src/main/resources/Rohit_Gurunath_Sharma.docx new file mode 100644 index 0000000..4bdba1a Binary files /dev/null and b/rag-springai-openai-llm/src/main/resources/Rohit_Gurunath_Sharma.docx differ diff --git a/rag-springai-openai-llm/src/main/resources/Rohit_Gurunath_Sharma.pdf b/rag-springai-openai-llm/src/main/resources/Rohit_Gurunath_Sharma.pdf deleted file mode 100644 index 5ef9e87..0000000 Binary files a/rag-springai-openai-llm/src/main/resources/Rohit_Gurunath_Sharma.pdf and /dev/null differ diff --git a/rag-springai-openai-llm/src/test/java/com/learning/ai/llmragwithspringai/LlmRagWithSpringAiApplicationIntTest.java b/rag-springai-openai-llm/src/test/java/com/learning/ai/llmragwithspringai/LlmRagWithSpringAiApplicationIntTest.java index 16b62ae..55d1977 100644 --- a/rag-springai-openai-llm/src/test/java/com/learning/ai/llmragwithspringai/LlmRagWithSpringAiApplicationIntTest.java +++ b/rag-springai-openai-llm/src/test/java/com/learning/ai/llmragwithspringai/LlmRagWithSpringAiApplicationIntTest.java @@ -1,9 +1,7 @@ package com.learning.ai.llmragwithspringai; import static io.restassured.RestAssured.given; -import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.hasSize; -import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.*; import com.learning.ai.llmragwithspringai.config.AbstractIntegrationTest; import io.restassured.RestAssured; @@ -30,7 +28,18 @@ void testRag() { .get("/api/ai/chat") .then() .statusCode(200) - .body("response", containsString("2007 T20 World Cup and the 2013 ICC Champions Trophy")); + .body("response", containsString("2007 T20 World Cup")) + .body("response", containsString("2013 ICC Champions Trophy")); + } + + @Test + void testRag2() { + given().param("question", "Who is successful IPL captain") + .when() + .get("/api/ai/chat") + .then() + .statusCode(200) + .body("response", containsString("Rohit Sharma")); } @Test