Skip to content

Commit

Permalink
feat : adds Test cases and CI
Browse files Browse the repository at this point in the history
  • Loading branch information
rajadilipkolli committed Feb 12, 2024
1 parent d0de0c0 commit 85c3aa7
Show file tree
Hide file tree
Showing 11 changed files with 287 additions and 171 deletions.
39 changes: 39 additions & 0 deletions .github/workflows/llm-rag-with-langchain4j-spring-boot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: llm-rag-with-langchain4j-spring-boot CI Build

on:
push:
paths:
- "llm-rag-with-langchain4j-spring-boot/**"
branches: [main]
pull_request:
paths:
- "llm-rag-with-langchain4j-spring-boot/**"
types:
- opened
- synchronize
- reopened

jobs:
build:
name: Run Unit & Integration Tests
runs-on: ubuntu-latest
defaults:
run:
working-directory: llm-rag-with-langchain4j-spring-boot
strategy:
matrix:
distribution: [ 'temurin' ]
java: [ '21' ]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis

- name: Set up JDK ${{ matrix.java }}
uses: actions/[email protected]
with:
java-version: ${{ matrix.java }}
distribution: ${{ matrix.distribution }}
cache: 'maven'
- name: Build and analyze
run: ./mvnw clean verify
2 changes: 1 addition & 1 deletion ai-opensearch-langchain4j/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
<relativePath/> <!-- lookup parent from repository -->
</parent>

<groupId>org.example</groupId>
<groupId>org.example.ai</groupId>
<artifactId>ai-opensearch-langchain4j</artifactId>
<version>1.0.0-SNAPSHOT</version>

Expand Down
2 changes: 0 additions & 2 deletions llm-rag-with-langchain4j-spring-boot/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,5 @@ services:
environment:
PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:[email protected]}
PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD:-admin}
volumes:
- ./servers.json:/pgadmin4/servers.json
ports:
- "${PGADMIN_PORT:-5050}:80"
39 changes: 39 additions & 0 deletions llm-rag-with-langchain4j-spring-boot/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

<properties>
<java.version>17</java.version>
<spotless.version>2.43.0</spotless.version>
</properties>

<dependencies>
Expand Down Expand Up @@ -74,6 +75,21 @@
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-testcontainers</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>postgresql</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

<build>
Expand All @@ -86,6 +102,29 @@
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>com.diffplug.spotless</groupId>
<artifactId>spotless-maven-plugin</artifactId>
<version>${spotless.version}</version>
<configuration>
<java>
<palantirJavaFormat>
<version>2.39.0</version>
</palantirJavaFormat>
<importOrder />
<removeUnusedImports />
<formatAnnotations />
</java>
</configuration>
<executions>
<execution>
<phase>compile</phase>
<goals>
<goal>check</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<repositories>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
package com.learning.ai;

import com.learning.ai.config.CustomerSupportAgent;
import org.springframework.boot.ApplicationRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.Bean;

import java.util.Map;

@SpringBootApplication
public class LLMRagWithSpringBoot {

public static void main(String[] args) {
SpringApplication.run(LLMRagWithSpringBoot.class, args);
}

@Bean
ApplicationRunner interactiveChatRunner(CustomerSupportAgent agent) {
return args -> {
var response = agent.chat("what should I know about the transition to consumer direct care network washington?");
System.out.println(Map.of("response", response));
};
}
}
package com.learning.ai;

import com.learning.ai.config.CustomerSupportAgent;
import java.util.Map;
import org.springframework.boot.ApplicationRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.Bean;

@SpringBootApplication
public class LLMRagWithSpringBoot {

public static void main(String[] args) {
SpringApplication.run(LLMRagWithSpringBoot.class, args);
}

@Bean
ApplicationRunner interactiveChatRunner(CustomerSupportAgent agent) {
return args -> {
var response =
agent.chat("what should I know about the transition to consumer direct care network washington?");
System.out.println(Map.of("response", response));
};
}
}
Original file line number Diff line number Diff line change
@@ -1,99 +1,112 @@
package com.learning.ai.config;

import com.zaxxer.hikari.HikariDataSource;
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentSplitter;
import dev.langchain4j.data.document.parser.apache.pdfbox.ApachePdfBoxDocumentParser;
import dev.langchain4j.data.document.splitter.DocumentSplitters;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.memory.chat.MessageWindowChatMemory;
import dev.langchain4j.model.chat.ChatLanguageModel;
import dev.langchain4j.model.embedding.AllMiniLmL6V2EmbeddingModel;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.openai.OpenAiTokenizer;
import dev.langchain4j.rag.content.retriever.ContentRetriever;
import dev.langchain4j.rag.content.retriever.EmbeddingStoreContentRetriever;
import dev.langchain4j.service.AiServices;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.io.Resource;
import org.springframework.core.io.ResourceLoader;

import javax.sql.DataSource;
import java.io.IOException;

import static dev.langchain4j.data.document.loader.FileSystemDocumentLoader.loadDocument;
import static dev.langchain4j.model.openai.OpenAiModelName.GPT_3_5_TURBO;

@Configuration(proxyBeanMethods = false)
public class AIConfig {

@Bean
CustomerSupportAgent customerSupportAgent(ChatLanguageModel chatLanguageModel,
// ChatTools bookingTools,
ContentRetriever contentRetriever) {
return AiServices.builder(CustomerSupportAgent.class)
.chatLanguageModel(chatLanguageModel)
.chatMemory(MessageWindowChatMemory.withMaxMessages(20))
// .tools(bookingTools)
.contentRetriever(contentRetriever)
.build();
}

@Bean
ContentRetriever contentRetriever(EmbeddingStore<TextSegment> embeddingStore, EmbeddingModel embeddingModel) {

// You will need to adjust these parameters to find the optimal setting, which will depend on two main factors:
// - The nature of your data
// - The embedding model you are using
int maxResults = 1;
double minScore = 0.6;

return EmbeddingStoreContentRetriever.builder()
.embeddingStore(embeddingStore)
.embeddingModel(embeddingModel)
.maxResults(maxResults)
.minScore(minScore)
.build();
}

@Bean
EmbeddingModel embeddingModel() {
return new AllMiniLmL6V2EmbeddingModel();
}

@Bean
EmbeddingStore<TextSegment> embeddingStore(EmbeddingModel embeddingModel, ResourceLoader resourceLoader, DataSource dataSource) throws IOException {

// Normally, you would already have your embedding store filled with your data.
// However, for the purpose of this demonstration, we will:

HikariDataSource hikariDataSource = (HikariDataSource) dataSource;
// 1. Create an postgres embedding store
// dimension of the embedding is 384 (all-minilm) and 1536 (openai)
EmbeddingStore<TextSegment> embeddingStore = PgVectorEmbeddingStore.builder().host("localhost").port(5432)
.user(hikariDataSource.getUsername()).password(hikariDataSource.getPassword()).database("vector_store")
.table("ai_vector_store").dimension(384).build();

// 2. Load an example document (medicaid-wa-faqs.pdf)
Resource pdfResource = resourceLoader.getResource("classpath:medicaid-wa-faqs.pdf");
Document document = loadDocument(pdfResource.getFile().toPath(), new ApachePdfBoxDocumentParser());

// 3. Split the document into segments 500 tokens each
// 4. Convert segments into embeddings
// 5. Store embeddings into embedding store
// All this can be done manually, but we will use EmbeddingStoreIngestor to automate this:
DocumentSplitter documentSplitter = DocumentSplitters.recursive(500, 0, new OpenAiTokenizer(GPT_3_5_TURBO));
EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder()
.documentSplitter(documentSplitter)
.embeddingModel(embeddingModel)
.embeddingStore(embeddingStore)
.build();
ingestor.ingest(document);

return embeddingStore;
}
}
package com.learning.ai.config;

import static dev.langchain4j.data.document.loader.FileSystemDocumentLoader.loadDocument;
import static dev.langchain4j.model.openai.OpenAiModelName.GPT_3_5_TURBO;

import com.zaxxer.hikari.HikariDataSource;
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentSplitter;
import dev.langchain4j.data.document.parser.apache.pdfbox.ApachePdfBoxDocumentParser;
import dev.langchain4j.data.document.splitter.DocumentSplitters;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.memory.chat.MessageWindowChatMemory;
import dev.langchain4j.model.chat.ChatLanguageModel;
import dev.langchain4j.model.embedding.AllMiniLmL6V2EmbeddingModel;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.openai.OpenAiTokenizer;
import dev.langchain4j.rag.content.retriever.ContentRetriever;
import dev.langchain4j.rag.content.retriever.EmbeddingStoreContentRetriever;
import dev.langchain4j.service.AiServices;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore;
import java.io.IOException;
import java.net.URI;
import javax.sql.DataSource;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.io.Resource;
import org.springframework.core.io.ResourceLoader;

@Configuration(proxyBeanMethods = false)
public class AIConfig {

@Bean
CustomerSupportAgent customerSupportAgent(
ChatLanguageModel chatLanguageModel,
// ChatTools bookingTools,
ContentRetriever contentRetriever) {
return AiServices.builder(CustomerSupportAgent.class)
.chatLanguageModel(chatLanguageModel)
.chatMemory(MessageWindowChatMemory.withMaxMessages(20))
// .tools(bookingTools)
.contentRetriever(contentRetriever)
.build();
}

@Bean
ContentRetriever contentRetriever(EmbeddingStore<TextSegment> embeddingStore, EmbeddingModel embeddingModel) {

// You will need to adjust these parameters to find the optimal setting, which will depend on two main factors:
// - The nature of your data
// - The embedding model you are using
int maxResults = 1;
double minScore = 0.6;

return EmbeddingStoreContentRetriever.builder()
.embeddingStore(embeddingStore)
.embeddingModel(embeddingModel)
.maxResults(maxResults)
.minScore(minScore)
.build();
}

@Bean
EmbeddingModel embeddingModel() {
return new AllMiniLmL6V2EmbeddingModel();
}

@Bean
EmbeddingStore<TextSegment> embeddingStore(
EmbeddingModel embeddingModel, ResourceLoader resourceLoader, DataSource dataSource) throws IOException {

// Normally, you would already have your embedding store filled with your data.
// However, for the purpose of this demonstration, we will:

HikariDataSource hikariDataSource = (HikariDataSource) dataSource;
String jdbcUrl = hikariDataSource.getJdbcUrl();
URI uri = URI.create(jdbcUrl.substring(5));
String host = uri.getHost();
int dbPort = uri.getPort();
String path = uri.getPath();
// 1. Create an postgres embedding store
// dimension of the embedding is 384 (all-minilm) and 1536 (openai)
EmbeddingStore<TextSegment> embeddingStore = PgVectorEmbeddingStore.builder()
.host(host)
.port(dbPort != -1 ? dbPort : 5432)
.user(hikariDataSource.getUsername())
.password(hikariDataSource.getPassword())
.database(path.substring(1))
.table("ai_vector_store")
.dimension(384)
.build();

// 2. Load an example document (medicaid-wa-faqs.pdf)
Resource pdfResource = resourceLoader.getResource("classpath:medicaid-wa-faqs.pdf");
Document document = loadDocument(pdfResource.getFile().toPath(), new ApachePdfBoxDocumentParser());

// 3. Split the document into segments 500 tokens each
// 4. Convert segments into embeddings
// 5. Store embeddings into embedding store
// All this can be done manually, but we will use EmbeddingStoreIngestor to automate this:
DocumentSplitter documentSplitter = DocumentSplitters.recursive(500, 0, new OpenAiTokenizer(GPT_3_5_TURBO));
EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder()
.documentSplitter(documentSplitter)
.embeddingModel(embeddingModel)
.embeddingStore(embeddingStore)
.build();
ingestor.ingest(document);

return embeddingStore;
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package com.learning.ai.config;

import org.springframework.stereotype.Component;

@Component
public class ChatTools {
}
package com.learning.ai.config;

import org.springframework.stereotype.Component;

@Component
public class ChatTools {}
Loading

0 comments on commit 85c3aa7

Please sign in to comment.