From 17fccc9ce780ff1703cdec29b44180bb4a2f0712 Mon Sep 17 00:00:00 2001 From: Preston Vasquez Date: Fri, 8 Nov 2024 11:44:35 -0700 Subject: [PATCH] Add README.md and example --- .../mongovector-vectorstore-example/README.md | 40 +++ .../docker-compose.yml | 19 ++ .../mongovector-vectorstore-example/go.mod | 23 ++ .../mongovector-vectorstore-example/go.sum | 128 +++++++++ .../mongovector_vectorstore_example.go | 242 ++++++++++++++++++ vectorstores/mongovector/README.md | 26 ++ 6 files changed, 478 insertions(+) create mode 100644 examples/mongovector-vectorstore-example/README.md create mode 100644 examples/mongovector-vectorstore-example/docker-compose.yml create mode 100644 examples/mongovector-vectorstore-example/go.mod create mode 100644 examples/mongovector-vectorstore-example/go.sum create mode 100644 examples/mongovector-vectorstore-example/mongovector_vectorstore_example.go create mode 100644 vectorstores/mongovector/README.md diff --git a/examples/mongovector-vectorstore-example/README.md b/examples/mongovector-vectorstore-example/README.md new file mode 100644 index 000000000..d26aef370 --- /dev/null +++ b/examples/mongovector-vectorstore-example/README.md @@ -0,0 +1,40 @@ +# Using MongoDB Atlas as a Vector Store with OpenAI Embeddings + +This project illustrates how to leverage MongoDB as a vector store for performing similarity searches, utilizing OpenAI embeddings within a Go application. It integrates the LangChainGo library, OpenAI's API, and MongoDB to create an efficient vector database for semantic search. + + +For more information on getting started with MongoDB Atlas, visit the [MongoDB Atlas Getting Started Guide](https://www.mongodb.com/products/platform/atlas-database). You can also use the following Docker image to containerize a free (M0) tier: [MongoDB Atlas Local](https://hub.docker.com/r/mongodb/mongodb-atlas-local). + +## What This Tutorial Covers + +1. **MongoDB Setup:** + - Connects to a MongoDB Atlas instance using a specified connection string. + - Automatically checks for and creates a vector search index on the collection if it is not already present, ensuring compatibility with OpenAI's embedding model. + +2. **OpenAI Embeddings Initialization:** + - Establishes an embeddings client through the OpenAI API. + - Requires the OpenAI API key to be set as an environment variable for authentication. + +3. **Creating the Vector Store:** + - Connects to the MongoDB database and sets up a vector store that utilizes OpenAI embeddings for document representation. + +4. **Inserting Sample Data:** + - Adds a collection of documents (cities) along with their metadata into the vector store. + - Each document contains information such as the city name, population, and area. + +5. **Executing Similarity Searches:** + - Demonstrates various types of similarity searches, including: + a. A basic search for documents related to "japan". + b. A search for cities in South America that meet a specified score threshold. + c. A search that combines score thresholds with metadata filtering. + +## Running the Example + +1. Configure your environment by setting the MongoDB URI and OpenAI API key: + ```bash + export MONGODB_URI= + export OPENAI_API_KEY= + +2. If you want to run this using docker-compose.yml, `MONGODB_URI` should be `localhost:27017`: `docker-compose up -d` + +3. Run the program: `go run main.go` diff --git a/examples/mongovector-vectorstore-example/docker-compose.yml b/examples/mongovector-vectorstore-example/docker-compose.yml new file mode 100644 index 000000000..4cf39afca --- /dev/null +++ b/examples/mongovector-vectorstore-example/docker-compose.yml @@ -0,0 +1,19 @@ +version: '3.8' + +services: + mongodb-atlas-local: + image: mongodb/mongodb-atlas-local:latest + container_name: mongodb-atlas-local + ports: + - "27017:27017" + environment: + DO_NOT_TRACK: 1 # Set to 1 to opt out of telemetry + volumes: + - ./init-scripts:/docker-entrypoint-initdb.d # Directory for initialization scripts + - ./logs:/var/log/mongodb # Directory for logs (optional) + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + diff --git a/examples/mongovector-vectorstore-example/go.mod b/examples/mongovector-vectorstore-example/go.mod new file mode 100644 index 000000000..ecfebcf5b --- /dev/null +++ b/examples/mongovector-vectorstore-example/go.mod @@ -0,0 +1,23 @@ +module github.com/tmc/langchaingo/examples/mongovector-vectorstore-example + +go 1.23.1 + +require ( + github.com/tmc/langchaingo v0.1.13-pre.0 + go.mongodb.org/mongo-driver/v2 v2.0.0-beta1 +) + +require ( + github.com/dlclark/regexp2 v1.10.0 // indirect + github.com/golang/snappy v0.0.4 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/klauspost/compress v1.17.6 // indirect + github.com/pkoukk/tiktoken-go v0.1.6 // indirect + github.com/xdg-go/pbkdf2 v1.0.0 // indirect + github.com/xdg-go/scram v1.1.2 // indirect + github.com/xdg-go/stringprep v1.0.4 // indirect + github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect + golang.org/x/crypto v0.23.0 // indirect + golang.org/x/sync v0.7.0 // indirect + golang.org/x/text v0.15.0 // indirect +) diff --git a/examples/mongovector-vectorstore-example/go.sum b/examples/mongovector-vectorstore-example/go.sum new file mode 100644 index 000000000..09c036566 --- /dev/null +++ b/examples/mongovector-vectorstore-example/go.sum @@ -0,0 +1,128 @@ +cloud.google.com/go v0.114.0 h1:OIPFAdfrFDFO2ve2U7r/H5SwSbBzEdrBdE7xkgwc+kY= +cloud.google.com/go v0.114.0/go.mod h1:ZV9La5YYxctro1HTPug5lXH/GefROyW8PPD4T8n9J8E= +cloud.google.com/go/aiplatform v1.68.0 h1:EPPqgHDJpBZKRvv+OsB3cr0jYz3EL2pZ+802rBPcG8U= +cloud.google.com/go/aiplatform v1.68.0/go.mod h1:105MFA3svHjC3Oazl7yjXAmIR89LKhRAeNdnDKJczME= +cloud.google.com/go/auth v0.5.1 h1:0QNO7VThG54LUzKiQxv8C6x1YX7lUrzlAa1nVLF8CIw= +cloud.google.com/go/auth v0.5.1/go.mod h1:vbZT8GjzDf3AVqCcQmqeeM32U9HBFc32vVVAbwDsa6s= +cloud.google.com/go/auth/oauth2adapt v0.2.2 h1:+TTV8aXpjeChS9M+aTtN/TjdQnzJvmzKFt//oWu7HX4= +cloud.google.com/go/auth/oauth2adapt v0.2.2/go.mod h1:wcYjgpZI9+Yu7LyYBg4pqSiaRkfEK3GQcpb7C/uyF1Q= +cloud.google.com/go/compute/metadata v0.3.0 h1:Tz+eQXMEqDIKRsmY3cHTL6FVaynIjX2QxYC4trgAKZc= +cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k= +cloud.google.com/go/iam v1.1.8 h1:r7umDwhj+BQyz0ScZMp4QrGXjSTI3ZINnpgU2nlB/K0= +cloud.google.com/go/iam v1.1.8/go.mod h1:GvE6lyMmfxXauzNq8NbgJbeVQNspG+tcdL/W8QO1+zE= +cloud.google.com/go/longrunning v0.5.7 h1:WLbHekDbjK1fVFD3ibpFFVoyizlLRl73I7YKuAKilhU= +cloud.google.com/go/longrunning v0.5.7/go.mod h1:8GClkudohy1Fxm3owmBGid8W0pSgodEMwEAztp38Xng= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0= +github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o= +github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfFxPRy3Bf7vr3h0cechB90XaQs= +github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0= +github.com/googleapis/gax-go/v2 v2.12.4 h1:9gWcmF85Wvq4ryPFvGFaOgPIs1AQX0d0bcbGw4Z96qg= +github.com/googleapis/gax-go/v2 v2.12.4/go.mod h1:KYEYLorsnIGDi/rPC8b5TdlB9kbKoFubselGIoBMCwI= +github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI= +github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= +github.com/pkoukk/tiktoken-go v0.1.6 h1:JF0TlJzhTbrI30wCvFuiw6FzP2+/bR+FIxUdgEAcUsw= +github.com/pkoukk/tiktoken-go v0.1.6/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/tmc/langchaingo v0.1.13-pre.0 h1:bmeNREQX433Ys4gggx5AYnJxP/tZX7/vTTMAZbMnbeQ= +github.com/tmc/langchaingo v0.1.13-pre.0/go.mod h1:EeervIv/DNYhSfQSMaql20wMFvhgF7lDaVaatp8lVPw= +github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= +github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= +github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= +github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= +github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= +github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= +github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d h1:splanxYIlg+5LfHAM6xpdFEAYOk8iySO56hMFq6uLyA= +github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +go.mongodb.org/mongo-driver/v2 v2.0.0-beta1 h1:vwKMYa9FCX1OW7efPaH0FUaD6o+WC0kiC7VtHtNX7UU= +go.mongodb.org/mongo-driver/v2 v2.0.0-beta1/go.mod h1:pfndQmffp38kKjbwVfoavadsdC0Nsg/qb+INK01PNaM= +go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= +go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.51.0 h1:A3SayB3rNyt+1S6qpI9mHPkeHTZbD7XILEqWnYZb2l0= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.51.0/go.mod h1:27iA5uvhuRNmalO+iEUdVn5ZMj2qy10Mm+XRIpRmyuU= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0 h1:Xs2Ncz0gNihqu9iosIZ5SkBbWo5T8JhhLJFMQL1qmLI= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0/go.mod h1:vy+2G/6NvVMpwGX/NyLqcC41fxepnuKHk16E6IZUcJc= +go.opentelemetry.io/otel v1.26.0 h1:LQwgL5s/1W7YiiRwxf03QGnWLb2HW4pLiAhaA5cZXBs= +go.opentelemetry.io/otel v1.26.0/go.mod h1:UmLkJHUAidDval2EICqBMbnAd0/m2vmpf/dAM+fvFs4= +go.opentelemetry.io/otel/metric v1.26.0 h1:7S39CLuY5Jgg9CrnA9HHiEjGMF/X2VHvoXGgSllRz30= +go.opentelemetry.io/otel/metric v1.26.0/go.mod h1:SY+rHOI4cEawI9a7N1A4nIg/nTQXe1ccCNWYOJUrpX4= +go.opentelemetry.io/otel/trace v1.26.0 h1:1ieeAUb4y0TE26jUFrCIXKpTuVK7uJGN9/Z/2LP5sQA= +go.opentelemetry.io/otel/trace v1.26.0/go.mod h1:4iDxvGDQuUkHve82hJJ8UqrwswHYsZuWCBllGV2U2y0= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= +golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/api v0.183.0 h1:PNMeRDwo1pJdgNcFQ9GstuLe/noWKIc89pRWRLMvLwE= +google.golang.org/api v0.183.0/go.mod h1:q43adC5/pHoSZTx5h2mSmdF7NcyfW9JuDyIOJAgS9ZQ= +google.golang.org/genproto v0.0.0-20240528184218-531527333157 h1:u7WMYrIrVvs0TF5yaKwKNbcJyySYf+HAIFXxWltJOXE= +google.golang.org/genproto v0.0.0-20240528184218-531527333157/go.mod h1:ubQlAQnzejB8uZzszhrTCU2Fyp6Vi7ZE5nn0c3W8+qQ= +google.golang.org/genproto/googleapis/api v0.0.0-20240604185151-ef581f913117 h1:+rdxYoE3E5htTEWIe15GlN6IfvbURM//Jt0mmkmm6ZU= +google.golang.org/genproto/googleapis/api v0.0.0-20240604185151-ef581f913117/go.mod h1:OimBR/bc1wPO9iV4NC2bpyjy3VnAwZh5EBPQdtaE5oo= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240604185151-ef581f913117 h1:1GBuWVLM/KMVUv1t1En5Gs+gFZCNd360GGb4sSxtrhU= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240604185151-ef581f913117/go.mod h1:EfXuqaE1J41VCDicxHzUDm+8rk+7ZdXzHV0IhO/I6s0= +google.golang.org/grpc v1.64.0 h1:KH3VH9y/MgNQg1dE7b3XfVK0GsPSIzJwdF617gUSbvY= +google.golang.org/grpc v1.64.0/go.mod h1:oxjF8E3FBnjp+/gVFYdWacaLDx9na1aqy9oovLpxQYg= +google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= +google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= +sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= diff --git a/examples/mongovector-vectorstore-example/mongovector_vectorstore_example.go b/examples/mongovector-vectorstore-example/mongovector_vectorstore_example.go new file mode 100644 index 000000000..dbf3af67e --- /dev/null +++ b/examples/mongovector-vectorstore-example/mongovector_vectorstore_example.go @@ -0,0 +1,242 @@ +package main + +import ( + "context" + "fmt" + "log" + "os" + "time" + + "github.com/tmc/langchaingo/embeddings" + "github.com/tmc/langchaingo/llms/openai" + "github.com/tmc/langchaingo/schema" + "github.com/tmc/langchaingo/vectorstores" + "github.com/tmc/langchaingo/vectorstores/mongovector" + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + "go.mongodb.org/mongo-driver/v2/mongo/options" +) + +func main() { + const ( + openAIEmbeddingModel = "text-embedding-3-small" + openAIEmbeddingDim = 1536 + similarityAlgorithm = "dotProduct" + indexDP1536 = "vector_index_dotProduct_1536" + databaseName = "langchaingo-test" + collectionName = "vstore" + ) + + if os.Getenv("OPENAI_API_KEY") == "" { + log.Fatalf("OPENAI_API_KEY required for this tutorial") + } + + // First create a client and ensure that a vector search index that supports + // OpenAI's embedding model exists on the example collection. + uri := os.Getenv("MONGODB_URI") + if uri == "" { + log.Fatal("MONGODB_URI required and must point to an MongoDB Atlas Database") + } + + client, err := mongo.Connect(options.Client().ApplyURI(uri)) + if err != nil { + log.Fatalf("failed to connect to server: %w", err) + } + + defer func() { + if err := client.Disconnect(context.Background()); err != nil { + log.Fatalf("error disconnecting the client: %v", err) + } + }() + + coll := client.Database(databaseName).Collection(collectionName) + + if ok, _ := searchIndexExists(context.Background(), coll, indexDP1536); !ok { + fields := []vectorField{} + + fields = append(fields, vectorField{ + Type: "vector", + Path: "plot_embedding", // Default path + NumDimensions: openAIEmbeddingDim, + Similarity: similarityAlgorithm, + }) + + _, err = createVectorSearchIndex(context.Background(), coll, indexDP1536, fields...) + if err != nil { + log.Fatalf("faield to create index: %v", err) + } + } + + // Create an embeddings client using the OpenAI API. Requires environment + // variable OPENAI_API_KEY to be set. + llm, err := openai.New(openai.WithEmbeddingModel(openAIEmbeddingModel)) + if err != nil { + log.Fatalf("failed to create an embedings client: %v", err) + } + + embedder, err := embeddings.NewEmbedder(llm) + if err != nil { + log.Fatal("failed to create an embedder: %v", err) + } + + // A Store is a wrapper for mongo.Collection, since adding and searching + // vectors is collection-specific. + store := mongovector.New(coll, embedder, mongovector.WithIndex(indexDP1536)) + + // Add documents to the MongoDB Atlas Database vector store. + _, err = store.AddDocuments(context.Background(), []schema.Document{ + { + PageContent: "Tokyo", + Metadata: map[string]any{ + "population": 38, + "area": 2190, + }, + }, + { + PageContent: "Paris", + Metadata: map[string]any{ + "population": 11, + "area": 105, + }, + }, + { + PageContent: "London", + Metadata: map[string]any{ + "population": 9.5, + "area": 1572, + }, + }, + { + PageContent: "Santiago", + Metadata: map[string]any{ + "population": 6.9, + "area": 641, + }, + }, + { + PageContent: "Buenos Aires", + Metadata: map[string]any{ + "population": 15.5, + "area": 203, + }, + }, + { + PageContent: "Rio de Janeiro", + Metadata: map[string]any{ + "population": 13.7, + "area": 1200, + }, + }, + { + PageContent: "Sao Paulo", + Metadata: map[string]any{ + "population": 22.6, + "area": 1523, + }, + }, + }) + if err != nil { + log.Fatal("error adding documents: %v", err) + } + + // Search for similar documents. + docs, err := store.SimilaritySearch(context.Background(), "japan", 1) + fmt.Println(docs) + + // Search for similar documents using score threshold. + docs, err = store.SimilaritySearch(context.Background(), "only cities in south america", 10, + vectorstores.WithScoreThreshold(0.80)) + fmt.Println(docs) + + // Search for similar documents using score threshold and metadata filter. + filter := map[string]interface{}{ + "$and": []map[string]interface{}{ + { + "area": map[string]interface{}{ + "$gte": 1000, + }, + }, + { + "population": map[string]interface{}{ + "$gte": 15.5, + }, + }, + }, + } + + docs, err = store.SimilaritySearch(context.Background(), "only cities in south america", 10, + vectorstores.WithScoreThreshold(0.80), + vectorstores.WithFilters(filter)) + fmt.Println(docs) +} + +// vectorField defines the fields of an index used for vector search. +type vectorField struct { + Type string `bson:"type,omitempty"` + Path string `bson:"path,omityempty"` + NumDimensions int `bson:"numDimensions,omitempty"` + Similarity string `bson:"similarity,omitempty"` +} + +// createVectorSearchIndex will create a vector search index on the "db.vstore" +// collection named "vector_index" with the provided field. This function blocks +// until the index has been created. +func createVectorSearchIndex( + ctx context.Context, + coll *mongo.Collection, + idxName string, + fields ...vectorField, +) (string, error) { + def := struct { + Fields []vectorField `bson:"fields"` + }{ + Fields: fields, + } + + view := coll.SearchIndexes() + + siOpts := options.SearchIndexes().SetName(idxName).SetType("vectorSearch") + searchName, err := view.CreateOne(ctx, mongo.SearchIndexModel{Definition: def, Options: siOpts}) + if err != nil { + return "", fmt.Errorf("failed to create the search index: %w", err) + } + + // Await the creation of the index. + var doc bson.Raw + for doc == nil { + cursor, err := view.List(ctx, options.SearchIndexes().SetName(searchName)) + if err != nil { + return "", fmt.Errorf("failed to list search indexes: %w", err) + } + + if !cursor.Next(ctx) { + break + } + + name := cursor.Current.Lookup("name").StringValue() + queryable := cursor.Current.Lookup("queryable").Boolean() + if name == searchName && queryable { + doc = cursor.Current + } else { + time.Sleep(5 * time.Second) + } + } + + return searchName, nil +} + +// Check if the search index exists. +func searchIndexExists(ctx context.Context, coll *mongo.Collection, idx string) (bool, error) { + view := coll.SearchIndexes() + + siOpts := options.SearchIndexes().SetName(idx).SetType("vectorSearch") + cursor, err := view.List(ctx, siOpts) + if err != nil { + return false, fmt.Errorf("failed to list search indexes: %w", err) + } + + name := cursor.Current.Lookup("name").StringValue() + queryable := cursor.Current.Lookup("queryable").Boolean() + + return name == idx && queryable, nil +} diff --git a/vectorstores/mongovector/README.md b/vectorstores/mongovector/README.md new file mode 100644 index 000000000..ca1a3496d --- /dev/null +++ b/vectorstores/mongovector/README.md @@ -0,0 +1,26 @@ +# MongoVector: MongoDB Vector Store for Embeddings + +`mongovector` provide a way for users to read and write to a [MongoDB Atlas Database](https://www.mongodb.com/products/platform/atlas-database) as a vector store using the [MongoDB Go Driver](https://github.com/mongodb/mongo-go-driver) and a supported embedding service. + +## Project Goals +The goal of this project is to enable users to interact with an Atlas cluster as a vector database. The `mongovector` package is designed to meet the following requirements: +- **Embedding-Agnostic**: The package allows users to embed data using various services, including OpenAI, Ollama, Mistral, and others. +- **VectorStore Interface Implementation**: The package implements the `VectorStore` interface, providing methods to add documents and perform similarity searches. + +## Features + +- **Document Storage**: Easily add documents to the MongoDB vector store with their embeddings. +- **Similarity Search**: Perform similarity searches based on user-defined queries and retrieve relevant documents. +- **Customizable Options**: Configure various options for embedding and searching, including score thresholds and filters. + +## Installation + +To use the `mongovector` package, ensure you have Go installed on your machine. You can then install the package using the following command: + +```bash +go get github.com/tmc/langchaingo/vectorstores/mongovector@v0.1.13-pre.0 +``` + +## Docker + +You can also use the following Docker image to containerize a free (M0) tier: [MongoDB Atlas Local](https://hub.docker.com/r/mongodb/mongodb-atlas-local).