From 439fc1ebc980fea603b9d450d13fa05a78f9eebe Mon Sep 17 00:00:00 2001
From: loopsaaage <maskerprc@gmail.com>
Date: Tue, 10 Sep 2024 15:22:31 +0800
Subject: [PATCH] Refactor and edit readme

---
 README.CN.md                                     | 13 ++++++++++++-
 infra_ai_service/api/ai_enhance/embedding.py     |  5 +++--
 infra_ai_service/api/ai_enhance/text_process.py  |  2 +-
 infra_ai_service/api/ai_enhance/vector_search.py |  7 +++----
 infra_ai_service/api/common/utils.py             |  4 ++--
 tests/test_health.py                             | 15 ---------------
 6 files changed, 21 insertions(+), 25 deletions(-)
 delete mode 100644 tests/test_health.py

diff --git a/README.CN.md b/README.CN.md
index 03b0b35..b236a0f 100644
--- a/README.CN.md
+++ b/README.CN.md
@@ -1,6 +1,5 @@
 # 注意事项
 
-
 ## 前置条件
 - 需要安装本地psql数据库
 ```shell
@@ -12,6 +11,18 @@ ALTER USER postgres PASSWORD 'postgres';
 # 建立数据库（db）
 CREATE DATABASE db;
 ```
+- 需要安装qdrant本地服务
+```shell
+# docker环境准备
+sudo apt update
+sudo apt install docker.io
+sudo systemctl start docker
+sudo systemctl enable docker
+# 安装
+sudo docker run -p 6333:6333 -d --name qdrant qdrant/qdrant
+# 测试
+curl http://localhost:6333
+```
 
 ## 本地启动
 ```shell
diff --git a/infra_ai_service/api/ai_enhance/embedding.py b/infra_ai_service/api/ai_enhance/embedding.py
index 52c13c2..12f5910 100644
--- a/infra_ai_service/api/ai_enhance/embedding.py
+++ b/infra_ai_service/api/ai_enhance/embedding.py
@@ -30,6 +30,7 @@ async def embed_text(input_data: TextInput):
         # 生成嵌入
         embeddings = list(fastembed_model.embed([input_data.content]))
         if not embeddings:
+            logger.error(f"Failed to generate embedding", exc_info=True)
             raise ValueError("Failed to generate embedding")
 
         embedding_vector = embeddings[0]
@@ -51,7 +52,7 @@ async def embed_text(input_data: TextInput):
 
         return EmbeddingOutput(id=point_id, embedding=embedding_vector.tolist())
     except Exception as e:
-        logger.error(f"Error in vector search: {str(e)}", exc_info=True)
+        logger.error(f"Error processing embedding: {str(e)}", exc_info=True)
         raise HTTPException(status_code=400,
                             detail=f"Error processing embedding: {str(e)}")
 
@@ -66,6 +67,6 @@ async def get_collection_status():
             "status": "ready" if collection_info.status == "green" else "not ready"
         }
     except Exception as e:
-        logger.error(f"Error in vector search: {str(e)}", exc_info=True)
+        logger.error(f"Error getting collection status: {str(e)}", exc_info=True)
         raise HTTPException(status_code=400,
                             detail=f"Error getting collection status: {str(e)}")
diff --git a/infra_ai_service/api/ai_enhance/text_process.py b/infra_ai_service/api/ai_enhance/text_process.py
index bdc07b5..8b044d5 100644
--- a/infra_ai_service/api/ai_enhance/text_process.py
+++ b/infra_ai_service/api/ai_enhance/text_process.py
@@ -28,5 +28,5 @@ async def process_text(input_data: TextInput):
         modified_text = clean_text(input_data.content)
         return TextOutput(modified_content=modified_text)
     except Exception as e:
-        logger.error(f"Error in vector search: {str(e)}", exc_info=True)
+        logger.error(f"Error processing text: {str(e)}", exc_info=True)
         raise HTTPException(status_code=400, detail=f"Error processing text: {str(e)}")
diff --git a/infra_ai_service/api/ai_enhance/vector_search.py b/infra_ai_service/api/ai_enhance/vector_search.py
index 75bb4b9..8dcb54b 100644
--- a/infra_ai_service/api/ai_enhance/vector_search.py
+++ b/infra_ai_service/api/ai_enhance/vector_search.py
@@ -1,9 +1,6 @@
 from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel
 from typing import List
-from fastembed.embedding import DefaultEmbedding
-from qdrant_client import QdrantClient
-from qdrant_client.http.models import Distance, VectorParams
 from infra_ai_service.api.common.utils import setup_qdrant_environment
 import logging
 
@@ -35,11 +32,13 @@ async def vector_search(input_data: SearchInput):
     try:
         # 检查集合是否存在
         if not qdrant_client.get_collection(collection_name):
+            logger.error(f"Collection {collection_name} does not exist", exc_info=True)
             raise ValueError(f"Collection {collection_name} does not exist")
 
         # 生成查询文本的嵌入
         query_vector = list(fastembed_model.embed([input_data.query_text]))
         if not query_vector:
+            logger.error(f"Failed to generate query embedding", exc_info=True)
             raise ValueError("Failed to generate query embedding")
 
         # 执行向量搜索
@@ -62,5 +61,5 @@ async def vector_search(input_data: SearchInput):
 
         return SearchOutput(results=results)
     except Exception as e:
-        logger.error(f"Error in vector search: {str(e)}", exc_info=True)
+        logger.error(f"Error performing vector search: {str(e)}", exc_info=True)
         raise HTTPException(status_code=500, detail=f"Error performing vector search: {str(e)}")
diff --git a/infra_ai_service/api/common/utils.py b/infra_ai_service/api/common/utils.py
index aa88512..796477d 100644
--- a/infra_ai_service/api/common/utils.py
+++ b/infra_ai_service/api/common/utils.py
@@ -1,7 +1,7 @@
-from fastapi import APIRouter, HTTPException
+from fastapi import HTTPException
 from fastembed.embedding import DefaultEmbedding
 from qdrant_client import QdrantClient
-from qdrant_client.http.models import Distance, VectorParams, PointStruct
+from qdrant_client.http.models import Distance, VectorParams
 
 
 def setup_qdrant_environment():
diff --git a/tests/test_health.py b/tests/test_health.py
deleted file mode 100644
index 8fbf5a8..0000000
--- a/tests/test_health.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import pytest
-from fastapi import FastAPI, status
-from httpx import AsyncClient
-
-@pytest.mark.asyncio
-async def test_health(client: AsyncClient, app: FastAPI) -> None:
-    """
-    Checks the health endpoint.
-
-    :param client: client for the infra_ai_service.
-    :param app: current FastAPI application.
-    """
-    url = app.url_path_for("health")
-    response = await client.get(url)
-    assert response.status_code == status.HTTP_200_OK