1.3.7 release

skier233 · Aug 28, 2024 · 1aa8e62 · 1aa8e62
1 parent ba19bcc
commit 1aa8e62
Show file tree

Hide file tree

Showing 11 changed files with 102 additions and 54 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,5 +1,5 @@
 # Use an official Python runtime as a parent image with CUDA support
-FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04
+FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
 
 # Set environment variables
 ENV DEBIAN_FRONTEND=noninteractive
@@ -18,24 +18,24 @@ RUN apt-get update && apt-get install -y \
     git \
     && rm -rf /var/lib/apt/lists/*
 
-# Install Python 3.11
+# Install Python 3.12
 RUN apt-get update && apt-get install -y software-properties-common && \
     add-apt-repository ppa:deadsnakes/ppa && \
-    apt-get update && apt-get install -y python3.11 python3.11-distutils && \
+    apt-get update && apt-get install -y python3.12 python3.12-distutils && \
     wget https://bootstrap.pypa.io/get-pip.py && \
-    python3.11 get-pip.py && \
+    python3.12 get-pip.py && \
     rm get-pip.py
 
-# Set Python 3.11 as the default
-RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
+# Set Python 3.12 as the default
+RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1
 
 # Install pip dependencies
 COPY requirements.txt .
-RUN python3.11 -m pip install -r requirements.txt
+RUN python3.12 -m pip install -r requirements.txt
 
 # Copy the wheel file and install it
-COPY dist/ai_processing-0.0.0-cp311-cp311-linux_x86_64.whl /tmp/
-RUN python3.11 -m pip install /tmp/ai_processing-0.0.0-cp311-cp311-linux_x86_64.whl
+COPY dist/ai_processing-0.0.0-cp312-cp312-linux_x86_64.whl /tmp/
+RUN python3.12 -m pip install /tmp/ai_processing-0.0.0-cp312-cp312-linux_x86_64.whl
 
 # Expose the port FastAPI runs on
 EXPOSE 8000
@@ -44,4 +44,4 @@ EXPOSE 8000
 WORKDIR /app
 
 # Command to run the server.py script
-CMD ["python3.11", "server.py"]
+CMD ["python3.12", "server.py"]
diff --git a/config/version.yaml b/config/version.yaml
@@ -1 +1 @@
-VERSION: 1.3.6
+VERSION: 1.3.7
diff --git a/environment-linux.yml b/environment-linux.yml
@@ -4,18 +4,20 @@ channels:
   - pytorch
   - nvidia
 dependencies:
-  - python=3.11
-  - pytorch=2.3.1
-  - torchvision=0.18.1
-  - pytorch-cuda=12.1
+  - python=3.12
+  - pytorch=2.4.0
+  - torchvision=0.19.0
+  - pytorch-cuda=12.4
   - pyyaml
   - pip
   - pip:
-    - ./dist/ai_processing-0.0.0-cp311-cp311-linux_x86_64.whl  # [win]
+    - ./dist/ai_processing-0.0.0-cp312-cp312-linux_x86_64.whl  # [win]
     - decord
-    - fastapi==0.111.0
+    - fastapi==0.112.2
     - pycryptodomex==3.20.0
-    - uvicorn==0.29.0
+    - pydantic==2.8.2
+    - pydantic_core==2.20.1
+    - uvicorn
     - requests
     - aiohttp
     - colorlog

diff --git a/environment-windows.yml b/environment-windows.yml
@@ -4,18 +4,20 @@ channels:
   - pytorch
   - nvidia
 dependencies:
-  - python=3.11
-  - pytorch=2.3.1
-  - torchvision=0.18.1
-  - pytorch-cuda=12.1
+  - python=3.12
+  - pytorch=2.4.0
+  - torchvision=0.19.0
+  - pytorch-cuda=12.4
   - pyyaml
   - pip
   - pip:
-    - ./dist/ai_processing-0.0.0-cp311-cp311-win_amd64.whl  # [win]
+    - ./dist/ai_processing-0.0.0-cp312-cp312-win_amd64.whl  # [win]
     - decord
-    - fastapi==0.111.0
+    - fastapi==0.112.2
     - pycryptodomex==3.20.0
-    - uvicorn==0.29.0
+    - pydantic==2.8.2
+    - pydantic_core==2.20.1
+    - uvicorn
     - requests
     - aiohttp
     - colorlog

diff --git a/install.ps1 b/install.ps1
@@ -1,3 +1,5 @@
+conda deactivate
+conda remove -n ai_model_server --all
 conda env create -f environment-windows.yml
 conda activate ai_model_server
 python server.py
diff --git a/install.sh b/install.sh
@@ -1,6 +1,8 @@
 #!/bin/bash
 
 # Activate conda environment and start server
+conda deactivate
+conda remove -n ai_model_server --all
 conda env create -f environment-linux.yml
 conda activate ai_model_server
 python server.py
diff --git a/lib/async_lib/async_processing.py b/lib/async_lib/async_processing.py
@@ -56,6 +56,15 @@ def __init__(self, model):
         self.workers_started = False
         self.failed_loading = False
 
+    def update_values_from_child_model(self):
+        self.instance_count = self.model.instance_count
+        if self.model.max_queue_size is None:
+            self.queue = asyncio.Queue()
+        else:
+            self.queue = asyncio.Queue(maxsize=self.model.max_queue_size)
+        self.max_batch_size = self.model.max_batch_size
+        self.max_batch_waits = self.model.max_batch_waits
+
     async def add_to_queue(self, data):
         await self.queue.put(data)
 

diff --git a/lib/logging/logger.py b/lib/logging/logger.py
@@ -1,6 +1,7 @@
 import datetime
 import logging
 import os
+import sys
 import colorlog
 
 
@@ -17,12 +18,13 @@ def setup_logger(name, level='INFO'):
     # Use the timestamp to create a unique log file for each session
     log_file = f"{log_dir}/log_{timestamp}.log"
     # Create a handler for writing to the log file
-    file_handler = logging.FileHandler(log_file)    
+    file_handler = logging.FileHandler(log_file, encoding='utf-8')    
     file_formatter = logging.Formatter('%(asctime)s|(%(filename)s)[%(levelname)s]:%(message)s')
     file_handler.setFormatter(file_formatter)
 
     # Create a handler for writing to the console
     console_handler = logging.StreamHandler()
+    console_handler.setStream(open(sys.stdout.fileno(), mode='w', encoding='utf-8', buffering=1))
     # Define a color scheme for the log levels
     log_colors = {
         'DEBUG': 'cyan',

diff --git a/lib/model/ai_model.py b/lib/model/ai_model.py
@@ -16,6 +16,7 @@ def __init__(self, configValues):
         self.model_return_tags = configValues.get("model_return_tags", False)
         self.model_return_confidence = configValues.get("model_return_confidence", False)
         self.device = configValues.get("device", None)
+        self.fill_to_batch = configValues.get("fill_to_batch_size", True)
         if self.model_file_name is None:
             raise ValueError("model_file_name is required for models of type model")
         self.model = None
@@ -24,14 +25,18 @@ def __init__(self, configValues):
         else:
             self.localdevice = torch.device(self.device)
 
+        self.update_batch_with_mutli_models(1)
+
+    def update_batch_with_mutli_models(self, model_count):
+        batch_multipliers = [1.0, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3]
         if self.batch_size_per_VRAM_GB is not None:
+            batch_size_temp = self.batch_size_per_VRAM_GB * batch_multipliers[model_count - 1]
             gpuMemory = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
-            scaledBatchSize = custom_round(self.batch_size_per_VRAM_GB * gpuMemory)
-            if self.max_model_batch_size == -1:
-                self.max_model_batch_size = scaledBatchSize
-                self.max_batch_size = scaledBatchSize
-                self.max_queue_size = scaledBatchSize
-                self.logger.debug(f"Setting batch size to {scaledBatchSize} based on VRAM size of {gpuMemory} GB")
+            scaledBatchSize = custom_round(batch_size_temp * gpuMemory)
+            self.max_model_batch_size = scaledBatchSize
+            self.max_batch_size = scaledBatchSize
+            self.max_queue_size = scaledBatchSize
+            self.logger.debug(f"Setting batch size to {scaledBatchSize} based on VRAM size of {gpuMemory} GB for model {self.model_file_name}")
 
 
 
@@ -67,8 +72,9 @@ async def worker_function(self, data):
 
     async def load(self):
         if self.model is None:
+            self.logger.info(f"Loading model {self.model_file_name} with batch size {self.max_model_batch_size}, {self.max_queue_size}, {self.max_batch_size}")
             if self.model_license_name is None:
-                self.model = PythonModel(f"./models/{self.model_file_name}.pt", self.max_model_batch_size, self.device)
+                self.model = PythonModel(f"./models/{self.model_file_name}.pt", self.max_model_batch_size, self.device, self.fill_to_batch)
             else:
                 from ai_processing import ModelRunner
                 self.model = ModelRunner(f"./models/{self.model_file_name}.pt.enc", f"./models/{self.model_license_name}.lic", self.max_model_batch_size, self.device)
@@ -94,11 +100,13 @@ def get_index_to_tag_mapping(path):
     return index_to_tag
 
 def custom_round(value):
-    # Calculate the difference between the value and the next highest integer
-    difference = -value % 1
-    # If the difference is less than or equal to 0.1, round up
-    if difference <= 0.1:
-        return int(value) + 1
-    # Otherwise, round down
+    if value < 8:
+        return int(value)
+    # Calculate the remainder when the value is divided by 8
+    remainder = int(value) % 8
+    # If the remainder is less than or equal to 4, round down
+    if remainder <= 5:
+        return int(value) - remainder
+    # Otherwise, round up
     else:
-        return int(value)
+        return int(value) + (8 - remainder)
diff --git a/lib/model/ai_model_python/python_model.py b/lib/model/ai_model_python/python_model.py
@@ -3,9 +3,10 @@
 import torch
 
 class PythonModel:
-    def __init__(self, path, batch_size, device):
+    def __init__(self, path, batch_size, device, fill_batch_size):
         self.model_path = path
         self.max_batch_size = batch_size
+        self.fill_batch_size = fill_batch_size
         if device:
             self.device = torch.device(device)
         else:
@@ -17,6 +18,12 @@ def __init__(self, path, batch_size, device):
 
     def run_model(self, preprocessed_images, applySigmoid):
         preprocessed_images = preprocessed_images.to(self.device)
+        original_batch_size = preprocessed_images.size(0)
+        if self.fill_batch_size:
+            if preprocessed_images.size(0) < self.max_batch_size:
+                padding_size = self.max_batch_size - original_batch_size
+                padding = torch.zeros((padding_size, *preprocessed_images.shape[1:]), device=self.device)
+                preprocessed_images = torch.cat([preprocessed_images, padding], dim=0)
         if self.device.type == 'cuda' and preprocessed_images.dtype != torch.float16:
             preprocessed_images = preprocessed_images.half()  # Convert to half precision
         with torch.no_grad():
@@ -27,6 +34,9 @@ def run_model(self, preprocessed_images, applySigmoid):
                 output = self.model(preprocessed_images)
             if applySigmoid:
                 output = torch.sigmoid(output)
+
+        # Remove the outputs corresponding to the padding images
+        output = output[:original_batch_size]
         return output.cpu()
 
     def process_images(self, preprocessed_images, applySigmoid = True):

diff --git a/lib/model/model_manager.py b/lib/model/model_manager.py
@@ -12,6 +12,7 @@ class ModelManager:
     def __init__(self):
         self.models = {}
         self.logger = logging.getLogger("logger")
+        self.ai_models = []
 
     def get_or_create_model(self, modelName):
         if modelName not in self.models:
@@ -23,21 +24,31 @@ def create_model(self, modelName):
             raise ValueError("Model names must be strings that are the name of the model config file!")
         model_config_path = f"./config/models/{modelName}.yaml"
         try:
-            model = model_factory(load_config(model_config_path))
+            model = self.model_factory(load_config(model_config_path))
         except Exception as e:
             self.logger.error(f"Error loading model {model_config_path}: {e}")
             return None
         return model
 
-def model_factory(model_config):
-    match model_config["type"]:
-        case "video_preprocessor":
-            return ModelProcessor(VideoPreprocessorModel(model_config))
-        case "image_preprocessor":
-            return ModelProcessor(ImagePreprocessorModel(model_config))
-        case "model":
-            return ModelProcessor(AIModel(model_config))
-        case "python":
-            return ModelProcessor(PythonModel(model_config))
-        case _:
-            raise ValueError(f"Model type {model_config['type']} not recognized!")
+    def model_factory(self, model_config):
+        match model_config["type"]:
+            case "video_preprocessor":
+                return ModelProcessor(VideoPreprocessorModel(model_config))
+            case "image_preprocessor":
+                return ModelProcessor(ImagePreprocessorModel(model_config))
+            case "model":
+                model_processor = ModelProcessor(AIModel(model_config))
+                self.ai_models.append(model_processor)
+                model_count = len(self.ai_models)
+                if model_count > 1:
+                    for model_processor in self.ai_models:
+                        ai_model = model_processor.model
+                        ai_model.update_batch_with_mutli_models(model_count)
+                        model_processor.update_values_from_child_model()
+
+
+                return model_processor
+            case "python":
+                return ModelProcessor(PythonModel(model_config))
+            case _:
+                raise ValueError(f"Model type {model_config['type']} not recognized!")