JSv4 · JSv4 · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025
diff --git a/.coveragerc b/.coveragerc
@@ -0,0 +1,35 @@
+[run]
+# Coverage configuration
+source = .
+omit =
+    # Exclude test files
+    test_*.py
+    # Exclude preload script
+    preload_models.py
+    # Exclude virtual environments
+    venv/*
+    .venv/*
+    env/*
+    # Exclude system/package files
+    */site-packages/*
+    */dist-packages/*
+
+[report]
+# Reporting options
+precision = 2
+show_missing = True
+skip_covered = False
+
+# Exclude lines from coverage
+exclude_lines =
+    # Default excludes
+    pragma: no cover
+    def __repr__
+    raise AssertionError
+    raise NotImplementedError
+    if __name__ == .__main__.:
+    if TYPE_CHECKING:
+    @abstract
+
+[html]
+directory = htmlcov
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
@@ -0,0 +1,94 @@
+name: Build and Publish Docker Image
+
+on:
+  push:
+    branches: [ main ]
+    tags: [ 'v*.*.*' ]
+  pull_request:
+    branches: [ main ]
+  workflow_dispatch:
+    inputs:
+      embedding_model:
+        description: 'Embedding model to use'
+        required: false
+        default: 'multi-qa-MiniLM-L6-cos-v1'
+      tokenizer_model:
+        description: 'Tokenizer model to use'
+        required: false
+        default: 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1'
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+      id-token: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log into registry ${{ env.REGISTRY }}
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract Docker metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=pr
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=semver,pattern={{major}}
+            type=sha,prefix=sha-
+            type=raw,value=latest,enable={{is_default_branch}}
+
+      - name: Set build args
+        id: build-args
+        run: |
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+            echo "EMBEDDING_MODEL=${{ github.event.inputs.embedding_model }}" >> $GITHUB_ENV
+            echo "TOKENIZER_MODEL=${{ github.event.inputs.tokenizer_model }}" >> $GITHUB_ENV
+          else
+            echo "EMBEDDING_MODEL=multi-qa-MiniLM-L6-cos-v1" >> $GITHUB_ENV
+            echo "TOKENIZER_MODEL=sentence-transformers/multi-qa-MiniLM-L6-cos-v1" >> $GITHUB_ENV
+          fi
+
+      - name: Build and push Docker image
+        id: build-and-push
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: ${{ github.event_name != 'pull_request' }}
+          load: ${{ github.event_name == 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            EMBEDDING_MODEL=${{ env.EMBEDDING_MODEL }}
+            TOKENIZER_MODEL=${{ env.TOKENIZER_MODEL }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          platforms: linux/amd64
+
+      - name: Generate artifact attestation
+        if: github.event_name != 'pull_request'
+        uses: actions/attest-build-provenance@v1
+        with:
+          subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          subject-digest: ${{ steps.build-and-push.outputs.digest }}
+          push-to-registry: true
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,62 @@
+name: Tests
+
+on:
+  push:
+    branches: [ main, develop ]
+  pull_request:
+    branches: [ main, develop ]
+  workflow_dispatch:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+        cache: 'pip'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements-dev.txt
+
+    - name: Lint with flake8 (optional)
+      run: |
+        # Install flake8 for basic linting
+        pip install flake8
+        # Stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude=venv,env,.venv,.git,__pycache__
+        # Exit-zero treats all errors as warnings
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude=venv,env,.venv,.git,__pycache__
+      continue-on-error: true
+
+    - name: Run tests with pytest
+      run: |
+        pytest
+
+    - name: Upload coverage reports
+      uses: codecov/codecov-action@v4
+      if: matrix.python-version == '3.10'
+      with:
+        file: ./coverage.xml
+        flags: unittests
+        name: codecov-umbrella
+        fail_ci_if_error: false
+      continue-on-error: true
+
+    - name: Archive coverage report
+      uses: actions/upload-artifact@v4
+      if: matrix.python-version == '3.10'
+      with:
+        name: coverage-report
+        path: htmlcov/
+        retention-days: 30
diff --git a/.gitignore b/.gitignore
@@ -1 +1,36 @@
+# Virtual environments
 .venv
+venv/
+env/
+ENV/
+
+# Python artifacts
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+
+# Testing and coverage
+.pytest_cache/
+.coverage
+.coverage.*
+htmlcov/
+coverage.xml
+*.cover
+.hypothesis/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Environment variables
+.env
+.env.local
diff --git a/Dockerfile b/Dockerfile
@@ -1,12 +1,26 @@
 # Use the official Python image as a base image
 FROM python:3.10-slim
 
+# Build arguments for model configuration
+ARG EMBEDDING_MODEL=multi-qa-MiniLM-L6-cos-v1
+ARG TOKENIZER_MODEL=sentence-transformers/multi-qa-MiniLM-L6-cos-v1
+
 # Set environment variables
-ENV PYTHONUNBUFFERED 1
+ENV PYTHONUNBUFFERED=1
+# Set HuggingFace cache directory to bundle models in the image
+ENV HF_HOME=/app/.cache/huggingface
+ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
+ENV SENTENCE_TRANSFORMERS_HOME=/app/.cache/huggingface
+# Set model to use at runtime (from build arg)
+ENV EMBEDDING_MODEL=${EMBEDDING_MODEL}
+ENV TOKENIZER_MODEL=${TOKENIZER_MODEL}
 
 # Set the working directory in the container
 WORKDIR /app
 
+# Create cache directory with proper permissions
+RUN mkdir -p /app/.cache/huggingface
+
 # Copy the requirements.txt file into the container
 COPY requirements.txt .
 
@@ -16,13 +30,24 @@ RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://
 # Install dependencies
 RUN pip install --no-cache-dir -r requirements.txt
 
+# Pre-download models for offline availability
+# This must happen BEFORE copying application code to ensure models are cached
+COPY preload_models.py .
+RUN python preload_models.py "${EMBEDDING_MODEL}" "${TOKENIZER_MODEL}" && rm preload_models.py
+
 # Copy the Python script into the container
 COPY embeddings.py .
 COPY main.py .
 
 # Run the web service on container startup. Here we use the gunicorn
-# webserver, with one worker process and 8 threads.
-# For environments with multiple CPU cores, increase the number of workers
-# to be equal to the cores available.
-# Timeout is set to 0 to disable the timeouts of the workers to allow Cloud Run to handle instance scaling.
-CMD exec gunicorn --bind :$PORT --workers 1 --threads 8 --timeout 0 main:app
+# webserver with optimized configuration for medium concurrency (10-50 requests).
+#
+# Configuration:
+# - 2 workers: Utilizes multiple CPU cores (each worker loads model separately)
+# - 8 threads per worker: Handles concurrent requests (total 16 concurrent capacity)
+# - Timeout 0: Allows Cloud Run to handle instance scaling
+#
+# NOTE: Each worker loads the model independently (~200MB RAM per worker).
+# For Cloud Run, ensure you allocate at least 1GB RAM and 2 vCPUs.
+# Adjust workers based on your CPU allocation: workers = (2 x $num_cores)
+CMD exec gunicorn --bind :$PORT --workers 2 --threads 8 --timeout 0 --worker-class gthread main:app