Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[run]
# Coverage configuration
source = .
omit =
# Exclude test files
test_*.py
# Exclude preload script
preload_models.py
# Exclude virtual environments
venv/*
.venv/*
env/*
# Exclude system/package files
*/site-packages/*
*/dist-packages/*

[report]
# Reporting options
precision = 2
show_missing = True
skip_covered = False

# Exclude lines from coverage
exclude_lines =
# Default excludes
pragma: no cover
def __repr__
raise AssertionError
raise NotImplementedError
if __name__ == .__main__.:
if TYPE_CHECKING:
@abstract

[html]
directory = htmlcov
94 changes: 94 additions & 0 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
name: Build and Publish Docker Image

on:
push:
branches: [ main ]
tags: [ 'v*.*.*' ]
pull_request:
branches: [ main ]
workflow_dispatch:
inputs:
embedding_model:
description: 'Embedding model to use'
required: false
default: 'multi-qa-MiniLM-L6-cos-v1'
tokenizer_model:
description: 'Tokenizer model to use'
required: false
default: 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1'

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}

jobs:
build-and-push:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
id-token: write

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Log into registry ${{ env.REGISTRY }}
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=semver,pattern={{major}}
type=sha,prefix=sha-
type=raw,value=latest,enable={{is_default_branch}}

- name: Set build args
id: build-args
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "EMBEDDING_MODEL=${{ github.event.inputs.embedding_model }}" >> $GITHUB_ENV
echo "TOKENIZER_MODEL=${{ github.event.inputs.tokenizer_model }}" >> $GITHUB_ENV
else
echo "EMBEDDING_MODEL=multi-qa-MiniLM-L6-cos-v1" >> $GITHUB_ENV
echo "TOKENIZER_MODEL=sentence-transformers/multi-qa-MiniLM-L6-cos-v1" >> $GITHUB_ENV
fi

- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v5
with:
context: .
push: ${{ github.event_name != 'pull_request' }}
load: ${{ github.event_name == 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: |
EMBEDDING_MODEL=${{ env.EMBEDDING_MODEL }}
TOKENIZER_MODEL=${{ env.TOKENIZER_MODEL }}
cache-from: type=gha
cache-to: type=gha,mode=max
platforms: linux/amd64

- name: Generate artifact attestation
if: github.event_name != 'pull_request'
uses: actions/attest-build-provenance@v1
with:
subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
subject-digest: ${{ steps.build-and-push.outputs.digest }}
push-to-registry: true
62 changes: 62 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
name: Tests

on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main, develop ]
workflow_dispatch:

jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11"]

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements-dev.txt

- name: Lint with flake8 (optional)
run: |
# Install flake8 for basic linting
pip install flake8
# Stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude=venv,env,.venv,.git,__pycache__
# Exit-zero treats all errors as warnings
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude=venv,env,.venv,.git,__pycache__
continue-on-error: true

- name: Run tests with pytest
run: |
pytest

- name: Upload coverage reports
uses: codecov/codecov-action@v4
if: matrix.python-version == '3.10'
with:
file: ./coverage.xml
flags: unittests
name: codecov-umbrella
fail_ci_if_error: false
continue-on-error: true

- name: Archive coverage report
uses: actions/upload-artifact@v4
if: matrix.python-version == '3.10'
with:
name: coverage-report
path: htmlcov/
retention-days: 30
35 changes: 35 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,36 @@
# Virtual environments
.venv
venv/
env/
ENV/

# Python artifacts
__pycache__/
*.py[cod]
*$py.class
*.so
.Python

# Testing and coverage
.pytest_cache/
.coverage
.coverage.*
htmlcov/
coverage.xml
*.cover
.hypothesis/

# IDE
.vscode/
.idea/
*.swp
*.swo
*~

# OS
.DS_Store
Thumbs.db

# Environment variables
.env
.env.local
37 changes: 31 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,26 @@
# Use the official Python image as a base image
FROM python:3.10-slim

# Build arguments for model configuration
ARG EMBEDDING_MODEL=multi-qa-MiniLM-L6-cos-v1
ARG TOKENIZER_MODEL=sentence-transformers/multi-qa-MiniLM-L6-cos-v1

# Set environment variables
ENV PYTHONUNBUFFERED 1
ENV PYTHONUNBUFFERED=1
# Set HuggingFace cache directory to bundle models in the image
ENV HF_HOME=/app/.cache/huggingface
ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
ENV SENTENCE_TRANSFORMERS_HOME=/app/.cache/huggingface
# Set model to use at runtime (from build arg)
ENV EMBEDDING_MODEL=${EMBEDDING_MODEL}
ENV TOKENIZER_MODEL=${TOKENIZER_MODEL}

# Set the working directory in the container
WORKDIR /app

# Create cache directory with proper permissions
RUN mkdir -p /app/.cache/huggingface

# Copy the requirements.txt file into the container
COPY requirements.txt .

Expand All @@ -16,13 +30,24 @@ RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://
# Install dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Pre-download models for offline availability
# This must happen BEFORE copying application code to ensure models are cached
COPY preload_models.py .
RUN python preload_models.py "${EMBEDDING_MODEL}" "${TOKENIZER_MODEL}" && rm preload_models.py

# Copy the Python script into the container
COPY embeddings.py .
COPY main.py .

# Run the web service on container startup. Here we use the gunicorn
# webserver, with one worker process and 8 threads.
# For environments with multiple CPU cores, increase the number of workers
# to be equal to the cores available.
# Timeout is set to 0 to disable the timeouts of the workers to allow Cloud Run to handle instance scaling.
CMD exec gunicorn --bind :$PORT --workers 1 --threads 8 --timeout 0 main:app
# webserver with optimized configuration for medium concurrency (10-50 requests).
#
# Configuration:
# - 2 workers: Utilizes multiple CPU cores (each worker loads model separately)
# - 8 threads per worker: Handles concurrent requests (total 16 concurrent capacity)
# - Timeout 0: Allows Cloud Run to handle instance scaling
#
# NOTE: Each worker loads the model independently (~200MB RAM per worker).
# For Cloud Run, ensure you allocate at least 1GB RAM and 2 vCPUs.
# Adjust workers based on your CPU allocation: workers = (2 x $num_cores)
CMD exec gunicorn --bind :$PORT --workers 2 --threads 8 --timeout 0 --worker-class gthread main:app
Loading
Loading