Skip to content

Commit 6581122

Browse files
authored
Merge pull request #1 from JSv4/feature/offline-models-testing-ghcr
Add offline models, test coverage, and GitHub Container Registry support
2 parents 097ddce + 33c8907 commit 6581122

File tree

12 files changed

+1099
-11
lines changed

12 files changed

+1099
-11
lines changed

.coveragerc

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
[run]
2+
# Coverage configuration
3+
source = .
4+
omit =
5+
# Exclude test files
6+
test_*.py
7+
# Exclude preload script
8+
preload_models.py
9+
# Exclude virtual environments
10+
venv/*
11+
.venv/*
12+
env/*
13+
# Exclude system/package files
14+
*/site-packages/*
15+
*/dist-packages/*
16+
17+
[report]
18+
# Reporting options
19+
precision = 2
20+
show_missing = True
21+
skip_covered = False
22+
23+
# Exclude lines from coverage
24+
exclude_lines =
25+
# Default excludes
26+
pragma: no cover
27+
def __repr__
28+
raise AssertionError
29+
raise NotImplementedError
30+
if __name__ == .__main__.:
31+
if TYPE_CHECKING:
32+
@abstract
33+
34+
[html]
35+
directory = htmlcov
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
name: Build and Publish Docker Image
2+
3+
on:
4+
push:
5+
branches: [ main ]
6+
tags: [ 'v*.*.*' ]
7+
pull_request:
8+
branches: [ main ]
9+
workflow_dispatch:
10+
inputs:
11+
embedding_model:
12+
description: 'Embedding model to use'
13+
required: false
14+
default: 'multi-qa-MiniLM-L6-cos-v1'
15+
tokenizer_model:
16+
description: 'Tokenizer model to use'
17+
required: false
18+
default: 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1'
19+
20+
env:
21+
REGISTRY: ghcr.io
22+
IMAGE_NAME: ${{ github.repository }}
23+
24+
jobs:
25+
build-and-push:
26+
runs-on: ubuntu-latest
27+
permissions:
28+
contents: read
29+
packages: write
30+
id-token: write
31+
32+
steps:
33+
- name: Checkout repository
34+
uses: actions/checkout@v4
35+
36+
- name: Set up Docker Buildx
37+
uses: docker/setup-buildx-action@v3
38+
39+
- name: Log into registry ${{ env.REGISTRY }}
40+
if: github.event_name != 'pull_request'
41+
uses: docker/login-action@v3
42+
with:
43+
registry: ${{ env.REGISTRY }}
44+
username: ${{ github.actor }}
45+
password: ${{ secrets.GITHUB_TOKEN }}
46+
47+
- name: Extract Docker metadata
48+
id: meta
49+
uses: docker/metadata-action@v5
50+
with:
51+
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
52+
tags: |
53+
type=ref,event=branch
54+
type=ref,event=pr
55+
type=semver,pattern={{version}}
56+
type=semver,pattern={{major}}.{{minor}}
57+
type=semver,pattern={{major}}
58+
type=sha,prefix=sha-
59+
type=raw,value=latest,enable={{is_default_branch}}
60+
61+
- name: Set build args
62+
id: build-args
63+
run: |
64+
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
65+
echo "EMBEDDING_MODEL=${{ github.event.inputs.embedding_model }}" >> $GITHUB_ENV
66+
echo "TOKENIZER_MODEL=${{ github.event.inputs.tokenizer_model }}" >> $GITHUB_ENV
67+
else
68+
echo "EMBEDDING_MODEL=multi-qa-MiniLM-L6-cos-v1" >> $GITHUB_ENV
69+
echo "TOKENIZER_MODEL=sentence-transformers/multi-qa-MiniLM-L6-cos-v1" >> $GITHUB_ENV
70+
fi
71+
72+
- name: Build and push Docker image
73+
id: build-and-push
74+
uses: docker/build-push-action@v5
75+
with:
76+
context: .
77+
push: ${{ github.event_name != 'pull_request' }}
78+
load: ${{ github.event_name == 'pull_request' }}
79+
tags: ${{ steps.meta.outputs.tags }}
80+
labels: ${{ steps.meta.outputs.labels }}
81+
build-args: |
82+
EMBEDDING_MODEL=${{ env.EMBEDDING_MODEL }}
83+
TOKENIZER_MODEL=${{ env.TOKENIZER_MODEL }}
84+
cache-from: type=gha
85+
cache-to: type=gha,mode=max
86+
platforms: linux/amd64
87+
88+
- name: Generate artifact attestation
89+
if: github.event_name != 'pull_request'
90+
uses: actions/attest-build-provenance@v1
91+
with:
92+
subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
93+
subject-digest: ${{ steps.build-and-push.outputs.digest }}
94+
push-to-registry: true

.github/workflows/test.yml

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
name: Tests
2+
3+
on:
4+
push:
5+
branches: [ main, develop ]
6+
pull_request:
7+
branches: [ main, develop ]
8+
workflow_dispatch:
9+
10+
jobs:
11+
test:
12+
runs-on: ubuntu-latest
13+
strategy:
14+
matrix:
15+
python-version: ["3.9", "3.10", "3.11"]
16+
17+
steps:
18+
- name: Checkout code
19+
uses: actions/checkout@v4
20+
21+
- name: Set up Python ${{ matrix.python-version }}
22+
uses: actions/setup-python@v5
23+
with:
24+
python-version: ${{ matrix.python-version }}
25+
cache: 'pip'
26+
27+
- name: Install dependencies
28+
run: |
29+
python -m pip install --upgrade pip
30+
pip install -r requirements-dev.txt
31+
32+
- name: Lint with flake8 (optional)
33+
run: |
34+
# Install flake8 for basic linting
35+
pip install flake8
36+
# Stop the build if there are Python syntax errors or undefined names
37+
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude=venv,env,.venv,.git,__pycache__
38+
# Exit-zero treats all errors as warnings
39+
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude=venv,env,.venv,.git,__pycache__
40+
continue-on-error: true
41+
42+
- name: Run tests with pytest
43+
run: |
44+
pytest
45+
46+
- name: Upload coverage reports
47+
uses: codecov/codecov-action@v4
48+
if: matrix.python-version == '3.10'
49+
with:
50+
file: ./coverage.xml
51+
flags: unittests
52+
name: codecov-umbrella
53+
fail_ci_if_error: false
54+
continue-on-error: true
55+
56+
- name: Archive coverage report
57+
uses: actions/upload-artifact@v4
58+
if: matrix.python-version == '3.10'
59+
with:
60+
name: coverage-report
61+
path: htmlcov/
62+
retention-days: 30

.gitignore

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,36 @@
1+
# Virtual environments
12
.venv
3+
venv/
4+
env/
5+
ENV/
6+
7+
# Python artifacts
8+
__pycache__/
9+
*.py[cod]
10+
*$py.class
11+
*.so
12+
.Python
13+
14+
# Testing and coverage
15+
.pytest_cache/
16+
.coverage
17+
.coverage.*
18+
htmlcov/
19+
coverage.xml
20+
*.cover
21+
.hypothesis/
22+
23+
# IDE
24+
.vscode/
25+
.idea/
26+
*.swp
27+
*.swo
28+
*~
29+
30+
# OS
31+
.DS_Store
32+
Thumbs.db
33+
34+
# Environment variables
35+
.env
36+
.env.local

Dockerfile

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,26 @@
11
# Use the official Python image as a base image
22
FROM python:3.10-slim
33

4+
# Build arguments for model configuration
5+
ARG EMBEDDING_MODEL=multi-qa-MiniLM-L6-cos-v1
6+
ARG TOKENIZER_MODEL=sentence-transformers/multi-qa-MiniLM-L6-cos-v1
7+
48
# Set environment variables
5-
ENV PYTHONUNBUFFERED 1
9+
ENV PYTHONUNBUFFERED=1
10+
# Set HuggingFace cache directory to bundle models in the image
11+
ENV HF_HOME=/app/.cache/huggingface
12+
ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
13+
ENV SENTENCE_TRANSFORMERS_HOME=/app/.cache/huggingface
14+
# Set model to use at runtime (from build arg)
15+
ENV EMBEDDING_MODEL=${EMBEDDING_MODEL}
16+
ENV TOKENIZER_MODEL=${TOKENIZER_MODEL}
617

718
# Set the working directory in the container
819
WORKDIR /app
920

21+
# Create cache directory with proper permissions
22+
RUN mkdir -p /app/.cache/huggingface
23+
1024
# Copy the requirements.txt file into the container
1125
COPY requirements.txt .
1226

@@ -16,13 +30,24 @@ RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://
1630
# Install dependencies
1731
RUN pip install --no-cache-dir -r requirements.txt
1832

33+
# Pre-download models for offline availability
34+
# This must happen BEFORE copying application code to ensure models are cached
35+
COPY preload_models.py .
36+
RUN python preload_models.py "${EMBEDDING_MODEL}" "${TOKENIZER_MODEL}" && rm preload_models.py
37+
1938
# Copy the Python script into the container
2039
COPY embeddings.py .
2140
COPY main.py .
2241

2342
# Run the web service on container startup. Here we use the gunicorn
24-
# webserver, with one worker process and 8 threads.
25-
# For environments with multiple CPU cores, increase the number of workers
26-
# to be equal to the cores available.
27-
# Timeout is set to 0 to disable the timeouts of the workers to allow Cloud Run to handle instance scaling.
28-
CMD exec gunicorn --bind :$PORT --workers 1 --threads 8 --timeout 0 main:app
43+
# webserver with optimized configuration for medium concurrency (10-50 requests).
44+
#
45+
# Configuration:
46+
# - 2 workers: Utilizes multiple CPU cores (each worker loads model separately)
47+
# - 8 threads per worker: Handles concurrent requests (total 16 concurrent capacity)
48+
# - Timeout 0: Allows Cloud Run to handle instance scaling
49+
#
50+
# NOTE: Each worker loads the model independently (~200MB RAM per worker).
51+
# For Cloud Run, ensure you allocate at least 1GB RAM and 2 vCPUs.
52+
# Adjust workers based on your CPU allocation: workers = (2 x $num_cores)
53+
CMD exec gunicorn --bind :$PORT --workers 2 --threads 8 --timeout 0 --worker-class gthread main:app

0 commit comments

Comments
 (0)