Skip to content

Commit 228c898

Browse files
refactor: Move EasyOCR models to lfs (#329)
* Extend LFS support to prebuilt_artifacts/models * Move EasyOCR model artifacts to Git LFS * fix: EasyOCR model installation script * fix: update EasyOCR model paths in installation script * fix: simplify EasyOCR model installation script by removing existing file check * Fix easyocr model installation script to work with LFS models * Update EasyOCR model installation script print statement for clarity
1 parent 2e029b0 commit 228c898

File tree

6 files changed

+19
-16
lines changed

6 files changed

+19
-16
lines changed

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
prebuilt_artifacts/* filter=lfs diff=lfs merge=lfs -text
2+
prebuilt_artifacts/models/* filter=lfs diff=lfs merge=lfs -text

llm-service/runtime.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ WORKDIR /app
1111
RUN uv sync -n -p /usr/local/bin/python3.12
1212

1313
RUN wget https://corretto.aws/downloads/latest/amazon-corretto-21-x64-linux-jdk.tar.gz -O amazon-corretto-21-x64-linux-jdk.tar.gz
14-
RUN wget https://github.com/cloudera/CML_AMP_RAG_Studio/releases/download/model_download/craft_mlt_25k.pth -O craft_mlt_25k.pth
15-
RUN wget https://github.com/cloudera/CML_AMP_RAG_Studio/releases/download/model_download/latin_g2.pth -O latin_g2.pth
14+
COPY --chown=cdsw:cdsw ../prebuilt_artifacts/models/craft_mlt_25k.pth /app/craft_mlt_25k.pth
15+
COPY --chown=cdsw:cdsw ../prebuilt_artifacts/models/latin_g2.pth /app/latin_g2.pth
1616
RUN wget https://github.com/qdrant/qdrant/releases/download/v1.11.3/qdrant-x86_64-unknown-linux-musl.tar.gz -O qdrant.tar.gz
1717

1818
USER root
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:4a5efbfb48b4081100544e75e1e2b57f8de3d84f213004b14b85fd4b3748db17
3+
size 83152330
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:aaa95be1c4a9cb3496879bed7c520886ce1164f89e026f0c54488394e74e8c55
3+
size 15406141

scripts/01_install_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,4 @@
6464
["bash scripts/install_easyocr_model.sh"], shell=True, check=True
6565
)
6666
)
67-
print("Downloading EASYOCR models complete")
67+
print("Installing EASYOCR models complete")

scripts/install_easyocr_model.sh

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -37,20 +37,16 @@
3737
# BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF
3838
# DATA.
3939
#
40-
41-
CRAFT_MLT_MODEL_URL=https://github.com/cloudera/CML_AMP_RAG_Studio/releases/download/model_download/craft_mlt_25k.pth
42-
LATIN_G2_URL=https://github.com/cloudera/CML_AMP_RAG_Studio/releases/download/model_download/latin_g2.pth
43-
44-
mkdir -p .EasyOCR/model
45-
cd .EasyOCR/model
46-
47-
if [ -f /app/craft_mlt_25k.pth ]; then
48-
cp /app/craft_mlt_25k.pth craft_mlt_25k.pth
49-
cp /app/latin_g2.pth latin_g2.pth
40+
if [ -z "$IS_COMPOSABLE" ]; then
41+
CRAFT_MLT_MODEL=/home/cdsw/prebuilt_artifacts/models/craft_mlt_25k.pth
42+
LATIN_G2_MODEL=/home/cdsw/prebuilt_artifacts/models/latin_g2.pth
5043
else
51-
wget --no-verbose -O craft_mlt_25k.pth ${CRAFT_MLT_MODEL_URL}
52-
wget --no-verbose -O latin_g2.pth ${LATIN_G2_URL}
44+
CRAFT_MLT_MODEL=/home/cdsw/rag-studio/prebuilt_artifacts/models/craft_mlt_25k.pth
45+
LATIN_G2_MODEL=/home/cdsw/rag-studio/prebuilt_artifacts/models/latin_g2.pth
5346
fi
5447

48+
mkdir -p .EasyOCR/model
49+
cd .EasyOCR/model
5550

56-
51+
cp ${CRAFT_MLT_MODEL} craft_mlt_25k.pth
52+
cp ${LATIN_G2_MODEL} latin_g2.pth

0 commit comments

Comments
 (0)