1414
1515import logging
1616import os
17+ import subprocess
1718import tempfile
1819import traceback
1920from pathlib import Path
4546logger = logging .getLogger (__name__ )
4647
4748
49+ def ensure_file_downloaded (url : str , target_path : str ) -> None :
50+ if os .path .exists (target_path ):
51+ return
52+
53+ import requests
54+
55+ response = requests .get (url )
56+ response .raise_for_status ()
57+ with open (target_path , "wb" ) as f :
58+ f .write (response .content )
59+
60+
4861def write_model_config_files (model : ModelConfig , base_url : str , prod_env_path : Path ) -> None :
4962 """
5063 Write out the necessary model configuration files for HELM.
@@ -56,9 +69,7 @@ def write_model_config_files(model: ModelConfig, base_url: str, prod_env_path: P
5669 model_name : str = model .name
5770 # Use model.path for loading from HuggingFace, fallback to model.name if path is None
5871 model_path_or_name : str = model .path or model .name
59- print (f"Loading tokenizer for model: { model_path_or_name } " , flush = True )
6072 tokenizer = AutoTokenizer .from_pretrained (model_path_or_name , trust_remote_code = True )
61- print (f"Tokenizer loaded, max_length: { tokenizer .model_max_length } " , flush = True )
6273
6374 content : dict = {
6475 "model_deployments" : [
@@ -77,7 +88,6 @@ def write_model_config_files(model: ModelConfig, base_url: str, prod_env_path: P
7788 ]
7889 }
7990 deployments_path = prod_env_path / MODEL_DEPLOYMENTS_FILE_PATH
80- print (f"Writing model_deployments to { deployments_path } " , flush = True )
8191 write_yaml (content , deployments_path )
8292
8393 content = {
@@ -94,7 +104,6 @@ def write_model_config_files(model: ModelConfig, base_url: str, prod_env_path: P
94104 ]
95105 }
96106 metadata_path = prod_env_path / MODEL_METADATA_FILE_PATH
97- print (f"Writing model_metadata to { metadata_path } " , flush = True )
98107 write_yaml (content , metadata_path )
99108
100109 content = {
@@ -123,9 +132,7 @@ def get_runtime_env(self) -> dict:
123132 """
124133 Returns the runtime environment to run the evaluator on the Ray cluster.
125134 """
126- return build_runtime_env_for_packages (
127- extra = ["eval" , "tpu" ], pip_packages = ["crfm-helm@git+https://github.com/stanford-crfm/helm.git@local_vllm" ]
128- )
135+ return build_runtime_env_for_packages (extra = ["eval" , "tpu" ])
129136
130137 def evaluate (
131138 self ,
@@ -155,9 +162,9 @@ def evaluate(
155162 prod_env_path = Path (results_path ) / "prod_env"
156163 results_folder = Path (results_path ) / "run" / "results"
157164
158- try :
159- from helm .common .general import ensure_file_downloaded
165+ subprocess .check_call (["uv" , "pip" , "install" , "crfm-helm@git+https://github.com/stanford-crfm/helm.git" ])
160166
167+ try :
161168 # Download the run_entries files and schema files for the specified evals
162169 assert len (evals ) > 0 , "Please specify at least one eval to run."
163170 run_entries_files : list [str ] = []
@@ -266,9 +273,7 @@ def evaluate(
266273 },
267274 ),
268275 resources = ResourceConfig (cpu = 1 , ram = "4g" , device = CpuConfig (), replicas = 1 ),
269- environment = create_environment (
270- pip_packages = ["crfm-helm@git+https://github.com/stanford-crfm/helm.git@local_vllm" ], extras = ["eval" ]
271- ),
276+ environment = create_environment (extras = ["eval" ]),
272277 )
273278 job_id = cluster .launch (job_request )
274279 logger .info ("Started Helm task with job id %s" , job_id )
0 commit comments