@@ -24,10 +24,10 @@ def __init__(self, embedding_directory=None, tokenizer_data={}):
2424
2525class Qwen25_7BVLIModel (sd1_clip .SDClipModel ):
2626 def __init__ (self , device = "cpu" , layer = "hidden" , layer_idx = - 1 , dtype = None , attention_mask = True , model_options = {}):
27- llama_scaled_fp8 = model_options .get ("qwen_scaled_fp8 " , None )
28- if llama_scaled_fp8 is not None :
27+ llama_quantization_metadata = model_options .get ("llama_quantization_metadata " , None )
28+ if llama_quantization_metadata is not None :
2929 model_options = model_options .copy ()
30- model_options ["scaled_fp8 " ] = llama_scaled_fp8
30+ model_options ["quantization_metadata " ] = llama_quantization_metadata
3131 super ().__init__ (device = device , layer = layer , layer_idx = layer_idx , textmodel_json_config = {}, dtype = dtype , special_tokens = {"pad" : 151643 }, layer_norm_hidden_state = False , model_class = Qwen25_7BVLI , enable_attention_masks = attention_mask , return_attention_masks = attention_mask , model_options = model_options )
3232
3333
@@ -56,12 +56,12 @@ def load_sd(self, sd):
5656 else :
5757 return super ().load_sd (sd )
5858
59- def te (dtype_llama = None , llama_scaled_fp8 = None ):
59+ def te (dtype_llama = None , llama_quantization_metadata = None ):
6060 class Kandinsky5TEModel_ (Kandinsky5TEModel ):
6161 def __init__ (self , device = "cpu" , dtype = None , model_options = {}):
62- if llama_scaled_fp8 is not None and "scaled_fp8" not in model_options :
62+ if llama_quantization_metadata is not None :
6363 model_options = model_options .copy ()
64- model_options ["qwen_scaled_fp8 " ] = llama_scaled_fp8
64+ model_options ["llama_quantization_metadata " ] = llama_quantization_metadata
6565 if dtype_llama is not None :
6666 dtype = dtype_llama
6767 super ().__init__ (device = device , dtype = dtype , model_options = model_options )
0 commit comments