utility to generate LoraConfig

Ssukriti · Ssukriti · commit 1a5dde853820 · 2023-11-17T16:55:31.000-07:00
Signed-off-by: Sukriti-Sharma4 &lt;sukriti.sharma4@ibm.com&gt;
diff --git a/caikit_nlp/data_model/generation.py b/caikit_nlp/data_model/generation.py
@@ -15,7 +15,7 @@
 """
 # Standard
 from enum import Enum
-from typing import List
+from typing import List, Union
 
 # First Party
 from caikit.core import DataObjectBase
@@ -73,6 +73,41 @@ class TuningConfig(DataObjectBase):
     # encoder_hidden_size: int # Optional -  The hidden size of the prompt encoder.
 
 
+@caikit.core.dataobject(package="caikit_data_model.caikit_nlp")
+class LoraTuningConfig(DataObjectBase):
+    # Lora attention dimension.
+    r: int
+    # The names of the modules to apply Lora to.
+    target_modules: Union[List[str], str]
+    # The alpha parameter for Lora scaling.
+    lora_alpha: int
+    # The dropout probability for Lora layers.
+    lora_dropout: float
+    # Set this to True if the layer to replace stores weight like (fan_in, fan_out).
+    # For example, gpt-2 uses Conv1D which stores weights like (fan_in, fan_out)
+    # and hence this should be set to True.
+    fan_in_fan_out: bool
+    # Bias type for Lora. Can be ‘none’, ‘all’ or ‘lora_only’.
+    # If ‘all’ or ‘lora_only’, the corresponding biases will be updated during training.
+    # Be aware that this means that, even when disabling the adapters,
+    # the model will not produce the same output
+    # as the base model would have without adaptation.
+    bias: str
+    # List of modules apart from LoRA layers to be set as trainable
+    # and saved in the final checkpoint.
+    modules_to_save: List[str]
+    # The layer indexes to transform, if this argument is specified,
+    # it will apply the LoRA transformations
+    # on the layer indexes that are specified in this list.
+    # If a single integer is passed,
+    # it will apply the LoRA transformations on the layer at this index.
+    layers_to_transform: Union[List[int], int]
+    # The layer pattern name, used only if layers_to_transform is different from None and
+    # if the layer pattern is not in the common layers pattern.
+    layers_pattern: str
+    output_model_types: List[str]
+
+
 @caikit.core.dataobject(package="caikit_data_model.caikit_nlp")
 class ExponentialDecayLengthPenalty(DataObjectBase):
     start_index: int
diff --git a/caikit_nlp/modules/text_generation/peft_config.py b/caikit_nlp/modules/text_generation/peft_config.py
@@ -18,7 +18,7 @@
 import re
 
 # Third Party
-from peft import MultitaskPromptTuningInit
+from peft import LoraConfig, MultitaskPromptTuningInit
 from transformers import AutoConfig
 
 # First Party
@@ -51,10 +51,10 @@
 class TuningType(str, Enum):
     PROMPT_TUNING = "PROMPT_TUNING"
     MULTITASK_PROMPT_TUNING = "MULTITASK_PROMPT_TUNING"
+    LORA = "LORA"
     # MULTITASK_PREFIX_TUNING = "MULTITASK_PREFIX_TUNING"
     # P_TUNING = "P_TUNING"
     # PREFIX_TUNING = "PREFIX_TUNING"
-    # LORA = "LORA"
 
 
 def resolve_base_model(base_model, cls, torch_dtype):
@@ -99,7 +99,15 @@ def get_peft_config(
     tuning_type, tuning_config, base_model, cls, torch_dtype, verbalizer
 ):
 
-    if tuning_type not in TuningType._member_names_:
+    if isinstance(tuning_type, str):
+        tuning_type = TuningType(tuning_type)
+
+    error.type_check("<NLP65714993E>", TuningType, tuning_type=tuning_type)
+
+    if tuning_type not in [
+        TuningType.PROMPT_TUNING,
+        TuningType.MULTITASK_PROMPT_TUNING,
+    ]:
         raise NotImplementedError("{} tuning type not supported!".format(tuning_type))
 
     if tuning_config.prompt_tuning_init_method:
@@ -147,27 +155,7 @@ def get_peft_config(
     error.type_check("<NLP65714919E>", PretrainedModelBase, base_model=base_model)
 
     # Validate if tuned output model type is compatible with base model or not
-    if not tuning_config.output_model_types:
-        output_model_types = base_model.PROMPT_OUTPUT_TYPES
-    else:
-        # If the first element is not PromptOutputModelType, assume the entire list
-        # isn't and convert
-        if not isinstance(tuning_config.output_model_types[0], PromptOutputModelType):
-            output_model_types = []
-            for output_type in tuning_config.output_model_types:
-                output_model_types.append(PromptOutputModelType(output_type))
-        else:
-            output_model_types = tuning_config.output_model_types
-        error.value_check(
-            "<NLP36947542E>",
-            all(
-                output_type in base_model.PROMPT_OUTPUT_TYPES
-                for output_type in output_model_types
-            ),
-            "{} not supported for base model type {}".format(
-                output_model_types, base_model.MODEL_TYPE
-            ),
-        )
+    output_model_types = _get_output_types(tuning_config, base_model)
 
     error.value_check(
         "<NLP30542004E>",
@@ -185,16 +173,6 @@ def get_peft_config(
     # NOTE: Base model is a resource at this point
     task_type = base_model.TASK_TYPE
 
-    if isinstance(tuning_type, str):
-        error.value_check(
-            "<NLP65714994E>",
-            tuning_type in TuningType._member_names_,
-            f"Invalid tuning type [{tuning_type}]. Allowed types: "
-            f"[{TuningType._member_names_}]",
-        )
-        tuning_type = TuningType(tuning_type)
-    error.type_check("<NLP65714993E>", TuningType, tuning_type=tuning_type)
-
     # Coerce the passed model into a resource; if we have one, this is a noop
     # TODO: When splitting up this mono-module, use the configured resource
     #   type of the concrete class to bootstrap
@@ -218,3 +196,77 @@ def get_peft_config(
     )
 
     return task_type, output_model_types, peft_config, tuning_type
+
+
+def _get_output_types(tuning_config, base_model):
+    "Validate and return output_model_types"
+    # Validate if tuned output model type is compatible with base model or not
+    if not tuning_config.output_model_types:
+        output_model_types = base_model.PROMPT_OUTPUT_TYPES
+    else:
+        # If the first element is not PromptOutputModelType, assume the entire list
+        # isn't and convert
+        if not isinstance(tuning_config.output_model_types[0], PromptOutputModelType):
+            output_model_types = []
+            for output_type in tuning_config.output_model_types:
+                output_model_types.append(PromptOutputModelType(output_type))
+        else:
+            output_model_types = tuning_config.output_model_types
+        error.value_check(
+            "<NLP36947542E>",
+            all(
+                output_type in base_model.PROMPT_OUTPUT_TYPES
+                for output_type in output_model_types
+            ),
+            "{} not supported for base model type {}".format(
+                output_model_types, base_model.MODEL_TYPE
+            ),
+        )
+    return output_model_types
+
+
+def _filter_params_for_prompt_config(prompt_config, params):
+    """Utility function to filter out required parameters for prompt_config
+    from `params`
+
+    Args:
+        prompt_config: PromptTuningConfig
+            Tuning config type, eg:, PromptTuningConfig
+        params: dict
+            Dictionary containing all the input training params
+
+    Returns:
+        dict:
+            Dictionary containing required params for prompt_config
+    """
+    # Inspect the underlying dataclass fileds; we do this because the common super class
+    # used for multi/vanilla prompt/prefix tuning is a DataClass; we can't use __dict__
+    # because the dataclass fields are omitted.
+    allowed_keys = list(prompt_config.__dataclass_fields__.keys())
+    allowed_params = dict(filter(lambda x: x[0] in allowed_keys, params.items()))
+    log.info(
+        "<NLP18184771I>",
+        "[{}] config params not supported by provided tuning type!".format(
+            params.keys() - allowed_params.keys()
+        ),
+    )
+    return allowed_params
+
+
+def get_lora_config(tuning_type, tuning_config, base_model) -> LoraConfig:
+    """Creates Huggingface LoraConfig from Caikit tuning configuration."""
+    if isinstance(tuning_type, str):
+        tuning_type = TuningType(tuning_type)
+
+    if tuning_type != TuningType.LORA:
+        raise NotImplementedError("{} tuning type not supported!".format(tuning_type))
+
+    error.type_check("<NLP65714919E>", PretrainedModelBase, base_model=base_model)
+    # NOTE: Base model is a resource at this point
+    task_type = base_model.TASK_TYPE
+    config_kwargs = tuning_config.to_dict()
+    log.info("<NLP61012781I>", f"Parameters used: {config_kwargs}")
+    config_params = _filter_params_for_prompt_config(tuning_config, config_kwargs)
+    output_model_types = _get_output_types(tuning_config, base_model)
+    lora_config = LoraConfig(task_type=task_type, **config_params)
+    return task_type, output_model_types, lora_config, tuning_type
diff --git a/caikit_nlp/modules/text_generation/peft_prompt_tuning.py b/caikit_nlp/modules/text_generation/peft_prompt_tuning.py
@@ -74,7 +74,12 @@
 )
 from ...toolkit.trainer_utils import validate_training_data
 from ...toolkit.verbalizer_utils import render_verbalizer
-from .peft_config import TuningType, get_peft_config, resolve_base_model
+from .peft_config import (
+    TuningType,
+    _filter_params_for_prompt_config,
+    get_peft_config,
+    resolve_base_model,
+)
 
 log = alog.use_channel("PEFT_PROMPT")
 error = error_handler.get(log)
@@ -99,10 +104,10 @@ class PeftPromptTuning(ModuleBase):
     tuning_type_to_huggingface = {
         TuningType.PROMPT_TUNING: PeftType.PROMPT_TUNING,
         TuningType.MULTITASK_PROMPT_TUNING: PeftType.MULTITASK_PROMPT_TUNING,
+        TuningType.LORA: PeftType.LORA,
         # TuningType.MULTITASK_PREFIX_TUNING: PeftType.MULTITASK_PREFIX_TUNING,
         # TuningType.P_TUNING: PeftType.P_TUNING,
         # TuningType.PREFIX_TUNING: PeftType.PREFIX_TUNING,
-        # TuningType.LORA: PeftType.LORA,
     }
 
     RANDOM_SEED = 73
@@ -856,7 +861,7 @@ def create_hf_tuning_config(
         elif tuning_type == TuningType.MULTITASK_PROMPT_TUNING:
             tuning_config_type = MultitaskPromptTuningConfig
 
-        config_params = cls._filter_params_for_prompt_config(
+        config_params = _filter_params_for_prompt_config(
             tuning_config_type, config_kwargs
         )
         log.info("<NLP41038481I>", f"Parameters used: {config_params}")
@@ -1150,34 +1155,6 @@ def _execute_train_loop(
                     )
         return {"loss": training_loss_tracker}
 
-    @classmethod
-    def _filter_params_for_prompt_config(cls, prompt_config, params):
-        """Utility function to filter out required parameters for prompt_config
-        from `params`
-
-        Args:
-            prompt_config: PromptTuningConfig
-                Tuning config type, eg:, PromptTuningConfig
-            params: dict
-                Dictionary containing all the input training params
-
-        Returns:
-            dict:
-                Dictionary containing required params for prompt_config
-        """
-        # Inspect the underlying dataclass fileds; we do this because the common super class
-        # used for multi/vanilla prompt/prefix tuning is a DataClass; we can't use __dict__
-        # because the dataclass fields are omitted.
-        allowed_keys = list(prompt_config.__dataclass_fields__.keys())
-        allowed_params = dict(filter(lambda x: x[0] in allowed_keys, params.items()))
-        log.info(
-            "<NLP18184771I>",
-            "[{}] config params not supported by provided tuning type!".format(
-                params.keys() - allowed_params.keys()
-            ),
-        )
-        return allowed_params
-
     @staticmethod
     def convert_peft_model_to_type(
         device: str, peft_model: PeftModel, torch_dtype=Union[str, torch.dtype]