feat-routing (#26)

Browse files

- feat: set adapter based on prompt (71b163e3134eb40c1a6da775ae7d945134e986a7)
- fix: read prompts from config (51411ff21ad7889871b6465f876eb0a0ade0a7d0)
- fix: check for exact task names (b3c540ce62778ffa8b9e0fa69bbbaa1042fc337e)

Co-authored-by: Mohammad Kalim Akram <makram93@users.noreply.huggingface.co>

Files changed (3) hide show

configuration_xlm_roberta.py +2 -0
modeling_lora.py +36 -14
modeling_xlm_roberta.py +3 -9

configuration_xlm_roberta.py CHANGED Viewed

@@ -23,6 +23,7 @@ class XLMRobertaFlashConfig(PretrainedConfig):
             use_cache=True,
             classifier_dropout=None,
             lora_adaptations=None,
             lora_rank=4,
             lora_dropout_p=0.0,
             lora_alpha=1,
@@ -55,6 +56,7 @@ class XLMRobertaFlashConfig(PretrainedConfig):
         self.classifier_dropout = classifier_dropout
         self.load_trained_adapters = load_trained_adapters
         self.lora_adaptations = lora_adaptations
         self.lora_rank = lora_rank
         self.lora_dropout_p = lora_dropout_p
         self.lora_alpha = lora_alpha

             use_cache=True,
             classifier_dropout=None,
             lora_adaptations=None,
+            lora_prompts=None,
             lora_rank=4,
             lora_dropout_p=0.0,
             lora_alpha=1,
         self.classifier_dropout = classifier_dropout
         self.load_trained_adapters = load_trained_adapters
         self.lora_adaptations = lora_adaptations
+        self.lora_prompts = lora_prompts
         self.lora_rank = lora_rank
         self.lora_dropout_p = lora_dropout_p
         self.lora_alpha = lora_alpha

modeling_lora.py CHANGED Viewed

@@ -14,9 +14,6 @@ from transformers import PretrainedConfig
 from .modeling_xlm_roberta import XLMRobertaFlashConfig, XLMRobertaModel, XLMRobertaPreTrainedModel
-LORA_NO_UPDATE = '__lora_no_update__'
 def initialized_weights(
     shape: Tuple[int], num_adaptations: int, init: str = "kaiming"
 ) -> torch.Tensor:
@@ -231,6 +228,16 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
             raise ValueError(
                 f'`lora_adaptations` must be a list and contain at least one element'
             )
         self._adaptation_map = {
             name: idx for idx, name in enumerate(self._lora_adaptations)
         }
@@ -332,9 +339,18 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
                 partial(LoRAParametrization.select_task_for_layer, task_idx=task_idx)
             )
-    def forward(self, *args, task: Union[str, None] = LORA_NO_UPDATE, **kwargs):
-        if task != LORA_NO_UPDATE:
-            self.current_task = task
         return self.roberta(*args, **kwargs)
@@ -355,7 +371,7 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
     def encode(
         self,
         *args,
-        task: Union[str, None] = LORA_NO_UPDATE,
         **kwargs,
     ) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
         """
@@ -364,18 +380,24 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
         task(`str`, *optional*, defaults to `LORA_NO_UPDATE`):
             Specifies the task for which the encoding is intended. This parameter controls the
             use of specialized LoRA adapters that are tuned for specific tasks. If `task` is set
-            to `LORA_NO_UPDATE`, there will be no update to the current task, retaining the
-            existing adapter configuration. If `task` is explicitly set to `None`, all LoRA
-            adapters are disabled, and the model reverts to its original, general-purpose weights.
-            If `task` is set to a specific LoRA adaptation, that adaptation is activated.
         """
-        if task != LORA_NO_UPDATE:
-            if not task:
                 warnings.warn(
                     f"Task-specific embeddings are disabled. To enable, specify the `task` "
                     f"argument with one of the supported tasks: {', '.join(self.config.lora_adaptations)}",
                     category=UserWarning,
                 )
-            self.current_task = task
         return self.roberta.encode(*args, **kwargs)

 from .modeling_xlm_roberta import XLMRobertaFlashConfig, XLMRobertaModel, XLMRobertaPreTrainedModel
 def initialized_weights(
     shape: Tuple[int], num_adaptations: int, init: str = "kaiming"
 ) -> torch.Tensor:
             raise ValueError(
                 f'`lora_adaptations` must be a list and contain at least one element'
             )
+        self._lora_prompts = config.lora_prompts
+        if (
+            not isinstance(self._lora_prompts, dict)
+            or len(self._lora_prompts) != len(self._lora_adaptations)
+            or not all([v in self._lora_adaptations for v in self._lora_prompts.keys()])
+        ):
+            raise ValueError(
+                f'`lora_prompts` must be a dict and contain the same number of elements '
+                f'as `lora_adaptations` with all keys in `lora_prompts` present in `lora_adaptations`.'
+        )
         self._adaptation_map = {
             name: idx for idx, name in enumerate(self._lora_adaptations)
         }
                 partial(LoRAParametrization.select_task_for_layer, task_idx=task_idx)
             )
+    def forward(self, *args, task_type: Union[str, None] = None, **kwargs):
+        if task_type:
+            self.current_task = task_type
+        else:
+            input_ids = kwargs["input_ids"]
+            input_text = self.roberta.tokenizer.decode(input_ids[0], skip_special_tokens=True)
+            for task_name, prompt in self._lora_prompts.items():
+                if input_text.startswith(prompt):
+                    self.current_task = task_name
+                    break
+            else:
+                self.current_task = None  # No task-specific adapter is found, just use the general-purpose weights
         return self.roberta(*args, **kwargs)
     def encode(
         self,
         *args,
+        task_type: Union[str, None] = None,
         **kwargs,
     ) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
         """
         task(`str`, *optional*, defaults to `LORA_NO_UPDATE`):
             Specifies the task for which the encoding is intended. This parameter controls the
             use of specialized LoRA adapters that are tuned for specific tasks. If `task` is set
+            to `None`, all LoRA adapters are disabled, and the model reverts to its original,
+            general-purpose weights. If `task` is set to a specific LoRA adaptation, that adaptation
+            is activated.
         """
+        if task_type:
+            self.current_task = task_type
+        else:  # infer the task from the input text
+            input_text = args[0][0] if isinstance(args[0], list) else args[0]  # take only the first sentence
+            for task_name, prompt in self._lora_prompts.items():
+                if input_text.startswith(prompt):
+                    self.current_task = task_name
+                    break
+            else:
                 warnings.warn(
                     f"Task-specific embeddings are disabled. To enable, specify the `task` "
                     f"argument with one of the supported tasks: {', '.join(self.config.lora_adaptations)}",
                     category=UserWarning,
                 )
+                self.current_task = None  # No task-specific adapter is found, just use the general-purpose weights
         return self.roberta.encode(*args, **kwargs)

modeling_xlm_roberta.py CHANGED Viewed

@@ -21,7 +21,7 @@ import torch.nn.functional as F
 import torch.utils.checkpoint
 from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
 from einops import rearrange
-from transformers import PretrainedConfig
 from transformers.modeling_utils import PreTrainedModel
 from transformers.modeling_outputs import MaskedLMOutput,SequenceClassifierOutput
 from transformers.models.xlm_roberta.modeling_xlm_roberta import XLMRobertaLMHead
@@ -440,7 +440,7 @@ class XLMRobertaModel(XLMRobertaPreTrainedModel):
         self.pooler = XLMRobertaPooler(config) if add_pooling_layer else None
         self.apply(partial(_init_weights, initializer_range=config.initializer_range))
     @torch.inference_mode()
     def encode(
@@ -492,12 +492,6 @@ class XLMRobertaModel(XLMRobertaPreTrainedModel):
             If convert_to_tensor, a stacked tensor is returned.
             If convert_to_numpy, a numpy matrix is returned.
         """
-        from transformers import AutoTokenizer
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            self.name_or_path, trust_remote_code=True
-        )
         is_training = self.training
         self.eval()
@@ -1278,4 +1272,4 @@ class XLMRobertaForSequenceClassification(XLMRobertaPreTrainedModel):
             logits=logits,
             hidden_states=outputs.hidden_states,
             attentions=outputs.attentions,
-        )

 import torch.utils.checkpoint
 from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
 from einops import rearrange
+from transformers import PretrainedConfig, AutoTokenizer
 from transformers.modeling_utils import PreTrainedModel
 from transformers.modeling_outputs import MaskedLMOutput,SequenceClassifierOutput
 from transformers.models.xlm_roberta.modeling_xlm_roberta import XLMRobertaLMHead
         self.pooler = XLMRobertaPooler(config) if add_pooling_layer else None
         self.apply(partial(_init_weights, initializer_range=config.initializer_range))
+        self.tokenizer = AutoTokenizer.from_pretrained(self.name_or_path, trust_remote_code=True)
     @torch.inference_mode()
     def encode(
             If convert_to_tensor, a stacked tensor is returned.
             If convert_to_numpy, a numpy matrix is returned.
         """
         is_training = self.training
         self.eval()
             logits=logits,
             hidden_states=outputs.hidden_states,
             attentions=outputs.attentions,
+        )