katuni4ka
/

tiny-random-glm4

Feature Extraction

Model card Files Files and versions Community

fix-generation

#1

by echarlaix HF staff - opened Jul 5

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

Files changed (1) hide show

modeling_chatglm.py +11 -3

modeling_chatglm.py CHANGED Viewed

@@ -40,6 +40,9 @@ logger = logging.get_logger(__name__)
 _CHECKPOINT_FOR_DOC = "THUDM/ChatGLM"
 _CONFIG_FOR_DOC = "ChatGLMConfig"
 def default_init(cls, *args, **kwargs):
     return cls(*args, **kwargs)
@@ -809,9 +812,14 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
             standardize_cache_format: bool = False,
     ) -> Dict[str, Any]:
         # update past_key_values
-        model_kwargs["past_key_values"] = self._extract_past_from_model_output(
-            outputs, standardize_cache_format=standardize_cache_format
-        )
         # update attention mask
         if "attention_mask" in model_kwargs:

 _CHECKPOINT_FOR_DOC = "THUDM/ChatGLM"
 _CONFIG_FOR_DOC = "ChatGLMConfig"
+is_transformers_4_42_or_higher = int(transformers.__version__.split(".")[1]) >= 42
 def default_init(cls, *args, **kwargs):
     return cls(*args, **kwargs)
             standardize_cache_format: bool = False,
     ) -> Dict[str, Any]:
         # update past_key_values
+        if is_transformers_4_42_or_higher:
+            model_kwargs["past_key_values"] = self._extract_past_from_model_output(
+                outputs, standardize_cache_format=standardize_cache_format
+            )[1]
+        else:
+            model_kwargs["past_key_values"] = self._extract_past_from_model_output(
+                outputs, standardize_cache_format=standardize_cache_format
+            )
         # update attention mask
         if "attention_mask" in model_kwargs: