ClaudiaIoana550
/

try1_deploy_falcon

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

ClaudiaIoana550 commited on May 14

Commit

ca05bde

•

1 Parent(s): abe5d1c

Update handler.py

Files changed (1) hide show

handler.py +59 -0

handler.py CHANGED Viewed

@@ -8,6 +8,65 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
 class EndpointHandler:
   def __init__(self, model_path=""):
     tokenizer=AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

 dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
+from transformers import (
+    StoppingCriteria,
+    StoppingCriteriaList,
+    pipeline,
+)
+from typing import List
+import torch
+class StopGenerationCriteria(StoppingCriteria):
+    def __init__(self, max_duplicate_sequences=3, max_repeated_words=2):
+        self.generated_sequences = set()
+        self.max_duplicate_sequences = max_duplicate_sequences
+        self.max_repeated_words = max_repeated_words
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+        tokenizer=AutoTokenizer.from_pretrained("ClaudiaIoana550/try1_deploy_falcon", trust_remote_code=True)
+        generated_sequence = input_ids.tolist()
+        if len(generated_sequence[0]) >= 50:
+          sequen = generated_sequence[0][-30:]
+          s_mare = str(generated_sequence[0]).strip("[]")
+          s_mic = str(sequen).strip("[]")
+          count2 = 0
+          if s_mic in s_mare:
+            count2 = sum(1 for i in range(len(generated_sequence[0]) - len(sequen) + 1) if generated_sequence[0][i:i + len(sequen)] == sequen)
+          if count2 >= 2:
+            return True
+        generated_tokens = [tokenizer.decode(token_id) for token_id in input_ids[0]]
+        count = 1
+        prev_token = None
+        for token in generated_tokens:
+            if token == prev_token:
+                count += 1
+                if count > self.max_repeated_words:
+                    return True
+            else:
+                count = 1
+            prev_token = token
+        if len(self.generated_sequences) >= self.max_duplicate_sequences:
+            return True
+        return False
+# Example usage:
+# Define the maximum number of duplicate sequences and repeated words
+max_duplicate_sequences = 1
+max_repeated_words = 2
+# Create an instance of StopGenerationCriteria
+stop_criteria = StopGenerationCriteria(max_duplicate_sequences, max_repeated_words)
+# Add the custom stopping criteria to a StoppingCriteriaList
+stopping_criteria = StoppingCriteriaList([stop_criteria])
 class EndpointHandler:
   def __init__(self, model_path=""):
     tokenizer=AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)