aloobun commited on
Commit
ade2fdb
1 Parent(s): 8142f98

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +75 -1
README.md CHANGED
@@ -4,4 +4,78 @@ datasets:
4
  - teknium/openhermes
5
  language:
6
  - en
7
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  - teknium/openhermes
5
  language:
6
  - en
7
+ tags:
8
+ - llama
9
+ - llama-2
10
+ - instruct
11
+ - finetune
12
+ - OpenHermes
13
+ ---
14
+
15
+ ## llama2-7b-openhermes-15k-mini
16
+
17
+ - 4-bit qlora fine-tuning of llama-v2-guanaco with openhermes dataset.
18
+ - It is finetuned on the Hermes dataset. The dataset had 15,000 rows.
19
+
20
+ ## Usage:
21
+
22
+ ```
23
+ def text_gen_eval_wrapper(model, tokenizer, prompt, model_id=1, show_metrics=True, temp=0.7, max_length=200):
24
+ """
25
+ A wrapper function for inferencing, evaluating, and logging text generation pipeline.
26
+
27
+ Parameters:
28
+ model (str or object): The model name or the initialized text generation model.
29
+ tokenizer (str or object): The tokenizer name or the initialized tokenizer for the model.
30
+ prompt (str): The input prompt text for text generation.
31
+ model_id (int, optional): An identifier for the model. Defaults to 1.
32
+ show_metrics (bool, optional): Whether to calculate and show evaluation metrics.
33
+ Defaults to True.
34
+ max_length (int, optional): The maximum length of the generated text sequence.
35
+ Defaults to 200.
36
+
37
+ Returns:
38
+ generated_text (str): The generated text by the model.
39
+ metrics (dict): Evaluation metrics for the generated text (if show_metrics is True).
40
+ """
41
+ # Suppress Hugging Face pipeline logging
42
+ logging.set_verbosity(logging.CRITICAL)
43
+
44
+ # Initialize the pipeline
45
+ pipe = pipeline(task="text-generation",
46
+ model=model,
47
+ tokenizer=tokenizer,
48
+ max_length=max_length,
49
+ do_sample=True,
50
+ temperature=temp)
51
+
52
+ # Generate text using the pipeline
53
+ pipe = pipeline(task="text-generation",
54
+ model=model,
55
+ tokenizer=tokenizer,
56
+ max_length=200)
57
+ result = pipe(f"<s>[INST] {prompt} [/INST]")
58
+ generated_text = result[0]['generated_text']
59
+
60
+ # Find the index of "### Assistant" in the generated text
61
+ index = generated_text.find("[/INST] ")
62
+ if index != -1:
63
+ # Extract the substring after "### Assistant"
64
+ substring_after_assistant = generated_text[index + len("[/INST] "):].strip()
65
+ else:
66
+ # If "### Assistant" is not found, use the entire generated text
67
+ substring_after_assistant = generated_text.strip()
68
+
69
+ if show_metrics:
70
+ # Calculate evaluation metrics
71
+ metrics = run_metrics(substring_after_assistant, prompt, model_id)
72
+
73
+ return substring_after_assistant, metrics
74
+ else:
75
+ return substring_after_assistant
76
+
77
+
78
+ prompt = "### Human: Why can camels survive for long without water? ### Assistant:"
79
+ generated_text = text_gen_eval_wrapper(model, tokenizer, prompt, show_metrics=False, max_length=250)
80
+ print(generated_text)
81
+ ```