Files changed (1) hide show
  1. README.md +37 -15
README.md CHANGED
@@ -31,12 +31,37 @@ Run with [Ollama](https://github.com/ollama/ollama)
31
  ollama run NexaAIDev/octopus-v2-Q4_K_M
32
  ```
33
 
34
- # AWQ Quantization
35
- Python example:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  ```python
 
38
  from awq import AutoAWQForCausalLM
39
- from transformers import AutoTokenizer, GemmaForCausalLM
40
  import torch
41
  import time
42
  import numpy as np
@@ -51,28 +76,25 @@ def inference(input_text):
51
  start_time = time.time()
52
  generation_output = model.generate(
53
  tokens,
54
- do_sample=True,
55
- temperature=0.7,
56
- top_p=0.95,
57
- top_k=40,
58
  max_new_tokens=512
59
  )
60
  end_time = time.time()
 
 
61
 
62
- res = tokenizer.decode(generation_output[0])
63
- res = res.split(input_text)
64
  latency = end_time - start_time
65
- output_tokens = tokenizer.encode(res)
66
- num_output_tokens = len(output_tokens)
67
  throughput = num_output_tokens / latency
68
 
69
- return {"output": res[-1], "latency": latency, "throughput": throughput}
70
-
71
 
72
- model_id = "path/to/Octopus-v2-AWQ"
 
 
73
  model = AutoAWQForCausalLM.from_quantized(model_id, fuse_layers=True,
74
  trust_remote_code=False, safetensors=True)
75
- tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=False)
76
 
77
  prompts = ["Below is the query from the users, please call the correct function and generate the parameters to call the function.\n\nQuery: Can you take a photo using the back camera and save it to the default location? \n\nResponse:"]
78
 
 
31
  ollama run NexaAIDev/octopus-v2-Q4_K_M
32
  ```
33
 
34
+ Input example:
35
+
36
+ ```dash
37
+ "Below is the query from the users, please call the correct function and generate the parameters to call the function.\n\nQuery: Take a selfie for me with front camera \n\nResponse:"
38
+ ```
39
+
40
+ Output function example:
41
+
42
+ ```json
43
+ def get_trending_news(category=None, region='US', language='en', max_results=5):
44
+ """
45
+ Fetches trending news articles based on category, region, and language.
46
+
47
+ Parameters:
48
+ - category (str, optional): News category to filter by, by default use None for all categories. Optional to provide.
49
+ - region (str, optional): ISO 3166-1 alpha-2 country code for region-specific news, by default, uses 'US'. Optional to provide.
50
+ - language (str, optional): ISO 639-1 language code for article language, by default uses 'en'. Optional to provide.
51
+ - max_results (int, optional): Maximum number of articles to return, by default, uses 5. Optional to provide.
52
+
53
+ Returns:
54
+ - list[str]: A list of strings, each representing an article. Each string contains the article's heading and URL.
55
+ """
56
+ ```
57
+
58
+ ## AWQ Quantization
59
+
60
+ Input Python example:
61
 
62
  ```python
63
+ from transformers import AutoTokenizer
64
  from awq import AutoAWQForCausalLM
 
65
  import torch
66
  import time
67
  import numpy as np
 
76
  start_time = time.time()
77
  generation_output = model.generate(
78
  tokens,
79
+ do_sample=False,
80
+ temperature=0,
 
 
81
  max_new_tokens=512
82
  )
83
  end_time = time.time()
84
+ generated_sequence = generation_output[:, input_length:].tolist()
85
+ res = tokenizer.decode(generated_sequence[0])
86
 
 
 
87
  latency = end_time - start_time
88
+ num_output_tokens = len(generated_sequence[0])
 
89
  throughput = num_output_tokens / latency
90
 
91
+ return {"output": res, "latency": latency, "throughput": throughput}
 
92
 
93
+ model_id = "NexaAIDev/Octopus-v2-gguf-awq"
94
+ tokenizer = AutoTokenizer.from_pretrained(model_id,
95
+ trust_remote_code=False)
96
  model = AutoAWQForCausalLM.from_quantized(model_id, fuse_layers=True,
97
  trust_remote_code=False, safetensors=True)
 
98
 
99
  prompts = ["Below is the query from the users, please call the correct function and generate the parameters to call the function.\n\nQuery: Can you take a photo using the back camera and save it to the default location? \n\nResponse:"]
100