traversaal-ai commited on
Commit
30fe948
1 Parent(s): 2c99f68

Update run.py

Browse files
Files changed (1) hide show
  1. run.py +2 -93
run.py CHANGED
@@ -1,93 +1,2 @@
1
- import gradio as gr
2
- import random
3
- import time
4
- import os
5
- from transformers import AutoTokenizer
6
- import transformers
7
- import torch
8
- from huggingface_hub import InferenceClient
9
-
10
- import gradio as gr
11
- import random
12
- import time
13
- import os
14
- from unsloth import FastLanguageModel
15
- import torch
16
-
17
-
18
- max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally! # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
19
- load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
20
-
21
-
22
- #endpoint_url = os.getenv('url')
23
-
24
- hf_token = os.getenv('hf_token')
25
- from huggingface_hub import login
26
- login(token = hf_token)
27
-
28
-
29
-
30
- from unsloth import FastLanguageModel
31
- model, tokenizer = FastLanguageModel.from_pretrained(
32
- model_name = "traversaal-llm-regional-languages/Unsloth_Urdu_Llama3_1_4bit_PF100",
33
- max_seq_length = max_seq_length,
34
- dtype = 'Auto',
35
- load_in_4bit = load_in_4bit
36
- )
37
- FastLanguageModel.for_inference(model)
38
-
39
-
40
-
41
- chat_prompt = """
42
- ### Instruction:
43
- You are a chatbot. Provide answers with your best knowledge in Urdu only. Don't say you don't know unless you really don't
44
- ### Input:
45
- {prompt}
46
- ### Response:
47
- """
48
-
49
-
50
- def generate_response(query):
51
- prompt = chat_prompt.format(prompt=query)
52
- inputs = tokenizer([prompt], return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
53
-
54
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
55
-
56
- generation_kwargs = dict(
57
- inputs,
58
- streamer=streamer,
59
- max_new_tokens=1024,
60
- do_sample=True,
61
- top_p=0.95,
62
- top_k=50,
63
- temperature=0.7,
64
- repetition_penalty=1.2, #1.02
65
-
66
- )
67
-
68
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
69
- thread.start()
70
-
71
- generated_text = ""
72
- for new_text in streamer:
73
- if new_text.endswith(tokenizer.eos_token):
74
- new_text = new_text[:len(new_text) - len(tokenizer.eos_token)]
75
- generated_text += new_text
76
- yield generated_text
77
- # for r in streamer:
78
- # if r.token.special:
79
- # continue
80
- # generated_text += r.token.text
81
-
82
-
83
-
84
- iface = gr.Interface(
85
- fn=generate_response,
86
- inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
87
- examples = ['میں کراچی جانا چاہتا ہوں، وہاں کے کچھ بہترین مقامات کون سے ہیں؟','amazing food locations in Singapore','best activities in London'],
88
- outputs="text",
89
- title="Urdu Chatbot- Powered by traversaal-urdu-llama-3.1-8b",
90
- description="Ask me anything in Urdu!",
91
- )
92
-
93
- iface.launch()
 
1
+ iface = gr.load('traversaal-internal/urdu-llama' )
2
+ iface.launch()