kennethli319 Aleksey Savin commited on
Commit
3f7a128
0 Parent(s):

Duplicate from xsa-face/mpt-7b-instruct

Browse files

Co-authored-by: Aleksey Savin <xsa-face@users.noreply.huggingface.co>

Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +14 -0
  3. app.py +151 -0
  4. requirements.txt +2 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: 'Rustformers: Mpt 7b Instruct'
3
+ emoji: 🦀
4
+ colorFrom: yellow
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.32.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ duplicated_from: xsa-face/mpt-7b-instruct
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llm_rs import AutoModel,SessionConfig,GenerationConfig,Precision
3
+
4
+ repo_name = "rustformers/mpt-7b-ggml"
5
+ file_name = "mpt-7b-instruct-q5_1-ggjt.bin"
6
+
7
+ examples = [
8
+ "Write a travel blog about a 3-day trip to Thailand.",
9
+ "Tell me a short story about a robot that has a nice day.",
10
+ "Compose a tweet to congratulate rustformers on the launch of their HuggingFace Space.",
11
+ "Explain how a candle works to a 6-year-old in a few sentences.",
12
+ "What are some of the most common misconceptions about birds?",
13
+ "Explain why the Rust programming language is so popular.",
14
+ ]
15
+
16
+ session_config = SessionConfig(threads=2,batch_size=2)
17
+ model = AutoModel.from_pretrained(repo_name, model_file=file_name, session_config=session_config,verbose=True)
18
+
19
+ def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed):
20
+
21
+ prompt=f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
22
+ ### Instruction:
23
+ {instruction}
24
+ ### Response:
25
+ Answer:"""
26
+ generation_config = GenerationConfig(seed=seed,temperature=temperature,top_p=top_p,top_k=top_k,max_new_tokens=max_new_tokens)
27
+ response = ""
28
+ streamer = model.stream(prompt=prompt,generation_config=generation_config)
29
+ for new_text in streamer:
30
+ response += new_text
31
+ yield response
32
+
33
+
34
+ with gr.Blocks(
35
+ theme=gr.themes.Soft(),
36
+ css=".disclaimer {font-variant-caps: all-small-caps;}",
37
+ ) as demo:
38
+ gr.Markdown(
39
+ """<h1><center>MPT-7B-Instruct on CPU in Rust 🦀</center></h1>
40
+
41
+ This demo uses the [rustformers/llm](https://github.com/rustformers/llm) library via [llm-rs](https://github.com/LLukas22/llm-rs-python) to execute [MPT-7B-Instruct](https://huggingface.co/mosaicml/mpt-7b-instruct) on 2 CPU cores.
42
+ """
43
+ )
44
+ with gr.Row():
45
+ with gr.Column():
46
+ with gr.Row():
47
+ instruction = gr.Textbox(
48
+ placeholder="Enter your question or instruction here",
49
+ label="Question/Instruction",
50
+ elem_id="q-input",
51
+ )
52
+ with gr.Accordion("Advanced Options:", open=False):
53
+ with gr.Row():
54
+ with gr.Column():
55
+ with gr.Row():
56
+ temperature = gr.Slider(
57
+ label="Temperature",
58
+ value=0.8,
59
+ minimum=0.1,
60
+ maximum=1.0,
61
+ step=0.1,
62
+ interactive=True,
63
+ info="Higher values produce more diverse outputs",
64
+ )
65
+ with gr.Column():
66
+ with gr.Row():
67
+ top_p = gr.Slider(
68
+ label="Top-p (nucleus sampling)",
69
+ value=0.95,
70
+ minimum=0.0,
71
+ maximum=1.0,
72
+ step=0.01,
73
+ interactive=True,
74
+ info=(
75
+ "Sample from the smallest possible set of tokens whose cumulative probability "
76
+ "exceeds top_p. Set to 1 to disable and sample from all tokens."
77
+ ),
78
+ )
79
+ with gr.Column():
80
+ with gr.Row():
81
+ top_k = gr.Slider(
82
+ label="Top-k",
83
+ value=40,
84
+ minimum=5,
85
+ maximum=80,
86
+ step=1,
87
+ interactive=True,
88
+ info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.",
89
+ )
90
+ with gr.Column():
91
+ with gr.Row():
92
+ max_new_tokens = gr.Slider(
93
+ label="Maximum new tokens",
94
+ value=256,
95
+ minimum=0,
96
+ maximum=1024,
97
+ step=5,
98
+ interactive=True,
99
+ info="The maximum number of new tokens to generate",
100
+ )
101
+
102
+ with gr.Column():
103
+ with gr.Row():
104
+ seed = gr.Number(
105
+ label="Seed",
106
+ value=42,
107
+ interactive=True,
108
+ info="The seed to use for the generation",
109
+ precision=0
110
+ )
111
+ with gr.Row():
112
+ submit = gr.Button("Submit")
113
+ with gr.Row():
114
+ with gr.Box():
115
+ gr.Markdown("**MPT-7B-Instruct**")
116
+ output_7b = gr.Markdown()
117
+
118
+ with gr.Row():
119
+ gr.Examples(
120
+ examples=examples,
121
+ inputs=[instruction],
122
+ cache_examples=False,
123
+ fn=process_stream,
124
+ outputs=output_7b,
125
+ )
126
+ with gr.Row():
127
+ gr.Markdown(
128
+ "Disclaimer: MPT-7B can produce factually incorrect output, and should not be relied on to produce "
129
+ "factually accurate information. MPT-7B was trained on various public datasets; while great efforts "
130
+ "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
131
+ "biased, or otherwise offensive outputs.",
132
+ elem_classes=["disclaimer"],
133
+ )
134
+ with gr.Row():
135
+ gr.Markdown(
136
+ "[Privacy policy](https://gist.github.com/samhavens/c29c68cdcd420a9aa0202d0839876dac)",
137
+ elem_classes=["disclaimer"],
138
+ )
139
+
140
+ submit.click(
141
+ process_stream,
142
+ inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
143
+ outputs=output_7b,
144
+ )
145
+ instruction.submit(
146
+ process_stream,
147
+ inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
148
+ outputs=output_7b,
149
+ )
150
+
151
+ demo.queue(max_size=4, concurrency_count=1).launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ llm-rs==0.2.8
2
+ gradio