Text Generation
Transformers
PyTorch
Safetensors
English
llama
Eval Results
text-generation-inference
Inference Endpoints
Pankaj Mathur commited on
Commit
015a58c
1 Parent(s): 0fceb49

Upload orca_mini_3b_T4_GPU.ipynb

Browse files
Files changed (1) hide show
  1. orca_mini_3b_T4_GPU.ipynb +501 -0
orca_mini_3b_T4_GPU.ipynb ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4"
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ },
16
+ "accelerator": "GPU",
17
+ "widgets": {
18
+ "application/vnd.jupyter.widget-state+json": {
19
+ "1830ab16750b4c7ebf5d1692a02e3544": {
20
+ "model_module": "@jupyter-widgets/controls",
21
+ "model_name": "HBoxModel",
22
+ "model_module_version": "1.5.0",
23
+ "state": {
24
+ "_dom_classes": [],
25
+ "_model_module": "@jupyter-widgets/controls",
26
+ "_model_module_version": "1.5.0",
27
+ "_model_name": "HBoxModel",
28
+ "_view_count": null,
29
+ "_view_module": "@jupyter-widgets/controls",
30
+ "_view_module_version": "1.5.0",
31
+ "_view_name": "HBoxView",
32
+ "box_style": "",
33
+ "children": [
34
+ "IPY_MODEL_de65aa83f07a48a98ecc54fd956c5831",
35
+ "IPY_MODEL_93dad1c7c99b4a36a8df85fc2dff617e",
36
+ "IPY_MODEL_098a2232a1f34d94ab4f61141733730f"
37
+ ],
38
+ "layout": "IPY_MODEL_79a38d122e4c4b2593b96151f5bce47e"
39
+ }
40
+ },
41
+ "de65aa83f07a48a98ecc54fd956c5831": {
42
+ "model_module": "@jupyter-widgets/controls",
43
+ "model_name": "HTMLModel",
44
+ "model_module_version": "1.5.0",
45
+ "state": {
46
+ "_dom_classes": [],
47
+ "_model_module": "@jupyter-widgets/controls",
48
+ "_model_module_version": "1.5.0",
49
+ "_model_name": "HTMLModel",
50
+ "_view_count": null,
51
+ "_view_module": "@jupyter-widgets/controls",
52
+ "_view_module_version": "1.5.0",
53
+ "_view_name": "HTMLView",
54
+ "description": "",
55
+ "description_tooltip": null,
56
+ "layout": "IPY_MODEL_d8e51c8c5d0b4698918e24b3aa32f492",
57
+ "placeholder": "​",
58
+ "style": "IPY_MODEL_b8d38b2204b94010acdd81a438c6c58c",
59
+ "value": "Loading checkpoint shards: 100%"
60
+ }
61
+ },
62
+ "93dad1c7c99b4a36a8df85fc2dff617e": {
63
+ "model_module": "@jupyter-widgets/controls",
64
+ "model_name": "FloatProgressModel",
65
+ "model_module_version": "1.5.0",
66
+ "state": {
67
+ "_dom_classes": [],
68
+ "_model_module": "@jupyter-widgets/controls",
69
+ "_model_module_version": "1.5.0",
70
+ "_model_name": "FloatProgressModel",
71
+ "_view_count": null,
72
+ "_view_module": "@jupyter-widgets/controls",
73
+ "_view_module_version": "1.5.0",
74
+ "_view_name": "ProgressView",
75
+ "bar_style": "success",
76
+ "description": "",
77
+ "description_tooltip": null,
78
+ "layout": "IPY_MODEL_1ac8c0d157144c48bae5444f35076192",
79
+ "max": 3,
80
+ "min": 0,
81
+ "orientation": "horizontal",
82
+ "style": "IPY_MODEL_818b9628fa4f4781922add05931a8503",
83
+ "value": 3
84
+ }
85
+ },
86
+ "098a2232a1f34d94ab4f61141733730f": {
87
+ "model_module": "@jupyter-widgets/controls",
88
+ "model_name": "HTMLModel",
89
+ "model_module_version": "1.5.0",
90
+ "state": {
91
+ "_dom_classes": [],
92
+ "_model_module": "@jupyter-widgets/controls",
93
+ "_model_module_version": "1.5.0",
94
+ "_model_name": "HTMLModel",
95
+ "_view_count": null,
96
+ "_view_module": "@jupyter-widgets/controls",
97
+ "_view_module_version": "1.5.0",
98
+ "_view_name": "HTMLView",
99
+ "description": "",
100
+ "description_tooltip": null,
101
+ "layout": "IPY_MODEL_920b4172ad2044eda1cdedb2af7c9ed7",
102
+ "placeholder": "​",
103
+ "style": "IPY_MODEL_8da0941661e7430d9dbb1984e19cb0ed",
104
+ "value": " 3/3 [01:03<00:00, 20.58s/it]"
105
+ }
106
+ },
107
+ "79a38d122e4c4b2593b96151f5bce47e": {
108
+ "model_module": "@jupyter-widgets/base",
109
+ "model_name": "LayoutModel",
110
+ "model_module_version": "1.2.0",
111
+ "state": {
112
+ "_model_module": "@jupyter-widgets/base",
113
+ "_model_module_version": "1.2.0",
114
+ "_model_name": "LayoutModel",
115
+ "_view_count": null,
116
+ "_view_module": "@jupyter-widgets/base",
117
+ "_view_module_version": "1.2.0",
118
+ "_view_name": "LayoutView",
119
+ "align_content": null,
120
+ "align_items": null,
121
+ "align_self": null,
122
+ "border": null,
123
+ "bottom": null,
124
+ "display": null,
125
+ "flex": null,
126
+ "flex_flow": null,
127
+ "grid_area": null,
128
+ "grid_auto_columns": null,
129
+ "grid_auto_flow": null,
130
+ "grid_auto_rows": null,
131
+ "grid_column": null,
132
+ "grid_gap": null,
133
+ "grid_row": null,
134
+ "grid_template_areas": null,
135
+ "grid_template_columns": null,
136
+ "grid_template_rows": null,
137
+ "height": null,
138
+ "justify_content": null,
139
+ "justify_items": null,
140
+ "left": null,
141
+ "margin": null,
142
+ "max_height": null,
143
+ "max_width": null,
144
+ "min_height": null,
145
+ "min_width": null,
146
+ "object_fit": null,
147
+ "object_position": null,
148
+ "order": null,
149
+ "overflow": null,
150
+ "overflow_x": null,
151
+ "overflow_y": null,
152
+ "padding": null,
153
+ "right": null,
154
+ "top": null,
155
+ "visibility": null,
156
+ "width": null
157
+ }
158
+ },
159
+ "d8e51c8c5d0b4698918e24b3aa32f492": {
160
+ "model_module": "@jupyter-widgets/base",
161
+ "model_name": "LayoutModel",
162
+ "model_module_version": "1.2.0",
163
+ "state": {
164
+ "_model_module": "@jupyter-widgets/base",
165
+ "_model_module_version": "1.2.0",
166
+ "_model_name": "LayoutModel",
167
+ "_view_count": null,
168
+ "_view_module": "@jupyter-widgets/base",
169
+ "_view_module_version": "1.2.0",
170
+ "_view_name": "LayoutView",
171
+ "align_content": null,
172
+ "align_items": null,
173
+ "align_self": null,
174
+ "border": null,
175
+ "bottom": null,
176
+ "display": null,
177
+ "flex": null,
178
+ "flex_flow": null,
179
+ "grid_area": null,
180
+ "grid_auto_columns": null,
181
+ "grid_auto_flow": null,
182
+ "grid_auto_rows": null,
183
+ "grid_column": null,
184
+ "grid_gap": null,
185
+ "grid_row": null,
186
+ "grid_template_areas": null,
187
+ "grid_template_columns": null,
188
+ "grid_template_rows": null,
189
+ "height": null,
190
+ "justify_content": null,
191
+ "justify_items": null,
192
+ "left": null,
193
+ "margin": null,
194
+ "max_height": null,
195
+ "max_width": null,
196
+ "min_height": null,
197
+ "min_width": null,
198
+ "object_fit": null,
199
+ "object_position": null,
200
+ "order": null,
201
+ "overflow": null,
202
+ "overflow_x": null,
203
+ "overflow_y": null,
204
+ "padding": null,
205
+ "right": null,
206
+ "top": null,
207
+ "visibility": null,
208
+ "width": null
209
+ }
210
+ },
211
+ "b8d38b2204b94010acdd81a438c6c58c": {
212
+ "model_module": "@jupyter-widgets/controls",
213
+ "model_name": "DescriptionStyleModel",
214
+ "model_module_version": "1.5.0",
215
+ "state": {
216
+ "_model_module": "@jupyter-widgets/controls",
217
+ "_model_module_version": "1.5.0",
218
+ "_model_name": "DescriptionStyleModel",
219
+ "_view_count": null,
220
+ "_view_module": "@jupyter-widgets/base",
221
+ "_view_module_version": "1.2.0",
222
+ "_view_name": "StyleView",
223
+ "description_width": ""
224
+ }
225
+ },
226
+ "1ac8c0d157144c48bae5444f35076192": {
227
+ "model_module": "@jupyter-widgets/base",
228
+ "model_name": "LayoutModel",
229
+ "model_module_version": "1.2.0",
230
+ "state": {
231
+ "_model_module": "@jupyter-widgets/base",
232
+ "_model_module_version": "1.2.0",
233
+ "_model_name": "LayoutModel",
234
+ "_view_count": null,
235
+ "_view_module": "@jupyter-widgets/base",
236
+ "_view_module_version": "1.2.0",
237
+ "_view_name": "LayoutView",
238
+ "align_content": null,
239
+ "align_items": null,
240
+ "align_self": null,
241
+ "border": null,
242
+ "bottom": null,
243
+ "display": null,
244
+ "flex": null,
245
+ "flex_flow": null,
246
+ "grid_area": null,
247
+ "grid_auto_columns": null,
248
+ "grid_auto_flow": null,
249
+ "grid_auto_rows": null,
250
+ "grid_column": null,
251
+ "grid_gap": null,
252
+ "grid_row": null,
253
+ "grid_template_areas": null,
254
+ "grid_template_columns": null,
255
+ "grid_template_rows": null,
256
+ "height": null,
257
+ "justify_content": null,
258
+ "justify_items": null,
259
+ "left": null,
260
+ "margin": null,
261
+ "max_height": null,
262
+ "max_width": null,
263
+ "min_height": null,
264
+ "min_width": null,
265
+ "object_fit": null,
266
+ "object_position": null,
267
+ "order": null,
268
+ "overflow": null,
269
+ "overflow_x": null,
270
+ "overflow_y": null,
271
+ "padding": null,
272
+ "right": null,
273
+ "top": null,
274
+ "visibility": null,
275
+ "width": null
276
+ }
277
+ },
278
+ "818b9628fa4f4781922add05931a8503": {
279
+ "model_module": "@jupyter-widgets/controls",
280
+ "model_name": "ProgressStyleModel",
281
+ "model_module_version": "1.5.0",
282
+ "state": {
283
+ "_model_module": "@jupyter-widgets/controls",
284
+ "_model_module_version": "1.5.0",
285
+ "_model_name": "ProgressStyleModel",
286
+ "_view_count": null,
287
+ "_view_module": "@jupyter-widgets/base",
288
+ "_view_module_version": "1.2.0",
289
+ "_view_name": "StyleView",
290
+ "bar_color": null,
291
+ "description_width": ""
292
+ }
293
+ },
294
+ "920b4172ad2044eda1cdedb2af7c9ed7": {
295
+ "model_module": "@jupyter-widgets/base",
296
+ "model_name": "LayoutModel",
297
+ "model_module_version": "1.2.0",
298
+ "state": {
299
+ "_model_module": "@jupyter-widgets/base",
300
+ "_model_module_version": "1.2.0",
301
+ "_model_name": "LayoutModel",
302
+ "_view_count": null,
303
+ "_view_module": "@jupyter-widgets/base",
304
+ "_view_module_version": "1.2.0",
305
+ "_view_name": "LayoutView",
306
+ "align_content": null,
307
+ "align_items": null,
308
+ "align_self": null,
309
+ "border": null,
310
+ "bottom": null,
311
+ "display": null,
312
+ "flex": null,
313
+ "flex_flow": null,
314
+ "grid_area": null,
315
+ "grid_auto_columns": null,
316
+ "grid_auto_flow": null,
317
+ "grid_auto_rows": null,
318
+ "grid_column": null,
319
+ "grid_gap": null,
320
+ "grid_row": null,
321
+ "grid_template_areas": null,
322
+ "grid_template_columns": null,
323
+ "grid_template_rows": null,
324
+ "height": null,
325
+ "justify_content": null,
326
+ "justify_items": null,
327
+ "left": null,
328
+ "margin": null,
329
+ "max_height": null,
330
+ "max_width": null,
331
+ "min_height": null,
332
+ "min_width": null,
333
+ "object_fit": null,
334
+ "object_position": null,
335
+ "order": null,
336
+ "overflow": null,
337
+ "overflow_x": null,
338
+ "overflow_y": null,
339
+ "padding": null,
340
+ "right": null,
341
+ "top": null,
342
+ "visibility": null,
343
+ "width": null
344
+ }
345
+ },
346
+ "8da0941661e7430d9dbb1984e19cb0ed": {
347
+ "model_module": "@jupyter-widgets/controls",
348
+ "model_name": "DescriptionStyleModel",
349
+ "model_module_version": "1.5.0",
350
+ "state": {
351
+ "_model_module": "@jupyter-widgets/controls",
352
+ "_model_module_version": "1.5.0",
353
+ "_model_name": "DescriptionStyleModel",
354
+ "_view_count": null,
355
+ "_view_module": "@jupyter-widgets/base",
356
+ "_view_module_version": "1.2.0",
357
+ "_view_name": "StyleView",
358
+ "description_width": ""
359
+ }
360
+ }
361
+ }
362
+ }
363
+ },
364
+ "cells": [
365
+ {
366
+ "cell_type": "code",
367
+ "execution_count": 1,
368
+ "metadata": {
369
+ "id": "ege5kkFDUiEf"
370
+ },
371
+ "outputs": [],
372
+ "source": [
373
+ "!pip -q install transformers\n",
374
+ "!pip -q install sentencepiece\n",
375
+ "!pip -q install accelerate"
376
+ ]
377
+ },
378
+ {
379
+ "cell_type": "code",
380
+ "source": [
381
+ "import torch\n",
382
+ "from transformers import LlamaForCausalLM, LlamaTokenizer\n",
383
+ "\n",
384
+ "# Hugging Face model_path\n",
385
+ "model_path = 'psmathur/orca_mini_3b'\n",
386
+ "tokenizer = LlamaTokenizer.from_pretrained(model_path)\n",
387
+ "model = LlamaForCausalLM.from_pretrained(\n",
388
+ " model_path, torch_dtype=torch.float16, device_map='auto',\n",
389
+ ")\n",
390
+ "\n",
391
+ "\n",
392
+ "#generate text function\n",
393
+ "def generate_text(system, instruction, input=None):\n",
394
+ "\n",
395
+ " if input:\n",
396
+ " prompt = f\"### System:\\n{system}\\n\\n### User:\\n{instruction}\\n\\n### Input:\\n{input}\\n\\n### Response:\\n\"\n",
397
+ " else:\n",
398
+ " prompt = f\"### System:\\n{system}\\n\\n### User:\\n{instruction}\\n\\n### Response:\\n\"\n",
399
+ "\n",
400
+ " tokens = tokenizer.encode(prompt)\n",
401
+ " tokens = torch.LongTensor(tokens).unsqueeze(0)\n",
402
+ " tokens = tokens.to('cuda')\n",
403
+ "\n",
404
+ " instance = {'input_ids': tokens,'top_p': 1.0, 'temperature':0.7, 'generate_len': 1024, 'top_k': 50}\n",
405
+ "\n",
406
+ " length = len(tokens[0])\n",
407
+ " with torch.no_grad():\n",
408
+ " rest = model.generate(\n",
409
+ " input_ids=tokens,\n",
410
+ " max_length=length+instance['generate_len'],\n",
411
+ " use_cache=True,\n",
412
+ " do_sample=True,\n",
413
+ " top_p=instance['top_p'],\n",
414
+ " temperature=instance['temperature'],\n",
415
+ " top_k=instance['top_k']\n",
416
+ " )\n",
417
+ " output = rest[0][length:]\n",
418
+ " string = tokenizer.decode(output, skip_special_tokens=True)\n",
419
+ " return f'[!] Response: {string}'\n",
420
+ "\n",
421
+ "# Sample Test Instruction Used by Youtuber Sam Witteveen https://www.youtube.com/@samwitteveenai\n",
422
+ "system = 'You are an AI assistant that follows instruction extremely well. Help as much as you can.'\n",
423
+ "instruction = 'Write a letter to Sam Altman, CEO of OpenAI, requesting him to convert GPT4 a private model by OpenAI to an open source project'\n",
424
+ "print(generate_text(system, instruction))"
425
+ ],
426
+ "metadata": {
427
+ "colab": {
428
+ "base_uri": "https://localhost:8080/",
429
+ "height": 319,
430
+ "referenced_widgets": [
431
+ "1830ab16750b4c7ebf5d1692a02e3544",
432
+ "de65aa83f07a48a98ecc54fd956c5831",
433
+ "93dad1c7c99b4a36a8df85fc2dff617e",
434
+ "098a2232a1f34d94ab4f61141733730f",
435
+ "79a38d122e4c4b2593b96151f5bce47e",
436
+ "d8e51c8c5d0b4698918e24b3aa32f492",
437
+ "b8d38b2204b94010acdd81a438c6c58c",
438
+ "1ac8c0d157144c48bae5444f35076192",
439
+ "818b9628fa4f4781922add05931a8503",
440
+ "920b4172ad2044eda1cdedb2af7c9ed7",
441
+ "8da0941661e7430d9dbb1984e19cb0ed"
442
+ ]
443
+ },
444
+ "id": "c8FJEmZgUxqo",
445
+ "outputId": "ceca1b51-dd4d-4d61-bec9-42dd6f074cde"
446
+ },
447
+ "execution_count": 2,
448
+ "outputs": [
449
+ {
450
+ "output_type": "stream",
451
+ "name": "stderr",
452
+ "text": [
453
+ "WARNING:accelerate.utils.modeling:The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.\n"
454
+ ]
455
+ },
456
+ {
457
+ "output_type": "display_data",
458
+ "data": {
459
+ "text/plain": [
460
+ "Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
461
+ ],
462
+ "application/vnd.jupyter.widget-view+json": {
463
+ "version_major": 2,
464
+ "version_minor": 0,
465
+ "model_id": "1830ab16750b4c7ebf5d1692a02e3544"
466
+ }
467
+ },
468
+ "metadata": {}
469
+ },
470
+ {
471
+ "output_type": "stream",
472
+ "name": "stdout",
473
+ "text": [
474
+ "[!] Response: Dear Sam Altman,\n",
475
+ "\n",
476
+ "I am writing to request that OpenAI makes GPT4, a private model developed by the company, a public open-source project. As an AI assistant, I understand the importance of open access to data and models in the field of AI.\n",
477
+ "\n",
478
+ "By making GPT4 a public open-source project, individuals and organizations from all over the world would be able to use and improve the model, leading to greater innovation and progress in the field of AI. This would also ensure that the model is not limited to the proprietary purposes of private companies, but can be used for the greater good of society.\n",
479
+ "\n",
480
+ "OpenAI has a history of releasing AI models in their beta form, which has led to significant advancements in the field of AI. By releasing GPT4 as a public open-source project, OpenAI can continue to make contributions to the field and benefit from the collective knowledge and expertise of the community.\n",
481
+ "\n",
482
+ "I urge OpenAI to consider this request and take the necessary steps to make GPT4 publicly available. Thank you for your attention to this matter.\n",
483
+ "\n",
484
+ "Sincerely,\n",
485
+ "\n",
486
+ "[Your Name]\n"
487
+ ]
488
+ }
489
+ ]
490
+ },
491
+ {
492
+ "cell_type": "code",
493
+ "source": [],
494
+ "metadata": {
495
+ "id": "zGpuTBEEU66b"
496
+ },
497
+ "execution_count": 2,
498
+ "outputs": []
499
+ }
500
+ ]
501
+ }