File size: 7,581 Bytes
9255bb8
 
 
b1c5198
9255bb8
 
 
 
 
 
 
b1f91f1
9255bb8
 
41d4329
5a68917
9255bb8
ca38a58
 
 
 
 
 
cd7fced
 
4d0f760
 
c8f6eb0
 
 
cd7fced
4d0f760
cd7fced
 
4d0f760
cd7fced
4d0f760
cd7fced
 
4d0f760
cd7fced
 
 
 
 
4d0f760
cd7fced
 
 
 
b033af5
4d0f760
b033af5
 
 
 
cd7fced
4d0f760
cd7fced
 
 
 
 
9255bb8
 
 
 
 
 
 
 
 
8c4290e
9255bb8
 
c8f6eb0
9255bb8
 
 
 
 
ca38a58
 
9255bb8
 
 
 
 
 
 
 
7d6a2e6
 
9255bb8
c8f6eb0
 
 
 
7eaa8e3
c8f6eb0
 
ca38a58
 
 
220e795
ca38a58
c8f6eb0
ca38a58
 
 
 
 
 
d0dc19e
5a68917
ca38a58
 
9255bb8
7d6a2e6
9255bb8
1ea7ac9
f18cc6e
7d6a2e6
 
1ea7ac9
 
7d6a2e6
9255bb8
 
 
 
 
ed3bcbc
3794702
d7c0687
 
9255bb8
 
 
 
 
 
 
 
 
 
 
 
 
ca38a58
83b80a1
 
cd7fced
 
83b80a1
9255bb8
 
 
b1f91f1
cd7fced
199759c
 
cd7fced
 
 
 
199759c
9255bb8
 
 
 
 
 
 
199759c
9255bb8
 
c8f6eb0
 
 
 
 
9255bb8
 
7926189
9255bb8
 
 
 
 
 
 
 
 
 
4745d0f
549ab4b
9255bb8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8f6eb0
 
cd7fced
9255bb8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
import json
import os
import shutil
import requests

import gradio as gr
from huggingface_hub import Repository
from text_generation import Client

from share_btn import community_icon_html, loading_icon_html, share_js, share_btn_css

HF_TOKEN = os.environ.get("HF_TOKEN", None)
API_URL = os.environ.get("API_URL")

with open("./HHH_prompt.txt", "r") as f:
    HHH_PROMPT = f.read() + "\n\n"

FIM_PREFIX = "<fim_prefix>"
FIM_MIDDLE = "<fim_middle>"
FIM_SUFFIX = "<fim_suffix>"

FIM_INDICATOR = "<FILL_HERE>"

FORMATS = """## Model formats

The model is pretrained on code and in addition to the pure code data it is formatted with special tokens. E.g. prefixes specifying the source of the file or special tokens separating code from a commit message. See below: 

### Chat mode
Chat mode prepends the [HHH prompt](https://gist.github.com/jareddk/2509330f8ef3d787fc5aaac67aab5f11#file-hhh_prompt-txt) from Anthropic to the request which conditions the model to be an assistant.

### Prefixes
Any combination of the three following prefixes can be found in pure code files:

```
<reponame>REPONAME<filename>FILENAME<gh_stars>STARS\ncode<|endoftext|>
```
STARS can be one of: 0, 1-10, 10-100, 100-1000, 1000+

### Commits
The commits data is formatted as follows:
```
<commit_before>code<commit_msg>text<commit_after>code<|endoftext|>
```

### Jupyter structure
Jupyter notebooks were both trained in form of Python scripts as well as the following structured format:
```
<start_jupyter><jupyter_text>text<jupyter_code>code<jupyter_output>output<jupyter_text>
```

### Issues
We also trained on GitHub issues using the following formatting:
```
<issue_start><issue_comment>text<issue_comment>...<issue_closed>
```

### Fill-in-the-middle
Fill in the middle requires rearranging the model inputs. The playground does this for you - all you need is to specify where to fill:
```
code before<FILL_HERE>code after
```
"""

theme = gr.themes.Monochrome(
    primary_hue="indigo",
    secondary_hue="blue",
    neutral_hue="slate",
    radius_size=gr.themes.sizes.radius_sm,
    font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
)

client = Client(
    API_URL, headers={"Authorization": f"Bearer {HF_TOKEN}"},
)

def generate(prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0, chat_mode=False):

    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)
    fim_mode = False
    
    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )
    if chat_mode:
        generate_kwargs.update({"stop_sequences": ["\nHuman", "\n-----"]})

    if chat_mode and FIM_INDICATOR in prompt:
        raise ValueError("Chat mode and FIM are mutually exclusive. Choose one or the other.")

    if chat_mode:
        chat_prompt = "Human: " + prompt + "\n\nAssistant:"
        prompt = HHH_PROMPT + chat_prompt
    
    if FIM_INDICATOR in prompt:
        fim_mode = True
        try:
            prefix, suffix = prompt.split(FIM_INDICATOR)
        except:
            raise ValueError(f"Only one {FIM_INDICATOR} allowed in prompt!")
        prompt = f"{FIM_PREFIX}{prefix}{FIM_SUFFIX}{suffix}{FIM_MIDDLE}"

    stream = client.generate_stream(prompt, **generate_kwargs)

    if fim_mode:
        output = prefix
    elif chat_mode:
        output = chat_prompt
    else:
        output = prompt

    previous_token = ""
    for response in stream:
        if fim_mode and response.token.text =="<|endoftext|>":
            output += (suffix + "\n" + response.token.text)
        elif chat_mode and response.token.text in ["Human", "-----"] and previous_token=="\n":
            return output
        else:
            output += response.token.text
        previous_token = response.token.text
        yield output
    return output


examples = [
    "def print_hello_world():",
    'def fibonacci(n: int) -> int:\n    """ Compute the n-th Fibonacci number. """',
    "class TransformerDecoder(nn.Module):",
    "class ComplexNumbers:"
]


def process_example(args):
    for x in generate(args):
        pass
    return x

css = ".generating {visibility: hidden}" + share_btn_css

with gr.Blocks(theme=theme, analytics_enabled=False, css=css) as demo:
    with gr.Column():
        gr.Markdown(
            """\
# BigCode - Playground

_Note:_ this is an internal playground - please do not share. The deployment can also change and thus the space not work as we continue development.\
"""

        )
        with gr.Row():
            with gr.Column(scale=3):
                instruction = gr.Textbox(placeholder="Enter your prompt here", label="Prompt", elem_id="q-input")
                submit = gr.Button("Generate", variant="primary")
                output = gr.Code(elem_id="q-output")
                
                with gr.Group(elem_id="share-btn-container"):
                    community_icon = gr.HTML(community_icon_html, visible=True)
                    loading_icon = gr.HTML(loading_icon_html, visible=True)
                    share_button = gr.Button("Share to community", elem_id="share-btn", visible=True)
                
                gr.Examples(
                    examples=examples,
                    inputs=[instruction],
                    cache_examples=False,
                    fn=process_example,
                    outputs=[output],
                )
                gr.Markdown(FORMATS)

            with gr.Column(scale=1):
                chat_mode = gr.Checkbox(
                    value=False,
                    label="Chat mode",
                    info="Uses Anthropic's HHH prompt to turn the model into an assistant."
                )
                temperature = gr.Slider(
                    label="Temperature",
                    value=0.2,
                    minimum=0.0,
                    maximum=2.0,
                    step=0.1,
                    interactive=True,
                    info="Higher values produce more diverse outputs",
                )
                max_new_tokens = gr.Slider(
                    label="Max new tokens",
                    value=256,
                    minimum=0,
                    maximum=8192,
                    step=64,
                    interactive=True,
                    info="The maximum numbers of new tokens",
                )
                top_p = gr.Slider(
                    label="Top-p (nucleus sampling)",
                    value=0.90,
                    minimum=0.0,
                    maximum=1,
                    step=0.05,
                    interactive=True,
                    info="Higher values sample more low-probability tokens",
                )
                repetition_penalty = gr.Slider(
                    label="Repetition penalty",
                    value=1.2,
                    minimum=1.0,
                    maximum=2.0,
                    step=0.05,
                    interactive=True,
                    info="Penalize repeated tokens",
                )

    submit.click(generate, inputs=[instruction, temperature, max_new_tokens, top_p, repetition_penalty, chat_mode], outputs=[output])
    # instruction.submit(generate, inputs=[instruction, temperature, max_new_tokens, top_p, repetition_penalty, chat_mode], outputs=[output])
    share_button.click(None, [], [], _js=share_js)
demo.queue(concurrency_count=16).launch(debug=True)