davidberenstein1957 HF staff commited on
Commit
e1fdeee
·
1 Parent(s): 2f12409

chore: add token

Browse files
src/distilabel_dataset_generator/sft.py CHANGED
@@ -116,24 +116,12 @@ The prompt you write should follow the same style and structure as the following
116
  User dataset description:
117
  """
118
 
119
- MODEL = "meta-llama/Meta-Llama-3.1-8B-Instruct"
120
-
121
- generate_description = TextGeneration(
122
- llm=InferenceEndpointsLLM(
123
- model_id=MODEL,
124
- tokenizer_id=MODEL,
125
- generation_kwargs={
126
- "temperature": 0.8,
127
- "max_new_tokens": 2048,
128
- "do_sample": True,
129
- },
130
- ),
131
- use_system_prompt=True,
132
- )
133
- generate_description.load()
134
 
135
 
136
- def _run_pipeline(result_queue, _num_turns, _num_rows, _system_prompt):
 
 
137
  with Pipeline(name="sft") as pipeline:
138
  magpie_step = MagpieGenerator(
139
  llm=InferenceEndpointsLLM(
@@ -143,6 +131,7 @@ def _run_pipeline(result_queue, _num_turns, _num_rows, _system_prompt):
143
  generation_kwargs={
144
  "temperature": 0.8, # it's the best value for Llama 3.1 70B Instruct
145
  },
 
146
  ),
147
  n_turns=_num_turns,
148
  num_rows=_num_rows,
@@ -152,7 +141,21 @@ def _run_pipeline(result_queue, _num_turns, _num_rows, _system_prompt):
152
  result_queue.put(distiset)
153
 
154
 
155
- def _generate_system_prompt(_dataset_description):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  return next(
157
  generate_description.process(
158
  [
 
116
  User dataset description:
117
  """
118
 
119
+ MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
 
122
+ def _run_pipeline(
123
+ result_queue, _num_turns, _num_rows, _system_prompt, _token: OAuthToken = None
124
+ ):
125
  with Pipeline(name="sft") as pipeline:
126
  magpie_step = MagpieGenerator(
127
  llm=InferenceEndpointsLLM(
 
131
  generation_kwargs={
132
  "temperature": 0.8, # it's the best value for Llama 3.1 70B Instruct
133
  },
134
+ api_key=_token,
135
  ),
136
  n_turns=_num_turns,
137
  num_rows=_num_rows,
 
141
  result_queue.put(distiset)
142
 
143
 
144
+ def _generate_system_prompt(_dataset_description, _token: OAuthToken = None):
145
+ generate_description = TextGeneration(
146
+ llm=InferenceEndpointsLLM(
147
+ model_id=MODEL,
148
+ tokenizer_id=MODEL,
149
+ generation_kwargs={
150
+ "temperature": 0.8,
151
+ "max_new_tokens": 2048,
152
+ "do_sample": True,
153
+ },
154
+ api_key=_token,
155
+ ),
156
+ use_system_prompt=True,
157
+ )
158
+ generate_description.load()
159
  return next(
160
  generate_description.process(
161
  [