davidberenstein1957 HF staff commited on
Commit
a13f86c
·
1 Parent(s): 291ad35

fix: stop sequences for textgen and add examples to pipeline definiton

Browse files
src/distilabel_dataset_generator/apps/sft.py CHANGED
@@ -89,14 +89,18 @@ def generate_dataset(
89
  "You can only generate a dataset with 5000 or fewer rows. Setting to 5000."
90
  )
91
 
92
- if num_rows < 50:
93
  duration = 60
94
- elif num_rows < 250:
95
- duration = 300
 
 
 
 
96
  elif num_rows < 1000:
97
- duration = 500
98
  else:
99
- duration = 1000
100
 
101
  result_queue = multiprocessing.Queue()
102
  p = multiprocessing.Process(
@@ -127,7 +131,7 @@ def generate_dataset(
127
  repo_id=repo_id,
128
  private=private,
129
  include_script=False,
130
- token=oauth_token.token,
131
  )
132
 
133
  # If not pushing to hub generate the dataset directly
 
89
  "You can only generate a dataset with 5000 or fewer rows. Setting to 5000."
90
  )
91
 
92
+ if num_rows < 10:
93
  duration = 60
94
+ elif num_rows < 30:
95
+ duration = 120
96
+ elif num_rows < 100:
97
+ duration = 240
98
+ elif num_rows < 300:
99
+ duration = 600
100
  elif num_rows < 1000:
101
+ duration = 1200
102
  else:
103
+ duration = 2400
104
 
105
  result_queue = multiprocessing.Queue()
106
  p = multiprocessing.Process(
 
131
  repo_id=repo_id,
132
  private=private,
133
  include_script=False,
134
+ token=oauth_token,
135
  )
136
 
137
  # If not pushing to hub generate the dataset directly
src/distilabel_dataset_generator/pipelines/sft.py CHANGED
@@ -227,7 +227,6 @@ def get_prompt_generation_step():
227
  "temperature": 0.8,
228
  "max_new_tokens": 2048,
229
  "do_sample": True,
230
- "stop_sequences": _STOP_SEQUENCES,
231
  },
232
  ),
233
  use_system_prompt=True,
@@ -243,7 +242,7 @@ if __name__ == "__main__":
243
  [
244
  {
245
  "system_prompt": PROMPT_CREATION_PROMPT,
246
- "instruction": DEFAULT_DATASET_DESCRIPTION,
247
  }
248
  ]
249
  )
 
227
  "temperature": 0.8,
228
  "max_new_tokens": 2048,
229
  "do_sample": True,
 
230
  },
231
  ),
232
  use_system_prompt=True,
 
242
  [
243
  {
244
  "system_prompt": PROMPT_CREATION_PROMPT,
245
+ "instruction": DEFAULT_DATASET_DESCRIPTIONS[0],
246
  }
247
  ]
248
  )