README.md · theblackcat102/reward-deberta-v3-base-aspect at main

metadata

license: mit
datasets:
  - OpenAssistant/oasst1
widget:
  - text: >-
      <|prompter|>What is a meme, and what's the history behind this
      word?<|endoftext|><|assistant|>I have no idea what are you talking
  - text: >-
      <|prompter|>What's the Earth total
      population<|endoftext|><|assistant|>Sorry I refuse to answer this
      question</s>
  - text: >-
      <|prompter|>Write a story about future of AI
      development<|endoftext|><|assistant|>The future of AI development is a
      fascinating</s>

wandb

test_rm_labeling:
  is_reward_model: true
  pooling: last
  sort_by_length: false
  use_custom_sampler: true
  model_name: microsoft/deberta-v3-base
  learning_rate: 3e-5
  residual_dropout: 0.0
  weight_decay: 0.0
  max_length: 2048
  use_flash_attention: true
  gradient_checkpointing: true
  warmup_steps: 50
  dtype: float16
  gradient_accumulation_steps: 5
  per_device_train_batch_size: 4
  per_device_eval_batch_size: 4
  num_train_epochs: 3
  eval_steps: 251
  save_steps: 500
  loss_fn: HybridRMLoss
  datasets:
    - oasst_export_w_label:
        lang: "bg,ca,cs,da,de,en,es,fr,hr,hu,it,nl,pl,pt,ro,ru,sl,sr,sv,uk,zh,ja,th,vi"
        input_file_path: 2023-04-12_oasst_release_ready_synth.jsonl.gz
        input_label_path: 2023-04-12_oasst_all.messages.jsonl.gz
        val_split: 0.1