|
{ |
|
"metadata": { |
|
"total_size": 26136674304 |
|
}, |
|
"weight_map": { |
|
"head.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.0.attention.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.attention.output.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.attention.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.attention.time_decay": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.attention.time_first": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.attention.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.attention.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.attention.time_mix_value": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.attention.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.feed_forward.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.feed_forward.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.feed_forward.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.feed_forward.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.feed_forward.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.ln1.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.ln1.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.ln2.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.ln2.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.pre_ln.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.0.pre_ln.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.attention.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.attention.output.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.attention.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.attention.time_decay": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.attention.time_first": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.attention.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.attention.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.attention.time_mix_value": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.attention.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.feed_forward.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.feed_forward.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.feed_forward.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.feed_forward.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.feed_forward.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.ln1.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.ln1.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.ln2.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.1.ln2.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.10.attention.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.10.attention.output.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.10.attention.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.10.attention.time_decay": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.10.attention.time_first": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.10.attention.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.10.attention.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.10.attention.time_mix_value": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.10.attention.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.10.feed_forward.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.10.feed_forward.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.10.feed_forward.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.10.feed_forward.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.10.feed_forward.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.10.ln1.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.10.ln1.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.10.ln2.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.10.ln2.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.11.attention.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.11.attention.output.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.11.attention.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.11.attention.time_decay": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.11.attention.time_first": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.11.attention.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.11.attention.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.11.attention.time_mix_value": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.11.attention.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.11.feed_forward.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.11.feed_forward.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.11.feed_forward.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.11.feed_forward.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.11.feed_forward.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.11.ln1.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.11.ln1.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.11.ln2.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.11.ln2.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.attention.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.attention.output.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.attention.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.attention.time_decay": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.attention.time_first": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.attention.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.attention.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.attention.time_mix_value": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.attention.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.feed_forward.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.feed_forward.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.feed_forward.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.feed_forward.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.feed_forward.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.ln1.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.ln1.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.ln2.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.12.ln2.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.attention.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.attention.output.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.attention.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.attention.time_decay": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.attention.time_first": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.attention.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.attention.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.attention.time_mix_value": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.attention.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.feed_forward.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.feed_forward.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.feed_forward.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.feed_forward.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.feed_forward.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.ln1.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.ln1.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.ln2.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.13.ln2.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.attention.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.attention.output.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.attention.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.attention.time_decay": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.attention.time_first": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.attention.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.attention.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.attention.time_mix_value": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.attention.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.feed_forward.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.feed_forward.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.feed_forward.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.feed_forward.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.feed_forward.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.ln1.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.ln1.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.ln2.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.14.ln2.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.attention.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.attention.output.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.attention.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.attention.time_decay": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.attention.time_first": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.attention.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.attention.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.attention.time_mix_value": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.attention.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.feed_forward.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.feed_forward.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.feed_forward.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.feed_forward.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.feed_forward.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.ln1.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.ln1.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.ln2.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.15.ln2.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.attention.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.attention.output.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.attention.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.attention.time_decay": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.attention.time_first": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.attention.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.attention.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.attention.time_mix_value": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.attention.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.feed_forward.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.feed_forward.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.feed_forward.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.feed_forward.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.feed_forward.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.ln1.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.ln1.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.ln2.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.16.ln2.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.attention.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.attention.output.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.attention.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.attention.time_decay": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.attention.time_first": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.attention.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.attention.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.attention.time_mix_value": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.attention.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.feed_forward.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.feed_forward.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.feed_forward.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.feed_forward.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.feed_forward.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.ln1.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.ln1.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.ln2.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.17.ln2.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.attention.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.attention.output.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.attention.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.attention.time_decay": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.attention.time_first": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.attention.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.attention.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.attention.time_mix_value": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.attention.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.feed_forward.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.feed_forward.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.feed_forward.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.feed_forward.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.feed_forward.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.ln1.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.ln1.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.ln2.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.18.ln2.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.attention.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.attention.output.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.attention.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.attention.time_decay": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.attention.time_first": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.attention.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.attention.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.attention.time_mix_value": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.attention.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.feed_forward.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.feed_forward.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.feed_forward.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.feed_forward.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.feed_forward.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.ln1.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.ln1.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.ln2.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.19.ln2.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.2.attention.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.2.attention.output.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.2.attention.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.2.attention.time_decay": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.2.attention.time_first": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.2.attention.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.2.attention.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.2.attention.time_mix_value": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.2.attention.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.2.feed_forward.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.2.feed_forward.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.2.feed_forward.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.2.feed_forward.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.2.feed_forward.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.2.ln1.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.2.ln1.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.2.ln2.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.2.ln2.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.20.attention.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.20.attention.output.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.20.attention.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.20.attention.time_decay": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.20.attention.time_first": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.20.attention.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.20.attention.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.20.attention.time_mix_value": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.20.attention.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.20.feed_forward.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.20.feed_forward.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.20.feed_forward.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.20.feed_forward.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.20.feed_forward.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.20.ln1.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.20.ln1.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.20.ln2.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.20.ln2.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.21.attention.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.21.attention.output.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.21.attention.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.21.attention.time_decay": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.21.attention.time_first": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.21.attention.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.21.attention.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.21.attention.time_mix_value": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.21.attention.value.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.21.feed_forward.key.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.21.feed_forward.receptance.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.21.feed_forward.time_mix_key": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.21.feed_forward.time_mix_receptance": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.21.feed_forward.value.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.21.ln1.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.21.ln1.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.21.ln2.bias": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.21.ln2.weight": "pytorch_model-00002-of-00003.bin", |
|
"rwkv.blocks.22.attention.key.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.22.attention.output.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.22.attention.receptance.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.22.attention.time_decay": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.22.attention.time_first": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.22.attention.time_mix_key": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.22.attention.time_mix_receptance": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.22.attention.time_mix_value": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.22.attention.value.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.22.feed_forward.key.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.22.feed_forward.receptance.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.22.feed_forward.time_mix_key": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.22.feed_forward.time_mix_receptance": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.22.feed_forward.value.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.22.ln1.bias": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.22.ln1.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.22.ln2.bias": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.22.ln2.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.attention.key.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.attention.output.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.attention.receptance.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.attention.time_decay": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.attention.time_first": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.attention.time_mix_key": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.attention.time_mix_receptance": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.attention.time_mix_value": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.attention.value.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.feed_forward.key.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.feed_forward.receptance.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.feed_forward.time_mix_key": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.feed_forward.time_mix_receptance": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.feed_forward.value.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.ln1.bias": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.ln1.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.ln2.bias": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.23.ln2.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.attention.key.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.attention.output.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.attention.receptance.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.attention.time_decay": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.attention.time_first": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.attention.time_mix_key": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.attention.time_mix_receptance": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.attention.time_mix_value": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.attention.value.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.feed_forward.key.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.feed_forward.receptance.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.feed_forward.time_mix_key": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.feed_forward.time_mix_receptance": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.feed_forward.value.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.ln1.bias": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.ln1.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.ln2.bias": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.24.ln2.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.attention.key.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.attention.output.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.attention.receptance.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.attention.time_decay": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.attention.time_first": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.attention.time_mix_key": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.attention.time_mix_receptance": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.attention.time_mix_value": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.attention.value.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.feed_forward.key.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.feed_forward.receptance.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.feed_forward.time_mix_key": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.feed_forward.time_mix_receptance": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.feed_forward.value.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.ln1.bias": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.ln1.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.ln2.bias": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.25.ln2.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.attention.key.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.attention.output.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.attention.receptance.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.attention.time_decay": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.attention.time_first": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.attention.time_mix_key": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.attention.time_mix_receptance": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.attention.time_mix_value": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.attention.value.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.feed_forward.key.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.feed_forward.receptance.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.feed_forward.time_mix_key": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.feed_forward.time_mix_receptance": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.feed_forward.value.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.ln1.bias": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.ln1.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.ln2.bias": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.26.ln2.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.attention.key.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.attention.output.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.attention.receptance.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.attention.time_decay": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.attention.time_first": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.attention.time_mix_key": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.attention.time_mix_receptance": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.attention.time_mix_value": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.attention.value.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.feed_forward.key.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.feed_forward.receptance.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.feed_forward.time_mix_key": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.feed_forward.time_mix_receptance": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.feed_forward.value.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.ln1.bias": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.ln1.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.ln2.bias": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.27.ln2.weight": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.blocks.3.attention.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.3.attention.output.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.3.attention.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.3.attention.time_decay": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.3.attention.time_first": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.3.attention.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.3.attention.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.3.attention.time_mix_value": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.3.attention.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.3.feed_forward.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.3.feed_forward.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.3.feed_forward.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.3.feed_forward.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.3.feed_forward.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.3.ln1.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.3.ln1.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.3.ln2.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.3.ln2.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.attention.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.attention.output.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.attention.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.attention.time_decay": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.attention.time_first": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.attention.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.attention.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.attention.time_mix_value": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.attention.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.feed_forward.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.feed_forward.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.feed_forward.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.feed_forward.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.feed_forward.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.ln1.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.ln1.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.ln2.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.4.ln2.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.attention.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.attention.output.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.attention.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.attention.time_decay": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.attention.time_first": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.attention.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.attention.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.attention.time_mix_value": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.attention.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.feed_forward.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.feed_forward.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.feed_forward.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.feed_forward.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.feed_forward.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.ln1.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.ln1.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.ln2.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.5.ln2.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.attention.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.attention.output.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.attention.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.attention.time_decay": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.attention.time_first": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.attention.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.attention.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.attention.time_mix_value": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.attention.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.feed_forward.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.feed_forward.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.feed_forward.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.feed_forward.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.feed_forward.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.ln1.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.ln1.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.ln2.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.6.ln2.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.attention.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.attention.output.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.attention.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.attention.time_decay": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.attention.time_first": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.attention.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.attention.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.attention.time_mix_value": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.attention.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.feed_forward.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.feed_forward.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.feed_forward.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.feed_forward.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.feed_forward.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.ln1.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.ln1.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.ln2.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.7.ln2.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.attention.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.attention.output.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.attention.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.attention.time_decay": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.attention.time_first": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.attention.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.attention.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.attention.time_mix_value": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.attention.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.feed_forward.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.feed_forward.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.feed_forward.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.feed_forward.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.feed_forward.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.ln1.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.ln1.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.ln2.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.8.ln2.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.attention.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.attention.output.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.attention.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.attention.time_decay": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.attention.time_first": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.attention.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.attention.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.attention.time_mix_value": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.attention.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.feed_forward.key.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.feed_forward.receptance.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.feed_forward.time_mix_key": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.feed_forward.time_mix_receptance": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.feed_forward.value.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.ln1.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.ln1.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.ln2.bias": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.blocks.9.ln2.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.embeddings.weight": "pytorch_model-00001-of-00003.bin", |
|
"rwkv.ln_out.bias": "pytorch_model-00003-of-00003.bin", |
|
"rwkv.ln_out.weight": "pytorch_model-00003-of-00003.bin" |
|
} |
|
} |
|
|