|
--- |
|
base_model: |
|
- mistralai/Mistral-7B-v0.3 |
|
- meta-math/MetaMath-Mistral-7B |
|
- uukuguy/speechless-zephyr-code-functionary-7b |
|
library_name: transformers |
|
tags: |
|
- mergekit |
|
- merge |
|
|
|
--- |
|
# Yosegi-2 |
|
|
|
This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit). |
|
|
|
## Merge Details |
|
### Merge Method |
|
|
|
This model was merged using the [TIES](https://arxiv.org/abs/2306.01708) merge method using ./Yosegi-0601 as a base. |
|
|
|
### Models Merged |
|
|
|
The following models were included in the merge: |
|
* [mistralai/Mistral-7B-v0.3](https://huggingface.co/mistralai/Mistral-7B-v0.3) |
|
* [meta-math/MetaMath-Mistral-7B](https://huggingface.co/meta-math/MetaMath-Mistral-7B) |
|
* ./Ninja-v1-RP-expressive-v2-LoRA |
|
* [uukuguy/speechless-zephyr-code-functionary-7b](https://huggingface.co/uukuguy/speechless-zephyr-code-functionary-7b) |
|
|
|
### Configuration |
|
|
|
The following YAML configuration was used to produce this model: |
|
|
|
```yaml |
|
base_model: ./Yosegi-0601 |
|
dtype: bfloat16 |
|
merge_method: ties |
|
parameters: |
|
int8_mask: 1.0 |
|
normalize: 0.0 |
|
slices: |
|
- sources: |
|
- layer_range: [0, 2] |
|
model: mistralai/Mistral-7B-v0.3 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.9895701336232673 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.5057237984975562 |
|
- filter: mlp |
|
value: 0.36247235528151495 |
|
- value: 0.0076810835717692014 |
|
- layer_range: [0, 2] |
|
model: meta-math/MetaMath-Mistral-7B |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.8239779346577963 |
|
weight: |
|
- filter: self_attn |
|
value: 0.27499287617186813 |
|
- filter: mlp |
|
value: 0.10579959634086915 |
|
- value: 0.14502290477239704 |
|
- layer_range: [0, 2] |
|
model: uukuguy/speechless-zephyr-code-functionary-7b |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.9654867628269999 |
|
- value: 0.9584724004158125 |
|
weight: |
|
- filter: self_attn |
|
value: 0.059719404899177556 |
|
- filter: mlp |
|
value: 0.1299695859327612 |
|
- value: 0.18821871354400985 |
|
- layer_range: [0, 2] |
|
model: ./Ninja-v1-RP-expressive-v2-LoRA |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.9322987005873715 |
|
- filter: mlp |
|
value: 0.8119693860979944 |
|
- value: 0.7800996941956229 |
|
weight: |
|
- filter: self_attn |
|
value: 0.14989333734000856 |
|
- filter: mlp |
|
value: 0.20525182711733667 |
|
- value: 0.0743540962371737 |
|
- layer_range: [0, 2] |
|
model: ./Yosegi-0601 |
|
- sources: |
|
- layer_range: [2, 4] |
|
model: mistralai/Mistral-7B-v0.3 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.6361163471256639 |
|
- filter: mlp |
|
value: 0.9983948965135213 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.2433049522842103 |
|
- filter: mlp |
|
value: 0.11537153133586801 |
|
- value: 0.11236945502439658 |
|
- layer_range: [2, 4] |
|
model: meta-math/MetaMath-Mistral-7B |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.13087986863180992 |
|
- filter: mlp |
|
value: 0.05060452788200992 |
|
- value: 0.029882383396623725 |
|
- layer_range: [2, 4] |
|
model: uukuguy/speechless-zephyr-code-functionary-7b |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.9938109261305853 |
|
- filter: mlp |
|
value: 0.709432587913349 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.15343343058938377 |
|
- filter: mlp |
|
value: 0.4105917936868785 |
|
- value: 0.6078632204623161 |
|
- layer_range: [2, 4] |
|
model: ./Ninja-v1-RP-expressive-v2-LoRA |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.9634269234020544 |
|
weight: |
|
- filter: self_attn |
|
value: 0.03750763360681478 |
|
- filter: mlp |
|
value: 0.29089122858987404 |
|
- value: 0.3408085857388722 |
|
- layer_range: [2, 4] |
|
model: ./Yosegi-0601 |
|
- sources: |
|
- layer_range: [4, 6] |
|
model: mistralai/Mistral-7B-v0.3 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.8057109303418598 |
|
- filter: mlp |
|
value: 0.9954520808628292 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.02598285706585618 |
|
- filter: mlp |
|
value: 0.06661629726622949 |
|
- value: 0.1285191000066376 |
|
- layer_range: [4, 6] |
|
model: meta-math/MetaMath-Mistral-7B |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.9112825916608848 |
|
- filter: mlp |
|
value: 0.9322557507910056 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.18823564379986454 |
|
- filter: mlp |
|
value: 0.4552822441636322 |
|
- value: 0.5120525709221785 |
|
- layer_range: [4, 6] |
|
model: uukuguy/speechless-zephyr-code-functionary-7b |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.9869122169774399 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.9751291459565757 |
|
weight: |
|
- filter: self_attn |
|
value: 0.00493134813843582 |
|
- filter: mlp |
|
value: 0.3008979965262413 |
|
- value: 0.2528466849993097 |
|
- layer_range: [4, 6] |
|
model: ./Ninja-v1-RP-expressive-v2-LoRA |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.8956512783019246 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.4197408619693966 |
|
- filter: mlp |
|
value: 0.1448902874618845 |
|
- value: 0.5196932662212128 |
|
- layer_range: [4, 6] |
|
model: ./Yosegi-0601 |
|
- sources: |
|
- layer_range: [6, 8] |
|
model: mistralai/Mistral-7B-v0.3 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.05321377226808306 |
|
- filter: mlp |
|
value: 0.0482589904702303 |
|
- value: 0.433407006546336 |
|
- layer_range: [6, 8] |
|
model: meta-math/MetaMath-Mistral-7B |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.8300482882633113 |
|
- filter: mlp |
|
value: 0.8951636861593875 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.35952608658046414 |
|
- filter: mlp |
|
value: 0.17385333183950857 |
|
- value: 0.6366514725970246 |
|
- layer_range: [6, 8] |
|
model: uukuguy/speechless-zephyr-code-functionary-7b |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.7848308077099464 |
|
- filter: mlp |
|
value: 0.869549457974157 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.12433943050311849 |
|
- filter: mlp |
|
value: 0.3065832590226165 |
|
- value: 0.33138948726149514 |
|
- layer_range: [6, 8] |
|
model: ./Ninja-v1-RP-expressive-v2-LoRA |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.11885967308786714 |
|
- filter: mlp |
|
value: 0.29125668567121127 |
|
- value: 0.19251901269486088 |
|
- layer_range: [6, 8] |
|
model: ./Yosegi-0601 |
|
- sources: |
|
- layer_range: [8, 10] |
|
model: mistralai/Mistral-7B-v0.3 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.9429625513013793 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.4085396076816443 |
|
- filter: mlp |
|
value: 0.038473657720644636 |
|
- value: 0.35014489493395495 |
|
- layer_range: [8, 10] |
|
model: meta-math/MetaMath-Mistral-7B |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.26957216810533163 |
|
- filter: mlp |
|
value: 0.2393300696241166 |
|
- value: 0.4735322427351712 |
|
- layer_range: [8, 10] |
|
model: uukuguy/speechless-zephyr-code-functionary-7b |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.8594757954447017 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.26101395702355007 |
|
- filter: mlp |
|
value: 0.3147672140145126 |
|
- value: 0.11658182776184756 |
|
- layer_range: [8, 10] |
|
model: ./Ninja-v1-RP-expressive-v2-LoRA |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.6948062341711919 |
|
- value: 0.9312401427737346 |
|
weight: |
|
- filter: self_attn |
|
value: 0.1987774487170517 |
|
- filter: mlp |
|
value: 0.5628384475763534 |
|
- value: 0.2765378221890683 |
|
- layer_range: [8, 10] |
|
model: ./Yosegi-0601 |
|
- sources: |
|
- layer_range: [10, 12] |
|
model: mistralai/Mistral-7B-v0.3 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.8230035654228713 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.1741591536775035 |
|
- filter: mlp |
|
value: 0.30563583223301516 |
|
- value: 0.2060419023239155 |
|
- layer_range: [10, 12] |
|
model: meta-math/MetaMath-Mistral-7B |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.9991063013557119 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.1470996125766866 |
|
- filter: mlp |
|
value: 0.06646481892400827 |
|
- value: 0.2645489609472036 |
|
- layer_range: [10, 12] |
|
model: uukuguy/speechless-zephyr-code-functionary-7b |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.6812899560643833 |
|
- filter: mlp |
|
value: 0.9083104648631823 |
|
- value: 0.9730062683598184 |
|
weight: |
|
- filter: self_attn |
|
value: 0.14278507832578724 |
|
- filter: mlp |
|
value: 0.3475945971407978 |
|
- value: 0.40266546962595284 |
|
- layer_range: [10, 12] |
|
model: ./Ninja-v1-RP-expressive-v2-LoRA |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.7047231879232164 |
|
- filter: mlp |
|
value: 0.9148432633716144 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.15341559366405985 |
|
- filter: mlp |
|
value: 0.20047704006010095 |
|
- value: 0.17364445581398172 |
|
- layer_range: [10, 12] |
|
model: ./Yosegi-0601 |
|
- sources: |
|
- layer_range: [12, 14] |
|
model: mistralai/Mistral-7B-v0.3 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.6974090973508299 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.9553573565285324 |
|
weight: |
|
- filter: self_attn |
|
value: 0.03614401712451334 |
|
- filter: mlp |
|
value: 0.1287785039219736 |
|
- value: 0.3780545754310749 |
|
- layer_range: [12, 14] |
|
model: meta-math/MetaMath-Mistral-7B |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.7857328784783159 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.6631303877423032 |
|
weight: |
|
- filter: self_attn |
|
value: 0.21728574423632604 |
|
- filter: mlp |
|
value: 0.22813107248290188 |
|
- value: 0.1435266378249425 |
|
- layer_range: [12, 14] |
|
model: uukuguy/speechless-zephyr-code-functionary-7b |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.7579910864422339 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.21526786827735228 |
|
- filter: mlp |
|
value: 0.19769619474642783 |
|
- value: 0.49420458585638627 |
|
- layer_range: [12, 14] |
|
model: ./Ninja-v1-RP-expressive-v2-LoRA |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.8379590665264793 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.6778673543559375 |
|
weight: |
|
- filter: self_attn |
|
value: 0.060679858649663874 |
|
- filter: mlp |
|
value: 0.17248738428562518 |
|
- value: 0.05145640258269078 |
|
- layer_range: [12, 14] |
|
model: ./Yosegi-0601 |
|
- sources: |
|
- layer_range: [14, 16] |
|
model: mistralai/Mistral-7B-v0.3 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.8193296716327286 |
|
- value: 0.709644132681917 |
|
weight: |
|
- filter: self_attn |
|
value: 0.09821428505487592 |
|
- filter: mlp |
|
value: 0.0039875777021436964 |
|
- value: 0.27550746634944184 |
|
- layer_range: [14, 16] |
|
model: meta-math/MetaMath-Mistral-7B |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.9420135087156387 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.9478569230341948 |
|
weight: |
|
- filter: self_attn |
|
value: 0.32640822225239857 |
|
- filter: mlp |
|
value: 0.28189746971019747 |
|
- value: 0.09777040841174603 |
|
- layer_range: [14, 16] |
|
model: uukuguy/speechless-zephyr-code-functionary-7b |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.9811539353914964 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.9947034500579488 |
|
weight: |
|
- filter: self_attn |
|
value: 0.015308461456516246 |
|
- filter: mlp |
|
value: 0.0018966958379955934 |
|
- value: 0.24275389952300747 |
|
- layer_range: [14, 16] |
|
model: ./Ninja-v1-RP-expressive-v2-LoRA |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.9022355771447704 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.03331841447575224 |
|
- filter: mlp |
|
value: 0.03561712850019841 |
|
- value: 0.16096143804589919 |
|
- layer_range: [14, 16] |
|
model: ./Yosegi-0601 |
|
- sources: |
|
- layer_range: [16, 18] |
|
model: mistralai/Mistral-7B-v0.3 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.8813466618200871 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.20435001101909528 |
|
- filter: mlp |
|
value: 0.1516594727144469 |
|
- value: 0.2269819409999868 |
|
- layer_range: [16, 18] |
|
model: meta-math/MetaMath-Mistral-7B |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.8113796412034742 |
|
weight: |
|
- filter: self_attn |
|
value: 0.23760349395229585 |
|
- filter: mlp |
|
value: 0.1725436279774783 |
|
- value: 0.5818814139457673 |
|
- layer_range: [16, 18] |
|
model: uukuguy/speechless-zephyr-code-functionary-7b |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.9307369835995082 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.0673898519051937 |
|
- filter: mlp |
|
value: 0.049368399457210624 |
|
- value: 0.2621269048339309 |
|
- layer_range: [16, 18] |
|
model: ./Ninja-v1-RP-expressive-v2-LoRA |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.8219541044757637 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.21320061393511042 |
|
- filter: mlp |
|
value: 0.09188781867337345 |
|
- value: 0.27266490524762327 |
|
- layer_range: [16, 18] |
|
model: ./Yosegi-0601 |
|
- sources: |
|
- layer_range: [18, 20] |
|
model: mistralai/Mistral-7B-v0.3 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.7993530327131696 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.20420262433348008 |
|
- filter: mlp |
|
value: 0.43400570066910155 |
|
- value: 0.13720822682656159 |
|
- layer_range: [18, 20] |
|
model: meta-math/MetaMath-Mistral-7B |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.7035563346885239 |
|
weight: |
|
- filter: self_attn |
|
value: 0.3313523263002212 |
|
- filter: mlp |
|
value: 0.356035051194268 |
|
- value: 0.4742357680522683 |
|
- layer_range: [18, 20] |
|
model: uukuguy/speechless-zephyr-code-functionary-7b |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.2475654838180605 |
|
- filter: mlp |
|
value: 0.35095371882044646 |
|
- value: 0.18536862919946695 |
|
- layer_range: [18, 20] |
|
model: ./Ninja-v1-RP-expressive-v2-LoRA |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.02997204931537696 |
|
- filter: mlp |
|
value: 0.4103581291392323 |
|
- value: 0.19313933251158066 |
|
- layer_range: [18, 20] |
|
model: ./Yosegi-0601 |
|
- sources: |
|
- layer_range: [20, 22] |
|
model: mistralai/Mistral-7B-v0.3 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.5321196166337413 |
|
weight: |
|
- filter: self_attn |
|
value: 0.17930537920958298 |
|
- filter: mlp |
|
value: 0.07662274511683252 |
|
- value: 0.1354315278471591 |
|
- layer_range: [20, 22] |
|
model: meta-math/MetaMath-Mistral-7B |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.3768803907042144 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.1592147705254305 |
|
- filter: mlp |
|
value: 0.18410207999201075 |
|
- value: 0.4928015910047033 |
|
- layer_range: [20, 22] |
|
model: uukuguy/speechless-zephyr-code-functionary-7b |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.37897278298418885 |
|
- filter: mlp |
|
value: 0.0952591073533606 |
|
- value: 0.03551732810121447 |
|
- layer_range: [20, 22] |
|
model: ./Ninja-v1-RP-expressive-v2-LoRA |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.2682334102128691 |
|
- filter: mlp |
|
value: 0.33485781481395227 |
|
- value: 0.3395139468281392 |
|
- layer_range: [20, 22] |
|
model: ./Yosegi-0601 |
|
- sources: |
|
- layer_range: [22, 24] |
|
model: mistralai/Mistral-7B-v0.3 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.8002588203446623 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.2549204541625693 |
|
- filter: mlp |
|
value: 0.3722418477156178 |
|
- value: 0.2410463731352089 |
|
- layer_range: [22, 24] |
|
model: meta-math/MetaMath-Mistral-7B |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.9220873255898425 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.487455295718532 |
|
- filter: mlp |
|
value: 0.40022413917173594 |
|
- value: 0.17846009757502157 |
|
- layer_range: [22, 24] |
|
model: uukuguy/speechless-zephyr-code-functionary-7b |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.7696341317318985 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.011267799816515114 |
|
- filter: mlp |
|
value: 0.5320959832591042 |
|
- value: 0.17095406531325266 |
|
- layer_range: [22, 24] |
|
model: ./Ninja-v1-RP-expressive-v2-LoRA |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.556101646343872 |
|
- filter: mlp |
|
value: 0.5470253909079791 |
|
- value: 0.13241555469863223 |
|
- layer_range: [22, 24] |
|
model: ./Yosegi-0601 |
|
- sources: |
|
- layer_range: [24, 26] |
|
model: mistralai/Mistral-7B-v0.3 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.8667033674916582 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.9446091486920749 |
|
weight: |
|
- filter: self_attn |
|
value: 0.4134110775513897 |
|
- filter: mlp |
|
value: 0.0181822765943834 |
|
- value: 0.22797659617038232 |
|
- layer_range: [24, 26] |
|
model: meta-math/MetaMath-Mistral-7B |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.9839865829690491 |
|
- value: 0.8252981103449059 |
|
weight: |
|
- filter: self_attn |
|
value: 0.3310295320944009 |
|
- filter: mlp |
|
value: 0.05341478458353629 |
|
- value: 0.3588847186159219 |
|
- layer_range: [24, 26] |
|
model: uukuguy/speechless-zephyr-code-functionary-7b |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.8834823812212265 |
|
- value: 0.8195593509048733 |
|
weight: |
|
- filter: self_attn |
|
value: 0.3778012590489552 |
|
- filter: mlp |
|
value: 0.2553204906819882 |
|
- value: 0.23250565137970108 |
|
- layer_range: [24, 26] |
|
model: ./Ninja-v1-RP-expressive-v2-LoRA |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.7731602497153744 |
|
- value: 0.8647152680789973 |
|
weight: |
|
- filter: self_attn |
|
value: 0.1101209698118704 |
|
- filter: mlp |
|
value: 0.2399169741437055 |
|
- value: 0.32311925187355206 |
|
- layer_range: [24, 26] |
|
model: ./Yosegi-0601 |
|
- sources: |
|
- layer_range: [26, 28] |
|
model: mistralai/Mistral-7B-v0.3 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.9508674341172941 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.4312865186270921 |
|
- filter: mlp |
|
value: 0.28336325917543326 |
|
- value: 0.051826325177477234 |
|
- layer_range: [26, 28] |
|
model: meta-math/MetaMath-Mistral-7B |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.8945725432745376 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.03524133636598346 |
|
- filter: mlp |
|
value: 0.21426126710725438 |
|
- value: 0.31724116335002545 |
|
- layer_range: [26, 28] |
|
model: uukuguy/speechless-zephyr-code-functionary-7b |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.7138130384877139 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.04890129864608137 |
|
- filter: mlp |
|
value: 0.3324333287494201 |
|
- value: 0.11533647335498036 |
|
- layer_range: [26, 28] |
|
model: ./Ninja-v1-RP-expressive-v2-LoRA |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.9200281327001997 |
|
weight: |
|
- filter: self_attn |
|
value: 0.300842776105564 |
|
- filter: mlp |
|
value: 0.08363140003203932 |
|
- value: 0.2538677006866867 |
|
- layer_range: [26, 28] |
|
model: ./Yosegi-0601 |
|
- sources: |
|
- layer_range: [28, 30] |
|
model: mistralai/Mistral-7B-v0.3 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.7116000185808022 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.10977758983122704 |
|
- filter: mlp |
|
value: 0.1839207861311269 |
|
- value: 0.5426174846632369 |
|
- layer_range: [28, 30] |
|
model: meta-math/MetaMath-Mistral-7B |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.8412049419861911 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.3517232690814979 |
|
- filter: mlp |
|
value: 0.11878679655495025 |
|
- value: 0.432611353923264 |
|
- layer_range: [28, 30] |
|
model: uukuguy/speechless-zephyr-code-functionary-7b |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.7196182744068202 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.29848623969081 |
|
- filter: mlp |
|
value: 0.034661358236493495 |
|
- value: 0.3438376072572394 |
|
- layer_range: [28, 30] |
|
model: ./Ninja-v1-RP-expressive-v2-LoRA |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.051511204430449285 |
|
- filter: mlp |
|
value: 0.3617968383178797 |
|
- value: 0.2578690795635758 |
|
- layer_range: [28, 30] |
|
model: ./Yosegi-0601 |
|
- sources: |
|
- layer_range: [30, 32] |
|
model: mistralai/Mistral-7B-v0.3 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.7971002248466003 |
|
- filter: mlp |
|
value: 0.8931695149333363 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.07401430804790136 |
|
- filter: mlp |
|
value: 0.00696466997386886 |
|
- value: 0.08295038526296711 |
|
- layer_range: [30, 32] |
|
model: meta-math/MetaMath-Mistral-7B |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.8158777337631619 |
|
- value: 0.8348784699583887 |
|
weight: |
|
- filter: self_attn |
|
value: 0.26799125918248423 |
|
- filter: mlp |
|
value: 0.08176923813129498 |
|
- value: 0.030317330226146508 |
|
- layer_range: [30, 32] |
|
model: uukuguy/speechless-zephyr-code-functionary-7b |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.8188850632365792 |
|
- filter: mlp |
|
value: 0.7463831519693573 |
|
- value: 0.6515317051533988 |
|
weight: |
|
- filter: self_attn |
|
value: 0.21122007850953434 |
|
- filter: mlp |
|
value: 0.1463362342258229 |
|
- value: 0.09176704194956312 |
|
- layer_range: [30, 32] |
|
model: ./Ninja-v1-RP-expressive-v2-LoRA |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.9313941807354906 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
weight: |
|
- filter: self_attn |
|
value: 0.1443680121177074 |
|
- filter: mlp |
|
value: 0.08309606396368145 |
|
- value: 0.37059044424517035 |
|
- layer_range: [30, 32] |
|
model: ./Yosegi-0601 |
|
|
|
``` |
|
|