--- base_model: - mistralai/Mistral-7B-v0.3 - meta-math/MetaMath-Mistral-7B - uukuguy/speechless-zephyr-code-functionary-7b library_name: transformers tags: - mergekit - merge --- # Yosegi-2 This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit). ## Merge Details ### Merge Method This model was merged using the [TIES](https://arxiv.org/abs/2306.01708) merge method using ./Yosegi-0601 as a base. ### Models Merged The following models were included in the merge: * [mistralai/Mistral-7B-v0.3](https://huggingface.co/mistralai/Mistral-7B-v0.3) * [meta-math/MetaMath-Mistral-7B](https://huggingface.co/meta-math/MetaMath-Mistral-7B) * ./Ninja-v1-RP-expressive-v2-LoRA * [uukuguy/speechless-zephyr-code-functionary-7b](https://huggingface.co/uukuguy/speechless-zephyr-code-functionary-7b) ### Configuration The following YAML configuration was used to produce this model: ```yaml base_model: ./Yosegi-0601 dtype: bfloat16 merge_method: ties parameters: int8_mask: 1.0 normalize: 0.0 slices: - sources: - layer_range: [0, 2] model: mistralai/Mistral-7B-v0.3 parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.9895701336232673 - value: 1.0 weight: - filter: self_attn value: 0.5057237984975562 - filter: mlp value: 0.36247235528151495 - value: 0.0076810835717692014 - layer_range: [0, 2] model: meta-math/MetaMath-Mistral-7B parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 1.0 - value: 0.8239779346577963 weight: - filter: self_attn value: 0.27499287617186813 - filter: mlp value: 0.10579959634086915 - value: 0.14502290477239704 - layer_range: [0, 2] model: uukuguy/speechless-zephyr-code-functionary-7b parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.9654867628269999 - value: 0.9584724004158125 weight: - filter: self_attn value: 0.059719404899177556 - filter: mlp value: 0.1299695859327612 - value: 0.18821871354400985 - layer_range: [0, 2] model: ./Ninja-v1-RP-expressive-v2-LoRA parameters: density: - filter: self_attn value: 0.9322987005873715 - filter: mlp value: 0.8119693860979944 - value: 0.7800996941956229 weight: - filter: self_attn value: 0.14989333734000856 - filter: mlp value: 0.20525182711733667 - value: 0.0743540962371737 - layer_range: [0, 2] model: ./Yosegi-0601 - sources: - layer_range: [2, 4] model: mistralai/Mistral-7B-v0.3 parameters: density: - filter: self_attn value: 0.6361163471256639 - filter: mlp value: 0.9983948965135213 - value: 1.0 weight: - filter: self_attn value: 0.2433049522842103 - filter: mlp value: 0.11537153133586801 - value: 0.11236945502439658 - layer_range: [2, 4] model: meta-math/MetaMath-Mistral-7B parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.13087986863180992 - filter: mlp value: 0.05060452788200992 - value: 0.029882383396623725 - layer_range: [2, 4] model: uukuguy/speechless-zephyr-code-functionary-7b parameters: density: - filter: self_attn value: 0.9938109261305853 - filter: mlp value: 0.709432587913349 - value: 1.0 weight: - filter: self_attn value: 0.15343343058938377 - filter: mlp value: 0.4105917936868785 - value: 0.6078632204623161 - layer_range: [2, 4] model: ./Ninja-v1-RP-expressive-v2-LoRA parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 1.0 - value: 0.9634269234020544 weight: - filter: self_attn value: 0.03750763360681478 - filter: mlp value: 0.29089122858987404 - value: 0.3408085857388722 - layer_range: [2, 4] model: ./Yosegi-0601 - sources: - layer_range: [4, 6] model: mistralai/Mistral-7B-v0.3 parameters: density: - filter: self_attn value: 0.8057109303418598 - filter: mlp value: 0.9954520808628292 - value: 1.0 weight: - filter: self_attn value: 0.02598285706585618 - filter: mlp value: 0.06661629726622949 - value: 0.1285191000066376 - layer_range: [4, 6] model: meta-math/MetaMath-Mistral-7B parameters: density: - filter: self_attn value: 0.9112825916608848 - filter: mlp value: 0.9322557507910056 - value: 1.0 weight: - filter: self_attn value: 0.18823564379986454 - filter: mlp value: 0.4552822441636322 - value: 0.5120525709221785 - layer_range: [4, 6] model: uukuguy/speechless-zephyr-code-functionary-7b parameters: density: - filter: self_attn value: 0.9869122169774399 - filter: mlp value: 1.0 - value: 0.9751291459565757 weight: - filter: self_attn value: 0.00493134813843582 - filter: mlp value: 0.3008979965262413 - value: 0.2528466849993097 - layer_range: [4, 6] model: ./Ninja-v1-RP-expressive-v2-LoRA parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.8956512783019246 - value: 1.0 weight: - filter: self_attn value: 0.4197408619693966 - filter: mlp value: 0.1448902874618845 - value: 0.5196932662212128 - layer_range: [4, 6] model: ./Yosegi-0601 - sources: - layer_range: [6, 8] model: mistralai/Mistral-7B-v0.3 parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.05321377226808306 - filter: mlp value: 0.0482589904702303 - value: 0.433407006546336 - layer_range: [6, 8] model: meta-math/MetaMath-Mistral-7B parameters: density: - filter: self_attn value: 0.8300482882633113 - filter: mlp value: 0.8951636861593875 - value: 1.0 weight: - filter: self_attn value: 0.35952608658046414 - filter: mlp value: 0.17385333183950857 - value: 0.6366514725970246 - layer_range: [6, 8] model: uukuguy/speechless-zephyr-code-functionary-7b parameters: density: - filter: self_attn value: 0.7848308077099464 - filter: mlp value: 0.869549457974157 - value: 1.0 weight: - filter: self_attn value: 0.12433943050311849 - filter: mlp value: 0.3065832590226165 - value: 0.33138948726149514 - layer_range: [6, 8] model: ./Ninja-v1-RP-expressive-v2-LoRA parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.11885967308786714 - filter: mlp value: 0.29125668567121127 - value: 0.19251901269486088 - layer_range: [6, 8] model: ./Yosegi-0601 - sources: - layer_range: [8, 10] model: mistralai/Mistral-7B-v0.3 parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.9429625513013793 - value: 1.0 weight: - filter: self_attn value: 0.4085396076816443 - filter: mlp value: 0.038473657720644636 - value: 0.35014489493395495 - layer_range: [8, 10] model: meta-math/MetaMath-Mistral-7B parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.26957216810533163 - filter: mlp value: 0.2393300696241166 - value: 0.4735322427351712 - layer_range: [8, 10] model: uukuguy/speechless-zephyr-code-functionary-7b parameters: density: - filter: self_attn value: 0.8594757954447017 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.26101395702355007 - filter: mlp value: 0.3147672140145126 - value: 0.11658182776184756 - layer_range: [8, 10] model: ./Ninja-v1-RP-expressive-v2-LoRA parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.6948062341711919 - value: 0.9312401427737346 weight: - filter: self_attn value: 0.1987774487170517 - filter: mlp value: 0.5628384475763534 - value: 0.2765378221890683 - layer_range: [8, 10] model: ./Yosegi-0601 - sources: - layer_range: [10, 12] model: mistralai/Mistral-7B-v0.3 parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.8230035654228713 - value: 1.0 weight: - filter: self_attn value: 0.1741591536775035 - filter: mlp value: 0.30563583223301516 - value: 0.2060419023239155 - layer_range: [10, 12] model: meta-math/MetaMath-Mistral-7B parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.9991063013557119 - value: 1.0 weight: - filter: self_attn value: 0.1470996125766866 - filter: mlp value: 0.06646481892400827 - value: 0.2645489609472036 - layer_range: [10, 12] model: uukuguy/speechless-zephyr-code-functionary-7b parameters: density: - filter: self_attn value: 0.6812899560643833 - filter: mlp value: 0.9083104648631823 - value: 0.9730062683598184 weight: - filter: self_attn value: 0.14278507832578724 - filter: mlp value: 0.3475945971407978 - value: 0.40266546962595284 - layer_range: [10, 12] model: ./Ninja-v1-RP-expressive-v2-LoRA parameters: density: - filter: self_attn value: 0.7047231879232164 - filter: mlp value: 0.9148432633716144 - value: 1.0 weight: - filter: self_attn value: 0.15341559366405985 - filter: mlp value: 0.20047704006010095 - value: 0.17364445581398172 - layer_range: [10, 12] model: ./Yosegi-0601 - sources: - layer_range: [12, 14] model: mistralai/Mistral-7B-v0.3 parameters: density: - filter: self_attn value: 0.6974090973508299 - filter: mlp value: 1.0 - value: 0.9553573565285324 weight: - filter: self_attn value: 0.03614401712451334 - filter: mlp value: 0.1287785039219736 - value: 0.3780545754310749 - layer_range: [12, 14] model: meta-math/MetaMath-Mistral-7B parameters: density: - filter: self_attn value: 0.7857328784783159 - filter: mlp value: 1.0 - value: 0.6631303877423032 weight: - filter: self_attn value: 0.21728574423632604 - filter: mlp value: 0.22813107248290188 - value: 0.1435266378249425 - layer_range: [12, 14] model: uukuguy/speechless-zephyr-code-functionary-7b parameters: density: - filter: self_attn value: 0.7579910864422339 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.21526786827735228 - filter: mlp value: 0.19769619474642783 - value: 0.49420458585638627 - layer_range: [12, 14] model: ./Ninja-v1-RP-expressive-v2-LoRA parameters: density: - filter: self_attn value: 0.8379590665264793 - filter: mlp value: 1.0 - value: 0.6778673543559375 weight: - filter: self_attn value: 0.060679858649663874 - filter: mlp value: 0.17248738428562518 - value: 0.05145640258269078 - layer_range: [12, 14] model: ./Yosegi-0601 - sources: - layer_range: [14, 16] model: mistralai/Mistral-7B-v0.3 parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.8193296716327286 - value: 0.709644132681917 weight: - filter: self_attn value: 0.09821428505487592 - filter: mlp value: 0.0039875777021436964 - value: 0.27550746634944184 - layer_range: [14, 16] model: meta-math/MetaMath-Mistral-7B parameters: density: - filter: self_attn value: 0.9420135087156387 - filter: mlp value: 1.0 - value: 0.9478569230341948 weight: - filter: self_attn value: 0.32640822225239857 - filter: mlp value: 0.28189746971019747 - value: 0.09777040841174603 - layer_range: [14, 16] model: uukuguy/speechless-zephyr-code-functionary-7b parameters: density: - filter: self_attn value: 0.9811539353914964 - filter: mlp value: 1.0 - value: 0.9947034500579488 weight: - filter: self_attn value: 0.015308461456516246 - filter: mlp value: 0.0018966958379955934 - value: 0.24275389952300747 - layer_range: [14, 16] model: ./Ninja-v1-RP-expressive-v2-LoRA parameters: density: - filter: self_attn value: 0.9022355771447704 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.03331841447575224 - filter: mlp value: 0.03561712850019841 - value: 0.16096143804589919 - layer_range: [14, 16] model: ./Yosegi-0601 - sources: - layer_range: [16, 18] model: mistralai/Mistral-7B-v0.3 parameters: density: - filter: self_attn value: 0.8813466618200871 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.20435001101909528 - filter: mlp value: 0.1516594727144469 - value: 0.2269819409999868 - layer_range: [16, 18] model: meta-math/MetaMath-Mistral-7B parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 1.0 - value: 0.8113796412034742 weight: - filter: self_attn value: 0.23760349395229585 - filter: mlp value: 0.1725436279774783 - value: 0.5818814139457673 - layer_range: [16, 18] model: uukuguy/speechless-zephyr-code-functionary-7b parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.9307369835995082 - value: 1.0 weight: - filter: self_attn value: 0.0673898519051937 - filter: mlp value: 0.049368399457210624 - value: 0.2621269048339309 - layer_range: [16, 18] model: ./Ninja-v1-RP-expressive-v2-LoRA parameters: density: - filter: self_attn value: 0.8219541044757637 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.21320061393511042 - filter: mlp value: 0.09188781867337345 - value: 0.27266490524762327 - layer_range: [16, 18] model: ./Yosegi-0601 - sources: - layer_range: [18, 20] model: mistralai/Mistral-7B-v0.3 parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.7993530327131696 - value: 1.0 weight: - filter: self_attn value: 0.20420262433348008 - filter: mlp value: 0.43400570066910155 - value: 0.13720822682656159 - layer_range: [18, 20] model: meta-math/MetaMath-Mistral-7B parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 1.0 - value: 0.7035563346885239 weight: - filter: self_attn value: 0.3313523263002212 - filter: mlp value: 0.356035051194268 - value: 0.4742357680522683 - layer_range: [18, 20] model: uukuguy/speechless-zephyr-code-functionary-7b parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.2475654838180605 - filter: mlp value: 0.35095371882044646 - value: 0.18536862919946695 - layer_range: [18, 20] model: ./Ninja-v1-RP-expressive-v2-LoRA parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.02997204931537696 - filter: mlp value: 0.4103581291392323 - value: 0.19313933251158066 - layer_range: [18, 20] model: ./Yosegi-0601 - sources: - layer_range: [20, 22] model: mistralai/Mistral-7B-v0.3 parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 1.0 - value: 0.5321196166337413 weight: - filter: self_attn value: 0.17930537920958298 - filter: mlp value: 0.07662274511683252 - value: 0.1354315278471591 - layer_range: [20, 22] model: meta-math/MetaMath-Mistral-7B parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.3768803907042144 - value: 1.0 weight: - filter: self_attn value: 0.1592147705254305 - filter: mlp value: 0.18410207999201075 - value: 0.4928015910047033 - layer_range: [20, 22] model: uukuguy/speechless-zephyr-code-functionary-7b parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.37897278298418885 - filter: mlp value: 0.0952591073533606 - value: 0.03551732810121447 - layer_range: [20, 22] model: ./Ninja-v1-RP-expressive-v2-LoRA parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.2682334102128691 - filter: mlp value: 0.33485781481395227 - value: 0.3395139468281392 - layer_range: [20, 22] model: ./Yosegi-0601 - sources: - layer_range: [22, 24] model: mistralai/Mistral-7B-v0.3 parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.8002588203446623 - value: 1.0 weight: - filter: self_attn value: 0.2549204541625693 - filter: mlp value: 0.3722418477156178 - value: 0.2410463731352089 - layer_range: [22, 24] model: meta-math/MetaMath-Mistral-7B parameters: density: - filter: self_attn value: 0.9220873255898425 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.487455295718532 - filter: mlp value: 0.40022413917173594 - value: 0.17846009757502157 - layer_range: [22, 24] model: uukuguy/speechless-zephyr-code-functionary-7b parameters: density: - filter: self_attn value: 0.7696341317318985 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.011267799816515114 - filter: mlp value: 0.5320959832591042 - value: 0.17095406531325266 - layer_range: [22, 24] model: ./Ninja-v1-RP-expressive-v2-LoRA parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.556101646343872 - filter: mlp value: 0.5470253909079791 - value: 0.13241555469863223 - layer_range: [22, 24] model: ./Yosegi-0601 - sources: - layer_range: [24, 26] model: mistralai/Mistral-7B-v0.3 parameters: density: - filter: self_attn value: 0.8667033674916582 - filter: mlp value: 1.0 - value: 0.9446091486920749 weight: - filter: self_attn value: 0.4134110775513897 - filter: mlp value: 0.0181822765943834 - value: 0.22797659617038232 - layer_range: [24, 26] model: meta-math/MetaMath-Mistral-7B parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.9839865829690491 - value: 0.8252981103449059 weight: - filter: self_attn value: 0.3310295320944009 - filter: mlp value: 0.05341478458353629 - value: 0.3588847186159219 - layer_range: [24, 26] model: uukuguy/speechless-zephyr-code-functionary-7b parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.8834823812212265 - value: 0.8195593509048733 weight: - filter: self_attn value: 0.3778012590489552 - filter: mlp value: 0.2553204906819882 - value: 0.23250565137970108 - layer_range: [24, 26] model: ./Ninja-v1-RP-expressive-v2-LoRA parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.7731602497153744 - value: 0.8647152680789973 weight: - filter: self_attn value: 0.1101209698118704 - filter: mlp value: 0.2399169741437055 - value: 0.32311925187355206 - layer_range: [24, 26] model: ./Yosegi-0601 - sources: - layer_range: [26, 28] model: mistralai/Mistral-7B-v0.3 parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.9508674341172941 - value: 1.0 weight: - filter: self_attn value: 0.4312865186270921 - filter: mlp value: 0.28336325917543326 - value: 0.051826325177477234 - layer_range: [26, 28] model: meta-math/MetaMath-Mistral-7B parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.8945725432745376 - value: 1.0 weight: - filter: self_attn value: 0.03524133636598346 - filter: mlp value: 0.21426126710725438 - value: 0.31724116335002545 - layer_range: [26, 28] model: uukuguy/speechless-zephyr-code-functionary-7b parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.7138130384877139 - value: 1.0 weight: - filter: self_attn value: 0.04890129864608137 - filter: mlp value: 0.3324333287494201 - value: 0.11533647335498036 - layer_range: [26, 28] model: ./Ninja-v1-RP-expressive-v2-LoRA parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 1.0 - value: 0.9200281327001997 weight: - filter: self_attn value: 0.300842776105564 - filter: mlp value: 0.08363140003203932 - value: 0.2538677006866867 - layer_range: [26, 28] model: ./Yosegi-0601 - sources: - layer_range: [28, 30] model: mistralai/Mistral-7B-v0.3 parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.7116000185808022 - value: 1.0 weight: - filter: self_attn value: 0.10977758983122704 - filter: mlp value: 0.1839207861311269 - value: 0.5426174846632369 - layer_range: [28, 30] model: meta-math/MetaMath-Mistral-7B parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.8412049419861911 - value: 1.0 weight: - filter: self_attn value: 0.3517232690814979 - filter: mlp value: 0.11878679655495025 - value: 0.432611353923264 - layer_range: [28, 30] model: uukuguy/speechless-zephyr-code-functionary-7b parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.7196182744068202 - value: 1.0 weight: - filter: self_attn value: 0.29848623969081 - filter: mlp value: 0.034661358236493495 - value: 0.3438376072572394 - layer_range: [28, 30] model: ./Ninja-v1-RP-expressive-v2-LoRA parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.051511204430449285 - filter: mlp value: 0.3617968383178797 - value: 0.2578690795635758 - layer_range: [28, 30] model: ./Yosegi-0601 - sources: - layer_range: [30, 32] model: mistralai/Mistral-7B-v0.3 parameters: density: - filter: self_attn value: 0.7971002248466003 - filter: mlp value: 0.8931695149333363 - value: 1.0 weight: - filter: self_attn value: 0.07401430804790136 - filter: mlp value: 0.00696466997386886 - value: 0.08295038526296711 - layer_range: [30, 32] model: meta-math/MetaMath-Mistral-7B parameters: density: - filter: self_attn value: 1.0 - filter: mlp value: 0.8158777337631619 - value: 0.8348784699583887 weight: - filter: self_attn value: 0.26799125918248423 - filter: mlp value: 0.08176923813129498 - value: 0.030317330226146508 - layer_range: [30, 32] model: uukuguy/speechless-zephyr-code-functionary-7b parameters: density: - filter: self_attn value: 0.8188850632365792 - filter: mlp value: 0.7463831519693573 - value: 0.6515317051533988 weight: - filter: self_attn value: 0.21122007850953434 - filter: mlp value: 0.1463362342258229 - value: 0.09176704194956312 - layer_range: [30, 32] model: ./Ninja-v1-RP-expressive-v2-LoRA parameters: density: - filter: self_attn value: 0.9313941807354906 - filter: mlp value: 1.0 - value: 1.0 weight: - filter: self_attn value: 0.1443680121177074 - filter: mlp value: 0.08309606396368145 - value: 0.37059044424517035 - layer_range: [30, 32] model: ./Yosegi-0601 ```