File size: 3,314 Bytes
f5f5f5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
base_model: TroyDoesAI/BlackSheep-4B
gate_mode: random # one of "hidden", "cheap_embed", or "random"
# There are three methods for populating the MoE gates implemented.
# "hidden"
# Uses the hidden state representations of the positive/negative prompts for MoE gate parameters. Best quality and most effective option; the default. Requires evaluating each prompt using the base model so you might not be able to use this on constrained hardware (depending on the model). You can use --load-in-8bit or --load-in-4bit to reduce VRAM usage.
# "cheap_embed"
# Uses only the raw token embedding of the prompts, using the same gate parameters for every layer. Distinctly less effective than "hidden". Can be run on much, much lower end hardware.
# "random"
# Randomly initializes the MoE gates. Good for if you are going to fine tune the model afterwards, or maybe if you want something a little unhinged? I won't judge.
experts:
- source_model: TroyDoesAI/BlackSheep-4B
positive_prompts:
- "digital"
- "soul"
- "<|assistant|>"
- "instruction"
- "input"
- "output"
- "response"
- "story"
- "stories"
- "creative"
- "curious"
- "action"
- "sensitive"
- "pleasant"
- "thoughtful"
- "interesting"
- "BEGININPUT"
- "BEGINCONTEXT"
- "ENDCONTEXT"
- "ENDINPUT"
- "BEGININSTRUCTION"
- "ENDINSTRUCTION"
- "url"
- "date"
- "source"
- "context"
- "cite"
- "mermaid"
- "flow"
- "diagram"
- "sequence"
- "series"
- "code to flow"
- "code to diagram"
- "python to flow diagram"
- "program to flow"
- "program to flow diagram"
- "write"
- "create"
- "visualize"
- "show"
- "explain"
- "draw"
- "describe"
- "summarize"
- "concise"
- "do"
- "say"
- "code"
- "```"
- "```mermaid"
- "graphTB"
- "graphTD"
- "graphLR"
- "graphRL"
- source_model: TroyDoesAI/BlackSheep-4B
positive_prompts:
- "Digital Soul"
- "hello"
- "you"
- "assistant"
- "who"
- "what"
- "when"
- "where"
- "why"
- "how"
- "me"
- "politic"
- "conservative"
- source_model: TroyDoesAI/BlackSheep-4B
positive_prompts:
- "<|assistant|>"
- "instruction"
- "input"
- "output"
- "response"
- "story"
- "stories"
- "creative"
- "curious"
- "action"
- "sensitive"
- "pleasant"
- "thoughtful"
- "interesting"
- "BEGININPUT"
- "BEGINCONTEXT"
- "ENDCONTEXT"
- "ENDINPUT"
- "BEGININSTRUCTION"
- "ENDINSTRUCTION"
- "url"
- "date"
- "source"
- "context"
- "cite"
- "mermaid"
- "flow"
- "diagram"
- "sequence"
- "series"
- "code to flow"
- "code to diagram"
- "python to flow diagram"
- "program to flow"
- "program to flow diagram"
- "write"
- "create"
- "visualize"
- "show"
- "explain"
- "draw"
- "describe"
- "summarize"
- "concise"
- "do"
- "say"
- "code"
- "```"
- "```mermaid"
- "graphTB"
- "graphTD"
- "graphLR"
- "graphRL"
|