allow different user and assistant end-token (#375)
Browse files* allow different user and assistant end-token
For models like Llama2, the EndToken is not the same for a userMessage
and an assistantMessage. This implements `userMessageEndToken` and
`assistantMessageEndToken` which overwrites the messageEndToken
behavior.
This PR also allows empty strings as userMessageToken and
assistantMessageToken and makes this the default. This adds additional
flexibility, which is required in the case of Llama2 where the first
userMessage is effectively different because of the system message.
Note that because `userMessageEndToken` and `assistantMessageToken` are
nearly always concatenated, it is almost redundant to have both. The
exception is `generateQuery` for websearch which have several
consecutive user messages.
* Make model branding customizable based on env var (#345)
* rm open assistant branding
* Update SettingsModal.svelte
* make settings work with a dynamic list of models
* fixed types
---------
Co-authored-by: Nathan Sarrazin <sarrazin.nathan@gmail.com>
* trim and remove stop-suffixes from summary (#369)
The chat generation removes parameters.stop and <|endoftext|>
from the generated text. And additionally trims trailing whitespace.
This PR copies that behavior to the summarize functionality, when the
summary is produced by a the chat model.
* add a login button when users are logged out (#381)
* add fallback to message end token if there's no specified tokens for user & assistant
---------
Co-authored-by: Florian Zimmermeister <flozi00.fz@gmail.com>
Co-authored-by: Nathan Sarrazin <sarrazin.nathan@gmail.com>
- README.md +2 -0
- src/lib/buildPrompt.ts +12 -11
- src/lib/server/models.ts +7 -3
- src/lib/server/websearch/generateQuery.ts +3 -3
- src/lib/server/websearch/summarizeWeb.ts +2 -2
@@ -121,6 +121,8 @@ MODELS=`[
|
|
121 |
"userMessageToken": "<|prompter|>", # This does not need to be a token, can be any string
|
122 |
"assistantMessageToken": "<|assistant|>", # This does not need to be a token, can be any string
|
123 |
"messageEndToken": "<|endoftext|>", # This does not need to be a token, can be any string
|
|
|
|
|
124 |
"preprompt": "Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful.\n-----\n",
|
125 |
"promptExamples": [
|
126 |
{
|
|
|
121 |
"userMessageToken": "<|prompter|>", # This does not need to be a token, can be any string
|
122 |
"assistantMessageToken": "<|assistant|>", # This does not need to be a token, can be any string
|
123 |
"messageEndToken": "<|endoftext|>", # This does not need to be a token, can be any string
|
124 |
+
# "userMessageEndToken": "", # Applies only to user messages, messageEndToken has no effect if specified. Can be any string.
|
125 |
+
# "assistantMessageEndToken": "", # Applies only to assistant messages, messageEndToken has no effect if specified. Can be any string.
|
126 |
"preprompt": "Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful.\n-----\n",
|
127 |
"promptExamples": [
|
128 |
{
|
@@ -13,18 +13,19 @@ export async function buildPrompt(
|
|
13 |
model: BackendModel,
|
14 |
webSearchId?: string
|
15 |
): Promise<string> {
|
|
|
|
|
|
|
16 |
const prompt =
|
17 |
messages
|
18 |
-
.map(
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
: model.messageEndToken
|
27 |
-
: "")
|
28 |
)
|
29 |
.join("") + model.assistantMessageToken;
|
30 |
|
@@ -41,7 +42,7 @@ export async function buildPrompt(
|
|
41 |
webPrompt =
|
42 |
model.assistantMessageToken +
|
43 |
`The following context was found while searching the internet: ${webSearch.summary}` +
|
44 |
-
model.
|
45 |
}
|
46 |
}
|
47 |
const finalPrompt =
|
|
|
13 |
model: BackendModel,
|
14 |
webSearchId?: string
|
15 |
): Promise<string> {
|
16 |
+
const userEndToken = model.userMessageEndToken ?? model.messageEndToken;
|
17 |
+
const assistantEndToken = model.assistantMessageEndToken ?? model.messageEndToken;
|
18 |
+
|
19 |
const prompt =
|
20 |
messages
|
21 |
+
.map((m) =>
|
22 |
+
m.from === "user"
|
23 |
+
? model.userMessageToken +
|
24 |
+
m.content +
|
25 |
+
(m.content.endsWith(userEndToken) ? "" : userEndToken)
|
26 |
+
: model.assistantMessageToken +
|
27 |
+
m.content +
|
28 |
+
(m.content.endsWith(assistantEndToken) ? "" : assistantEndToken)
|
|
|
|
|
29 |
)
|
30 |
.join("") + model.assistantMessageToken;
|
31 |
|
|
|
42 |
webPrompt =
|
43 |
model.assistantMessageToken +
|
44 |
`The following context was found while searching the internet: ${webSearch.summary}` +
|
45 |
+
model.assistantMessageEndToken;
|
46 |
}
|
47 |
}
|
48 |
const finalPrompt =
|
@@ -14,9 +14,11 @@ const modelsRaw = z
|
|
14 |
modelUrl: z.string().url().optional(),
|
15 |
datasetName: z.string().min(1).optional(),
|
16 |
datasetUrl: z.string().url().optional(),
|
17 |
-
userMessageToken: z.string()
|
18 |
-
|
19 |
-
|
|
|
|
|
20 |
preprompt: z.string().default(""),
|
21 |
prepromptUrl: z.string().url().optional(),
|
22 |
promptExamples: z
|
@@ -52,6 +54,8 @@ const modelsRaw = z
|
|
52 |
export const models = await Promise.all(
|
53 |
modelsRaw.map(async (m) => ({
|
54 |
...m,
|
|
|
|
|
55 |
id: m.id || m.name,
|
56 |
displayName: m.displayName || m.name,
|
57 |
preprompt: m.prepromptUrl ? await fetch(m.prepromptUrl).then((r) => r.text()) : m.preprompt,
|
|
|
14 |
modelUrl: z.string().url().optional(),
|
15 |
datasetName: z.string().min(1).optional(),
|
16 |
datasetUrl: z.string().url().optional(),
|
17 |
+
userMessageToken: z.string(),
|
18 |
+
userMessageEndToken: z.string().default(""),
|
19 |
+
assistantMessageToken: z.string(),
|
20 |
+
assistantMessageEndToken: z.string().default(""),
|
21 |
+
messageEndToken: z.string().default(""),
|
22 |
preprompt: z.string().default(""),
|
23 |
prepromptUrl: z.string().url().optional(),
|
24 |
promptExamples: z
|
|
|
54 |
export const models = await Promise.all(
|
55 |
modelsRaw.map(async (m) => ({
|
56 |
...m,
|
57 |
+
userMessageEndToken: m?.userMessageEndToken || m?.messageEndToken,
|
58 |
+
assistantMessageEndToken: m?.assistantMessageEndToken || m?.messageEndToken,
|
59 |
id: m.id || m.name,
|
60 |
displayName: m.displayName || m.name,
|
61 |
preprompt: m.prepromptUrl ? await fetch(m.prepromptUrl).then((r) => r.text()) : m.preprompt,
|
@@ -6,13 +6,13 @@ export async function generateQuery(messages: Message[], model: BackendModel) {
|
|
6 |
const promptSearchQuery =
|
7 |
model.userMessageToken +
|
8 |
"The following messages were written by a user, trying to answer a question." +
|
9 |
-
model.
|
10 |
messages
|
11 |
.filter((message) => message.from === "user")
|
12 |
-
.map((message) => model.userMessageToken + message.content + model.
|
13 |
model.userMessageToken +
|
14 |
"What plain-text english sentence would you input into Google to answer the last question? Answer with a short (10 words max) simple sentence." +
|
15 |
-
model.
|
16 |
model.assistantMessageToken +
|
17 |
"Query: ";
|
18 |
|
|
|
6 |
const promptSearchQuery =
|
7 |
model.userMessageToken +
|
8 |
"The following messages were written by a user, trying to answer a question." +
|
9 |
+
model.userMessageEndToken +
|
10 |
messages
|
11 |
.filter((message) => message.from === "user")
|
12 |
+
.map((message) => model.userMessageToken + message.content + model.userMessageEndToken) +
|
13 |
model.userMessageToken +
|
14 |
"What plain-text english sentence would you input into Google to answer the last question? Answer with a short (10 words max) simple sentence." +
|
15 |
+
model.userMessageEndToken +
|
16 |
model.assistantMessageToken +
|
17 |
"Query: ";
|
18 |
|
@@ -29,10 +29,10 @@ export async function summarizeWeb(content: string, query: string, model: Backen
|
|
29 |
.split(" ")
|
30 |
.slice(0, model.parameters?.truncate ?? 0)
|
31 |
.join(" ") +
|
32 |
-
model.
|
33 |
model.userMessageToken +
|
34 |
`The text above should be summarized to best answer the query: ${query}.` +
|
35 |
-
model.
|
36 |
model.assistantMessageToken +
|
37 |
"Summary: ";
|
38 |
|
|
|
29 |
.split(" ")
|
30 |
.slice(0, model.parameters?.truncate ?? 0)
|
31 |
.join(" ") +
|
32 |
+
model.userMessageEndToken +
|
33 |
model.userMessageToken +
|
34 |
`The text above should be summarized to best answer the query: ${query}.` +
|
35 |
+
model.userMessageEndToken +
|
36 |
model.assistantMessageToken +
|
37 |
"Summary: ";
|
38 |
|