nsarrazin HF staff Carolyn Marie commited on
Commit
6434339
·
unverified ·
1 Parent(s): aa18b4d

Refactor of websearch (#281)

Browse files

* broke up websearch into multiple endpoints

* Refactored loading to use the load function instead of client side fetching

* lint

* Chat Logo Home Screen Bookmark icons for iOS (#279)

* prettier fix for #279

* fix eslint

---------

Co-authored-by: Carolyn Marie <99927612+CarolynM8ri3@users.noreply.github.com>

src/lib/components/OpenWebSearchResults.svelte CHANGED
@@ -7,41 +7,17 @@
7
 
8
  import EosIconsLoading from "~icons/eos-icons/loading";
9
 
10
- import { base } from "$app/paths";
11
- import { onMount } from "svelte";
12
-
13
  export let loading = false;
14
  export let classNames = "";
15
- export let webSearchId: string | undefined;
16
  export let webSearchMessages: WebSearchMessage[] = [];
17
 
18
  let detailsOpen: boolean;
19
  let error: boolean;
20
- onMount(() => {
21
- if (webSearchMessages.length === 0 && webSearchId) {
22
- fetch(`${base}/search/${webSearchId}`)
23
- .then((res) => res.json())
24
- .then((res) => {
25
- webSearchMessages = [...res.messages, { type: "result", id: webSearchId }];
26
- })
27
- .catch((err) => console.log(err));
28
- }
29
- });
30
  $: error = webSearchMessages.some((message) => message.type === "error");
31
  </script>
32
 
33
  <details
34
  class="flex w-fit rounded-xl border border-gray-200 bg-white shadow-sm dark:border-gray-800 dark:bg-gray-900 {classNames} max-w-full"
35
- on:toggle={() => {
36
- if (webSearchMessages.length === 0 && webSearchId) {
37
- fetch(`${base}/search/${webSearchId}`)
38
- .then((res) => res.json())
39
- .then((res) => {
40
- webSearchMessages = [...res.messages, { type: "result", id: webSearchId }];
41
- })
42
- .catch((err) => console.log(err));
43
- }
44
- }}
45
  bind:open={detailsOpen}
46
  >
47
  <summary
 
7
 
8
  import EosIconsLoading from "~icons/eos-icons/loading";
9
 
 
 
 
10
  export let loading = false;
11
  export let classNames = "";
 
12
  export let webSearchMessages: WebSearchMessage[] = [];
13
 
14
  let detailsOpen: boolean;
15
  let error: boolean;
 
 
 
 
 
 
 
 
 
 
16
  $: error = webSearchMessages.some((message) => message.type === "error");
17
  </script>
18
 
19
  <details
20
  class="flex w-fit rounded-xl border border-gray-200 bg-white shadow-sm dark:border-gray-800 dark:bg-gray-900 {classNames} max-w-full"
 
 
 
 
 
 
 
 
 
 
21
  bind:open={detailsOpen}
22
  >
23
  <summary
src/lib/components/chat/ChatMessage.svelte CHANGED
@@ -46,7 +46,6 @@
46
  export let isAuthor = true;
47
  export let readOnly = false;
48
  export let isTapped = false;
49
- export let isLast = false;
50
 
51
  export let webSearchMessages: WebSearchMessage[] = [];
52
 
@@ -99,9 +98,8 @@
99
  let webSearchIsDone = true;
100
 
101
  $: webSearchIsDone =
102
- !!message.webSearchId ||
103
- (webSearchMessages.length > 0 &&
104
- webSearchMessages[webSearchMessages.length - 1].type === "result");
105
  </script>
106
 
107
  {#if message.from === "assistant"}
@@ -118,17 +116,14 @@
118
  <div
119
  class="relative min-h-[calc(2rem+theme(spacing[3.5])*2)] min-w-[60px] break-words rounded-2xl border border-gray-100 bg-gradient-to-br from-gray-50 px-5 py-3.5 text-gray-600 prose-pre:my-2 dark:border-gray-800 dark:from-gray-800/40 dark:text-gray-300"
120
  >
121
- {#if message.webSearchId || (webSearchMessages.length > 0 && isLast)}
122
- {#key (message.webSearchId, message.score, loading)}
123
- <OpenWebSearchResults
124
- classNames={tokens.length ? "mb-3.5" : ""}
125
- webSearchId={message.webSearchId}
126
- {webSearchMessages}
127
- loading={!webSearchIsDone}
128
- />
129
- {/key}
130
  {/if}
131
- {#if !message.content && (webSearchIsDone || webSearchMessages.length === 0)}
132
  <IconLoading />
133
  {/if}
134
 
 
46
  export let isAuthor = true;
47
  export let readOnly = false;
48
  export let isTapped = false;
 
49
 
50
  export let webSearchMessages: WebSearchMessage[] = [];
51
 
 
98
  let webSearchIsDone = true;
99
 
100
  $: webSearchIsDone =
101
+ webSearchMessages.length > 0 &&
102
+ webSearchMessages[webSearchMessages.length - 1].type === "result";
 
103
  </script>
104
 
105
  {#if message.from === "assistant"}
 
116
  <div
117
  class="relative min-h-[calc(2rem+theme(spacing[3.5])*2)] min-w-[60px] break-words rounded-2xl border border-gray-100 bg-gradient-to-br from-gray-50 px-5 py-3.5 text-gray-600 prose-pre:my-2 dark:border-gray-800 dark:from-gray-800/40 dark:text-gray-300"
118
  >
119
+ {#if webSearchMessages && webSearchMessages.length > 0}
120
+ <OpenWebSearchResults
121
+ classNames={tokens.length ? "mb-3.5" : ""}
122
+ {webSearchMessages}
123
+ loading={!webSearchIsDone}
124
+ />
 
 
 
125
  {/if}
126
+ {#if !message.content && (webSearchIsDone || (webSearchMessages && webSearchMessages.length === 0))}
127
  <IconLoading />
128
  {/if}
129
 
src/lib/components/chat/ChatMessages.svelte CHANGED
@@ -9,7 +9,6 @@
9
  import ChatIntroduction from "./ChatIntroduction.svelte";
10
  import ChatMessage from "./ChatMessage.svelte";
11
  import type { WebSearchMessage } from "$lib/types/WebSearch";
12
- import { page } from "$app/stores";
13
 
14
  export let messages: Message[];
15
  export let loading: boolean;
@@ -19,7 +18,9 @@
19
  export let settings: LayoutData["settings"];
20
  export let models: Model[];
21
  export let readOnly: boolean;
 
22
 
 
23
  let chatContainer: HTMLElement;
24
 
25
  export let webSearchMessages: WebSearchMessage[] = [];
@@ -33,6 +34,17 @@
33
  $: if (messages[messages.length - 1]?.from === "user") {
34
  scrollToBottom();
35
  }
 
 
 
 
 
 
 
 
 
 
 
36
  </script>
37
 
38
  <div
@@ -42,19 +54,16 @@
42
  >
43
  <div class="mx-auto flex h-full max-w-3xl flex-col gap-6 px-5 pt-6 sm:gap-8 xl:max-w-4xl">
44
  {#each messages as message, i}
45
- {#key (message.id, $page.params.id)}
46
- <ChatMessage
47
- loading={loading && i === messages.length - 1}
48
- {message}
49
- {isAuthor}
50
- {readOnly}
51
- model={currentModel}
52
- {webSearchMessages}
53
- isLast={i === messages.length - 1}
54
- on:retry
55
- on:vote
56
- />
57
- {/key}
58
  {:else}
59
  <ChatIntroduction {settings} {models} {currentModel} on:message />
60
  {/each}
@@ -62,7 +71,6 @@
62
  <ChatMessage
63
  message={{ from: "assistant", content: "", id: randomUUID() }}
64
  model={currentModel}
65
- isLast={true}
66
  {webSearchMessages}
67
  />
68
  {/if}
 
9
  import ChatIntroduction from "./ChatIntroduction.svelte";
10
  import ChatMessage from "./ChatMessage.svelte";
11
  import type { WebSearchMessage } from "$lib/types/WebSearch";
 
12
 
13
  export let messages: Message[];
14
  export let loading: boolean;
 
18
  export let settings: LayoutData["settings"];
19
  export let models: Model[];
20
  export let readOnly: boolean;
21
+ export let searches: Record<string, WebSearchMessage[]>;
22
 
23
+ let webSearchArray: Array<WebSearchMessage[] | undefined> = [];
24
  let chatContainer: HTMLElement;
25
 
26
  export let webSearchMessages: WebSearchMessage[] = [];
 
34
  $: if (messages[messages.length - 1]?.from === "user") {
35
  scrollToBottom();
36
  }
37
+
38
+ $: messages,
39
+ (webSearchArray = messages.map((message, idx) => {
40
+ if (message.webSearchId) {
41
+ return searches[message.webSearchId] ?? [];
42
+ } else if (idx === messages.length - 1) {
43
+ return webSearchMessages;
44
+ } else {
45
+ return [];
46
+ }
47
+ }));
48
  </script>
49
 
50
  <div
 
54
  >
55
  <div class="mx-auto flex h-full max-w-3xl flex-col gap-6 px-5 pt-6 sm:gap-8 xl:max-w-4xl">
56
  {#each messages as message, i}
57
+ <ChatMessage
58
+ loading={loading && i === messages.length - 1}
59
+ {message}
60
+ {isAuthor}
61
+ {readOnly}
62
+ model={currentModel}
63
+ webSearchMessages={webSearchArray[i]}
64
+ on:retry
65
+ on:vote
66
+ />
 
 
 
67
  {:else}
68
  <ChatIntroduction {settings} {models} {currentModel} on:message />
69
  {/each}
 
71
  <ChatMessage
72
  message={{ from: "assistant", content: "", id: randomUUID() }}
73
  model={currentModel}
 
74
  {webSearchMessages}
75
  />
76
  {/if}
src/lib/components/chat/ChatWindow.svelte CHANGED
@@ -24,6 +24,7 @@
24
  export let models: Model[];
25
  export let settings: LayoutData["settings"];
26
  export let webSearchMessages: WebSearchMessage[] = [];
 
27
 
28
  export let loginRequired = false;
29
  $: isReadOnly = !models.some((model) => model.id === currentModel.id);
@@ -59,6 +60,7 @@
59
  readOnly={isReadOnly}
60
  isAuthor={!shared}
61
  {webSearchMessages}
 
62
  on:message
63
  on:vote
64
  on:retry={(ev) => {
 
24
  export let models: Model[];
25
  export let settings: LayoutData["settings"];
26
  export let webSearchMessages: WebSearchMessage[] = [];
27
+ export let searches: Record<string, WebSearchMessage[]> = {};
28
 
29
  export let loginRequired = false;
30
  $: isReadOnly = !models.some((model) => model.id === currentModel.id);
 
60
  readOnly={isReadOnly}
61
  isAuthor={!shared}
62
  {webSearchMessages}
63
+ {searches}
64
  on:message
65
  on:vote
66
  on:retry={(ev) => {
src/lib/server/websearch/generateQuery.ts ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { Message } from "$lib/types/Message";
2
+ import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
3
+ import type { BackendModel } from "../models";
4
+
5
+ export async function generateQuery(messages: Message[], model: BackendModel) {
6
+ const promptSearchQuery =
7
+ model.userMessageToken +
8
+ "The following messages were written by a user, trying to answer a question." +
9
+ model.messageEndToken +
10
+ messages
11
+ .filter((message) => message.from === "user")
12
+ .map((message) => model.userMessageToken + message.content + model.messageEndToken) +
13
+ model.userMessageToken +
14
+ "What plain-text english sentence would you input into Google to answer the last question? Answer with a short (10 words max) simple sentence." +
15
+ model.messageEndToken +
16
+ model.assistantMessageToken +
17
+ "Query: ";
18
+
19
+ const searchQuery = await generateFromDefaultEndpoint(promptSearchQuery).then((query) => {
20
+ const arr = query.split(/\r?\n/);
21
+ return arr[0].length > 0 ? arr[0] : arr[1];
22
+ });
23
+
24
+ return searchQuery;
25
+ }
src/lib/server/websearch/parseWeb.ts ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { JSDOM, VirtualConsole } from "jsdom";
2
+
3
+ function removeTags(node: Node) {
4
+ if (node.hasChildNodes()) {
5
+ node.childNodes.forEach((childNode) => {
6
+ if (node.nodeName === "SCRIPT" || node.nodeName === "STYLE") {
7
+ node.removeChild(childNode);
8
+ } else {
9
+ removeTags(childNode);
10
+ }
11
+ });
12
+ }
13
+ }
14
+ function naiveInnerText(node: Node): string {
15
+ const Node = node; // We need Node(DOM's Node) for the constants, but Node doesn't exist in the nodejs global space, and any Node instance references the constants through the prototype chain
16
+ return [...node.childNodes]
17
+ .map((childNode) => {
18
+ switch (childNode.nodeType) {
19
+ case Node.TEXT_NODE:
20
+ return node.textContent;
21
+ case Node.ELEMENT_NODE:
22
+ return naiveInnerText(childNode);
23
+ default:
24
+ return "";
25
+ }
26
+ })
27
+ .join("\n");
28
+ }
29
+
30
+ export async function parseWeb(url: string) {
31
+ const abortController = new AbortController();
32
+ setTimeout(() => abortController.abort(), 10000);
33
+ const htmlString = await fetch(url, { signal: abortController.signal })
34
+ .then((response) => response.text())
35
+ .catch((err) => console.log(err));
36
+
37
+ const virtualConsole = new VirtualConsole();
38
+ virtualConsole.on("error", () => {
39
+ // No-op to skip console errors.
40
+ });
41
+
42
+ // put the html string into a DOM
43
+ const dom = new JSDOM(htmlString ?? "", {
44
+ virtualConsole,
45
+ });
46
+
47
+ const body = dom.window.document.querySelector("body");
48
+ if (!body) throw new Error("body of the webpage is null");
49
+
50
+ removeTags(body);
51
+
52
+ // recursively extract text content from the body and then remove newlines and multiple spaces
53
+ const text = (naiveInnerText(body) ?? "").replace(/ {2}|\r\n|\n|\r/gm, "");
54
+
55
+ return text;
56
+ }
src/lib/server/{searchWeb.ts → websearch/searchWeb.ts} RENAMED
File without changes
src/lib/server/websearch/summarizeWeb.ts ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
2
+ import type { BackendModel } from "../models";
3
+
4
+ export async function summarizeWeb(content: string, query: string, model: BackendModel) {
5
+ const summaryPrompt =
6
+ model.userMessageToken +
7
+ content
8
+ .split(" ")
9
+ .slice(0, model.parameters?.truncate ?? 0)
10
+ .join(" ") +
11
+ model.messageEndToken +
12
+ model.userMessageToken +
13
+ `The text above should be summarized to best answer the query: ${query}.` +
14
+ model.messageEndToken +
15
+ model.assistantMessageToken +
16
+ "Summary: ";
17
+
18
+ const summary = await generateFromDefaultEndpoint(summaryPrompt).then((txt: string) =>
19
+ txt.trim()
20
+ );
21
+
22
+ return summary;
23
+ }
src/lib/types/UrlDependency.ts CHANGED
@@ -1,4 +1,5 @@
1
  /* eslint-disable no-shadow */
2
  export enum UrlDependency {
3
  ConversationList = "conversation:list",
 
4
  }
 
1
  /* eslint-disable no-shadow */
2
  export enum UrlDependency {
3
  ConversationList = "conversation:list",
4
+ Conversation = "conversation",
5
  }
src/routes/conversation/[id]/+page.server.ts CHANGED
@@ -2,14 +2,18 @@ import { collections } from "$lib/server/database";
2
  import { ObjectId } from "mongodb";
3
  import { error } from "@sveltejs/kit";
4
  import { authCondition } from "$lib/server/auth";
 
 
5
 
6
- export const load = async ({ params, locals }) => {
7
  // todo: add validation on params.id
8
  const conversation = await collections.conversations.findOne({
9
  _id: new ObjectId(params.id),
10
  ...authCondition(locals),
11
  });
12
 
 
 
13
  if (!conversation) {
14
  const conversationExists =
15
  (await collections.conversations.countDocuments({
@@ -26,9 +30,23 @@ export const load = async ({ params, locals }) => {
26
  throw error(404, "Conversation not found.");
27
  }
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  return {
30
  messages: conversation.messages,
31
  title: conversation.title,
32
  model: conversation.model,
 
33
  };
34
  };
 
2
  import { ObjectId } from "mongodb";
3
  import { error } from "@sveltejs/kit";
4
  import { authCondition } from "$lib/server/auth";
5
+ import type { WebSearchMessageResult } from "$lib/types/WebSearch";
6
+ import { UrlDependency } from "$lib/types/UrlDependency";
7
 
8
+ export const load = async ({ params, depends, locals }) => {
9
  // todo: add validation on params.id
10
  const conversation = await collections.conversations.findOne({
11
  _id: new ObjectId(params.id),
12
  ...authCondition(locals),
13
  });
14
 
15
+ depends(UrlDependency.Conversation);
16
+
17
  if (!conversation) {
18
  const conversationExists =
19
  (await collections.conversations.countDocuments({
 
30
  throw error(404, "Conversation not found.");
31
  }
32
 
33
+ const webSearchesId = conversation.messages
34
+ .filter((message) => message.webSearchId)
35
+ .map((message) => new ObjectId(message.webSearchId));
36
+
37
+ const results = await collections.webSearches.find({ _id: { $in: webSearchesId } }).toArray();
38
+
39
+ const searches = Object.fromEntries(
40
+ results.map((x) => [
41
+ x._id.toString(),
42
+ [...x.messages, { type: "result", id: x._id.toString() } satisfies WebSearchMessageResult],
43
+ ])
44
+ );
45
+
46
  return {
47
  messages: conversation.messages,
48
  title: conversation.title,
49
  model: conversation.model,
50
+ searches,
51
  };
52
  };
src/routes/conversation/[id]/+page.svelte CHANGED
@@ -13,8 +13,9 @@
13
  import { randomUUID } from "$lib/utils/randomUuid";
14
  import { findCurrentModel } from "$lib/utils/models";
15
  import { webSearchParameters } from "$lib/stores/webSearchParameters";
16
- import type { WebSearchMessage } from "$lib/types/WebSearch.js";
17
  import type { Message } from "$lib/types/Message";
 
18
 
19
  export let data;
20
 
@@ -194,6 +195,7 @@
194
  await getTextGenerationStream(message, messageId, isRetry, searchResponseId ?? undefined);
195
 
196
  webSearchMessages = [];
 
197
 
198
  if (messages.filter((m) => m.from === "user").length === 1) {
199
  summarizeTitle($page.params.id)
@@ -266,6 +268,7 @@
266
  {pending}
267
  {messages}
268
  bind:webSearchMessages
 
269
  on:message={(event) => writeMessage(event.detail)}
270
  on:retry={(event) => writeMessage(event.detail.content, event.detail.id)}
271
  on:vote={(event) => voteMessage(event.detail.score, event.detail.id)}
 
13
  import { randomUUID } from "$lib/utils/randomUuid";
14
  import { findCurrentModel } from "$lib/utils/models";
15
  import { webSearchParameters } from "$lib/stores/webSearchParameters";
16
+ import type { WebSearchMessage } from "$lib/types/WebSearch";
17
  import type { Message } from "$lib/types/Message";
18
+ import { browser } from "$app/environment";
19
 
20
  export let data;
21
 
 
195
  await getTextGenerationStream(message, messageId, isRetry, searchResponseId ?? undefined);
196
 
197
  webSearchMessages = [];
198
+ if (browser) invalidate(UrlDependency.Conversation);
199
 
200
  if (messages.filter((m) => m.from === "user").length === 1) {
201
  summarizeTitle($page.params.id)
 
268
  {pending}
269
  {messages}
270
  bind:webSearchMessages
271
+ searches={{ ...data.searches }}
272
  on:message={(event) => writeMessage(event.detail)}
273
  on:retry={(event) => writeMessage(event.detail.content, event.detail.id)}
274
  on:vote={(event) => voteMessage(event.detail.score, event.detail.id)}
src/routes/conversation/[id]/summarize/+server.ts CHANGED
@@ -1,7 +1,7 @@
1
  import { buildPrompt } from "$lib/buildPrompt";
2
  import { authCondition } from "$lib/server/auth";
3
  import { collections } from "$lib/server/database";
4
- import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint.js";
5
  import { defaultModel } from "$lib/server/models";
6
  import { error } from "@sveltejs/kit";
7
  import { ObjectId } from "mongodb";
 
1
  import { buildPrompt } from "$lib/buildPrompt";
2
  import { authCondition } from "$lib/server/auth";
3
  import { collections } from "$lib/server/database";
4
+ import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint";
5
  import { defaultModel } from "$lib/server/models";
6
  import { error } from "@sveltejs/kit";
7
  import { ObjectId } from "mongodb";
src/routes/conversation/[id]/web-search/+server.ts CHANGED
@@ -1,41 +1,15 @@
1
  import { authCondition } from "$lib/server/auth";
2
  import { collections } from "$lib/server/database";
3
- import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint.js";
4
  import { defaultModel } from "$lib/server/models";
5
- import { searchWeb } from "$lib/server/searchWeb.js";
6
- import type { Message } from "$lib/types/Message.js";
7
  import { error } from "@sveltejs/kit";
8
  import { ObjectId } from "mongodb";
9
  import { z } from "zod";
10
- import { JSDOM, VirtualConsole } from "jsdom";
11
- import type { WebSearch } from "$lib/types/WebSearch.js";
12
-
13
- function removeTags(node: Node) {
14
- if (node.hasChildNodes()) {
15
- node.childNodes.forEach((childNode) => {
16
- if (node.nodeName === "SCRIPT" || node.nodeName === "STYLE") {
17
- node.removeChild(childNode);
18
- } else {
19
- removeTags(childNode);
20
- }
21
- });
22
- }
23
- }
24
- function naiveInnerText(node: Node): string {
25
- const Node = node; // We need Node(DOM's Node) for the constants, but Node doesn't exist in the nodejs global space, and any Node instance references the constants through the prototype chain
26
- return [...node.childNodes]
27
- .map((childNode) => {
28
- switch (childNode.nodeType) {
29
- case Node.TEXT_NODE:
30
- return node.textContent;
31
- case Node.ELEMENT_NODE:
32
- return naiveInnerText(childNode);
33
- default:
34
- return "";
35
- }
36
- })
37
- .join("\n");
38
- }
39
 
40
  interface GenericObject {
41
  [key: string]: GenericObject | unknown;
@@ -82,45 +56,24 @@ export async function GET({ params, locals, url }) {
82
  createdAt: new Date(),
83
  updatedAt: new Date(),
84
  };
85
- try {
86
- webSearch.messages.push({
87
- type: "update",
88
- message: "Generating search query",
89
- });
90
- controller.enqueue(JSON.stringify({ messages: webSearch.messages }));
91
-
92
- const promptSearchQuery =
93
- model.userMessageToken +
94
- "The following messages were written by a user, trying to answer a question." +
95
- model.messageEndToken +
96
- messages
97
- .filter((message) => message.from === "user")
98
- .map((message) => model.userMessageToken + message.content + model.messageEndToken) +
99
- model.userMessageToken +
100
- "What plain-text english sentence would you input into Google to answer the last question? Answer with a short (10 words max) simple sentence." +
101
- model.messageEndToken +
102
- model.assistantMessageToken +
103
- "Query: ";
104
-
105
- webSearch.searchQuery = await generateFromDefaultEndpoint(promptSearchQuery).then(
106
- (query) => {
107
- const arr = query.split(/\r?\n/);
108
- return arr[0].length > 0 ? arr[0] : arr[1];
109
- }
110
- );
111
- // the model has a tendency to continue answering even when we tell it not to, so the split makes
112
- // sure we only get the first line of the response
113
 
 
114
  webSearch.messages.push({
115
  type: "update",
116
- message: "Searching Google",
117
- args: [webSearch.searchQuery],
118
  });
119
  controller.enqueue(JSON.stringify({ messages: webSearch.messages }));
 
120
 
 
 
 
 
 
121
  const results = await searchWeb(webSearch.searchQuery);
122
- let text = "";
123
 
 
124
  webSearch.results =
125
  (results.organic_results &&
126
  results.organic_results.map((el: { link: string }) => el.link)) ??
@@ -129,85 +82,22 @@ export async function GET({ params, locals, url }) {
129
  if (results.knowledge_graph) {
130
  // if google returns a knowledge graph, we use it
131
  webSearch.knowledgeGraph = JSON.stringify(removeLinks(results.knowledge_graph));
132
-
133
  text = webSearch.knowledgeGraph;
134
-
135
- webSearch.messages.push({
136
- type: "update",
137
- message: "Found a Google knowledge page",
138
- });
139
- controller.enqueue(JSON.stringify({ messages: webSearch.messages }));
140
  } else if (webSearch.results.length > 0) {
141
  // otherwise we use the top result from search
142
  const topUrl = webSearch.results[0];
 
143
 
144
- webSearch.messages.push({
145
- type: "update",
146
- message: "Browsing first result",
147
- args: [JSON.stringify(topUrl)],
148
- });
149
- controller.enqueue(JSON.stringify({ messages: webSearch.messages }));
150
-
151
- // fetch the webpage
152
- //10 second timeout:
153
- const abortController = new AbortController();
154
- setTimeout(() => abortController.abort(), 10000);
155
- const htmlString = await fetch(topUrl, { signal: abortController.signal })
156
- .then((response) => response.text())
157
- .catch((err) => console.log(err));
158
-
159
- const virtualConsole = new VirtualConsole();
160
- virtualConsole.on("error", () => {
161
- // No-op to skip console errors.
162
- });
163
-
164
- // put the html string into a DOM
165
- const dom = new JSDOM(htmlString ?? "", {
166
- virtualConsole,
167
- });
168
-
169
- const body = dom.window.document.querySelector("body");
170
- if (!body) throw new Error("body of the webpage is null");
171
-
172
- removeTags(body);
173
-
174
- // recursively extract text content from the body and then remove newlines and multiple spaces
175
- text = (naiveInnerText(body) ?? "").replace(/ {2}|\r\n|\n|\r/gm, "");
176
-
177
  if (!text) throw new Error("text of the webpage is null");
178
  } else {
179
  throw new Error("No results found for this search query");
180
  }
181
 
182
- webSearch.messages.push({
183
- type: "update",
184
- message: "Creating summary",
185
- });
186
- controller.enqueue(JSON.stringify({ messages: webSearch.messages }));
187
-
188
- const summaryPrompt =
189
- model.userMessageToken +
190
- text
191
- .split(" ")
192
- .slice(0, model.parameters?.truncate ?? 0)
193
- .join(" ") +
194
- model.messageEndToken +
195
- model.userMessageToken +
196
- `The text above should be summarized to best answer the query: ${webSearch.searchQuery}.` +
197
- model.messageEndToken +
198
- model.assistantMessageToken +
199
- "Summary: ";
200
-
201
- webSearch.summary = await generateFromDefaultEndpoint(summaryPrompt).then((txt: string) =>
202
- txt.trim()
203
- );
204
-
205
- webSearch.messages.push({
206
- type: "update",
207
- message: "Injecting summary",
208
- args: [JSON.stringify(webSearch.summary)],
209
- });
210
- controller.enqueue(JSON.stringify({ messages: webSearch.messages }));
211
  } catch (searchError) {
212
  if (searchError instanceof Error) {
213
  webSearch.messages.push({
@@ -219,7 +109,6 @@ export async function GET({ params, locals, url }) {
219
  }
220
 
221
  const res = await collections.webSearches.insertOne(webSearch);
222
-
223
  webSearch.messages.push({
224
  type: "result",
225
  id: res.insertedId.toString(),
 
1
  import { authCondition } from "$lib/server/auth";
2
  import { collections } from "$lib/server/database";
 
3
  import { defaultModel } from "$lib/server/models";
4
+ import { searchWeb } from "$lib/server/websearch/searchWeb";
5
+ import type { Message } from "$lib/types/Message";
6
  import { error } from "@sveltejs/kit";
7
  import { ObjectId } from "mongodb";
8
  import { z } from "zod";
9
+ import type { WebSearch } from "$lib/types/WebSearch";
10
+ import { generateQuery } from "$lib/server/websearch/generateQuery";
11
+ import { parseWeb } from "$lib/server/websearch/parseWeb";
12
+ import { summarizeWeb } from "$lib/server/websearch/summarizeWeb";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  interface GenericObject {
15
  [key: string]: GenericObject | unknown;
 
56
  createdAt: new Date(),
57
  updatedAt: new Date(),
58
  };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
+ function appendUpdate(message: string, args?: string[]) {
61
  webSearch.messages.push({
62
  type: "update",
63
+ message,
64
+ args,
65
  });
66
  controller.enqueue(JSON.stringify({ messages: webSearch.messages }));
67
+ }
68
 
69
+ try {
70
+ appendUpdate("Generating search query");
71
+ webSearch.searchQuery = await generateQuery(messages, model);
72
+
73
+ appendUpdate("Searching Google", [webSearch.searchQuery]);
74
  const results = await searchWeb(webSearch.searchQuery);
 
75
 
76
+ let text = "";
77
  webSearch.results =
78
  (results.organic_results &&
79
  results.organic_results.map((el: { link: string }) => el.link)) ??
 
82
  if (results.knowledge_graph) {
83
  // if google returns a knowledge graph, we use it
84
  webSearch.knowledgeGraph = JSON.stringify(removeLinks(results.knowledge_graph));
 
85
  text = webSearch.knowledgeGraph;
86
+ appendUpdate("Found a Google knowledge page");
 
 
 
 
 
87
  } else if (webSearch.results.length > 0) {
88
  // otherwise we use the top result from search
89
  const topUrl = webSearch.results[0];
90
+ appendUpdate("Browsing first result", [JSON.stringify(topUrl)]);
91
 
92
+ text = await parseWeb(topUrl);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  if (!text) throw new Error("text of the webpage is null");
94
  } else {
95
  throw new Error("No results found for this search query");
96
  }
97
 
98
+ appendUpdate("Creating summary");
99
+ webSearch.summary = await summarizeWeb(text, webSearch.searchQuery, model);
100
+ appendUpdate("Injecting summary", [JSON.stringify(webSearch.summary)]);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  } catch (searchError) {
102
  if (searchError instanceof Error) {
103
  webSearch.messages.push({
 
109
  }
110
 
111
  const res = await collections.webSearches.insertOne(webSearch);
 
112
  webSearch.messages.push({
113
  type: "result",
114
  id: res.insertedId.toString(),
src/routes/r/[id]/+page.server.ts CHANGED
@@ -1,6 +1,8 @@
1
  import type { PageServerLoad } from "./$types";
2
  import { collections } from "$lib/server/database";
3
  import { error } from "@sveltejs/kit";
 
 
4
 
5
  export const load: PageServerLoad = async ({ params }) => {
6
  const conversation = await collections.sharedConversations.findOne({
@@ -11,9 +13,23 @@ export const load: PageServerLoad = async ({ params }) => {
11
  throw error(404, "Conversation not found");
12
  }
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  return {
15
  messages: conversation.messages,
16
  title: conversation.title,
17
  model: conversation.model,
 
18
  };
19
  };
 
1
  import type { PageServerLoad } from "./$types";
2
  import { collections } from "$lib/server/database";
3
  import { error } from "@sveltejs/kit";
4
+ import { ObjectId } from "mongodb";
5
+ import type { WebSearchMessageResult } from "$lib/types/WebSearch";
6
 
7
  export const load: PageServerLoad = async ({ params }) => {
8
  const conversation = await collections.sharedConversations.findOne({
 
13
  throw error(404, "Conversation not found");
14
  }
15
 
16
+ const webSearchesId = conversation.messages
17
+ .filter((message) => message.webSearchId)
18
+ .map((message) => new ObjectId(message.webSearchId));
19
+
20
+ const results = await collections.webSearches.find({ _id: { $in: webSearchesId } }).toArray();
21
+
22
+ const searches = Object.fromEntries(
23
+ results.map((x) => [
24
+ x._id.toString(),
25
+ [...x.messages, { type: "result", id: x._id.toString() } satisfies WebSearchMessageResult],
26
+ ])
27
+ );
28
+
29
  return {
30
  messages: conversation.messages,
31
  title: conversation.title,
32
  model: conversation.model,
33
+ searches,
34
  };
35
  };
src/routes/r/[id]/+page.svelte CHANGED
@@ -59,6 +59,7 @@
59
  {loading}
60
  shared={true}
61
  messages={data.messages}
 
62
  on:message={(ev) =>
63
  createConversation()
64
  .then((convId) => {
 
59
  {loading}
60
  shared={true}
61
  messages={data.messages}
62
+ searches={data.searches}
63
  on:message={(ev) =>
64
  createConversation()
65
  .then((convId) => {
src/routes/search/[id]/+server.ts CHANGED
@@ -1,5 +1,5 @@
1
  import { collections } from "$lib/server/database";
2
- import { sha256 } from "$lib/utils/sha256.js";
3
  import { error } from "@sveltejs/kit";
4
  import { ObjectId } from "mongodb";
5
 
 
1
  import { collections } from "$lib/server/database";
2
+ import { sha256 } from "$lib/utils/sha256";
3
  import { error } from "@sveltejs/kit";
4
  import { ObjectId } from "mongodb";
5