Spaces:

huggingchat
/

chat-ui

Running

App Files Files Community

610

nsarrazin HF staff commited on Mar 6

Commit

537b6f5

•

1 Parent(s): 21c9b41

Add limits on API endpoints (#886)

Browse files

* Add limits on messages, conversations, assistants and messages/minute

* Add max message length limit

* remove rate limits from public config

* add `RATE_LIMITS` to secrets

* Add `MESSAGES_BEFORE_LOGIN` to secrets

* replace `RATE_LIMITS` by `USAGE_LIMITS`

* replace `RateLimits` by `usageLimits` and only get nEvents if needed

* rename schema too

* replace \r\n by \n

Files changed (10) hide show

.env +5 -2
.env.template +0 -3
.github/workflows/deploy-release.yml +2 -0
scripts/updateProdEnv.ts +4 -0
src/lib/server/usageLimits.ts +23 -0
src/routes/+page.svelte +4 -3
src/routes/conversation/+server.ts +11 -0
src/routes/conversation/[id]/+page.svelte +1 -1
src/routes/conversation/[id]/+server.ts +27 -9
src/routes/settings/assistants/new/+page.server.ts +13 -0

.env CHANGED Viewed

@@ -113,7 +113,7 @@ ADMIN_API_SECRET=# secret to admin API calls, like computing usage stats or expo
 PARQUET_EXPORT_SECRET=#DEPRECATED, use ADMIN_API_SECRET instead
-RATE_LIMIT= # requests per minute
 MESSAGES_BEFORE_LOGIN=# how many messages a user can send in a conversation before having to login. set to 0 to force login right away
 APP_BASE="" # base path of the app, e.g. /chat, left blank as default
@@ -140,4 +140,7 @@ ALTERNATIVE_REDIRECT_URLS=`[]` #valide alternative redirect URL for OAuth
 WEBHOOK_URL_REPORT_ASSISTANT=#provide webhook url to get notified when an assistant gets reported
-ALLOWED_USER_EMAILS=`[]` # if it's defined, only these emails will be allowed to use the app

 PARQUET_EXPORT_SECRET=#DEPRECATED, use ADMIN_API_SECRET instead
+RATE_LIMIT= # /!\ Legacy definition of messages per minute. Use USAGE_LIMITS.messagesPerMinute instead
 MESSAGES_BEFORE_LOGIN=# how many messages a user can send in a conversation before having to login. set to 0 to force login right away
 APP_BASE="" # base path of the app, e.g. /chat, left blank as default
 WEBHOOK_URL_REPORT_ASSISTANT=#provide webhook url to get notified when an assistant gets reported
+ALLOWED_USER_EMAILS=`[]` # if it's defined, only these emails will be allowed to use the app
+USAGE_LIMITS=`{}`

.env.template CHANGED Viewed

@@ -269,9 +269,6 @@ PUBLIC_APP_DISCLAIMER_MESSAGE="Disclaimer: AI is an area of active research with
 PUBLIC_APP_DATA_SHARING=1
 PUBLIC_APP_DISCLAIMER=1
-RATE_LIMIT=16
-MESSAGES_BEFORE_LOGIN=5# how many messages a user can send in a conversation before having to login. set to 0 to force login right away
 PUBLIC_GOOGLE_ANALYTICS_ID=G-8Q63TH4CSL
 PUBLIC_PLAUSIBLE_SCRIPT_URL="/js/script.js"

 PUBLIC_APP_DATA_SHARING=1
 PUBLIC_APP_DISCLAIMER=1
 PUBLIC_GOOGLE_ANALYTICS_ID=G-8Q63TH4CSL
 PUBLIC_PLAUSIBLE_SCRIPT_URL="/js/script.js"

.github/workflows/deploy-release.yml CHANGED Viewed

@@ -27,6 +27,8 @@ jobs:
           HF_DEPLOYMENT_TOKEN: ${{ secrets.HF_DEPLOYMENT_TOKEN }}
           WEBHOOK_URL_REPORT_ASSISTANT: ${{ secrets.WEBHOOK_URL_REPORT_ASSISTANT }}
           ADMIN_API_SECRET: ${{ secrets.ADMIN_API_SECRET }}
         run: npm run updateProdEnv
   sync-to-hub:
     runs-on: ubuntu-latest

           HF_DEPLOYMENT_TOKEN: ${{ secrets.HF_DEPLOYMENT_TOKEN }}
           WEBHOOK_URL_REPORT_ASSISTANT: ${{ secrets.WEBHOOK_URL_REPORT_ASSISTANT }}
           ADMIN_API_SECRET: ${{ secrets.ADMIN_API_SECRET }}
+          USAGE_LIMITS: ${{ secrets.USAGE_LIMITS }}
+          MESSAGES_BEFORE_LOGIN: ${{ secrets.MESSAGES_BEFORE_LOGIN }}
         run: npm run updateProdEnv
   sync-to-hub:
     runs-on: ubuntu-latest

scripts/updateProdEnv.ts CHANGED Viewed

@@ -8,6 +8,8 @@ const MONGODB_URL = process.env.MONGODB_URL;
 const HF_TOKEN = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN; // token used for API requests in prod
 const WEBHOOK_URL_REPORT_ASSISTANT = process.env.WEBHOOK_URL_REPORT_ASSISTANT; // slack webhook url used to get "report assistant" events
 const ADMIN_API_SECRET = process.env.ADMIN_API_SECRET;
 // Read the content of the file .env.template
 const PUBLIC_CONFIG = fs.readFileSync(".env.template", "utf8");
@@ -20,6 +22,8 @@ SERPER_API_KEY=${SERPER_API_KEY}
 HF_TOKEN=${HF_TOKEN}
 WEBHOOK_URL_REPORT_ASSISTANT=${WEBHOOK_URL_REPORT_ASSISTANT}
 ADMIN_API_SECRET=${ADMIN_API_SECRET}
 `;
 // Make an HTTP POST request to add the space secrets

 const HF_TOKEN = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN; // token used for API requests in prod
 const WEBHOOK_URL_REPORT_ASSISTANT = process.env.WEBHOOK_URL_REPORT_ASSISTANT; // slack webhook url used to get "report assistant" events
 const ADMIN_API_SECRET = process.env.ADMIN_API_SECRET;
+const USAGE_LIMITS = process.env.USAGE_LIMITS;
+const MESSAGES_BEFORE_LOGIN = process.env.MESSAGES_BEFORE_LOGIN;
 // Read the content of the file .env.template
 const PUBLIC_CONFIG = fs.readFileSync(".env.template", "utf8");
 HF_TOKEN=${HF_TOKEN}
 WEBHOOK_URL_REPORT_ASSISTANT=${WEBHOOK_URL_REPORT_ASSISTANT}
 ADMIN_API_SECRET=${ADMIN_API_SECRET}
+USAGE_LIMITS=${USAGE_LIMITS}
+MESSAGES_BEFORE_LOGIN=${MESSAGES_BEFORE_LOGIN}
 `;
 // Make an HTTP POST request to add the space secrets

src/lib/server/usageLimits.ts ADDED Viewed

	@@ -0,0 +1,23 @@

+import { z } from "zod";
+import { USAGE_LIMITS, RATE_LIMIT } from "$env/static/private";
+import JSON5 from "json5";
+// RATE_LIMIT is the legacy way to define messages per minute limit
+export const usageLimitsSchema = z
+	.object({
+		conversations: z.coerce.number().optional(), // how many conversations
+		messages: z.coerce.number().optional(), // how many messages in a conversation
+		assistants: z.coerce.number().optional(), // how many assistants
+		messageLength: z.coerce.number().optional(), // how long can a message be before we cut it off
+		messagesPerMinute: z
+			.preprocess((val) => {
+				if (val === undefined) {
+					return RATE_LIMIT;
+				}
+				return val;
+			}, z.coerce.number().optional())
+			.optional(), // how many messages per minute
+	})
+	.optional();
+export const usageLimits = usageLimitsSchema.parse(JSON5.parse(USAGE_LIMITS));

src/routes/+page.svelte CHANGED Viewed

@@ -47,8 +47,9 @@
 			});
 			if (!res.ok) {
-				error.set("Error while creating conversation, try again.");
-				console.error("Error while creating conversation: " + (await res.text()));
 				return;
 			}
@@ -63,7 +64,7 @@
 			// invalidateAll to update list of conversations
 			await goto(`${base}/conversation/${conversationId}`, { invalidateAll: true });
 		} catch (err) {
-			error.set(ERROR_MESSAGES.default);
 			console.error(err);
 		} finally {
 			loading = false;

 			});
 			if (!res.ok) {
+				const errorMessage = (await res.json()).message || ERROR_MESSAGES.default;
+				error.set(errorMessage);
+				console.error("Error while creating conversation: ", errorMessage);
 				return;
 			}
 			// invalidateAll to update list of conversations
 			await goto(`${base}/conversation/${conversationId}`, { invalidateAll: true });
 		} catch (err) {
+			error.set((err as Error).message || ERROR_MESSAGES.default);
 			console.error(err);
 		} finally {
 			loading = false;

src/routes/conversation/+server.ts CHANGED Viewed

@@ -8,6 +8,8 @@ import type { Message } from "$lib/types/Message";
 import { models, validateModel } from "$lib/server/models";
 import { defaultEmbeddingModel } from "$lib/server/embeddingModels";
 import { v4 } from "uuid";
 export const POST: RequestHandler = async ({ locals, request }) => {
 	const body = await request.text();
@@ -23,6 +25,15 @@ export const POST: RequestHandler = async ({ locals, request }) => {
 		})
 		.parse(JSON.parse(body));
 	let messages: Message[] = [
 		{
 			id: v4(),

 import { models, validateModel } from "$lib/server/models";
 import { defaultEmbeddingModel } from "$lib/server/embeddingModels";
 import { v4 } from "uuid";
+import { authCondition } from "$lib/server/auth";
+import { usageLimits } from "$lib/server/usageLimits";
 export const POST: RequestHandler = async ({ locals, request }) => {
 	const body = await request.text();
 		})
 		.parse(JSON.parse(body));
+	const convCount = await collections.conversations.countDocuments(authCondition(locals));
+	if (usageLimits?.conversations && convCount > usageLimits?.conversations) {
+		throw error(
+			429,
+			"You have reached the maximum number of conversations. Delete some to continue."
+		);
+	}
 	let messages: Message[] = [
 		{
 			id: v4(),

src/routes/conversation/[id]/+page.svelte CHANGED Viewed

@@ -43,7 +43,7 @@
 			});
 			if (!res.ok) {
-				error.set("Error while creating conversation, try again.");
 				console.error("Error while creating conversation: " + (await res.text()));
 				return;
 			}

 			});
 			if (!res.ok) {
+				error.set(await res.text());
 				console.error("Error while creating conversation: " + (await res.text()));
 				return;
 			}

src/routes/conversation/[id]/+server.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { MESSAGES_BEFORE_LOGIN, RATE_LIMIT } from "$env/static/private";
 import { authCondition, requiresUser } from "$lib/server/auth";
 import { collections } from "$lib/server/database";
 import { models } from "$lib/server/models";
@@ -19,6 +19,7 @@ import { buildSubtree } from "$lib/utils/tree/buildSubtree.js";
 import { addChildren } from "$lib/utils/tree/addChildren.js";
 import { addSibling } from "$lib/utils/tree/addSibling.js";
 import { preprocessMessages } from "$lib/server/preprocessMessages.js";
 export async function POST({ request, locals, params, getClientAddress }) {
 	const id = z.string().parse(params.id);
@@ -95,14 +96,22 @@ export async function POST({ request, locals, params, getClientAddress }) {
 		}
 	}
-	// check if the user is rate limited
-	const nEvents = Math.max(
-		await collections.messageEvents.countDocuments({ userId }),
-		await collections.messageEvents.countDocuments({ ip: getClientAddress() })
-	);
-	if (RATE_LIMIT != "" && nEvents > parseInt(RATE_LIMIT)) {
-		throw error(429, ERROR_MESSAGES.rateLimited);
 	}
 	// fetch the model
@@ -125,7 +134,13 @@ export async function POST({ request, locals, params, getClientAddress }) {
 	} = z
 		.object({
 			id: z.string().uuid().refine(isMessageId).optional(), // parent message id to append to for a normal message, or the message id for a retry/continue
-			inputs: z.optional(z.string().trim().min(1)),
 			is_retry: z.optional(z.boolean()),
 			is_continue: z.optional(z.boolean()),
 			web_search: z.optional(z.boolean()),
@@ -133,6 +148,9 @@ export async function POST({ request, locals, params, getClientAddress }) {
 		})
 		.parse(json);
 	// files is an array of base64 strings encoding Blob objects
 	// we need to convert this array to an array of File objects

+import { MESSAGES_BEFORE_LOGIN } from "$env/static/private";
 import { authCondition, requiresUser } from "$lib/server/auth";
 import { collections } from "$lib/server/database";
 import { models } from "$lib/server/models";
 import { addChildren } from "$lib/utils/tree/addChildren.js";
 import { addSibling } from "$lib/utils/tree/addSibling.js";
 import { preprocessMessages } from "$lib/server/preprocessMessages.js";
+import { usageLimits } from "$lib/server/usageLimits";
 export async function POST({ request, locals, params, getClientAddress }) {
 	const id = z.string().parse(params.id);
 		}
 	}
+	if (usageLimits?.messagesPerMinute) {
+		// check if the user is rate limited
+		const nEvents = Math.max(
+			await collections.messageEvents.countDocuments({ userId }),
+			await collections.messageEvents.countDocuments({ ip: getClientAddress() })
+		);
+		if (nEvents > usageLimits.messagesPerMinute) {
+			throw error(429, ERROR_MESSAGES.rateLimited);
+		}
+	}
+	if (usageLimits?.messages && conv.messages.length > usageLimits.messages) {
+		throw error(
+			429,
+			`This conversation has more than ${usageLimits.messages} messages. Start a new one to continue`
+		);
 	}
 	// fetch the model
 	} = z
 		.object({
 			id: z.string().uuid().refine(isMessageId).optional(), // parent message id to append to for a normal message, or the message id for a retry/continue
+			inputs: z.optional(
+				z
+					.string()
+					.trim()
+					.min(1)
+					.transform((s) => s.replace(/\r\n/g, "\n"))
+			),
 			is_retry: z.optional(z.boolean()),
 			is_continue: z.optional(z.boolean()),
 			web_search: z.optional(z.boolean()),
 		})
 		.parse(json);
+	if (usageLimits?.messageLength && (newPrompt?.length ?? 0) > usageLimits.messageLength) {
+		throw error(400, "Message too long.");
+	}
 	// files is an array of base64 strings encoding Blob objects
 	// we need to convert this array to an array of File objects

src/routes/settings/assistants/new/+page.server.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import { ObjectId } from "mongodb";
 import { z } from "zod";
 import { sha256 } from "$lib/utils/sha256";
 import sharp from "sharp";
 import { generateSearchTokens } from "$lib/utils/searchTokens";
 const newAsssistantSchema = z.object({
@@ -62,6 +63,18 @@ export const actions: Actions = {
 			return fail(400, { error: true, errors });
 		}
 		const createdById = locals.user?._id ?? locals.sessionId;
 		const newAssistantId = new ObjectId();

 import { z } from "zod";
 import { sha256 } from "$lib/utils/sha256";
 import sharp from "sharp";
+import { usageLimits } from "$lib/server/usageLimits";
 import { generateSearchTokens } from "$lib/utils/searchTokens";
 const newAsssistantSchema = z.object({
 			return fail(400, { error: true, errors });
 		}
+		const assistantsCount = await collections.assistants.countDocuments(authCondition(locals));
+		if (usageLimits?.assistants && assistantsCount > usageLimits.assistants) {
+			const errors = [
+				{
+					field: "preprompt",
+					message: "You have reached the maximum number of assistants. Delete some to continue.",
+				},
+			];
+			return fail(400, { error: true, errors });
+		}
 		const createdById = locals.user?._id ?? locals.sessionId;
 		const newAssistantId = new ObjectId();