Spaces:

Illumotion
/

Koboldcpp

Build error

App Files Files Community

Koboldcpp / examples /server /public /completion.js

Illumotion's picture

Upload folder using huggingface_hub

edc20ac over 1 year ago

history blame contribute delete

5.1 kB

	const paramDefaults = {
	stream: true,
	n_predict: 500,
	temperature: 0.2,
	stop: ["</s>"]
	};

	let generation_settings = null;


	// Completes the prompt as a generator. Recommended for most use cases.
	//
	// Example:
	//
	// import { llama } from '/completion.js'
	//
	// const request = llama("Tell me a joke", {n_predict: 800})
	// for await (const chunk of request) {
	// document.write(chunk.data.content)
	// }
	//
	export async function* llama(prompt, params = {}, config = {}) {
	let controller = config.controller;

	if (!controller) {
	controller = new AbortController();
	}

	const completionParams = { ...paramDefaults, ...params, prompt };

	const response = await fetch("/completion", {
	method: 'POST',
	body: JSON.stringify(completionParams),
	headers: {
	'Connection': 'keep-alive',
	'Content-Type': 'application/json',
	'Accept': 'text/event-stream'
	},
	signal: controller.signal,
	});

	const reader = response.body.getReader();
	const decoder = new TextDecoder();

	let content = "";
	let leftover = ""; // Buffer for partially read lines

	try {
	let cont = true;

	while (cont) {
	const result = await reader.read();
	if (result.done) {
	break;
	}

	// Add any leftover data to the current chunk of data
	const text = leftover + decoder.decode(result.value);

	// Check if the last character is a line break
	const endsWithLineBreak = text.endsWith('\n');

	// Split the text into lines
	let lines = text.split('\n');

	// If the text doesn't end with a line break, then the last line is incomplete
	// Store it in leftover to be added to the next chunk of data
	if (!endsWithLineBreak) {
	leftover = lines.pop();
	} else {
	leftover = ""; // Reset leftover if we have a line break at the end
	}

	// Parse all sse events and add them to result
	const regex = /^(\S+):\s(.*)$/gm;
	for (const line of lines) {
	const match = regex.exec(line);
	if (match) {
	result[match[1]] = match[2]
	// since we know this is llama.cpp, let's just decode the json in data
	if (result.data) {
	result.data = JSON.parse(result.data);
	content += result.data.content;

	// yield
	yield result;

	// if we got a stop token from server, we will break here
	if (result.data.stop) {
	if (result.data.generation_settings) {
	generation_settings = result.data.generation_settings;
	}
	cont = false;
	break;
	}
	}
	}
	}
	}
	} catch (e) {
	if (e.name !== 'AbortError') {
	console.error("llama error: ", e);
	}
	throw e;
	}
	finally {
	controller.abort();
	}

	return content;
	}

	// Call llama, return an event target that you can subcribe to
	//
	// Example:
	//
	// import { llamaEventTarget } from '/completion.js'
	//
	// const conn = llamaEventTarget(prompt)
	// conn.addEventListener("message", (chunk) => {
	// document.write(chunk.detail.content)
	// })
	//
	export const llamaEventTarget = (prompt, params = {}, config = {}) => {
	const eventTarget = new EventTarget();
	(async () => {
	let content = "";
	for await (const chunk of llama(prompt, params, config)) {
	if (chunk.data) {
	content += chunk.data.content;
	eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
	}
	if (chunk.data.generation_settings) {
	eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
	}
	if (chunk.data.timings) {
	eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
	}
	}
	eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
	})();
	return eventTarget;
	}

	// Call llama, return a promise that resolves to the completed text. This does not support streaming
	//
	// Example:
	//
	// llamaPromise(prompt).then((content) => {
	// document.write(content)
	// })
	//
	// or
	//
	// const content = await llamaPromise(prompt)
	// document.write(content)
	//
	export const llamaPromise = (prompt, params = {}, config = {}) => {
	return new Promise(async (resolve, reject) => {
	let content = "";
	try {
	for await (const chunk of llama(prompt, params, config)) {
	content += chunk.data.content;
	}
	resolve(content);
	} catch (error) {
	reject(error);
	}
	});
	};

	/**
	* (deprecated)
	*/
	export const llamaComplete = async (params, controller, callback) => {
	for await (const chunk of llama(params.prompt, params, { controller })) {
	callback(chunk);
	}
	}

	// Get the model info from the server. This is useful for getting the context window and so on.
	export const llamaModelInfo = async () => {
	if (!generation_settings) {
	generation_settings = await fetch("/model.json").then(r => r.json());
	}
	return generation_settings;
	}