Spaces:

oceansweep
/

tldw

Running

App Files Files Community

tldw / App_Function_Libraries /Benchmarks_Evaluations /Confabulation_check.py

oceansweep

Upload 155 files

43cd37c verified about 2 months ago

raw

history blame contribute delete

2.77 kB

	# Confabulation_check.py
	#
	# This file contains the functions that are used to check the confabulation of the user's input.
	#
	#
	# Imports
	#
	# External Imports
	#
	# Local Imports
	#
	#
	####################################################################################################
	#
	# Functions:
	from App_Function_Libraries.Chat import chat_api_call
	from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import validate_inputs, detailed_api_error


	def simplified_geval(transcript: str, summary: str, api_name: str, api_key: str, temp: float = 0.7) -> str:
	"""
	Perform a simplified version of G-Eval using a single query to evaluate the summary.

	Args:
	transcript (str): The original transcript
	summary (str): The summary to be evaluated
	api_name (str): The name of the LLM API to use
	api_key (str): The API key for the chosen LLM
	temp (float, optional): The temperature parameter for the API call. Defaults to 0.7.

	Returns:
	str: The evaluation result
	"""
	try:
	validate_inputs(transcript, summary, api_name, api_key)
	except ValueError as e:
	return str(e)

	prompt = f"""You are an AI assistant tasked with evaluating the quality of a summary. You will be given an original transcript and a summary of that transcript. Your task is to evaluate the summary based on the following criteria:

	1. Coherence (1-5): How well-structured and organized is the summary?
	2. Consistency (1-5): How factually aligned is the summary with the original transcript?
	3. Fluency (1-3): How well-written is the summary in terms of grammar, spelling, and readability?
	4. Relevance (1-5): How well does the summary capture the important information from the transcript?

	Please provide a score for each criterion and a brief explanation for your scoring. Then, give an overall assessment of the summary's quality.

	Original Transcript:
	{transcript}

	Summary to Evaluate:
	{summary}

	Please provide your evaluation in the following format:
	Coherence: [score] - [brief explanation]
	Consistency: [score] - [brief explanation]
	Fluency: [score] - [brief explanation]
	Relevance: [score] - [brief explanation]

	Overall Assessment: [Your overall assessment of the summary's quality]
	"""

	try:
	result = chat_api_call(
	api_name,
	api_key,
	prompt,
	"",
	temp=temp,
	system_message="You are a helpful AI assistant tasked with evaluating summaries."
	)
	except Exception as e:
	return detailed_api_error(api_name, e)

	formatted_result = f"""
	Confabulation Check Results:

	{result}
	"""

	return formatted_result