Spaces:

neoai-inc-private
/

neo-llm-module-v1.3.5

Configuration error

App Files Files Community

neo-llm-module-v1.3.5 / neollm /utils /postprocess.py

Kpenciler

Upload 53 files

88435ed verified 6 months ago

raw

history blame

3.83 kB

	import json
	from typing import Any, overload


	# string ---------------------------------------
	def _extract_string(text: str, start_string: str \| None = None, end_string: str \| None = None) -> str:
	"""
	テキストから必要な文字列を抽出する

	Args:
	text (str): 抽出するテキスト

	Returns:
	str: 抽出された必要な文字列
	"""
	# 最初の文字
	if start_string is not None and start_string in text:
	idx_head = text.index(start_string)
	text = text[idx_head:]
	# 最後の文字
	if end_string is not None and end_string in text:
	idx_tail = len(text) - text[::-1].index(end_string[::-1])
	text = text[:idx_tail]
	return text


	def _delete_first_chapter_tag(text: str, first_character_tag: str \| list[str]) -> str:
	"""_summary_

	Args:
	text (str): テキスト
	first_character_tag (str \| list[str]): 最初にある余分な文字列

	Returns:
	str: 除去済みのテキスト
	"""
	# first_character_tagのlist化
	if isinstance(first_character_tag, str):
	first_character_tag = [first_character_tag]
	# 最初のチャプタータグの消去
	for first_character_i in first_character_tag:
	if text.startswith(first_character_i):
	text = text[len(first_character_i) :]
	break
	return text.strip()


	def strip_string(
	text: str,
	first_character: str \| list[str] = ["<output>", "<outputs>"],
	start_string: str \| None = None,
	end_string: str \| None = None,
	strip_quotes: str \| list[str] = ["'", '"'],
	) -> str:
	"""stringの前後の余分な文字を削除する

	Args:
	text (str): ChatGPTの出力文字列
	first_character (str, optional): 出力の先頭につく文字 Defaults to ["<output>", "<outputs>"].
	start_string (str, optional): 出力の先頭につく文字 Defaults to None.
	end_string (str, optional): 出力の先頭につく文字 Defaults to None.
	strip_quotes (str, optional): 前後の余分な'"を消す. Defaults to ["'", '"'].

	Returns:
	str: 余分な文字列を消去した文字列

	Examples:
	>>> strip_string("<output>'''ChatGPT is smart!'''", "<output>")
	ChatGPT is smart!
	>>> strip_string('{"a": 1}', start_string="{", end_string="}")
	{"a": 1}
	>>> strip_string("<outputs> `neoAI`", strip_quotes="`")
	neoAI
	"""
	# 余分な文字列消去
	text = _delete_first_chapter_tag(text, first_character)
	# 前後の'" を消す
	if isinstance(strip_quotes, str):
	strip_quotes = [strip_quotes]
	for quote in strip_quotes:
	text = text.strip(quote).strip()
	text = _extract_string(text, start_string, end_string)
	return text.strip()


	# dict ---------------------------------------


	@overload
	def json2dict(json_string: str, error_key: None) -> dict[Any, Any] \| str: ...


	@overload
	def json2dict(json_string: str, error_key: str) -> dict[Any, Any]: ...


	def json2dict(json_string: str, error_key: str \| None = "error") -> dict[Any, Any] \| str:
	"""
	JSON文字列をPython dictに変換する

	Args:
	json_string (str): 変換するJSON文字列
	error_key (str, optional): エラーキーの値として代入する文字列. Defaults to "error".

	Returns:
	dict: 変換されたPython dict
	"""
	try:
	python_dict = json.loads(_extract_string(json_string, start_string="{", end_string="}"), strict=False)
	except ValueError:
	if error_key is None:
	return json_string
	python_dict = {error_key: json_string}
	if isinstance(python_dict, dict):
	return python_dict
	return {error_key: python_dict}


	# calender
	# YYYY年MM月YY日 -> YYYY-MM-DD