Kpenciler's picture
Upload 53 files
88435ed verified
raw
history blame
3.83 kB
import json
from typing import Any, overload
# string ---------------------------------------
def _extract_string(text: str, start_string: str | None = None, end_string: str | None = None) -> str:
"""
テキストから必要な文字列を抽出する
Args:
text (str): 抽出するテキスト
Returns:
str: 抽出された必要な文字列
"""
# 最初の文字
if start_string is not None and start_string in text:
idx_head = text.index(start_string)
text = text[idx_head:]
# 最後の文字
if end_string is not None and end_string in text:
idx_tail = len(text) - text[::-1].index(end_string[::-1])
text = text[:idx_tail]
return text
def _delete_first_chapter_tag(text: str, first_character_tag: str | list[str]) -> str:
"""_summary_
Args:
text (str): テキスト
first_character_tag (str | list[str]): 最初にある余分な文字列
Returns:
str: 除去済みのテキスト
"""
# first_character_tagのlist化
if isinstance(first_character_tag, str):
first_character_tag = [first_character_tag]
# 最初のチャプタータグの消去
for first_character_i in first_character_tag:
if text.startswith(first_character_i):
text = text[len(first_character_i) :]
break
return text.strip()
def strip_string(
text: str,
first_character: str | list[str] = ["<output>", "<outputs>"],
start_string: str | None = None,
end_string: str | None = None,
strip_quotes: str | list[str] = ["'", '"'],
) -> str:
"""stringの前後の余分な文字を削除する
Args:
text (str): ChatGPTの出力文字列
first_character (str, optional): 出力の先頭につく文字 Defaults to ["<output>", "<outputs>"].
start_string (str, optional): 出力の先頭につく文字 Defaults to None.
end_string (str, optional): 出力の先頭につく文字 Defaults to None.
strip_quotes (str, optional): 前後の余分な'"を消す. Defaults to ["'", '"'].
Returns:
str: 余分な文字列を消去した文字列
Examples:
>>> strip_string("<output>'''ChatGPT is smart!'''", "<output>")
ChatGPT is smart!
>>> strip_string('{"a": 1}', start_string="{", end_string="}")
{"a": 1}
>>> strip_string("<outputs> `neoAI`", strip_quotes="`")
neoAI
"""
# 余分な文字列消去
text = _delete_first_chapter_tag(text, first_character)
# 前後の'" を消す
if isinstance(strip_quotes, str):
strip_quotes = [strip_quotes]
for quote in strip_quotes:
text = text.strip(quote).strip()
text = _extract_string(text, start_string, end_string)
return text.strip()
# dict ---------------------------------------
@overload
def json2dict(json_string: str, error_key: None) -> dict[Any, Any] | str: ...
@overload
def json2dict(json_string: str, error_key: str) -> dict[Any, Any]: ...
def json2dict(json_string: str, error_key: str | None = "error") -> dict[Any, Any] | str:
"""
JSON文字列をPython dictに変換する
Args:
json_string (str): 変換するJSON文字列
error_key (str, optional): エラーキーの値として代入する文字列. Defaults to "error".
Returns:
dict: 変換されたPython dict
"""
try:
python_dict = json.loads(_extract_string(json_string, start_string="{", end_string="}"), strict=False)
except ValueError:
if error_key is None:
return json_string
python_dict = {error_key: json_string}
if isinstance(python_dict, dict):
return python_dict
return {error_key: python_dict}
# calender
# YYYY年MM月YY日 -> YYYY-MM-DD