Spaces:
Running
Running
# ccv3_parser.py | |
# | |
# | |
# Imports | |
from typing import Any, Dict, List, Optional, Union | |
import re | |
# | |
# External Imports | |
# | |
# Local Imports | |
from App_Function_Libraries.Personas.models import Lorebook, Asset, CharacterCardV3, CharacterCardV3Data, Decorator, \ | |
LorebookEntry | |
from App_Function_Libraries.Personas.utils import validate_iso_639_1, extract_json_from_charx, parse_json_file, \ | |
extract_text_chunks_from_png, decode_base64 | |
# | |
############################################################################################################ | |
# | |
# Functions: | |
class CCv3ParserError(Exception): | |
"""Custom exception for CCv3 Parser errors.""" | |
pass | |
class CharacterCardV3Parser: | |
REQUIRED_SPEC = 'chara_card_v3' | |
REQUIRED_VERSION = '3.0' | |
def __init__(self, input_data: Union[str, bytes], input_type: str): | |
""" | |
Initialize the parser with input data. | |
:param input_data: The input data as a string or bytes. | |
:param input_type: The type of the input data: 'json', 'png', 'apng', 'charx'. | |
""" | |
self.input_data = input_data | |
self.input_type = input_type.lower() | |
self.character_card: Optional[CharacterCardV3] = None | |
def parse(self): | |
"""Main method to parse the input data based on its type.""" | |
if self.input_type == 'json': | |
self.parse_json_input() | |
elif self.input_type in ['png', 'apng']: | |
self.parse_png_apng_input() | |
elif self.input_type == 'charx': | |
self.parse_charx_input() | |
else: | |
raise CCv3ParserError(f"Unsupported input type: {self.input_type}") | |
def parse_json_input(self): | |
"""Parse JSON input directly.""" | |
try: | |
data = parse_json_file( | |
self.input_data.encode('utf-8') if isinstance(self.input_data, str) else self.input_data) | |
self.character_card = self._build_character_card(data) | |
except Exception as e: | |
raise CCv3ParserError(f"Failed to parse JSON input: {e}") | |
def parse_png_apng_input(self): | |
"""Parse PNG or APNG input by extracting 'ccv3' tEXt chunk.""" | |
try: | |
text_chunks = extract_text_chunks_from_png(self.input_data) | |
if 'ccv3' not in text_chunks: | |
raise CCv3ParserError("PNG/APNG does not contain 'ccv3' tEXt chunk.") | |
ccv3_base64 = text_chunks['ccv3'] | |
ccv3_json_bytes = decode_base64(ccv3_base64) | |
data = parse_json_file(ccv3_json_bytes) | |
self.character_card = self._build_character_card(data) | |
except Exception as e: | |
raise CCv3ParserError(f"Failed to parse PNG/APNG input: {e}") | |
def parse_charx_input(self): | |
"""Parse CHARX input by extracting 'card.json' from the ZIP archive.""" | |
try: | |
data = extract_json_from_charx(self.input_data) | |
self.character_card = self._build_character_card(data) | |
except Exception as e: | |
raise CCv3ParserError(f"Failed to parse CHARX input: {e}") | |
def _build_character_card(self, data: Dict[str, Any]) -> CharacterCardV3: | |
"""Build the CharacterCardV3 object from parsed data.""" | |
# Validate required fields | |
spec = data.get('spec') | |
spec_version = data.get('spec_version') | |
if spec != self.REQUIRED_SPEC: | |
raise CCv3ParserError(f"Invalid spec: Expected '{self.REQUIRED_SPEC}', got '{spec}'") | |
if spec_version != self.REQUIRED_VERSION: | |
# As per spec, should not reject but handle versions | |
# For now, proceed if version is >=3.0 | |
try: | |
version_float = float(spec_version) | |
if version_float < 3.0: | |
raise CCv3ParserError(f"Unsupported spec_version: '{spec_version}' (must be >= '3.0')") | |
except ValueError: | |
raise CCv3ParserError(f"Invalid spec_version format: '{spec_version}'") | |
data_field = data.get('data') | |
if not data_field: | |
raise CCv3ParserError("Missing 'data' field in CharacterCardV3 object.") | |
# Extract required fields | |
required_fields = ['name', 'description', 'tags', 'creator', 'character_version', | |
'mes_example', 'extensions', 'system_prompt', | |
'post_history_instructions', 'first_mes', | |
'alternate_greetings', 'personality', 'scenario', | |
'creator_notes', 'group_only_greetings'] | |
for field_name in required_fields: | |
if field_name not in data_field: | |
raise CCv3ParserError(f"Missing required field in data: '{field_name}'") | |
# Parse assets | |
assets_data = data_field.get('assets', [{ | |
'type': 'icon', | |
'uri': 'ccdefault:', | |
'name': 'main', | |
'ext': 'png' | |
}]) | |
assets = self._parse_assets(assets_data) | |
# Parse creator_notes_multilingual | |
creator_notes_multilingual = data_field.get('creator_notes_multilingual') | |
if creator_notes_multilingual: | |
if not isinstance(creator_notes_multilingual, dict): | |
raise CCv3ParserError("'creator_notes_multilingual' must be a dictionary.") | |
# Validate ISO 639-1 codes | |
for lang_code in creator_notes_multilingual.keys(): | |
if not validate_iso_639_1(lang_code): | |
raise CCv3ParserError(f"Invalid language code in 'creator_notes_multilingual': '{lang_code}'") | |
# Parse character_book | |
character_book_data = data_field.get('character_book') | |
character_book = self._parse_lorebook(character_book_data) if character_book_data else None | |
# Build CharacterCardV3Data | |
character_card_data = CharacterCardV3Data( | |
name=data_field['name'], | |
description=data_field['description'], | |
tags=data_field['tags'], | |
creator=data_field['creator'], | |
character_version=data_field['character_version'], | |
mes_example=data_field['mes_example'], | |
extensions=data_field['extensions'], | |
system_prompt=data_field['system_prompt'], | |
post_history_instructions=data_field['post_history_instructions'], | |
first_mes=data_field['first_mes'], | |
alternate_greetings=data_field['alternate_greetings'], | |
personality=data_field['personality'], | |
scenario=data_field['scenario'], | |
creator_notes=data_field['creator_notes'], | |
character_book=character_book, | |
assets=assets, | |
nickname=data_field.get('nickname'), | |
creator_notes_multilingual=creator_notes_multilingual, | |
source=data_field.get('source'), | |
group_only_greetings=data_field['group_only_greetings'], | |
creation_date=data_field.get('creation_date'), | |
modification_date=data_field.get('modification_date') | |
) | |
return CharacterCardV3( | |
spec=spec, | |
spec_version=spec_version, | |
data=character_card_data | |
) | |
def _parse_assets(self, assets_data: List[Dict[str, Any]]) -> List[Asset]: | |
"""Parse and validate assets.""" | |
assets = [] | |
for asset_data in assets_data: | |
# Validate required fields | |
for field in ['type', 'uri', 'ext']: | |
if field not in asset_data: | |
raise CCv3ParserError(f"Asset missing required field: '{field}'") | |
if not isinstance(asset_data[field], str): | |
raise CCv3ParserError(f"Asset field '{field}' must be a string.") | |
# Optional 'name' | |
name = asset_data.get('name', '') | |
# Validate 'ext' | |
ext = asset_data['ext'].lower() | |
if not re.match(r'^[a-z0-9]+$', ext): | |
raise CCv3ParserError(f"Invalid file extension in asset: '{ext}'") | |
# Append to assets list | |
assets.append(Asset( | |
type=asset_data['type'], | |
uri=asset_data['uri'], | |
name=name, | |
ext=ext | |
)) | |
return assets | |
def _parse_lorebook(self, lorebook_data: Dict[str, Any]) -> Lorebook: | |
"""Parse and validate Lorebook object.""" | |
# Validate Lorebook fields | |
if not isinstance(lorebook_data, dict): | |
raise CCv3ParserError("Lorebook must be a JSON object.") | |
# Extract fields with defaults | |
name = lorebook_data.get('name') | |
description = lorebook_data.get('description') | |
scan_depth = lorebook_data.get('scan_depth') | |
token_budget = lorebook_data.get('token_budget') | |
recursive_scanning = lorebook_data.get('recursive_scanning') | |
extensions = lorebook_data.get('extensions', {}) | |
entries_data = lorebook_data.get('entries', []) | |
# Parse entries | |
entries = self._parse_lorebook_entries(entries_data) | |
return Lorebook( | |
name=name, | |
description=description, | |
scan_depth=scan_depth, | |
token_budget=token_budget, | |
recursive_scanning=recursive_scanning, | |
extensions=extensions, | |
entries=entries | |
) | |
def _parse_lorebook_entries(self, entries_data: List[Dict[str, Any]]) -> List[LorebookEntry]: | |
"""Parse and validate Lorebook entries.""" | |
entries = [] | |
for entry_data in entries_data: | |
# Validate required fields | |
for field in ['keys', 'content', 'enabled', 'insertion_order']: | |
if field not in entry_data: | |
raise CCv3ParserError(f"Lorebook entry missing required field: '{field}'") | |
if not isinstance(entry_data['keys'], list) or not all(isinstance(k, str) for k in entry_data['keys']): | |
raise CCv3ParserError("'keys' field in Lorebook entry must be a list of strings.") | |
if not isinstance(entry_data['content'], str): | |
raise CCv3ParserError("'content' field in Lorebook entry must be a string.") | |
if not isinstance(entry_data['enabled'], bool): | |
raise CCv3ParserError("'enabled' field in Lorebook entry must be a boolean.") | |
if not isinstance(entry_data['insertion_order'], (int, float)): | |
raise CCv3ParserError("'insertion_order' field in Lorebook entry must be a number.") | |
# Optional fields | |
use_regex = entry_data.get('use_regex', False) | |
constant = entry_data.get('constant') | |
selective = entry_data.get('selective') | |
secondary_keys = entry_data.get('secondary_keys') | |
position = entry_data.get('position') | |
name = entry_data.get('name') | |
priority = entry_data.get('priority') | |
entry_id = entry_data.get('id') | |
comment = entry_data.get('comment') | |
if selective and not isinstance(selective, bool): | |
raise CCv3ParserError("'selective' field in Lorebook entry must be a boolean.") | |
if secondary_keys: | |
if not isinstance(secondary_keys, list) or not all(isinstance(k, str) for k in secondary_keys): | |
raise CCv3ParserError("'secondary_keys' field in Lorebook entry must be a list of strings.") | |
if position and not isinstance(position, str): | |
raise CCv3ParserError("'position' field in Lorebook entry must be a string.") | |
# Parse decorators from content | |
decorators = self._extract_decorators(entry_data['content']) | |
# Create LorebookEntry | |
entries.append(LorebookEntry( | |
keys=entry_data['keys'], | |
content=entry_data['content'], | |
enabled=entry_data['enabled'], | |
insertion_order=int(entry_data['insertion_order']), | |
use_regex=use_regex, | |
constant=constant, | |
selective=selective, | |
secondary_keys=secondary_keys, | |
position=position, | |
decorators=decorators, | |
name=name, | |
priority=priority, | |
id=entry_id, | |
comment=comment | |
)) | |
return entries | |
def _extract_decorators(self, content: str) -> List[Decorator]: | |
"""Extract decorators from the content field.""" | |
decorators = [] | |
lines = content.splitlines() | |
for line in lines: | |
if line.startswith('@@'): | |
decorator = self._parse_decorator_line(line) | |
if decorator: | |
decorators.append(decorator) | |
return decorators | |
def _parse_decorator_line(self, line: str) -> Optional[Decorator]: | |
""" | |
Parses a single decorator line. | |
Example: | |
@@decorator_name value | |
@@@fallback_decorator value | |
""" | |
fallback = None | |
if line.startswith('@@@'): | |
# Fallback decorator | |
name_value = line.lstrip('@').strip() | |
parts = name_value.split(' ', 1) | |
name = parts[0] | |
value = parts[1] if len(parts) > 1 else None | |
fallback = Decorator(name=name, value=value) | |
return fallback | |
elif line.startswith('@@'): | |
# Primary decorator | |
name_value = line.lstrip('@').strip() | |
parts = name_value.split(' ', 1) | |
name = parts[0] | |
value = parts[1] if len(parts) > 1 else None | |
# Check for fallback decorators in subsequent lines | |
# This assumes that fallback decorators follow immediately after the primary | |
# decorator in the content | |
# For simplicity, not implemented here. You can enhance this based on your needs. | |
return Decorator(name=name, value=value) | |
else: | |
return None | |
def get_character_card(self) -> Optional[CharacterCardV3]: | |
"""Returns the parsed CharacterCardV3 object.""" | |
return self.character_card | |
# | |
# End of ccv3_parser.py | |
############################################################################################################ |