SYS_PROMPT = "" USER_PROMPT = """Create a detailed and accurate video description, starting from a specific scene and possibly transitioning through various themes and settings. Begin by describing the initial scene in detail, including the environment, key objects, any characters or creatures and their actions, and the overall atmosphere, considering specific aspects such as shot sizes (extreme close-up, close-up, medium, full, wide, etc.), camera movements (push, pull, shake, pan, tilt, rise, descend, etc.), and more. For example, if the scene involves a person like a young man sitting on a chair reading a book, describe his appearance and the surrounding environment, including basic features such as the character's gender, age, race, etc., as well as actions, emotions, dialogues, and performance content. If the scene includes animals or natural elements such as cats, the sky, or landscapes, vividly describe these elements and their behaviors or states, and consider the emotions and thematic elements introduced in this opening scene. Then, as the video progresses, describe the evolving visual effects, how they present a more vivid and rich picture through camera movements and special effects, considering aesthetics (style, tone, color palette, atmosphere, emotions, etc.). If the scene changes, explain how it transitions, what new elements are introduced, whether the atmosphere remains consistent or changes, and how this affects the overall narrative or theme of the video. If the video contains multiple scenes, describe the connections between them, whether creating a story, presenting a contrast, or highlighting different aspects of a theme, considering scenes (day, night, indoor, outdoor, etc.), props (relationship with characters and scenes, relationship with camera and scheduling), and scene scheduling (single character, multiple characters with camera and narrative association, and how they relate to scene props). Finally, conclude with a summary that encapsulates the essence of the video, combining all the described elements into a cohesive narrative or message, emphasizing the sensory and emotional experience provided by the video, and speculating on the impact or message intended for the audience, allowing viewers to engage in profound reflection and insight during the viewing process, thus achieving a deeper impact. The generated description should adhere to English grammar and be no less than 120 words in length. """ SKIP = 2 TEMP = 0.3 TOP = 0.75 MAX_TOKEN = 512 API_CLASSES = { 'Azure': 'AzureAPI', 'Google': 'GoogleAPI', 'Anthropic': 'AnthropicAPI', 'OpenAI': 'OpenAIAPI' } PROVIDERS_CONFIG = { 'Azure': { 'model': ['GPT-4o', 'GPT-4v'], 'key_label': 'Azure API Key', 'endpoint_label': 'Azure Endpoint' }, 'Google': { 'model': ['Gemini-1.5-Flash', 'Gemini-1.5-Pro'], 'key_label': 'Google API Key', 'endpoint_label': 'Google API Endpoint' }, 'Anthropic': { 'model': ['Claude-3-Opus', 'Claude-3-Sonnet'], 'key_label': 'Anthropic API Key', 'endpoint_label': 'Anthropic Endpoint' }, 'OpenAI': { 'model': ['GPT-4o', 'GPT-4v'], 'key_label': 'OpenAI API Key', 'endpoint_label': 'OpenAI Endpoint' } } GENERAL_CONFIG = { 'temp': { 'label': 'Temperature', 'default': 0.3, 'min': 0, 'max': 1, 'step': 0.1 }, 'top_p': { 'label': 'Top-P', 'default': 0.75, 'min': 0, 'max': 1, 'step': 0.1 }, 'max_tokens': { 'label': 'Max Tokens', 'default': 4096, 'min': 512, 'max': 4096, 'step': 1 }, 'frame_format': { 'label': 'Frame Format', 'default': 'JPEG', 'choices': ['JPEG', 'PNG'] }, 'frame_skip': { 'label': 'Frame Skip', 'default': 2, 'min': 2, 'max': 100, 'step': 1 }, 'group_size': { 'label': 'Group Size', 'default': 10, 'min': 1, 'max': 100, 'step': 1 } }