Spaces:

HuggingFaceFV
/

FineVideo-Explorer

Running on CPU Upgrade

App Files Files Community

FineVideo-Explorer / app.py

mfarre HF staff

postprocess false

a46d06f 26 days ago

raw

history blame

No virus

25.6 kB

	import gradio as gr
	import logging
	import json
	import os
	from typing import Dict, Any, List
	from itertools import groupby

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	video_folder = 'video/'
	metadata_folder = 'metadata/'

	def load_video_list() -> List[Dict[str, str]]:
	video_list = []
	for filename in os.listdir(video_folder):
	if filename.endswith('.mp4'):
	video_id = os.path.splitext(filename)[0]
	metadata_path = os.path.join(metadata_folder, f"{video_id}.json")
	if os.path.exists(metadata_path):
	with open(metadata_path, 'r') as f:
	metadata = json.load(f)
	metadata = metadata['content_metadata']
	title = metadata.get('title', 'Untitled')
	video_list.append({"video_id": video_id, "title": title})

	# Define the custom order for the first five videos
	custom_order = ['7BhJmDPB7RU', 'PrAwsi3Ldzo', '3rhsSPxQ39c', 'P7WnJZ55sgc', 'g9GtUQs7XUM']

	# Custom sorting function
	def custom_sort(item):
	try:
	return custom_order.index(item['video_id'])
	except ValueError:
	return len(custom_order) + 1 # Place non-specified videos after the custom ordered ones

	# Sort the video list
	video_list.sort(key=lambda x: (custom_sort(x), x['title']))

	return video_list

	def score_to_emoji(score):
	if score < 0.2:
	return "😴"
	elif score < 0.4:
	return "🙂"
	elif score < 0.6:
	return "😊"
	elif score < 0.8:
	return "😃"
	else:
	return "🤩"
	def load_metadata(video_id: str) -> Dict[str, Any]:
	metadata_path = os.path.join(metadata_folder, f"{video_id}.json")
	try:
	with open(metadata_path, 'r') as f:
	asd =json.load(f)
	return asd['content_metadata']
	except FileNotFoundError:
	logger.error(f"Metadata file not found for video ID: {video_id}")
	raise
	except json.JSONDecodeError:
	logger.error(f"Invalid JSON in metadata file for video ID: {video_id}")
	raise

	def timestamp_to_seconds(timestamp: str) -> float:
	try:
	h, m, s = timestamp.split(':')
	return int(h) * 3600 + int(m) * 60 + float(s)
	except ValueError:
	logger.error(f"Invalid timestamp format: {timestamp}")
	return 0.0

	def format_timestamp(timestamp: str) -> str:
	try:
	h, m, s = timestamp.split(':')
	return f"{int(m):02d}:{int(float(s)):02d}"
	except Exception as e:
	logger.error(f"Invalid timestamp format: {timestamp}")
	return ""
	def create_scene_table(scene: Dict[str, Any]) -> str:
	dynamism_score = scene.get('dynamismScore', 0)
	av_correlation = scene.get('audioVisualCorrelation', 0)
	cast = ", ".join([cast_member for cast_member in scene.get('cast', []) if cast_member and cast_member != 'None'])

	output = f"""
	<div class="scene-container">
	<h3>Scene {scene.get('sceneId', 'Unknown')}: {scene.get('title', '')}</h3>
	<p>Dynamism: {score_to_emoji(dynamism_score)} Audio-visual correlation: {score_to_emoji(av_correlation)} Cast: {cast}</p>
	<table class="metadata-table">
	<tr>
	<th>Timestamp</th>
	<th>Type</th>
	<th>Description</th>
	</tr>
	"""

	scene_events = []

	# Collect all scene data
	data_types = [
	('Activities', scene.get('activities', [])),
	('Props', scene.get('props', [])),
	('Mood', [scene.get('mood', {})]),
	('Narrative Progression', scene.get('narrativeProgression', [])),
	('Video Editing Details', scene.get('videoEditingDetails', [])),
	('Thematic Elements', [{'description': scene.get('thematicElements', '')}]),
	('Contextual Relevance', [{'description': scene.get('contextualRelevance', '')}]),
	('Character Interaction', scene.get('characterInteraction', []))
	]

	for data_type, data_list in data_types:
	for item in data_list:
	if isinstance(item, dict):
	start_time = ''
	end_time = ''
	description = ''

	if data_type == 'Activities':
	start_time = item.get('timestamp', {}).get('start_timestamp', '')
	end_time = item.get('timestamp', {}).get('end_timestamp', '')
	description = item.get('description', '')
	elif data_type == 'Props':
	start_time = item.get('timestamp', {}).get('start_timestamp', '')
	end_time = item.get('timestamp', {}).get('end_timestamp', '')
	description = item.get('name', '')
	elif data_type == 'Video Editing Details':
	start_time = item.get('timestamps', {}).get('start_timestamp', '')
	end_time = item.get('timestamps', {}).get('end_timestamp', '')
	description = item.get('description', '')
	elif data_type == 'Mood':
	description = item.get('description', '')
	# Handle mood changes
	for mood_change in item.get('keyMoments', []):
	if isinstance(mood_change, dict):
	mood_change_description = mood_change.get('changeDescription', '')
	if mood_change_description and mood_change_description != 'None':
	scene_events.append({
	'timestamp_start': mood_change.get('timestamp', ''),
	'timestamp_end': '',
	'type': 'Mood Change',
	'description': mood_change_description
	})
	elif data_type == 'Character Interaction':
	characters = ', '.join([char for char in item.get('characters', []) if char and char != 'None'])
	description = f"{characters}: {item.get('description', '')}"
	else:
	start_time = item.get('timestamp', '')
	description = item.get('description', '')

	if description and description != 'None': # Only add the event if there's a valid description
	scene_events.append({
	'timestamp_start': start_time,
	'timestamp_end': end_time,
	'type': data_type,
	'description': description
	})
	elif isinstance(item, str) and item and item != 'None': # Only add non-empty and non-'None' string items
	scene_events.append({
	'timestamp_start': '',
	'timestamp_end': '',
	'type': data_type,
	'description': item
	})

	# Sort events by timestamp
	scene_events.sort(key=lambda x: x['timestamp_start'] if x['timestamp_start'] else '')

	for event in scene_events:
	start_time = format_timestamp(event['timestamp_start'])
	end_time = format_timestamp(event['timestamp_end'])
	start_link = f'<a href="#" class="timestamp-link" data-timestamp="{event["timestamp_start"]}">{start_time}</a>' if start_time else ''
	end_link = f' - <a href="#" class="timestamp-link" data-timestamp="{event["timestamp_end"]}">{end_time}</a>' if end_time else ''

	output += f"""
	<tr>
	<td>{start_link}{end_link}</td>
	<td>{event['type']}</td>
	<td>{event['description']}</td>
	</tr>
	"""

	output += """
	</table>
	</div>
	"""
	return output

	def create_storylines_table(storylines: Dict[str, Any]) -> str:
	output = """
	<div class="storylines-container">
	<h3>Storylines</h3>
	<table class="metadata-table">
	<tr>
	<th>Storyline</th>
	<th>Scenes Involved</th>
	</tr>
	"""

	output += f"""
	<tr>
	<td>{storylines.get('description', 'No description available')}</td>
	<td>{', '.join(map(str, storylines.get('scenes', [])))}</td>
	</tr>
	"""

	output += """
	</table>
	</div>
	"""
	return output

	def create_qa_section(qa_list: List[Dict[str, str]]) -> str:
	output = """
	<div class="qa-container">
	<h3>Q&A</h3>
	<div class="chat-discussion">
	"""

	for qa in qa_list:
	output += f"""
	<div class="question">{qa.get('question', '')}</div>
	<div class="answer">{qa.get('answer', '')}</div>
	"""

	output += """
	</div>
	</div>
	"""
	return output

	def create_trimming_suggestions(suggestions: List[Dict[str, Any]]) -> str:
	output = """
	<div class="trimming-suggestions-container">
	<h3>Trimming Suggestions</h3>
	<table class="metadata-table">
	<tr>
	<th>Timestamp</th>
	<th>Description</th>
	</tr>
	"""

	for suggestion in suggestions:
	start_time = suggestion.get('timestamps', {}).get('start_timestamp', '')
	end_time = suggestion.get('timestamps', {}).get('end_timestamp', '')
	start_formatted = format_timestamp(start_time)
	end_formatted = format_timestamp(end_time)

	output += f"""
	<tr>
	<td>
	<a href="#" class="timestamp-link" data-timestamp="{start_time}">{start_formatted}</a>
	{f' - <a href="#" class="timestamp-link" data-timestamp="{end_time}">{end_formatted}</a>' if end_time else ''}
	</td>
	<td>{suggestion.get('description', '')}</td>
	</tr>
	"""

	output += """
	</table>
	</div>
	"""
	return output

	def create_filmstrip(scenes: List[Dict[str, Any]], video_duration: float) -> str:
	filmstrip_html = f"""
	<div id="filmstrip-inner" style="position: relative; width: 100%; height: 100%;" data-duration="{video_duration}">
	"""

	for scene in scenes:
	start_time = timestamp_to_seconds(scene['timestamps'].get('start_timestamp', '0:00:00'))
	end_time = timestamp_to_seconds(scene['timestamps'].get('end_timestamp', str(video_duration)))
	left_pos = (start_time / video_duration) * 100
	width = ((end_time - start_time) / video_duration) * 100
	title = scene.get('title', '')
	filmstrip_html += f'''
	<div class="scene-marker" style="position: absolute; left: {left_pos}%; width: {width}%; height: 100%; background-color: rgba(0, 0, 255, 0.2); border-right: 1px solid blue; overflow: hidden;">
	<div class="scene-title" style="font-size: 10px; word-wrap: break-word; padding: 2px;">{title}</div>
	</div>
	'''

	filmstrip_html += """
	<div id="scrubbing-needle" style="position: absolute; width: 2px; height: 100%; background-color: red; top: 0; left: 0; pointer-events: none;"></div>
	</div>
	"""
	return filmstrip_html

	# def generate_correlation_scores(metadata: Dict[str, Any]) -> str:
	# dynamism_score = metadata.get('dynamismscore', 0)
	# av_correlation = metadata.get('audiovisualcorrelation', 0)

	# def score_to_emoji(score):
	# if score < 0.2:
	# return "😴"
	# elif score < 0.4:
	# return "🙂"
	# elif score < 0.6:
	# return "😊"
	# elif score < 0.8:
	# return "😃"
	# else:
	# return "🤩"

	# return f"""
	# <div class="correlation-scores">
	# <p>Dynamism: {score_to_emoji(dynamism_score)} ({dynamism_score:.2f})</p>
	# <p>Audio-Visual Correlation: {score_to_emoji(av_correlation)} ({av_correlation:.2f})</p>
	# </div>
	# """

	def process_video(video_id: str):
	try:
	#logger.info(f"Processing video with ID: {video_id}")
	metadata = load_metadata(video_id)
	video_path = os.path.join(video_folder, f"{video_id}.mp4")

	if not os.path.exists(video_path):
	logger.error(f"Video file not found: {video_path}")
	return None, "", f"Error: Video file not found for ID {video_id}"

	# Character List Table
	character_table = """
	<h3>Characters</h3>
	<table class="metadata-table">
	<tr>
	<th>Character</th>
	<th>Description</th>
	</tr>
	"""
	for character in metadata.get('characterList', []):
	character_table += f"""
	<tr>
	<td>{character.get('name', '')}</td>
	<td>{character.get('description', '')}</td>
	</tr>
	"""
	character_table += "</table>"

	additional_data = f"""
	<div class="video-info">
	<h2>{metadata.get('title', 'Untitled')}</h2>
	<p><strong>Description:</strong> {metadata.get('description', 'No description available')}</p>
	</div>
	{character_table}
	"""

	scenes_output = ""
	for scene in metadata.get('scenes', []):
	scenes_output += create_scene_table(scene)

	storylines_output = create_storylines_table(metadata.get('storylines', {}))
	qa_output = create_qa_section(metadata.get('qAndA', []))
	trimming_suggestions_output = create_trimming_suggestions(metadata.get('trimmingSuggestions', []))

	# Generate filmstrip HTML
	last_scene = metadata['scenes'][-1]
	video_duration = timestamp_to_seconds(last_scene['timestamps'].get('end_timestamp', '0:00:00'))
	filmstrip_html = create_filmstrip(metadata['scenes'], video_duration)

	logger.info("Video processing completed successfully")
	return video_path, filmstrip_html, additional_data + scenes_output + storylines_output + qa_output + trimming_suggestions_output
	except Exception as e:
	logger.exception(f"Error processing video: {str(e)}")
	return None, "", f"Error processing video: {str(e)}"

	css = """
	body {
	margin: 0;
	padding: 0;
	font-family: Arial, sans-serif;
	overflow: hidden;
	}
	.container {
	display: flex;
	flex-direction: column;
	height: 100vh;
	}
	#header {
	display: flex;
	align-items: center;
	padding: 10px;
	background-color: white;
	}
	#logo {
	width: auto;
	height: 150px;
	margin-right: 20px;
	box-shadow: none !important;
	border: none !important;
	background: none !important;
	object-fit: contain;
	}
	#header-content {
	flex-grow: 1;
	display: flex;
	justify-content: space-between;
	align-items: center;
	}
	#header-content h1 {
	margin: 0;
	font-size: 45px;
	font-weight: bold;
	}
	#header-content a {
	font-size: 18px;
	color: #0066cc;
	text-decoration: none;
	}
	#header-content a:hover {
	text-decoration: underline;
	}
	#top-panel {
	position: sticky;
	top: 10vh;
	background-color: white;
	z-index: 100;
	padding: 20px;
	box-shadow: 0 2px 5px rgba(0,0,0,0.1);
	display: flex;
	height: 35vh;
	overflow: hidden;
	}
	#video-list-column {
	display: flex;
	flex-direction: column;
	max-height: 100%;
	width: 30%;
	overflow-y: auto;
	}
	#video-list label {
	display: block;
	width: 100%;
	}
	#video-column {
	display: flex;
	flex-direction: column;
	max-height: 100%;
	overflow: hidden;
	width: 70%;
	}
	#video-column > div:first-child {
	display: flex;
	flex-direction: column;
	height: calc(100% - 100px);
	}
	#video-column video {
	max-height: 100%;
	object-fit: contain;
	width: 100%;
	margin: 0;
	}
	#filmstrip-container {
	width: 100%;
	height: 80px !important;
	background-color: #f0f0f0;
	position: relative;
	overflow: hidden;
	cursor: pointer;
	margin-top: 0;
	}
	#filmstrip-container > div,
	#filmstrip-container > div > div,
	#filmstrip-container > div > div > div {
	height: 100% !important;
	}
	#scrollable-content {
	flex-grow: 1;
	overflow-y: auto;
	padding: 20px;
	height: calc(55vh - 40px);
	}
	#metadata-container {
	margin-top: 20px;
	}
	.content-samples {
	display: flex;
	flex-direction: column;
	overflow-y: auto;
	max-height: 100%;
	}
	.content-samples > .wrap {
	display: flex;
	flex-direction: column;
	}
	.content-samples .hidden {
	display: none !important;
	}
	.content-samples > .wrap > .wrap {
	display: flex !important;
	flex-direction: column !important;
	}
	.content-samples label {
	padding: 10px;
	cursor: pointer;
	border-bottom: 1px solid #ddd;
	white-space: nowrap;
	overflow: hidden;
	text-overflow: ellipsis;
	}
	.content-samples label:hover {
	background-color: #f0f0f0;
	}
	.video-info {
	margin-bottom: 20px;
	}
	.scene-container {
	margin-bottom: 30px;
	}
	.metadata-table {
	width: 100%;
	border-collapse: collapse;
	margin-bottom: 20px;
	}
	.metadata-table th, .metadata-table td {
	border: 1px solid #ddd;
	padding: 8px;
	text-align: left;
	}
	.metadata-table th {
	background-color: #f2f2f2;
	}
	.metadata-table tr:nth-child(even) {
	background-color: #f9f9f9;
	}
	.timestamp-link {
	color: #0066cc;
	text-decoration: none;
	cursor: pointer;
	}
	.timestamp-link:hover {
	text-decoration: underline;
	}
	.chat-discussion {
	background-color: #f0f0f0;
	border-radius: 10px;
	padding: 15px;
	margin-bottom: 20px;
	}
	.question {
	font-weight: bold;
	margin-bottom: 5px;
	}
	.answer {
	margin-bottom: 15px;
	padding-left: 15px;
	}
	.correlation-scores {
	font-size: 18px;
	margin-bottom: 20px;
	}
	#reinitialization-overlay {
	position: fixed;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	background-color: rgba(0, 0, 0, 0.5);
	display: flex;
	justify-content: center;
	align-items: center;
	z-index: 9999;
	color: white;
	font-size: 24px;
	font-weight: bold;
	}

	@media (max-width: 768px) {
	#header {
	flex-direction: column;
	align-items: flex-start;
	}
	#header-content h1 {
	font-size: 24px;
	}
	#header-content p {
	font-size: 14px;
	}
	#logo {
	align-self: flex-end;
	margin-top: 10px;
	}
	#top-panel {
	flex-direction: column;
	}
	#video-list-column, #video-column {
	width: 100%;
	}
	}
	.icon-buttons button {
	display: none !important;
	}
	"""

	js = """
	<script>
	(function() {
	let isReinitializing = false;
	let lastVideoSrc = null;

	function showOverlay() {
	let overlay = document.getElementById('reinitialization-overlay');
	if (!overlay) {
	overlay = document.createElement('div');
	overlay.id = 'reinitialization-overlay';
	overlay.style.position = 'fixed';
	overlay.style.top = '0';
	overlay.style.left = '0';
	overlay.style.width = '100%';
	overlay.style.height = '100%';
	overlay.style.backgroundColor = 'rgba(0, 0, 0, 0.5)';
	overlay.style.display = 'flex';
	overlay.style.justifyContent = 'center';
	overlay.style.alignItems = 'center';
	overlay.style.zIndex = '9999';

	const message = document.createElement('div');
	message.textContent = 'Loading assets...';
	message.style.color = 'white';
	message.style.fontSize = '24px';
	message.style.fontWeight = 'bold';

	overlay.appendChild(message);
	document.body.appendChild(overlay);
	}
	overlay.style.display = 'flex';
	}

	function hideOverlay() {
	const overlay = document.getElementById('reinitialization-overlay');
	if (overlay) {
	overlay.style.display = 'none';
	}
	}

	function initializeFilmstrip() {
	var videoElement = document.querySelector('video');
	var filmstripContainer = document.getElementById('filmstrip-container');
	var filmstripInner = filmstripContainer ? filmstripContainer.querySelector('#filmstrip-inner') : null;
	var scrubbingNeedle = document.getElementById('scrubbing-needle');

	if (!videoElement \|\| !filmstripContainer \|\| !filmstripInner \|\| !scrubbingNeedle) {
	return;
	}

	var videoDuration = parseFloat(filmstripInner.getAttribute('data-duration') \|\| videoElement.duration);

	videoElement.addEventListener('timeupdate', function() {
	var progress = videoElement.currentTime / videoDuration;
	scrubbingNeedle.style.left = (progress * 100) + '%';
	});

	filmstripContainer.addEventListener('click', function(event) {
	var rect = filmstripContainer.getBoundingClientRect();
	var clickPosition = (event.clientX - rect.left) / rect.width;
	videoElement.currentTime = clickPosition * videoDuration;
	});
	}

	function initializeTimestampLinks() {
	var videoElement = document.querySelector('video');
	var links = document.querySelectorAll('.timestamp-link');

	if (!videoElement \|\| links.length === 0) {
	return;
	}

	links.forEach(function(link) {
	link.addEventListener('click', function(e) {
	e.preventDefault();
	var timestamp = this.getAttribute('data-timestamp');
	var parts = timestamp.split(':');
	var seconds = parseInt(parts[0], 10) * 3600 + parseInt(parts[1], 10) * 60 + parseFloat(parts[2]);
	videoElement.currentTime = seconds;
	});
	});
	}

	function initializeEverything() {
	if (isReinitializing) {
	return;
	}

	isReinitializing = true;
	showOverlay();

	const videoElement = document.querySelector('video');
	if (videoElement) {
	const onCanPlay = function() {
	videoElement.removeEventListener('canplay', onCanPlay);
	initializeFilmstrip();
	initializeTimestampLinks();
	isReinitializing = false;
	hideOverlay();
	};

	videoElement.addEventListener('canplay', onCanPlay);

	// If the video is already loaded, trigger the event manually
	if (videoElement.readyState >= 3) {
	videoElement.dispatchEvent(new Event('canplay'));
	}
	} else {
	// If there's no video element, just initialize other components
	initializeFilmstrip();
	initializeTimestampLinks();
	isReinitializing = false;
	hideOverlay();
	}
	}

	function checkForVideoChanges() {
	const videoElement = document.querySelector('video');
	if (videoElement && videoElement.src !== lastVideoSrc) {
	lastVideoSrc = videoElement.src;
	showOverlay();
	setTimeout(initializeEverything, 100);
	}
	}

	// Set up a MutationObserver to watch for changes in the entire document
	const contentObserver = new MutationObserver((mutations) => {
	checkForVideoChanges();
	});

	contentObserver.observe(document.body, {
	childList: true,
	subtree: true,
	attributes: true,
	attributeFilter: ['src']
	});

	// Periodically check for video changes
	setInterval(checkForVideoChanges, 1000);

	// Initialize everything when the DOM is ready
	document.addEventListener('DOMContentLoaded', initializeEverything);

	// Also try to initialize after a short delay, in case DOMContentLoaded has already fired
	setTimeout(initializeEverything, 1000);
	})();
	</script>
	"""

	with gr.Blocks(css=css, head=js) as iface:
	with gr.Row(elem_id="header"):
	with gr.Column(scale=1):
	gr.Image("logo.png", elem_id="logo", show_label=False, interactive=False)
	with gr.Column(elem_id="header-content",scale=10):
	gr.Markdown("""
	# Exploration space

	## [🔗 Dataset](https://huggingface.co/datasets/HuggingFaceFV/finevideo)
	""")
	with gr.Row(elem_id="top-panel"):
	with gr.Column(scale=1, elem_id="video-list-column"):
	video_list_data = load_video_list()
	video_list = gr.Radio(
	label="Content Samples",
	choices=[video["title"] for video in video_list_data],
	elem_id="video-list",
	value=None,
	container=False
	)

	with gr.Column(scale=2, elem_id="video-column"):
	video_output = gr.Video(label="Video", elem_id="video-player", postprocess=False)
	filmstrip_output = gr.HTML(elem_id="filmstrip-container")

	with gr.Row(elem_id="scrollable-content"):
	metadata_output = gr.HTML(elem_id="metadata-container")

	def wrapped_process_video(title: str) -> tuple:
	if not title:
	return None, "", ""
	video_id = next(video["video_id"] for video in video_list_data if video["title"] == title)
	logging.info(f"Processing video with ID: {video_id}")
	video_path, filmstrip_html, metadata_html = process_video(video_id)
	return video_path, filmstrip_html, metadata_html

	video_list.change(
	fn=wrapped_process_video,
	inputs=[video_list],
	outputs=[video_output, filmstrip_output, metadata_output]
	)

	if __name__ == "__main__":
	iface.launch()