File size: 2,349 Bytes
5b0a718
d9eecfa
cefa8e3
 
3814e1b
d49bcd1
657aaa4
b8a25b7
c4a5c8e
 
cefa8e3
5b0a718
 
cefa8e3
3814e1b
 
5b0a718
 
 
3814e1b
d9eecfa
 
3814e1b
 
5b0a718
3814e1b
 
 
 
 
 
 
 
 
7cb494c
3814e1b
 
 
 
 
 
 
 
 
 
 
 
 
cefa8e3
5b0a718
 
 
 
 
3814e1b
 
cefa8e3
d9eecfa
 
5b0a718
 
cefa8e3
3814e1b
d9eecfa
5b0a718
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
os.system('pip uninstall vidfetch -y')
os.system('pip install -U https://github.com/dailingx/VidFetch/archive/master.zip')
os.system('pip install --upgrade google-api-python-client')
os.system('pip install pandas')
os.system('pip install moviepy')
os.system('pip install psutil')
os.system('pip install -U https://huggingface.co/dailingx/youtube-dl-package/resolve/main/youtube-dl-2024.04.08.tar.gz')
# os.system('wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl')
# os.system('chmod a+rx /usr/local/bin/youtube-dl')

import sys
import gradio as gr
from vidfetch.website.youtube import YoutubeVideoDataset
import pandas as pd
from pandas.api.types import is_numeric_dtype


def fetch(
    kw_file,
    dev_key: str,
    hf_token: str,
    hf_ds_repo_id: str,
    key_word: str = None
):
    df = pd.read_csv(kw_file.name)
    if len(df['keyword']) <= 0:
        return 'no keyword'

    success_kw = ''
    for index, value in df['keyword'].items():
        if 'num' in df.columns:
            video_max_num = df['num'][index]
        else:
            video_max_num = 50

        youtube_video_dataset = YoutubeVideoDataset(
            root_dir="./",
            google_cloud_developer_key=dev_key,
            search_keyword=value,
            video_max_num=video_max_num,
            hf_token=hf_token,
            hf_ds_repo_id=hf_ds_repo_id
        )
        youtube_video_dataset.download()

        success_kw = success_kw + value
    return success_kw


with gr.Blocks() as demo:
    gr.Markdown('''OpenVideo Youtube fetch demo''')
    with gr.Row():
        with gr.Column():
            # kw_input_text = gr.Text(label='Keyword')
            kw_input_file = gr.File(label="Upload CSV File, Include Columns: keyword, num, ...")
            dev_key_input_text = gr.Text(label='Google Cloud Developer Key')
            hf_token_input_text = gr.Text(label='HF Token')
            hf_ds_repo_id_text = gr.Text(label='HF Dataset Repo ID, like: OpenVideo/YouTube-Commons-5G-Raw')
            fetch_btn = gr.Button("Fetch")
        result = gr.Text()

    fetch_btn.click(fn=fetch, inputs=[kw_input_file, dev_key_input_text, hf_token_input_text, hf_ds_repo_id_text],
                    outputs=[result])


if __name__ == "__main__":
    demo.queue(max_size=1)
    demo.launch(share=False, max_threads=1)