Spaces:
Sleeping
Sleeping
Jeremy Watt
commited on
Commit
•
29482c4
1
Parent(s):
ec0ea44
first push
Browse files- .DS_Store +0 -0
- bleep_that_sht/.DS_Store +0 -0
- bleep_that_sht/__init__.py +9 -0
- bleep_that_sht/__pycache__/__init__.cpython-310.pyc +0 -0
- bleep_that_sht/__pycache__/audio_extractor.cpython-310.pyc +0 -0
- bleep_that_sht/__pycache__/create.cpython-310.pyc +0 -0
- bleep_that_sht/__pycache__/gradio_app_url_download.cpython-310.pyc +0 -0
- bleep_that_sht/__pycache__/transcribe.cpython-310.pyc +0 -0
- bleep_that_sht/__pycache__/yt_download.cpython-310.pyc +0 -0
- bleep_that_sht/audio_extractor.py +13 -0
- bleep_that_sht/bleep.mp3 +0 -0
- bleep_that_sht/create.py +89 -0
- bleep_that_sht/gradio_app_url_download.py +187 -0
- bleep_that_sht/transcribe.py +13 -0
- bleep_that_sht/yt_download.py +32 -0
- requirements.gradio +4 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
bleep_that_sht/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
bleep_that_sht/__init__.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
base_dir = os.path.dirname(os.path.abspath(__file__))
|
4 |
+
main_dir = os.path.dirname(base_dir)
|
5 |
+
|
6 |
+
import whisper_timestamped as whisper
|
7 |
+
|
8 |
+
model = whisper.load_model("tiny", device="cpu")
|
9 |
+
model = whisper.load_model("base", device="cpu")
|
bleep_that_sht/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (399 Bytes). View file
|
|
bleep_that_sht/__pycache__/audio_extractor.cpython-310.pyc
ADDED
Binary file (683 Bytes). View file
|
|
bleep_that_sht/__pycache__/create.cpython-310.pyc
ADDED
Binary file (2.57 kB). View file
|
|
bleep_that_sht/__pycache__/gradio_app_url_download.cpython-310.pyc
ADDED
Binary file (5.53 kB). View file
|
|
bleep_that_sht/__pycache__/transcribe.cpython-310.pyc
ADDED
Binary file (798 Bytes). View file
|
|
bleep_that_sht/__pycache__/yt_download.cpython-310.pyc
ADDED
Binary file (1.18 kB). View file
|
|
bleep_that_sht/audio_extractor.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from moviepy.editor import VideoFileClip
|
2 |
+
|
3 |
+
|
4 |
+
def extract_audio(local_file_path: str, audio_filepath: str) -> None:
|
5 |
+
try:
|
6 |
+
video = VideoFileClip(local_file_path)
|
7 |
+
audio = video.audio
|
8 |
+
if audio is not None:
|
9 |
+
audio.write_audiofile(audio_filepath, verbose=False, logger=None)
|
10 |
+
audio.close()
|
11 |
+
video.close()
|
12 |
+
except Exception as e:
|
13 |
+
raise ValueError(f"error extracting audio from video {local_file_path}, exception: {e}")
|
bleep_that_sht/bleep.mp3
ADDED
Binary file (49.2 kB). View file
|
|
bleep_that_sht/create.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip
|
2 |
+
from pydub import AudioSegment
|
3 |
+
from bleep_that_sht import base_dir
|
4 |
+
from bleep_that_sht.audio_extractor import extract_audio
|
5 |
+
|
6 |
+
bleep_sound = AudioSegment.from_mp3(base_dir + "/bleep.mp3")
|
7 |
+
bleep_first_sec = bleep_sound[1 * 1000 : 2 * 1000]
|
8 |
+
|
9 |
+
|
10 |
+
# simple word cleaner - remove punctuation etc.,
|
11 |
+
def word_cleaner(word: str) -> str:
|
12 |
+
return "".join(e for e in word if e.isalnum()).lower().strip()
|
13 |
+
|
14 |
+
|
15 |
+
# collect all timestamped instances of bleep_word in transcript
|
16 |
+
def query_transcript(bleep_words: list, timestamped_transcript: list) -> list:
|
17 |
+
transcript_words = sum(
|
18 |
+
[timestamped_transcript[i]["words"] for i in range(len(timestamped_transcript))],
|
19 |
+
[],
|
20 |
+
)
|
21 |
+
detected_bleep_words = []
|
22 |
+
for bleep_word in bleep_words:
|
23 |
+
detected_bleep_words += [v for v in transcript_words if word_cleaner(v["text"]) == word_cleaner(bleep_word)]
|
24 |
+
detected_bleep_words = sorted(detected_bleep_words, key=lambda d: d["start"])
|
25 |
+
return detected_bleep_words
|
26 |
+
|
27 |
+
|
28 |
+
def bleep_replace(
|
29 |
+
og_video_path: str,
|
30 |
+
og_audio_path: str,
|
31 |
+
final_video_path: str,
|
32 |
+
final_audio_path: str,
|
33 |
+
bleep_words: list,
|
34 |
+
timestamped_transcript: dict,
|
35 |
+
) -> None:
|
36 |
+
|
37 |
+
# # extract and save audio from original video
|
38 |
+
# extract_audio(local_file_path=og_video_path, audio_filepath=og_audio_path)
|
39 |
+
|
40 |
+
# input og audio file for splicing
|
41 |
+
test_sound = AudioSegment.from_mp3(og_audio_path)
|
42 |
+
|
43 |
+
# find bleep_words in timestamped transcript
|
44 |
+
bleep_word_instances = query_transcript(bleep_words, timestamped_transcript)
|
45 |
+
|
46 |
+
# start creation of test_sound_bleeped - by splicing in instance 0
|
47 |
+
test_clip = test_sound[:1]
|
48 |
+
test_sound_clips = [test_clip]
|
49 |
+
|
50 |
+
# loop over instances, thread in clips of bleep
|
51 |
+
prev_end_time = 1
|
52 |
+
for instance in bleep_word_instances:
|
53 |
+
# unpack bleep_word start / end times - converted to microseconds
|
54 |
+
start_time = int(instance["start"] * 1000) - 50
|
55 |
+
end_time = int(instance["end"] * 1000) + 50
|
56 |
+
|
57 |
+
# collect clip of test starting at previous end time, and leading to start_time of next bleep
|
58 |
+
test_clip = test_sound[prev_end_time:start_time]
|
59 |
+
|
60 |
+
# create bleep clip for this instance
|
61 |
+
bleep_clip = bleep_first_sec[: (end_time - start_time)]
|
62 |
+
|
63 |
+
# store test and bleep clips
|
64 |
+
test_sound_clips.append(test_clip)
|
65 |
+
test_sound_clips.append(bleep_clip)
|
66 |
+
|
67 |
+
# update prev_end_time
|
68 |
+
prev_end_time = end_time
|
69 |
+
|
70 |
+
# create final clip from test
|
71 |
+
test_clip = test_sound[prev_end_time:]
|
72 |
+
test_sound_clips.append(test_clip)
|
73 |
+
|
74 |
+
# save bleeped audio
|
75 |
+
bleeped_test_clip = sum(test_sound_clips)
|
76 |
+
bleeped_test_clip.export(final_audio_path, format="mp3")
|
77 |
+
|
78 |
+
# load in og video, overlay with bleeped audio
|
79 |
+
og_video = VideoFileClip(og_video_path)
|
80 |
+
bleep_audio = AudioFileClip(final_audio_path)
|
81 |
+
new_audioclip = CompositeAudioClip([bleep_audio])
|
82 |
+
og_video.audio = new_audioclip
|
83 |
+
og_video.write_videofile(
|
84 |
+
final_video_path,
|
85 |
+
codec="libx264",
|
86 |
+
audio_codec="aac",
|
87 |
+
temp_audiofile="temp-audio.m4a",
|
88 |
+
remove_temp=True,
|
89 |
+
)
|
bleep_that_sht/gradio_app_url_download.py
ADDED
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from bleep_that_sht import main_dir
|
2 |
+
from bleep_that_sht.transcribe import avaliable_models
|
3 |
+
from bleep_that_sht.transcribe import transcribe
|
4 |
+
from bleep_that_sht.audio_extractor import extract_audio
|
5 |
+
from bleep_that_sht.create import bleep_replace
|
6 |
+
from bleep_that_sht.yt_download import download_video
|
7 |
+
import tempfile
|
8 |
+
import uuid
|
9 |
+
import os
|
10 |
+
import io
|
11 |
+
import gradio as gr
|
12 |
+
|
13 |
+
|
14 |
+
HF_TOKEN = None
|
15 |
+
|
16 |
+
try:
|
17 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
18 |
+
except:
|
19 |
+
pass
|
20 |
+
|
21 |
+
|
22 |
+
print("Setting up Gradio interface...")
|
23 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="🎬 Bleep That Sh*t 🙊") as demo:
|
24 |
+
with gr.Tabs():
|
25 |
+
with gr.TabItem("🎬 Bleep That Sh*t 🙊"):
|
26 |
+
with gr.Row():
|
27 |
+
with gr.Column(scale=4):
|
28 |
+
url_input = gr.Textbox(
|
29 |
+
value="https://www.youtube.com/shorts/43BhDHYBG0o",
|
30 |
+
label="🔗 Paste YouTube / Shorts URL here",
|
31 |
+
placeholder="e.g., https://www.youtube.com/watch?v=.",
|
32 |
+
max_lines=1,
|
33 |
+
)
|
34 |
+
|
35 |
+
with gr.Row():
|
36 |
+
with gr.Column(scale=8):
|
37 |
+
bleep_words = gr.Textbox(
|
38 |
+
placeholder="bleep keywords go here separated by commas",
|
39 |
+
label="bleep-word list",
|
40 |
+
value="treetz, ice, cream, chocolate, syrup, cookie, hooked, threats, treats, trees",
|
41 |
+
)
|
42 |
+
with gr.Column(scale=3):
|
43 |
+
model_selection = gr.Dropdown(choices=avaliable_models, value="base", label="whisper model (base only in HF space)", info="whisper model selection", interactive=False)
|
44 |
+
with gr.Column(scale=4):
|
45 |
+
just_transcribe_button = gr.Button("Just Transcribe", variant="primary")
|
46 |
+
transcribe_and_bleep_button = gr.Button("Transcribe & Bleep", variant="primary")
|
47 |
+
|
48 |
+
with tempfile.TemporaryDirectory() as tmpdirname:
|
49 |
+
with gr.Row():
|
50 |
+
transcript_output = gr.Textbox(label="Video Transcript", placeholder="", max_lines=5, show_copy_button=True)
|
51 |
+
|
52 |
+
with gr.Row():
|
53 |
+
og_video = gr.Video(
|
54 |
+
visible=False,
|
55 |
+
show_download_button=True,
|
56 |
+
show_label=True,
|
57 |
+
label="original video",
|
58 |
+
format="mp4",
|
59 |
+
width="50vw",
|
60 |
+
height="50vw",
|
61 |
+
)
|
62 |
+
|
63 |
+
bleep_video = gr.Video(
|
64 |
+
visible=False,
|
65 |
+
show_download_button=True,
|
66 |
+
show_label=True,
|
67 |
+
label="bleeped video",
|
68 |
+
format="mp4",
|
69 |
+
width="50vw",
|
70 |
+
height="50vw",
|
71 |
+
)
|
72 |
+
|
73 |
+
@just_transcribe_button.click(inputs=[url_input, model_selection], outputs=[og_video, bleep_video, transcript_output])
|
74 |
+
def just_transcribe(url_input, model_selection):
|
75 |
+
temporary_video_location = tmpdirname + "/original_" + str(uuid.uuid4()) + ".mp4"
|
76 |
+
temporary_audio_location = temporary_video_location.replace("mp4", "mp3")
|
77 |
+
|
78 |
+
download_video(url_input, temporary_video_location)
|
79 |
+
filename = open(temporary_video_location, "rb")
|
80 |
+
byte_file = io.BytesIO(filename.read())
|
81 |
+
with open(temporary_video_location, "wb") as out:
|
82 |
+
out.write(byte_file.read())
|
83 |
+
|
84 |
+
new_og_video = gr.Video(
|
85 |
+
value=temporary_video_location,
|
86 |
+
visible=True,
|
87 |
+
show_download_button=True,
|
88 |
+
show_label=True,
|
89 |
+
label="original video",
|
90 |
+
format="mp4",
|
91 |
+
width="50vw",
|
92 |
+
height="50vw",
|
93 |
+
)
|
94 |
+
|
95 |
+
new_bleep_video = gr.Video(
|
96 |
+
visible=False,
|
97 |
+
show_download_button=True,
|
98 |
+
show_label=True,
|
99 |
+
label="bleeped video",
|
100 |
+
format="mp4",
|
101 |
+
width="50vw",
|
102 |
+
height="50vw",
|
103 |
+
)
|
104 |
+
|
105 |
+
|
106 |
+
extract_audio(temporary_video_location, temporary_audio_location)
|
107 |
+
transcript, timestamped_transcript = transcribe(local_file_path=temporary_audio_location, model=model_selection)
|
108 |
+
|
109 |
+
return new_og_video, new_bleep_video, transcript
|
110 |
+
|
111 |
+
|
112 |
+
@transcribe_and_bleep_button.click(inputs=[url_input, model_selection, bleep_words], outputs=[og_video, bleep_video, transcript_output])
|
113 |
+
def transcribe_and_bleep(url_input, model_selection, bleep_words):
|
114 |
+
if len(bleep_words) > 0:
|
115 |
+
temporary_video_location = tmpdirname + "/original_" + str(uuid.uuid4()) + ".mp4"
|
116 |
+
temporary_audio_location = temporary_video_location.replace("mp4", "mp3")
|
117 |
+
|
118 |
+
download_video(url_input, temporary_video_location)
|
119 |
+
filename = open(temporary_video_location, "rb")
|
120 |
+
byte_file = io.BytesIO(filename.read())
|
121 |
+
with open(temporary_video_location, "wb") as out:
|
122 |
+
out.write(byte_file.read())
|
123 |
+
|
124 |
+
new_og_video = gr.Video(
|
125 |
+
value=temporary_video_location,
|
126 |
+
visible=True,
|
127 |
+
show_download_button=True,
|
128 |
+
show_label=True,
|
129 |
+
label="original video",
|
130 |
+
format="mp4",
|
131 |
+
width="50vw",
|
132 |
+
height="50vw",
|
133 |
+
)
|
134 |
+
|
135 |
+
extract_audio(temporary_video_location, temporary_audio_location)
|
136 |
+
transcript, timestamped_transcript = transcribe(local_file_path=temporary_audio_location, model=model_selection)
|
137 |
+
|
138 |
+
bleep_word_list = bleep_words.split(",")
|
139 |
+
bleep_word_list = [v.strip() for v in bleep_word_list if len(v.strip()) > 0]
|
140 |
+
bleep_video_output = temporary_video_location.replace("original", "bleep")
|
141 |
+
bleep_audio_output = bleep_video_output.replace("mp4", "mp3")
|
142 |
+
|
143 |
+
bleep_replace(
|
144 |
+
temporary_video_location,
|
145 |
+
temporary_audio_location,
|
146 |
+
bleep_video_output,
|
147 |
+
bleep_audio_output,
|
148 |
+
bleep_word_list,
|
149 |
+
timestamped_transcript,
|
150 |
+
)
|
151 |
+
|
152 |
+
new_bleep_video = gr.Video(
|
153 |
+
value=bleep_video_output,
|
154 |
+
visible=True,
|
155 |
+
show_download_button=True,
|
156 |
+
show_label=True,
|
157 |
+
label="bleeped video",
|
158 |
+
format="mp4",
|
159 |
+
width="50vw",
|
160 |
+
height="50vw",
|
161 |
+
)
|
162 |
+
|
163 |
+
return new_og_video, new_bleep_video, transcript
|
164 |
+
else:
|
165 |
+
gr.Warning("bleep words empty!", duration=3)
|
166 |
+
return None, None, None
|
167 |
+
|
168 |
+
with gr.TabItem("💡 About"):
|
169 |
+
with gr.Blocks() as about:
|
170 |
+
gr.Markdown(
|
171 |
+
(
|
172 |
+
"### Bleep out words of your choice from an input video. \n"
|
173 |
+
"How it works: \n\n"
|
174 |
+
"1. Provided a youtube / shorts url \n"
|
175 |
+
"2. Choose your your desired bleep keywords \n"
|
176 |
+
"3. (if running locally) Choose a model from the Whisper family to transcribe the audio (defaults to base only for HF space) \n"
|
177 |
+
"4. (optional) Press 'Just Transcribe' to examine / download just the transcription of the video (can help in choosing bleep words) \n"
|
178 |
+
"5. Press 'Transcribe and bleep' to transcribe and replace all instances of your keywords with *beep* sounds \n\n"
|
179 |
+
"If you want to select your Whisper model / run longer videos pull and run the app locally. \n\n"
|
180 |
+
"Notice: baseline (not fine tuned) Whisper models are used here - you may need to be creative to bleep out all the versions of an input word you want depending on its transcription. \n\n"
|
181 |
+
"You do *not* need a GPU to run this locally. Larger models take more time to process locally, but its doable. \n"
|
182 |
+
)
|
183 |
+
)
|
184 |
+
|
185 |
+
if __name__ == "__main__":
|
186 |
+
print("Launching Gradio interface...")
|
187 |
+
demo.launch()
|
bleep_that_sht/transcribe.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import whisper_timestamped as whisper
|
2 |
+
from typing import Tuple
|
3 |
+
|
4 |
+
avaliable_models = ["tiny", "base", "small", "medium", "large-v3"]
|
5 |
+
|
6 |
+
|
7 |
+
def transcribe(local_file_path: str, model: str = "tiny", device: str = "cpu") -> Tuple[str, dict]:
|
8 |
+
assert model in avaliable_models, f"input model '{model}' not a member of available models = {avaliable_models}"
|
9 |
+
model = whisper.load_model(model, device="cpu")
|
10 |
+
process_output = whisper.transcribe(model, local_file_path, verbose=False)
|
11 |
+
transcript = process_output["text"]
|
12 |
+
timestamped_transcript = process_output["segments"]
|
13 |
+
return transcript, timestamped_transcript
|
bleep_that_sht/yt_download.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from pytube import YouTube
|
2 |
+
|
3 |
+
import yt_dlp
|
4 |
+
import re
|
5 |
+
|
6 |
+
|
7 |
+
def is_valid_youtube_url(url: str) -> bool:
|
8 |
+
if not isinstance(url, str):
|
9 |
+
return False
|
10 |
+
pattern = r"^https://www\.youtube\.com/watch\?v=[A-Za-z0-9_-]{11}$" # youtube vido ids are always 11 chars long
|
11 |
+
if "shorts" in url:
|
12 |
+
pattern = r"^https://www\.youtube\.com/shorts/[A-Za-z0-9_-]{11}$" # youtube vido ids are always 11 chars long
|
13 |
+
return re.match(pattern, url) is not None
|
14 |
+
|
15 |
+
|
16 |
+
def download_video(url: str, savepath: str, my_proxies: dict = {}) -> None:
|
17 |
+
try:
|
18 |
+
print("Downloading video from youtube...")
|
19 |
+
if is_valid_youtube_url(url):
|
20 |
+
ydl_opts = {
|
21 |
+
'format': 'bestvideo[height<=720]+bestaudio/best',
|
22 |
+
'merge_output_format': 'mp4',
|
23 |
+
'outtmpl': savepath,
|
24 |
+
}
|
25 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
26 |
+
ydl.download([url])
|
27 |
+
|
28 |
+
print("...done!")
|
29 |
+
else:
|
30 |
+
raise ValueError(f"invalid input url: {url}")
|
31 |
+
except Exception as e:
|
32 |
+
raise ValueError(f"yt_download failed with exception {e}")
|
requirements.gradio
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
whisper-timestamped
|
2 |
+
moviepy
|
3 |
+
yt-dlp
|
4 |
+
gradio
|