Spaces:
Running
Running
Wrap lines in SRT and VTT files that exceed 47 characters
Browse files
utils.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import unicodedata
|
2 |
import re
|
3 |
|
@@ -55,9 +56,11 @@ def write_txt(transcript: Iterator[dict], file: TextIO):
|
|
55 |
def write_vtt(transcript: Iterator[dict], file: TextIO):
|
56 |
print("WEBVTT\n", file=file)
|
57 |
for segment in transcript:
|
|
|
|
|
58 |
print(
|
59 |
f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
|
60 |
-
f"{
|
61 |
file=file,
|
62 |
flush=True,
|
63 |
)
|
@@ -76,16 +79,22 @@ def write_srt(transcript: Iterator[dict], file: TextIO):
|
|
76 |
write_srt(result["segments"], file=srt)
|
77 |
"""
|
78 |
for i, segment in enumerate(transcript, start=1):
|
|
|
|
|
79 |
# write srt lines
|
80 |
print(
|
81 |
f"{i}\n"
|
82 |
f"{format_timestamp(segment['start'], always_include_hours=True, fractionalSeperator=',')} --> "
|
83 |
f"{format_timestamp(segment['end'], always_include_hours=True, fractionalSeperator=',')}\n"
|
84 |
-
f"{
|
85 |
file=file,
|
86 |
flush=True,
|
87 |
)
|
88 |
|
|
|
|
|
|
|
|
|
89 |
def slugify(value, allow_unicode=False):
|
90 |
"""
|
91 |
Taken from https://github.com/django/django/blob/master/django/utils/text.py
|
|
|
1 |
+
import textwrap
|
2 |
import unicodedata
|
3 |
import re
|
4 |
|
|
|
56 |
def write_vtt(transcript: Iterator[dict], file: TextIO):
|
57 |
print("WEBVTT\n", file=file)
|
58 |
for segment in transcript:
|
59 |
+
text = processText(segment['text']).replace('-->', '->')
|
60 |
+
|
61 |
print(
|
62 |
f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
|
63 |
+
f"{text}\n",
|
64 |
file=file,
|
65 |
flush=True,
|
66 |
)
|
|
|
79 |
write_srt(result["segments"], file=srt)
|
80 |
"""
|
81 |
for i, segment in enumerate(transcript, start=1):
|
82 |
+
text = processText(segment['text'].strip()).replace('-->', '->')
|
83 |
+
|
84 |
# write srt lines
|
85 |
print(
|
86 |
f"{i}\n"
|
87 |
f"{format_timestamp(segment['start'], always_include_hours=True, fractionalSeperator=',')} --> "
|
88 |
f"{format_timestamp(segment['end'], always_include_hours=True, fractionalSeperator=',')}\n"
|
89 |
+
f"{text}\n",
|
90 |
file=file,
|
91 |
flush=True,
|
92 |
)
|
93 |
|
94 |
+
def processText(text: str):
|
95 |
+
lines = textwrap.wrap(text, width=47, tabsize=4)
|
96 |
+
return '\n'.join(lines)
|
97 |
+
|
98 |
def slugify(value, allow_unicode=False):
|
99 |
"""
|
100 |
Taken from https://github.com/django/django/blob/master/django/utils/text.py
|