Greencapabara commited on
Commit
17bf9f2
1 Parent(s): baccd4f

Upload utils.py

Browse files
Files changed (1) hide show
  1. utils.py +54 -0
utils.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import zlib
2
+ from typing import Iterator, TextIO
3
+
4
+
5
+ def exact_div(x, y):
6
+ assert x % y == 0
7
+ return x // y
8
+
9
+
10
+ def str2bool(string):
11
+ str2val = {"True": True, "False": False}
12
+ if string in str2val:
13
+ return str2val[string]
14
+ else:
15
+ raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
16
+
17
+
18
+ def optional_int(string):
19
+ return None if string == "None" else int(string)
20
+
21
+
22
+ def optional_float(string):
23
+ return None if string == "None" else float(string)
24
+
25
+
26
+ def compression_ratio(text) -> float:
27
+ return len(text) / len(zlib.compress(text.encode("utf-8")))
28
+
29
+
30
+ def format_timestamp(seconds: float):
31
+ assert seconds >= 0, "non-negative timestamp expected"
32
+ milliseconds = round(seconds * 1000.0)
33
+
34
+ hours = milliseconds // 3_600_000
35
+ milliseconds -= hours * 3_600_000
36
+
37
+ minutes = milliseconds // 60_000
38
+ milliseconds -= minutes * 60_000
39
+
40
+ seconds = milliseconds // 1_000
41
+ milliseconds -= seconds * 1_000
42
+
43
+ return (f"{hours}:" if hours > 0 else "") + f"{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
44
+
45
+
46
+ def write_vtt(transcript: Iterator[dict], file: TextIO):
47
+ print("WEBVTT\n", file=file)
48
+ for segment in transcript:
49
+ print(
50
+ f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
51
+ f"{segment['text'].replace('-->', '->')}\n",
52
+ file=file,
53
+ flush=True,
54
+ )