Chris Bracegirdle commited on
Commit
ac5e313
1 Parent(s): e01114b

Try to save tokenizer

Browse files
Files changed (1) hide show
  1. app.py +7 -0
app.py CHANGED
@@ -12,6 +12,13 @@ BATCH_SIZE = 8
12
  FILE_LIMIT_MB = 1000
13
  YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
14
 
 
 
 
 
 
 
 
15
  device = 0 if torch.cuda.is_available() else "cpu"
16
 
17
  pipe = pipeline(
 
12
  FILE_LIMIT_MB = 1000
13
  YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
14
 
15
+ from transformers import AutoTokenizer
16
+
17
+ tokenizer = AutoTokenizer.from_pretrained("openai/whisper-large-v3")
18
+ assert tokenizer.is_fast
19
+ tokenizer.save_pretrained("...")
20
+
21
+
22
  device = 0 if torch.cuda.is_available() else "cpu"
23
 
24
  pipe = pipeline(