Files changed (1) hide show
  1. app.py +94 -137
app.py CHANGED
@@ -1,147 +1,104 @@
1
- import spaces
2
- import torch
3
-
4
  import gradio as gr
5
- import yt_dlp as youtube_dl
6
- from transformers import pipeline
7
- from transformers.pipelines.audio_utils import ffmpeg_read
8
-
9
- import tempfile
10
- import os
11
-
12
- MODEL_NAME = "ylacombe/whisper-large-v3-turbo"
13
- BATCH_SIZE = 8
14
- FILE_LIMIT_MB = 1000
15
- YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
16
-
17
- device = 0 if torch.cuda.is_available() else "cpu"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- pipe = pipeline(
20
- task="automatic-speech-recognition",
21
- model=MODEL_NAME,
22
- chunk_length_s=30,
23
- device=device,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  )
25
 
 
 
 
 
26
 
27
- @spaces.GPU
28
- def transcribe(inputs, task):
29
- if inputs is None:
30
- raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
31
-
32
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
33
- return text
34
-
35
-
36
- def _return_yt_html_embed(yt_url):
37
- video_id = yt_url.split("?v=")[-1]
38
- HTML_str = (
39
- f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
40
- " </center>"
41
- )
42
- return HTML_str
43
-
44
- def download_yt_audio(yt_url, filename):
45
- info_loader = youtube_dl.YoutubeDL()
46
-
47
- try:
48
- info = info_loader.extract_info(yt_url, download=False)
49
- except youtube_dl.utils.DownloadError as err:
50
- raise gr.Error(str(err))
51
-
52
- file_length = info["duration_string"]
53
- file_h_m_s = file_length.split(":")
54
- file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
55
-
56
- if len(file_h_m_s) == 1:
57
- file_h_m_s.insert(0, 0)
58
- if len(file_h_m_s) == 2:
59
- file_h_m_s.insert(0, 0)
60
- file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
61
-
62
- if file_length_s > YT_LENGTH_LIMIT_S:
63
- yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
64
- file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
65
- raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
66
-
67
- ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
68
-
69
- with youtube_dl.YoutubeDL(ydl_opts) as ydl:
70
- try:
71
- ydl.download([yt_url])
72
- except youtube_dl.utils.ExtractorError as err:
73
- raise gr.Error(str(err))
74
-
75
- @spaces.GPU
76
- def yt_transcribe(yt_url, task, max_filesize=75.0):
77
- html_embed_str = _return_yt_html_embed(yt_url)
78
-
79
- with tempfile.TemporaryDirectory() as tmpdirname:
80
- filepath = os.path.join(tmpdirname, "video.mp4")
81
- download_yt_audio(yt_url, filepath)
82
- with open(filepath, "rb") as f:
83
- inputs = f.read()
84
-
85
- inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
86
- inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
87
-
88
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
89
-
90
- return html_embed_str, text
91
-
92
-
93
- demo = gr.Blocks()
94
-
95
- mf_transcribe = gr.Interface(
96
- fn=transcribe,
97
- inputs=[
98
- gr.Audio(sources="microphone", type="filepath"),
99
- gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
100
- ],
101
- outputs="text",
102
- title="Whisper Large V3 Turbo: Transcribe Audio",
103
- description=(
104
- "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
105
- f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
106
- " of arbitrary length."
107
- ),
108
- allow_flagging="never",
109
- )
110
 
111
- file_transcribe = gr.Interface(
112
- fn=transcribe,
113
- inputs=[
114
- gr.Audio(sources="upload", type="filepath", label="Audio file"),
115
- gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
116
- ],
117
- outputs="text",
118
- title="Whisper Large V3: Transcribe Audio",
119
- description=(
120
- "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
121
- f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
122
- " of arbitrary length."
123
- ),
124
- allow_flagging="never",
125
- )
126
 
127
- yt_transcribe = gr.Interface(
128
- fn=yt_transcribe,
129
- inputs=[
130
- gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
131
- gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
132
- ],
133
- outputs=["html", "text"],
134
- title="Whisper Large V3: Transcribe YouTube",
135
- description=(
136
- "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
137
- f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe video files of"
138
- " arbitrary length."
139
- ),
140
- allow_flagging="never",
141
  )
142
 
143
- with demo:
144
- gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
145
-
146
- demo.queue().launch()
 
147
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer,StoppingCriteria,StoppingCriteriaList,pipeline
3
+ from langchain.chains import ConversationChain
4
+ from langchain.chains.conversation.memory import ConversationBufferWindowMemory
5
+ from langchain.llms import HuggingFacePipeline
6
+ from langchain import PromptTemplate
7
+ from typing import List
8
+ import torch
9
+ # Load the model and tokenizer
10
+ tokenizer = AutoTokenizer.from_pretrained("gpt2")
11
+ model = AutoModelForCausalLM.from_pretrained("gpt2")
12
+
13
+ generation_config = model.generation_config
14
+ generation_config.temperature = 0
15
+ generation_config.num_return_sequences = 1
16
+ generation_config.max_new_tokens = 256
17
+ generation_config.use_cache = False
18
+ generation_config.repetition_penalty = 1.7
19
+ generation_config.pad_token_id = tokenizer.eos_token_id
20
+ generation_config.eos_token_id = tokenizer.eos_token_id
21
+ generation_config
22
+ stop_tokens = [["Human", ":"], ["AI", ":"]]
23
+
24
+ class StopGenerationCriteria(StoppingCriteria):
25
+ def __init__(
26
+ self, tokens: List[List[str]], tokenizer: AutoTokenizer, device: torch.device
27
+ ):
28
+ stop_token_ids = [tokenizer.convert_tokens_to_ids(t) for t in tokens]
29
+ self.stop_token_ids = [
30
+ torch.tensor(x, dtype=torch.long, device=device) for x in stop_token_ids
31
+ ]
32
+
33
+ def __call__(
34
+ self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
35
+ ) -> bool:
36
+ for stop_ids in self.stop_token_ids:
37
+ if torch.eq(input_ids[0][-len(stop_ids) :], stop_ids).all():
38
+ return True
39
+ return False
40
+
41
+
42
+ stopping_criteria = StoppingCriteriaList(
43
+ [StopGenerationCriteria(stop_tokens, tokenizer, model.device)]
44
+ )
45
 
46
+ class StopGenerationCriteria(StoppingCriteria):
47
+ def __init__(
48
+ self, tokens: List[List[str]], tokenizer: AutoTokenizer, device: torch.device
49
+ ):
50
+ stop_token_ids = [tokenizer.convert_tokens_to_ids(t) for t in tokens]
51
+ self.stop_token_ids = [
52
+ torch.tensor(x, dtype=torch.long, device=device) for x in stop_token_ids
53
+ ]
54
+
55
+ def __call__(
56
+ self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
57
+ ) -> bool:
58
+ for stop_ids in self.stop_token_ids:
59
+ if torch.eq(input_ids[0][-len(stop_ids) :], stop_ids).all():
60
+ return True
61
+ return False
62
+
63
+
64
+ generation_pipeline = pipeline(
65
+ model=model,
66
+ tokenizer=tokenizer,
67
+ return_full_text=True,
68
+ task="text-generation",
69
+ stopping_criteria=stopping_criteria,
70
+ generation_config=generation_config,
71
  )
72
 
73
+ llm = HuggingFacePipeline(pipeline=generation_pipeline)
74
+ template = """
75
+ The following
76
+ Current conversation:
77
 
78
+ {history}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ Human: {input}
81
+ AI:""".strip()
82
+ prompt = PromptTemplate(input_variables=["history", "input"], template=template)
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ memory = ConversationBufferWindowMemory(
85
+ memory_key="history", k=6, return_only_outputs=True
 
 
 
 
 
 
 
 
 
 
 
 
86
  )
87
 
88
+ chain = ConversationChain(
89
+ llm=llm,
90
+ prompt=prompt,
91
+ verbose=True,
92
+ )
93
 
94
+ def generate_response(input_text):
95
+ res=chain.invoke(input_text)
96
+ print('response:',res)
97
+ print(4444444444444444444444444444444444444444444444)
98
+ inputs = tokenizer(input_text, return_tensors="pt")
99
+ outputs = model.generate(inputs.input_ids, max_length=50)
100
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
101
+ return res
102
+
103
+ iface = gr.Interface(fn=generate_response, inputs="text", outputs="text")
104
+ iface.launch()