Spaces:
Build error
Build error
camparchimedes
commited on
Commit
•
d262ec1
1
Parent(s):
f7e87b9
Update app.py
Browse files
app.py
CHANGED
@@ -27,7 +27,7 @@ def transcribe_audio(audio_file):
|
|
27 |
start_time = time.time()
|
28 |
|
29 |
with torch.no_grad():
|
30 |
-
output = asr(audio_file, chunk_length_s=30, generate_kwargs={"num_beams": 8, "batch_size": 10}) # "task": "transcribe", "language": "no"
|
31 |
|
32 |
transcription = output["text"]
|
33 |
end_time = time.time()
|
@@ -54,7 +54,11 @@ nltk.download('stopwords')
|
|
54 |
|
55 |
WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
|
56 |
|
57 |
-
|
|
|
|
|
|
|
|
|
58 |
|
59 |
def clean_text(text):
|
60 |
text = re.sub(r'https?:\/\/.*[\r\n]*', '', str(text), flags=re.MULTILINE)
|
@@ -118,6 +122,7 @@ def graph_based_summary(text, num_paragraphs=3):
|
|
118 |
stop_words = set(stopwords.words('norwegian'))
|
119 |
filtered_tokens = [[word for word in tokens if word.lower() not in stop_words] for tokens in sentence_tokens]
|
120 |
similarity_matrix = build_similarity_matrix(filtered_tokens, stop_words)
|
|
|
121 |
scores = nx.pagerank(similarity_matrix)
|
122 |
ranked_sentences = sorted(((scores[i], sent) for i, sent in enumerate(sentences)), reverse=True)
|
123 |
summary = [sent for _, sent in ranked_sentences[:num_paragraphs]]
|
@@ -160,7 +165,6 @@ def text_rank_summary(text, num_paragraphs=3):
|
|
160 |
ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) # rank by PageRank scores
|
161 |
|
162 |
summary = [ranked_sentences[i][1] for i in range(num_paragraphs)] # top sentences for summary
|
163 |
-
|
164 |
return ' '.join(summary)
|
165 |
|
166 |
|
|
|
27 |
start_time = time.time()
|
28 |
|
29 |
with torch.no_grad():
|
30 |
+
output = asr(audio_file, chunk_length_s=30, generate_kwargs={"num_beams": 8, "batch_size": 10}) # "task": "transcribe", "language": "no"
|
31 |
|
32 |
transcription = output["text"]
|
33 |
end_time = time.time()
|
|
|
54 |
|
55 |
WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
|
56 |
|
57 |
+
def transcribe(audio_file):
|
58 |
+
transcription, result = transcribe_audio(audio_file)
|
59 |
+
text = transcription
|
60 |
+
return text, result
|
61 |
+
|
62 |
|
63 |
def clean_text(text):
|
64 |
text = re.sub(r'https?:\/\/.*[\r\n]*', '', str(text), flags=re.MULTILINE)
|
|
|
122 |
stop_words = set(stopwords.words('norwegian'))
|
123 |
filtered_tokens = [[word for word in tokens if word.lower() not in stop_words] for tokens in sentence_tokens]
|
124 |
similarity_matrix = build_similarity_matrix(filtered_tokens, stop_words)
|
125 |
+
|
126 |
scores = nx.pagerank(similarity_matrix)
|
127 |
ranked_sentences = sorted(((scores[i], sent) for i, sent in enumerate(sentences)), reverse=True)
|
128 |
summary = [sent for _, sent in ranked_sentences[:num_paragraphs]]
|
|
|
165 |
ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) # rank by PageRank scores
|
166 |
|
167 |
summary = [ranked_sentences[i][1] for i in range(num_paragraphs)] # top sentences for summary
|
|
|
168 |
return ' '.join(summary)
|
169 |
|
170 |
|