Spaces:

camparchimedes
/

nb

Build error

camparchimedes commited on Aug 11

Commit

d262ec1

•

1 Parent(s): f7e87b9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ def transcribe_audio(audio_file):
     start_time = time.time()
     with torch.no_grad():
-        output = asr(audio_file, chunk_length_s=30, generate_kwargs={"num_beams": 8, "batch_size": 10}) # "task": "transcribe", "language": "no" + ..batch_size=10 ?
     transcription = output["text"]
     end_time = time.time()
@@ -54,7 +54,11 @@ nltk.download('stopwords')
 WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
-text = transcription
 def clean_text(text):
     text = re.sub(r'https?:\/\/.*[\r\n]*', '', str(text), flags=re.MULTILINE)
@@ -118,6 +122,7 @@ def graph_based_summary(text, num_paragraphs=3):
     stop_words = set(stopwords.words('norwegian'))
     filtered_tokens = [[word for word in tokens if word.lower() not in stop_words] for tokens in sentence_tokens]
     similarity_matrix = build_similarity_matrix(filtered_tokens, stop_words)
     scores = nx.pagerank(similarity_matrix)
     ranked_sentences = sorted(((scores[i], sent) for i, sent in enumerate(sentences)), reverse=True)
     summary = [sent for _, sent in ranked_sentences[:num_paragraphs]]
@@ -160,7 +165,6 @@ def text_rank_summary(text, num_paragraphs=3):
     ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) # rank by PageRank scores
     summary = [ranked_sentences[i][1] for i in range(num_paragraphs)] # top sentences for summary
     return ' '.join(summary)

     start_time = time.time()
     with torch.no_grad():
+        output = asr(audio_file, chunk_length_s=30, generate_kwargs={"num_beams": 8, "batch_size": 10}) # "task": "transcribe", "language": "no"
     transcription = output["text"]
     end_time = time.time()
 WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
+def transcribe(audio_file):
+    transcription, result = transcribe_audio(audio_file)
+    text = transcription
+    return text, result
 def clean_text(text):
     text = re.sub(r'https?:\/\/.*[\r\n]*', '', str(text), flags=re.MULTILINE)
     stop_words = set(stopwords.words('norwegian'))
     filtered_tokens = [[word for word in tokens if word.lower() not in stop_words] for tokens in sentence_tokens]
     similarity_matrix = build_similarity_matrix(filtered_tokens, stop_words)
     scores = nx.pagerank(similarity_matrix)
     ranked_sentences = sorted(((scores[i], sent) for i, sent in enumerate(sentences)), reverse=True)
     summary = [sent for _, sent in ranked_sentences[:num_paragraphs]]
     ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) # rank by PageRank scores
     summary = [ranked_sentences[i][1] for i in range(num_paragraphs)] # top sentences for summary
     return ' '.join(summary)