camparchimedes commited on
Commit
d262ec1
1 Parent(s): f7e87b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -27,7 +27,7 @@ def transcribe_audio(audio_file):
27
  start_time = time.time()
28
 
29
  with torch.no_grad():
30
- output = asr(audio_file, chunk_length_s=30, generate_kwargs={"num_beams": 8, "batch_size": 10}) # "task": "transcribe", "language": "no" + ..batch_size=10 ?
31
 
32
  transcription = output["text"]
33
  end_time = time.time()
@@ -54,7 +54,11 @@ nltk.download('stopwords')
54
 
55
  WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
56
 
57
- text = transcription
 
 
 
 
58
 
59
  def clean_text(text):
60
  text = re.sub(r'https?:\/\/.*[\r\n]*', '', str(text), flags=re.MULTILINE)
@@ -118,6 +122,7 @@ def graph_based_summary(text, num_paragraphs=3):
118
  stop_words = set(stopwords.words('norwegian'))
119
  filtered_tokens = [[word for word in tokens if word.lower() not in stop_words] for tokens in sentence_tokens]
120
  similarity_matrix = build_similarity_matrix(filtered_tokens, stop_words)
 
121
  scores = nx.pagerank(similarity_matrix)
122
  ranked_sentences = sorted(((scores[i], sent) for i, sent in enumerate(sentences)), reverse=True)
123
  summary = [sent for _, sent in ranked_sentences[:num_paragraphs]]
@@ -160,7 +165,6 @@ def text_rank_summary(text, num_paragraphs=3):
160
  ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) # rank by PageRank scores
161
 
162
  summary = [ranked_sentences[i][1] for i in range(num_paragraphs)] # top sentences for summary
163
-
164
  return ' '.join(summary)
165
 
166
 
 
27
  start_time = time.time()
28
 
29
  with torch.no_grad():
30
+ output = asr(audio_file, chunk_length_s=30, generate_kwargs={"num_beams": 8, "batch_size": 10}) # "task": "transcribe", "language": "no"
31
 
32
  transcription = output["text"]
33
  end_time = time.time()
 
54
 
55
  WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
56
 
57
+ def transcribe(audio_file):
58
+ transcription, result = transcribe_audio(audio_file)
59
+ text = transcription
60
+ return text, result
61
+
62
 
63
  def clean_text(text):
64
  text = re.sub(r'https?:\/\/.*[\r\n]*', '', str(text), flags=re.MULTILINE)
 
122
  stop_words = set(stopwords.words('norwegian'))
123
  filtered_tokens = [[word for word in tokens if word.lower() not in stop_words] for tokens in sentence_tokens]
124
  similarity_matrix = build_similarity_matrix(filtered_tokens, stop_words)
125
+
126
  scores = nx.pagerank(similarity_matrix)
127
  ranked_sentences = sorted(((scores[i], sent) for i, sent in enumerate(sentences)), reverse=True)
128
  summary = [sent for _, sent in ranked_sentences[:num_paragraphs]]
 
165
  ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) # rank by PageRank scores
166
 
167
  summary = [ranked_sentences[i][1] for i in range(num_paragraphs)] # top sentences for summary
 
168
  return ' '.join(summary)
169
 
170