Helw150 commited on
Commit
a74d5f9
1 Parent(s): e2607b6

Buffer TTS and Fix Selector

Browse files
Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -104,6 +104,7 @@ def response(state: AppState, audio: tuple):
104
  prev_outs = causal_outs
105
  stream = orca.stream_open()
106
 
 
107
  for resp, outs in diva_audio(
108
  (audio[0], audio[1]),
109
  prev_outs=(prev_outs if prev_outs is not None else None),
@@ -115,12 +116,18 @@ def response(state: AppState, audio: tuple):
115
  pcm = stream.synthesize(resp[len(prev_resp) :])
116
  audio_chunk = None
117
  if pcm is not None:
 
 
118
  mp3_io = io.BytesIO()
119
  sf.write(
120
- mp3_io, np.asarray(pcm).astype(np.int16), orca.sample_rate, format="mp3"
 
 
 
121
  )
122
  audio_chunk = mp3_io.getvalue()
123
  mp3_io.close()
 
124
  yield state, state.conversation, audio_chunk
125
 
126
  del outs.logits
@@ -132,10 +139,12 @@ def response(state: AppState, audio: tuple):
132
  audio_chunk = None
133
  pcm = stream.flush()
134
  if pcm is not None:
135
- audio_chunk = np.asarray(pcm).tobytes()
136
  mp3_io = io.BytesIO()
137
  sf.write(
138
- mp3_io, np.asarray(pcm).astype(np.int16), orca.sample_rate, format="mp3"
 
 
 
139
  )
140
  audio_chunk = mp3_io.getvalue()
141
  mp3_io.close()
@@ -183,7 +192,7 @@ async function main() {
183
  const myvad = await vad.MicVAD.new({
184
  onSpeechStart: () => {
185
  var record = document.querySelector('.record-button');
186
- var player = document.querySelector('#streaming-out')
187
  if (record != null && (player == null || player.paused)) {
188
  console.log(record);
189
  record.click();
 
104
  prev_outs = causal_outs
105
  stream = orca.stream_open()
106
 
107
+ buff = []
108
  for resp, outs in diva_audio(
109
  (audio[0], audio[1]),
110
  prev_outs=(prev_outs if prev_outs is not None else None),
 
116
  pcm = stream.synthesize(resp[len(prev_resp) :])
117
  audio_chunk = None
118
  if pcm is not None:
119
+ buff.extend(pcm)
120
+ if len(buff) > (orca.sample_rate * 2):
121
  mp3_io = io.BytesIO()
122
  sf.write(
123
+ mp3_io,
124
+ np.asarray(buff[: orca.sample_rate]).astype(np.int16),
125
+ orca.sample_rate,
126
+ format="mp3",
127
  )
128
  audio_chunk = mp3_io.getvalue()
129
  mp3_io.close()
130
+ buff = buff[orca.sample_rate :]
131
  yield state, state.conversation, audio_chunk
132
 
133
  del outs.logits
 
139
  audio_chunk = None
140
  pcm = stream.flush()
141
  if pcm is not None:
 
142
  mp3_io = io.BytesIO()
143
  sf.write(
144
+ mp3_io,
145
+ np.asarray(buff + pcm).astype(np.int16),
146
+ orca.sample_rate,
147
+ format="mp3",
148
  )
149
  audio_chunk = mp3_io.getvalue()
150
  mp3_io.close()
 
192
  const myvad = await vad.MicVAD.new({
193
  onSpeechStart: () => {
194
  var record = document.querySelector('.record-button');
195
+ var player = document.getElementById("streaming_out").querySelector(".standard-player")
196
  if (record != null && (player == null || player.paused)) {
197
  console.log(record);
198
  record.click();