禾息 commited on
Commit
6e7f5b9
1 Parent(s): 53a3c1b
cosyvoice/bin/export_trt.py CHANGED
@@ -66,13 +66,13 @@ def main():
66
  opset_version=18,
67
  do_constant_folding=True,
68
  input_names=['x', 'mask', 'mu', 't', 'spks', 'cond'],
69
- output_names=['output'],
70
  dynamic_axes={
71
  'x': {2: 'seq_len'},
72
  'mask': {2: 'seq_len'},
73
  'mu': {2: 'seq_len'},
74
  'cond': {2: 'seq_len'},
75
- 'output': {2: 'seq_len'},
76
  }
77
  )
78
 
@@ -95,7 +95,7 @@ def main():
95
  "--minShapes=x:1x80x1,mask:1x1x1,mu:1x80x1,t:1,spks:1x80,cond:1x80x1 " \
96
  "--maxShapes=x:1x80x4096,mask:1x1x4096,mu:1x80x4096,t:1,spks:1x80,cond:1x80x4096 --verbose " + \
97
  ("--fp16" if args.export_half else "")
98
- # /ossfs/workspace/TensorRT-10.2.0.19/bin/trtexec --onnx=estimator_fp32.onnx --saveEngine=estimator_fp32.plan --minShapes=x:1x80x1,mask:1x1x1,mu:1x80x1,t:1,spks:1x80,cond:1x80x1 --maxShapes=x:1x80x4096,mask:1x1x4096,mu:1x80x4096,t:1,spks:1x80,cond:1x80x4096 --verbose
99
  print("execute ", trtexec_cmd)
100
 
101
  os.system(trtexec_cmd)
 
66
  opset_version=18,
67
  do_constant_folding=True,
68
  input_names=['x', 'mask', 'mu', 't', 'spks', 'cond'],
69
+ output_names=['estimator_out'],
70
  dynamic_axes={
71
  'x': {2: 'seq_len'},
72
  'mask': {2: 'seq_len'},
73
  'mu': {2: 'seq_len'},
74
  'cond': {2: 'seq_len'},
75
+ 'estimator_out': {2: 'seq_len'},
76
  }
77
  )
78
 
 
95
  "--minShapes=x:1x80x1,mask:1x1x1,mu:1x80x1,t:1,spks:1x80,cond:1x80x1 " \
96
  "--maxShapes=x:1x80x4096,mask:1x1x4096,mu:1x80x4096,t:1,spks:1x80,cond:1x80x4096 --verbose " + \
97
  ("--fp16" if args.export_half else "")
98
+
99
  print("execute ", trtexec_cmd)
100
 
101
  os.system(trtexec_cmd)
cosyvoice/cli/model.py CHANGED
@@ -83,8 +83,7 @@ class CosyVoiceModel:
83
  with open(trt_file_path, 'rb') as f:
84
  serialized_engine = f.read()
85
  engine = runtime.deserialize_cuda_engine(serialized_engine)
86
- self.flow.decoder.estimator_context = engine.create_execution_context()
87
- self.flow.decoder.estimator_engine = engine
88
 
89
  def llm_job(self, text, prompt_text, llm_prompt_speech_token, llm_embedding, uuid):
90
  with self.llm_context:
 
83
  with open(trt_file_path, 'rb') as f:
84
  serialized_engine = f.read()
85
  engine = runtime.deserialize_cuda_engine(serialized_engine)
86
+ self.flow.decoder.estimator = engine.create_execution_context()
 
87
 
88
  def llm_job(self, text, prompt_text, llm_prompt_speech_token, llm_embedding, uuid):
89
  with self.llm_context:
cosyvoice/flow/flow_matching.py CHANGED
@@ -30,10 +30,6 @@ class ConditionalCFM(BASECFM):
30
  # Just change the architecture of the estimator here
31
  self.estimator = estimator
32
 
33
- self.estimator_context = None
34
- self.estimator_engine = None
35
- self.is_saved = None
36
-
37
  @torch.inference_mode()
38
  def forward(self, mu, mask, n_timesteps, temperature=1.0, spks=None, cond=None):
39
  """Forward diffusion
@@ -102,7 +98,11 @@ class ConditionalCFM(BASECFM):
102
  return sol[-1]
103
 
104
  def forward_estimator(self, x, mask, mu, t, spks, cond):
105
- if self.estimator_context is not None:
 
 
 
 
106
  assert self.training is False, 'tensorrt cannot be used in training'
107
  bs = x.shape[0]
108
  hs = x.shape[1]
@@ -116,50 +116,14 @@ class ConditionalCFM(BASECFM):
116
  self.estimator_context.set_input_shape("spks", spks.shape)
117
  self.estimator_context.set_input_shape("cond", cond.shape)
118
  bindings = [x.data_ptr(), mask.data_ptr(), mu.data_ptr(), t.data_ptr(), spks.data_ptr(), cond.data_ptr(), ret.data_ptr()]
 
119
 
120
  for i in range(len(bindings)):
121
- self.estimator_context.set_tensor_address(self.estimator_engine.get_tensor_name(i), bindings[i])
122
 
123
  handle = torch.cuda.current_stream().cuda_stream
124
- self.estimator_context.execute_async_v3(stream_handle=handle)
125
  return ret
126
- else:
127
-
128
- if self.is_saved == None:
129
- self.is_saved = True
130
- output = self.estimator.forward(x, mask, mu, t, spks, cond)
131
- torch.save(x, "x.pt")
132
- torch.save(mask, "mask.pt")
133
- torch.save(mu, "mu.pt")
134
- torch.save(t, "t.pt")
135
- torch.save(spks, "spks.pt")
136
- torch.save(cond, "cond.pt")
137
- torch.save(output, "output.pt")
138
- dummy_input = (x, mask, mu, t, spks, cond)
139
- torch.onnx.export(
140
- self.estimator,
141
- dummy_input,
142
- "estimator_fp32.onnx",
143
- export_params=True,
144
- opset_version=17,
145
- do_constant_folding=True,
146
- input_names=['x', 'mask', 'mu', 't', 'spks', 'cond'],
147
- output_names=['output'],
148
- dynamic_axes={
149
- 'x': {2: 'seq_len'},
150
- 'mask': {2: 'seq_len'},
151
- 'mu': {2: 'seq_len'},
152
- 'cond': {2: 'seq_len'},
153
- 'output': {2: 'seq_len'},
154
- }
155
- )
156
- # print("x, x.shape", x, x.shape)
157
- # print("mask, mask.shape", mask, mask.shape)
158
- # print("mu, mu.shape", mu, mu.shape)
159
- # print("t, t.shape", t, t.shape)
160
- # print("spks, spks.shape", spks, spks.shape)
161
- # print("cond, cond.shape", cond, cond.shape)
162
- return self.estimator.forward(x, mask, mu, t, spks, cond)
163
 
164
  def compute_loss(self, x1, mask, mu, spks=None, cond=None):
165
  """Computes diffusion loss
 
30
  # Just change the architecture of the estimator here
31
  self.estimator = estimator
32
 
 
 
 
 
33
  @torch.inference_mode()
34
  def forward(self, mu, mask, n_timesteps, temperature=1.0, spks=None, cond=None):
35
  """Forward diffusion
 
98
  return sol[-1]
99
 
100
  def forward_estimator(self, x, mask, mu, t, spks, cond):
101
+
102
+ if not isinstance(self.estimator, torch.nn.Module):
103
+ return self.estimator.forward(x, mask, mu, t, spks, cond)
104
+
105
+ else:
106
  assert self.training is False, 'tensorrt cannot be used in training'
107
  bs = x.shape[0]
108
  hs = x.shape[1]
 
116
  self.estimator_context.set_input_shape("spks", spks.shape)
117
  self.estimator_context.set_input_shape("cond", cond.shape)
118
  bindings = [x.data_ptr(), mask.data_ptr(), mu.data_ptr(), t.data_ptr(), spks.data_ptr(), cond.data_ptr(), ret.data_ptr()]
119
+ names = ['x', 'mask', 'mu', 't', 'spks', 'cond', 'estimator_out']
120
 
121
  for i in range(len(bindings)):
122
+ self.estimator.set_tensor_address(names[i], bindings[i])
123
 
124
  handle = torch.cuda.current_stream().cuda_stream
125
+ self.estimator.execute_async_v3(stream_handle=handle)
126
  return ret
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  def compute_loss(self, x1, mask, mu, spks=None, cond=None):
129
  """Computes diffusion loss