VarunGumma commited on
Commit
59feb3e
1 Parent(s): 62d68cb

Upload configuration_rotary_indictrans.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. configuration_rotary_indictrans.py +2 -175
configuration_rotary_indictrans.py CHANGED
@@ -1,28 +1,4 @@
1
- # coding=utf-8
2
- # Copyright 2023 The IndicTrans2 Authors and AI4Bharat team. All rights reserved.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- """ PyTorch IndicTrans config."""
16
-
17
- import json
18
- from collections import OrderedDict
19
- from typing import Any, Mapping, Optional
20
-
21
- from transformers import PreTrainedTokenizer
22
  from transformers.configuration_utils import PretrainedConfig
23
- from transformers.onnx import OnnxConfig, OnnxSeq2SeqConfigWithPast
24
- from transformers.onnx.utils import compute_effective_axis_dimension
25
- from transformers.utils import TensorType, is_torch_available
26
 
27
 
28
  # Copied from transformers.models.m2m_100.configuration_m2m_100.M2M100Config->IndicTrans
@@ -79,6 +55,7 @@ class RotaryIndicTransConfig(PretrainedConfig):
79
  use_cache (`bool`, *optional*, defaults to `True`):
80
  Whether or not the model should return the last key/values attentions (not used by all models).
81
  ```"""
 
82
  model_type = "RotaryIndicTrans"
83
  keys_to_ignore_at_inference = ["past_key_values"]
84
  attribute_map = {
@@ -146,7 +123,7 @@ class RotaryIndicTransConfig(PretrainedConfig):
146
  self.scale_embedding = scale_embedding
147
  self.share_decoder_input_output_embed = share_decoder_input_output_embed
148
  self.attn_implementation = attn_implementation
149
-
150
  super().__init__(
151
  pad_token_id=pad_token_id,
152
  bos_token_id=bos_token_id,
@@ -155,153 +132,3 @@ class RotaryIndicTransConfig(PretrainedConfig):
155
  decoder_start_token_id=decoder_start_token_id,
156
  **kwargs,
157
  )
158
-
159
-
160
- class RotaryIndicTransOnnxConfig(OnnxSeq2SeqConfigWithPast):
161
- @property
162
- def inputs(self) -> Mapping[str, Mapping[int, str]]:
163
- common_inputs = OrderedDict(
164
- [
165
- ("input_ids", {0: "batch", 1: "encoder_sequence"}),
166
- ("attention_mask", {0: "batch", 1: "encoder_sequence"}),
167
- ]
168
- )
169
-
170
- if self.use_past:
171
- common_inputs["decoder_input_ids"] = {0: "batch"}
172
- common_inputs["decoder_attention_mask"] = {
173
- 0: "batch",
174
- 1: "past_decoder_sequence + sequence",
175
- }
176
- else:
177
- common_inputs["decoder_input_ids"] = {0: "batch", 1: "decoder_sequence"}
178
- common_inputs["decoder_attention_mask"] = {
179
- 0: "batch",
180
- 1: "decoder_sequence",
181
- }
182
-
183
- if self.use_past:
184
- self.fill_with_past_key_values_(common_inputs, direction="inputs")
185
- return common_inputs
186
-
187
- # Copied from BartOnnxConfig._generate_dummy_inputs_for_sequence_classification_and_question_answering
188
- # A better name would be _generate_dummy_inputs_for_encoder_and_decoder because sequence classification and question
189
- # answering are not supported for IT2, but this name is preserved to be able to check that the copy matches what
190
- # was done for BART so that it can be updated if need be.
191
- def _generate_dummy_inputs_for_sequence_classification_and_question_answering(
192
- self,
193
- tokenizer: PreTrainedTokenizer,
194
- batch_size: int = -1,
195
- seq_length: int = -1,
196
- is_pair: bool = False,
197
- framework: Optional[TensorType] = None,
198
- ) -> Mapping[str, Any]:
199
- # Copied from OnnxConfig.generate_dummy_inputs
200
- # Did not use super(OnnxConfigWithPast, self).generate_dummy_inputs for code clarity.
201
- # If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX
202
- batch_size = compute_effective_axis_dimension(
203
- batch_size,
204
- fixed_dimension=OnnxConfig.default_fixed_batch,
205
- num_token_to_add=0,
206
- )
207
-
208
- # If dynamic axis (-1) we forward with a fixed dimension of 8 tokens to avoid optimizations made by ONNX
209
- token_to_add = tokenizer.num_special_tokens_to_add(is_pair)
210
- seq_length = compute_effective_axis_dimension(
211
- seq_length,
212
- fixed_dimension=OnnxConfig.default_fixed_sequence,
213
- num_token_to_add=token_to_add,
214
- )
215
-
216
- # Generate dummy inputs according to compute batch and sequence
217
- dummy_input = [" ".join([tokenizer.unk_token]) * seq_length] * batch_size
218
- common_inputs = dict(tokenizer(dummy_input, return_tensors=framework))
219
- return common_inputs
220
-
221
- # Copied from transformers.models.bart.configuration_bart.BartOnnxConfig._generate_dummy_inputs_for_default_and_seq2seq_lm
222
- def _generate_dummy_inputs_for_default_and_seq2seq_lm(
223
- self,
224
- tokenizer: PreTrainedTokenizer,
225
- batch_size: int = -1,
226
- seq_length: int = -1,
227
- is_pair: bool = False,
228
- framework: Optional[TensorType] = None,
229
- ) -> Mapping[str, Any]:
230
- encoder_inputs = self._generate_dummy_inputs_for_sequence_classification_and_question_answering(
231
- tokenizer, batch_size, seq_length, is_pair, framework
232
- )
233
-
234
- # Generate decoder inputs
235
- decoder_seq_length = seq_length if not self.use_past else 1
236
- decoder_inputs = self._generate_dummy_inputs_for_sequence_classification_and_question_answering(
237
- tokenizer, batch_size, decoder_seq_length, is_pair, framework
238
- )
239
- decoder_inputs = {
240
- f"decoder_{name}": tensor for name, tensor in decoder_inputs.items()
241
- }
242
- common_inputs = dict(**encoder_inputs, **decoder_inputs)
243
-
244
- if self.use_past:
245
- if not is_torch_available():
246
- raise ValueError(
247
- "Cannot generate dummy past_keys inputs without PyTorch installed."
248
- )
249
- else:
250
- import torch
251
- batch, encoder_seq_length = common_inputs["input_ids"].shape
252
- decoder_seq_length = common_inputs["decoder_input_ids"].shape[1]
253
- (
254
- num_encoder_attention_heads,
255
- num_decoder_attention_heads,
256
- ) = self.num_attention_heads
257
- encoder_shape = (
258
- batch,
259
- num_encoder_attention_heads,
260
- encoder_seq_length,
261
- self._config.hidden_size // num_encoder_attention_heads,
262
- )
263
- decoder_past_length = decoder_seq_length + 3
264
- decoder_shape = (
265
- batch,
266
- num_decoder_attention_heads,
267
- decoder_past_length,
268
- self._config.hidden_size // num_decoder_attention_heads,
269
- )
270
-
271
- common_inputs["decoder_attention_mask"] = torch.cat(
272
- [
273
- common_inputs["decoder_attention_mask"],
274
- torch.ones(batch, decoder_past_length),
275
- ],
276
- dim=1,
277
- )
278
-
279
- common_inputs["past_key_values"] = []
280
- # If the number of encoder and decoder layers are present in the model configuration, both are considered
281
- num_encoder_layers, num_decoder_layers = self.num_layers
282
- min_num_layers = min(num_encoder_layers, num_decoder_layers)
283
- max_num_layers = (
284
- max(num_encoder_layers, num_decoder_layers) - min_num_layers
285
- )
286
- remaining_side_name = (
287
- "encoder" if num_encoder_layers > num_decoder_layers else "decoder"
288
- )
289
-
290
- for _ in range(min_num_layers):
291
- common_inputs["past_key_values"].append(
292
- (
293
- torch.zeros(decoder_shape),
294
- torch.zeros(decoder_shape),
295
- torch.zeros(encoder_shape),
296
- torch.zeros(encoder_shape),
297
- )
298
- )
299
- # TODO: test this.
300
- shape = encoder_shape if remaining_side_name == "encoder" else decoder_shape
301
- for _ in range(min_num_layers, max_num_layers):
302
- common_inputs["past_key_values"].append(
303
- (torch.zeros(shape), torch.zeros(shape))
304
- )
305
- return common_inputs
306
-
307
- generate_dummy_inputs = _generate_dummy_inputs_for_default_and_seq2seq_lm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from transformers.configuration_utils import PretrainedConfig
 
 
 
2
 
3
 
4
  # Copied from transformers.models.m2m_100.configuration_m2m_100.M2M100Config->IndicTrans
 
55
  use_cache (`bool`, *optional*, defaults to `True`):
56
  Whether or not the model should return the last key/values attentions (not used by all models).
57
  ```"""
58
+
59
  model_type = "RotaryIndicTrans"
60
  keys_to_ignore_at_inference = ["past_key_values"]
61
  attribute_map = {
 
123
  self.scale_embedding = scale_embedding
124
  self.share_decoder_input_output_embed = share_decoder_input_output_embed
125
  self.attn_implementation = attn_implementation
126
+
127
  super().__init__(
128
  pad_token_id=pad_token_id,
129
  bos_token_id=bos_token_id,
 
132
  decoder_start_token_id=decoder_start_token_id,
133
  **kwargs,
134
  )