Jackmin108 commited on
Commit
70e22f5
2 Parent(s): 814cbbb 027219a

merge changes

Browse files

Signed-off-by: Meow <ongjackm@gmail.com>

Files changed (3) hide show
  1. embedding.py +0 -2
  2. mha.py +1 -0
  3. modeling_xlm_roberta.py +0 -1
embedding.py CHANGED
@@ -59,7 +59,6 @@ class XLMRobertaEmbeddings(nn.Module):
59
  embeddings[task_indices] = task_embeddings
60
  else:
61
  embeddings = self.word_embeddings(input_ids)
62
-
63
  if self.max_position_embeddings > 0:
64
  if position_ids is None:
65
  position_ids = create_position_ids_from_input_ids(input_ids, padding_idx=self.word_embeddings.padding_idx).to(input_ids.device)
@@ -79,5 +78,4 @@ class XLMRobertaEmbeddings(nn.Module):
79
  else:
80
  token_type_embeddings = self.token_type_embeddings(token_type_ids)
81
  embeddings = embeddings + token_type_embeddings
82
-
83
  return embeddings
 
59
  embeddings[task_indices] = task_embeddings
60
  else:
61
  embeddings = self.word_embeddings(input_ids)
 
62
  if self.max_position_embeddings > 0:
63
  if position_ids is None:
64
  position_ids = create_position_ids_from_input_ids(input_ids, padding_idx=self.word_embeddings.padding_idx).to(input_ids.device)
 
78
  else:
79
  token_type_embeddings = self.token_type_embeddings(token_type_ids)
80
  embeddings = embeddings + token_type_embeddings
 
81
  return embeddings
mha.py CHANGED
@@ -643,6 +643,7 @@ class MHA(nn.Module):
643
  inference_params.max_sequence_len if inference_params is not None else max_seqlen
644
  )
645
  batch, seqlen = x.shape[:2]
 
646
  if not self.cross_attn and self.num_heads_kv == self.num_heads:
647
  assert x_kv is None and mixer_subset is None
648
 
 
643
  inference_params.max_sequence_len if inference_params is not None else max_seqlen
644
  )
645
  batch, seqlen = x.shape[:2]
646
+ lora_kwargs = {}
647
  if not self.cross_attn and self.num_heads_kv == self.num_heads:
648
  assert x_kv is None and mixer_subset is None
649
 
modeling_xlm_roberta.py CHANGED
@@ -213,7 +213,6 @@ class XLMRobertaEncoder(nn.Module):
213
  mixer_kwargs = {'adapter_mask': adapter_mask}
214
  if key_padding_mask is not None:
215
  mixer_kwargs['key_padding_mask'] = key_padding_mask.bool()
216
-
217
  for layer in self.layers:
218
  if self._grad_checkpointing:
219
  hidden_states = torch.utils.checkpoint.checkpoint(
 
213
  mixer_kwargs = {'adapter_mask': adapter_mask}
214
  if key_padding_mask is not None:
215
  mixer_kwargs['key_padding_mask'] = key_padding_mask.bool()
 
216
  for layer in self.layers:
217
  if self._grad_checkpointing:
218
  hidden_states = torch.utils.checkpoint.checkpoint(