Continue Pretraining
#7
by
HuggySSO
- opened
I would like to continue pretraining with TSDAE. I am freezing layers 0-16 but i am getting worse embeddings:
model_name = params["model"]
batch_size = params["batch"]
################# Initialize an SBERT model #################
word_embedding_model = models.Transformer(model_name)
if n_frozen_layers > 0:
for param in word_embedding_model.auto_model.encoder.layer[:n_frozen_layers].parameters():
param.requires_grad = False
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), 'cls')
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
################# Train and evaluate the model #################
logging.info("{} train sentences".format(len(train_sentences)))
# Wrapping the dataset to add deletion noise on the fly
train_dataset = datasets.DenoisingAutoEncoderDataset(train_sentences)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
train_loss = losses.DenoisingAutoEncoderLoss(model, decoder_name_or_path=model_name, tie_encoder_decoder=True)
logging.info("Start training")
for epoch in range(params["epochs"]):
model.fit(
train_objectives=[(train_dataloader, train_loss)],
epochs=1,
weight_decay=0,
scheduler='constantlr',
optimizer_params={'lr': params["lr"]},
show_progress_bar=True,
use_amp=True # Set to True if your GPU supports FP16 cores
)
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
output_dir = os.path.join(params["save_path"], timestamp)
Any advice on what i am doing wrong, or how i can make the embeddings more domain-specific would be greatly appreciated. (Labeled data is non existant, thats why i try tsdae)