from m_conf import * from keras.preprocessing.text import Tokenizer from gensim.models import Word2Vec with open('train.txt', 'r') as file: lines = file.readlines() tokenizer = Tokenizer() tokenizer.fit_on_texts(lines) sequences = tokenizer.texts_to_sequences(lines) tokens = [[str(i) for i in seq] for seq in sequences] model = Word2Vec(tokens, window=3, min_count=1, vector_size=emb_o_dim) model.save("w2v.model")