Spaces:
Runtime error
Runtime error
import codecs | |
from SmilesPE.learner import * | |
import pandas as pd | |
import argparse | |
parser = argparse.ArgumentParser(description='Train SmilesPE Tokenizer.') | |
parser.add_argument('dataset_file_path', type=str, help='Path to the dataset file') | |
parser.add_argument('output_file_path', type=str, help='Path to file containing trained tokenizer weights') | |
# Parse the arguments | |
args = parser.parse_args() | |
df = pd.read_csv(args.dataset_file_path) | |
# df = df[0:30000] | |
output = codecs.open(args.output_file_path, 'w') | |
learn_SPE(df['canonical_smiles'].tolist(), output, 30000, min_frequency=2000, augmentation=1, verbose=True, total_symbols=True) |