Spaces:
Runtime error
Runtime error
saicharan2804
commited on
Commit
•
f23bcf0
1
Parent(s):
1fc0c38
Added token IDs
Browse files- SmilesPeTokenizer.py +12 -8
- app.py +1 -1
- chembl_smiles_tokenizer.txt +0 -0
SmilesPeTokenizer.py
CHANGED
@@ -1,12 +1,16 @@
|
|
1 |
-
import
|
2 |
-
from SmilesPE.tokenizer import *
|
3 |
|
4 |
-
def
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
8 |
|
9 |
-
tokenized
|
10 |
-
|
11 |
-
return tokenized
|
12 |
|
|
|
|
|
|
|
|
|
|
1 |
+
from tokenizers import Tokenizer
|
|
|
2 |
|
3 |
+
def bpe_tokenizer(smiles_string):
|
4 |
+
# Load the tokenizer from the saved file
|
5 |
+
tokenizer = Tokenizer.from_file("chembl_bpe_tokenizer.json")
|
6 |
|
7 |
+
# Tokenize the SMILES string
|
8 |
+
encoded_output = tokenizer.encode(smiles_string)
|
9 |
|
10 |
+
# To get the tokenized output as text
|
11 |
+
tokens_text = encoded_output.tokens
|
|
|
12 |
|
13 |
+
# To get the corresponding token IDs
|
14 |
+
token_ids = encoded_output.ids
|
15 |
+
|
16 |
+
return tokens_text, token_ids
|
app.py
CHANGED
@@ -6,7 +6,7 @@ iface = gr.Interface(
|
|
6 |
inputs=[
|
7 |
gr.Textbox(label="SMILES"),
|
8 |
],
|
9 |
-
outputs="text"
|
10 |
)
|
11 |
|
12 |
iface.launch()
|
|
|
6 |
inputs=[
|
7 |
gr.Textbox(label="SMILES"),
|
8 |
],
|
9 |
+
outputs=["text", "text"]
|
10 |
)
|
11 |
|
12 |
iface.launch()
|
chembl_smiles_tokenizer.txt
ADDED
File without changes
|