miscjose's picture
Updaded app with more anime + filter adjustments
afff708
import os
import json
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
def update_filters(filters, data):
for key, value in filters.items():
dont_add = ['Boys Love', 'Erotica', 'Girls Love', 'Hentai', 'Ecchi', 'Gore', 'Crossdressing', 'Magical Sex Shift', 'Rx - Hentai', 'R+ - Mild Nudity']
if data[key] and key == 'rating':
if data[key] not in dont_add:
value.add(data[key])
else:
for val in data[key]:
if val and val not in dont_add:
value.add(val)
return filters
def clean_filters(filters):
for key, val in filters.items():
val.add('ALL')
filters[key] = list(val)
return filters
if __name__ == '__main__':
print('Embedding Started')
filters = {
'genres': set(),
'themes': set(),
'rating': set()
}
embeddings = {}
for name in os.listdir('./anime'):
with open(f"./anime/{name}", 'r') as file:
data = json.load(file)
if not data:
continue
filters = update_filters(filters, data)
name = name.replace('.json', '')
data['image'] = f"./images/{name}.jpg"
text = f'''
This anime has {data['episodes']} Episodes |
This anime premiered on {data['premiered']} |
This anime was broadcasted on: {data['broadcast']} |
This anime was produced by {' '.join(data['producers'])} |
This anime was licensed by Licensors: {' '.join(data['licensors'])} |
The studios in charge of this anime was {' '.join(data['studios'])} |
The source of this anime was {' '.join(data['source'])} |
The genres of this anime are {' '.join(data['genres'])} |
The themes of this anime are {' '.join(data['themes'])} |
The demographic of this anime is {data['demographic']} |
The duration of this anime is {data['duration']} |
The rating of this anime is {data['rating']} |
The description of this anime is {data['description']}'''
embeddings[name] = data.copy()
embeddings[name]['objective_embedding'] = [model.encode(text).tolist()]
subjective_embeddings = []
for review in embeddings[name]['reviews']:
text = review['text']
subjective_embeddings.append(model.encode(text).tolist())
data['review'] = text
embeddings[name]['subjective_embeddings'] = subjective_embeddings
filters = clean_filters(filters)
with open('./embeddings/data.json', 'w') as f:
json.dump({'embeddings':embeddings, 'filters': filters}, f)
print('Embedding Complete')