gabrielandrade2
/

point-to-span-estimation

Token Classification

Inference Endpoints

Model card Files Files and versions Community

point-to-span-estimation / example.py

gabrielandrade2's picture

gabrielandrade2

Update README, add example code

348db8b 9 months ago

1.47 kB

	import mojimoji
	import numpy as np
	from transformers import AutoTokenizer, AutoModelForTokenClassification

	import iob_util #pip install git+https://github.com/gabrielandrade2/IOB-util.git

	model_name = "gabrielandrade2/point-to-span-estimation"

	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForTokenClassification.from_pretrained(model_name)

	# Point-annotated text
	text = "肥大型心⧫筋症、心房⧫細動に対してＷＦ投与が開始となった。\
	治療経過中に非持続性心⧫室頻拍が認められたためアミオダロンが併用となった。"

	# Convert to zenkaku and tokenize
	text = mojimoji.han_to_zen(text)
	tokenized = tokenizer.tokenize(text)

	# Encode text
	input_ids = tokenizer.encode(text, return_tensors="pt")

	# Predict spans
	output = model(input_ids)
	logits = output[0].detach().cpu().numpy()
	tags = np.argmax(logits, axis=2)[:, :].tolist()[0]

	# Convert model output to IOB format
	id2label = model.config.id2label
	tags = [id2label[t] for t in tags]

	# Convert input_ids back to chars
	tokens = [tokenizer.convert_ids_to_tokens(t) for t in input_ids][0]

	# Remove model special tokens (CLS, SEP, PAD)
	tags = [y for x, y in zip(tokens, tags) if x not in ['[CLS]', '[SEP]', '[PAD]']]
	tokens = [x for x in tokens if x not in ['[CLS]', '[SEP]', '[PAD]']]

	# Convert from IOB to XML tag format
	xml_text = iob_util.convert_iob_to_xml(tokens, tags)
	xml_text = xml_text.replace('⧫', '')
	print(xml_text)