from transformers import AutoModelForTokenClassification,AutoTokenizer,pipeline import gradio as gr import torch model = AutoModelForTokenClassification.from_pretrained('uer/roberta-base-finetuned-cluener2020-chinese')#,local_files_only=True)#cache_dir="C:\2023\Huggingface_4_12\Gradio Tutorial",force_download=True) # model = AutoModelForTokenClassification.from_pretrained('C:\\2023\Huggingface_4_12\Gradio Tutorial\cache3\Huggingface_4_12\Gradio Tutorial\models--uer--roberta-base-finetuned-cluener2020-chinese\blobs\3d20fdef0b0f04d283e1693ef4c030b133fa7c3c') tokenizer = AutoTokenizer.from_pretrained('uer/roberta-base-finetuned-cluener2020-chinese')#,local_files_only=True) ner_pipeline = pipeline('ner', model=model, tokenizer=tokenizer) examples=["江苏警方通报特斯拉冲进店铺","李沐,深度学习专家。\ 李沐于2004年进入上海交通大学计算机科学与工程系进行本科学习;2009年至2010年担任香港科技大学研究助理;2011年至2012年担任百度高级研究员;2012年至2017年在美国卡内基梅隆大学攻读博士学位。2019年编著的《动手学深度学习》出版。\ 李沐专注于分布式系统和机器学习算法的研究。"] def ner(text): output1 = ner_pipeline(text) output = [output1[0]] if output[0]['entity'][1] == '-': output[0]['entity'] = output[0]['entity'][2:len(output[0]['entity'])] # j = 0 for i in range(1,len(output1)): if output1[i]['entity'][1] == '-': output1[i]['entity'] = output1[i]['entity'][2:len(output1[i]['entity'])] dict1 = output1[i] u = len(output) - 1 dict0 = output[u] if (dict0['end'] == dict1['start']) and (dict0['entity'] == dict1['entity']): dict = { 'entity':dict0['entity'], 'score':min(dict0['score'],dict1['score']), 'index':dict1['index'], 'word':dict0['word']+dict1['word'], 'start':dict0['start'], 'end':dict1['end'], } output[len(output) - 1] = dict else: dict = dict1 output.append(dict) # print('output_before',output) # print('output_after',output) return {"text": text, "entities": output} demo = gr.Interface(ner, gr.Textbox(placeholder="Enter sentence here..."), gr.HighlightedText(), examples=examples) demo.launch()