File size: 2,135 Bytes
49af15e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
Fine-tuned [Bert-Base-Chinese](https://huggingface.co/bert-base-chinese) for NER task on [Adapting/chinese_biomedical_NER_dataset](https://huggingface.co/datasets/Adapting/chinese_biomedical_NER_dataset)

# Usage
```python
from transformers import AutoTokenizer, AutoModelForTokenClassification
tokenizer = AutoTokenizer.from_pretrained("Adapting/bert-base-chinese-finetuned-NER-biomedical")
model = AutoModelForTokenClassification.from_pretrained("Adapting/bert-base-chinese-finetuned-NER-biomedical",revision='7f63e3d18b1dc3cc23041a89e77be21860704d2e')

from transformers import pipeline
nlp = pipeline('ner',model=model,tokenizer = tokenizer)

tag_set = [
 'B_手术',
 'I_疾病和诊断',
 'B_症状',
 'I_解剖部位',
 'I_药物',
 'B_影像检查',
 'B_药物',
 'B_疾病和诊断',
 'I_影像检查',
 'I_手术',
 'B_解剖部位',
 'O',
 'B_实验室检验',
 'I_症状',
 'I_实验室检验'
 ]
 
tag2id = lambda tag: tag_set.index(tag)
id2tag = lambda id: tag_set[id]

def readable_result(result):

    results_in_word = []
    j = 0
    while j < len(result):   
        i = result[j]
        entity = id2tag(int(i['entity'][i['entity'].index('_')+1:]))
        token = i['word']
        if entity.startswith('B'):
            entity_name = entity[entity.index('_')+1:]

            word = token
            j = j+1
            while j<len(result):
                next = result[j]
                next_ent = id2tag(int(next['entity'][next['entity'].index('_')+1:]))
                next_token = next['word']

                if next_ent.startswith('I') and next_ent[next_ent.index('_')+1:] == entity_name:
                    word += next_token
                    j += 1

                    if j >= len(result):
                        results_in_word.append((entity_name,word))
                else:
                    results_in_word.append((entity_name,word))
                    break

        else:
            j += 1
    
    return results_in_word


        
print(readable_result(nlp('淋球菌性尿道炎会引起头痛')))

'''
[('疾病和诊断', '淋球菌性尿道炎'), ('症状', '头痛')]
'''
```