Adapting commited on
Commit
49af15e
1 Parent(s): cf142fd

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +73 -0
README.md ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Fine-tuned [Bert-Base-Chinese](https://huggingface.co/bert-base-chinese) for NER task on [Adapting/chinese_biomedical_NER_dataset](https://huggingface.co/datasets/Adapting/chinese_biomedical_NER_dataset)
2
+
3
+ # Usage
4
+ ```python
5
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
6
+ tokenizer = AutoTokenizer.from_pretrained("Adapting/bert-base-chinese-finetuned-NER-biomedical")
7
+ model = AutoModelForTokenClassification.from_pretrained("Adapting/bert-base-chinese-finetuned-NER-biomedical",revision='7f63e3d18b1dc3cc23041a89e77be21860704d2e')
8
+
9
+ from transformers import pipeline
10
+ nlp = pipeline('ner',model=model,tokenizer = tokenizer)
11
+
12
+ tag_set = [
13
+ 'B_手术',
14
+ 'I_疾病和诊断',
15
+ 'B_症状',
16
+ 'I_解剖部位',
17
+ 'I_药物',
18
+ 'B_影像检查',
19
+ 'B_药物',
20
+ 'B_疾病和诊断',
21
+ 'I_影像检查',
22
+ 'I_手术',
23
+ 'B_解剖部位',
24
+ 'O',
25
+ 'B_实验室检验',
26
+ 'I_症状',
27
+ 'I_实验室检验'
28
+ ]
29
+
30
+ tag2id = lambda tag: tag_set.index(tag)
31
+ id2tag = lambda id: tag_set[id]
32
+
33
+ def readable_result(result):
34
+
35
+ results_in_word = []
36
+ j = 0
37
+ while j < len(result):
38
+ i = result[j]
39
+ entity = id2tag(int(i['entity'][i['entity'].index('_')+1:]))
40
+ token = i['word']
41
+ if entity.startswith('B'):
42
+ entity_name = entity[entity.index('_')+1:]
43
+
44
+ word = token
45
+ j = j+1
46
+ while j<len(result):
47
+ next = result[j]
48
+ next_ent = id2tag(int(next['entity'][next['entity'].index('_')+1:]))
49
+ next_token = next['word']
50
+
51
+ if next_ent.startswith('I') and next_ent[next_ent.index('_')+1:] == entity_name:
52
+ word += next_token
53
+ j += 1
54
+
55
+ if j >= len(result):
56
+ results_in_word.append((entity_name,word))
57
+ else:
58
+ results_in_word.append((entity_name,word))
59
+ break
60
+
61
+ else:
62
+ j += 1
63
+
64
+ return results_in_word
65
+
66
+
67
+
68
+ print(readable_result(nlp('淋球菌性尿道炎会引起头痛')))
69
+
70
+ '''
71
+ [('疾病和诊断', '淋球菌性尿道炎'), ('症状', '头痛')]
72
+ '''
73
+ ```