change method names
Browse files
README.md
CHANGED
@@ -35,22 +35,89 @@ nlp.add_pipe(entityLinker, last=True, name="entityLinker")
|
|
35 |
|
36 |
doc = nlp("I watched the Pirates of the Carribean last silvester")
|
37 |
|
38 |
-
|
39 |
#returns all entities in the whole document
|
40 |
all_linked_entities=doc._.linkedEntities
|
41 |
#iterates over sentences and prints linked entities
|
42 |
for sent in doc.sents:
|
43 |
sent._.linkedEntities.pretty_print()
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
48 |
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
## Example
|
53 |
-
In the following example we will use SpacyEntityLinker to
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
|
56 |
### Entity Linking Policy
|
|
|
35 |
|
36 |
doc = nlp("I watched the Pirates of the Carribean last silvester")
|
37 |
|
|
|
38 |
#returns all entities in the whole document
|
39 |
all_linked_entities=doc._.linkedEntities
|
40 |
#iterates over sentences and prints linked entities
|
41 |
for sent in doc.sents:
|
42 |
sent._.linkedEntities.pretty_print()
|
43 |
+
|
44 |
+
#OUTPUT:
|
45 |
+
#https://www.wikidata.org/wiki/Q194318 194318 Pirates of the Caribbean Series of fantasy adventure films
|
46 |
+
#https://www.wikidata.org/wiki/Q12525597 12525597 Silvester the day celebrated on 31 December (Roman Catholic Church) or 2 January (Eastern Orthodox Churches)
|
47 |
+
|
48 |
+
```
|
49 |
|
50 |
+
### EntityCollection
|
51 |
+
contains an array of entity elements. It can be accessed like an array but also implements the following
|
52 |
+
helper functions:
|
53 |
+
- <code>pretty_print()</code> prints out information about all contained entities
|
54 |
+
- <code>print_super_classes()</code> groups and prints all entites by their super class
|
55 |
|
56 |
+
```python
|
57 |
+
doc = nlp("Elon Musk was born in South Africa. Bill Gates and Steve Jobs come from the United States")
|
58 |
+
doc._.linkedEntities.print_super_entities()
|
59 |
+
#OUTPUT:
|
60 |
+
#human (3) : Elon Musk,Bill Gates,Steve Jobs
|
61 |
+
#country (2) : South Africa,United States of America
|
62 |
+
#sovereign state (2) : South Africa,United States of America
|
63 |
+
#federal state (1) : United States of America
|
64 |
+
#constitutional republic (1) : United States of America
|
65 |
+
#democratic republic (1) : United States of America
|
66 |
```
|
67 |
+
### EntityElement
|
68 |
+
each linked Entity is an object of type <code>EntityElement</code>. Each entity contains the methods
|
69 |
+
|
70 |
+
- <code>get_description()</code> returns description from Wikidata
|
71 |
+
- <code>get_id()</code> returns Wikidata ID
|
72 |
+
- <code>get_label()</code> returns Wikidata label
|
73 |
+
- <code>get_span()</code> returns the span from the spacy document that contains the linked entity
|
74 |
+
- <code>get_url()</code> returns the url to the corresponding Wikidata item
|
75 |
+
- <code>pretty_print()</code> prints out information about the entity element
|
76 |
+
- <code>get_sub_entities(limit=10)</code> returns EntityCollection of all entities that derive from the current entityElement (e.g. fruit -> apple, banana, etc.)
|
77 |
+
- <code>get_super_entities(limit=10)</code> returns EntityCollection of all entities that the current entityElement derives from (e.g. New England Patriots -> Football Team))
|
78 |
|
79 |
## Example
|
80 |
+
In the following example we will use SpacyEntityLinker to find find the mentioned Football Team in our text
|
81 |
+
and explore other football teams of the same type
|
82 |
+
|
83 |
+
```python
|
84 |
+
|
85 |
+
doc = nlp("I follow the New England Patriots")
|
86 |
+
|
87 |
+
patriots_entity=doc._.linkedEntities[0]
|
88 |
+
patriots_entity.pretty_print()
|
89 |
+
#OUTPUT:
|
90 |
+
#https://www.wikidata.org/wiki/Q193390
|
91 |
+
#193390
|
92 |
+
#New England Patriots
|
93 |
+
#National Football League franchise in Foxborough, Massachusetts
|
94 |
+
|
95 |
+
football_team_entity=patriots_entity.get_super_entities()[0]
|
96 |
+
football_team_entity.pretty_print()
|
97 |
+
#OUTPUT:
|
98 |
+
#https://www.wikidata.org/wiki/Q17156793
|
99 |
+
#17156793
|
100 |
+
#American football team
|
101 |
+
#organization, in which a group of players are organized to compete as a team in American football
|
102 |
+
|
103 |
+
|
104 |
+
for child in football_team_entity.get_sub_entities(limit=32):
|
105 |
+
print(child)
|
106 |
+
#OUTPUT:
|
107 |
+
#New Orleans Saints
|
108 |
+
#New York Giants
|
109 |
+
#Pittsburgh Steelers
|
110 |
+
#New England Patriots
|
111 |
+
#Indianapolis Colts
|
112 |
+
#Miami Seahawks
|
113 |
+
#Dallas Cowboys
|
114 |
+
#Chicago Bears
|
115 |
+
#Washington Redskins
|
116 |
+
#Green Bay Packers
|
117 |
+
#...
|
118 |
+
```
|
119 |
+
|
120 |
+
</pre>
|
121 |
|
122 |
|
123 |
### Entity Linking Policy
|
spacyEntityLinker/DatabaseConnection.py
CHANGED
@@ -11,7 +11,7 @@ conn = None
|
|
11 |
entity_cache = {}
|
12 |
chain_cache = {}
|
13 |
|
14 |
-
DB_DEFAULT_PATH = os.path.abspath('
|
15 |
|
16 |
wikidata_instance = None
|
17 |
|
@@ -93,11 +93,11 @@ class WikidataQueryController:
|
|
93 |
|
94 |
if res and len(res):
|
95 |
if res[0] == None:
|
96 |
-
self.
|
97 |
else:
|
98 |
-
self.
|
99 |
else:
|
100 |
-
self.
|
101 |
|
102 |
return self._get_cached_value("name", item_id)
|
103 |
|
|
|
11 |
entity_cache = {}
|
12 |
chain_cache = {}
|
13 |
|
14 |
+
DB_DEFAULT_PATH = os.path.abspath(__file__ + '/../../data_spacy_entity_linker/wikidb_filtered.db')
|
15 |
|
16 |
wikidata_instance = None
|
17 |
|
|
|
93 |
|
94 |
if res and len(res):
|
95 |
if res[0] == None:
|
96 |
+
self._add_to_cache("name", item_id, 'no label')
|
97 |
else:
|
98 |
+
self._add_to_cache("name", item_id, res[0])
|
99 |
else:
|
100 |
+
self._add_to_cache("name", item_id, '<none>')
|
101 |
|
102 |
return self._get_cached_value("name", item_id)
|
103 |
|
spacyEntityLinker/EntityCollection.py
CHANGED
@@ -27,7 +27,7 @@ class EntityCollection:
|
|
27 |
|
28 |
return categories
|
29 |
|
30 |
-
def
|
31 |
wikidataInstance = get_wikidata_instance()
|
32 |
|
33 |
all_categories = []
|
@@ -49,7 +49,7 @@ class EntityCollection:
|
|
49 |
for entity in self.entities:
|
50 |
entity.pretty_print()
|
51 |
|
52 |
-
def
|
53 |
counter = Counter()
|
54 |
counter.update(self.get_categories(max_depth))
|
55 |
|
@@ -57,9 +57,3 @@ class EntityCollection:
|
|
57 |
|
58 |
def get_distinct_categories(self, max_depth=1):
|
59 |
return list(set(self.get_categories(max_depth)))
|
60 |
-
|
61 |
-
def most_frequent_categories(self):
|
62 |
-
pass
|
63 |
-
|
64 |
-
def get_most_significant_categories(self, priors):
|
65 |
-
pass
|
|
|
27 |
|
28 |
return categories
|
29 |
|
30 |
+
def print_super_entities(self, max_depth=1, limit=10):
|
31 |
wikidataInstance = get_wikidata_instance()
|
32 |
|
33 |
all_categories = []
|
|
|
49 |
for entity in self.entities:
|
50 |
entity.pretty_print()
|
51 |
|
52 |
+
def grouped_by_super_entities(self, max_depth=1):
|
53 |
counter = Counter()
|
54 |
counter.update(self.get_categories(max_depth))
|
55 |
|
|
|
57 |
|
58 |
def get_distinct_categories(self, max_depth=1):
|
59 |
return list(set(self.get_categories(max_depth)))
|
|
|
|
|
|
|
|
|
|
|
|
spacyEntityLinker/EntityElement.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
from spacyEntityLinker.DatabaseConnection import get_wikidata_instance
|
|
|
2 |
|
3 |
|
4 |
class EntityElement:
|
@@ -61,11 +62,13 @@ class EntityElement:
|
|
61 |
def get_categories(self, max_depth=10):
|
62 |
return self.wikidata_instance.get_categories(self.identifier, max_depth=max_depth)
|
63 |
|
64 |
-
def
|
65 |
-
return
|
|
|
66 |
|
67 |
-
def
|
68 |
-
return
|
|
|
69 |
|
70 |
def get_subclass_hierarchy(self):
|
71 |
chain = self.wikidata_instance.get_chain(self.identifier, max_depth=5, property=279)
|
@@ -122,3 +125,6 @@ class EntityElement:
|
|
122 |
return label
|
123 |
else:
|
124 |
return ""
|
|
|
|
|
|
|
|
1 |
from spacyEntityLinker.DatabaseConnection import get_wikidata_instance
|
2 |
+
from spacyEntityLinker.EntityCollection import EntityCollection
|
3 |
|
4 |
|
5 |
class EntityElement:
|
|
|
62 |
def get_categories(self, max_depth=10):
|
63 |
return self.wikidata_instance.get_categories(self.identifier, max_depth=max_depth)
|
64 |
|
65 |
+
def get_sub_entities(self, limit=10):
|
66 |
+
return EntityCollection(
|
67 |
+
[EntityElement(row, None) for row in self.wikidata_instance.get_children(self.get_id(), limit)])
|
68 |
|
69 |
+
def get_super_entities(self, limit=10):
|
70 |
+
return EntityCollection(
|
71 |
+
[EntityElement(row, None) for row in self.wikidata_instance.get_parents(self.get_id(), limit)])
|
72 |
|
73 |
def get_subclass_hierarchy(self):
|
74 |
chain = self.wikidata_instance.get_chain(self.identifier, max_depth=5, property=279)
|
|
|
125 |
return label
|
126 |
else:
|
127 |
return ""
|
128 |
+
|
129 |
+
def __eq__(self, other):
|
130 |
+
return isinstance(other, EntityElement) and other.get_id() == self.get_id()
|
spacyEntityLinker/__init__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
from .EntityLinker import EntityLinker
|
2 |
|
3 |
-
__version__ = '0.0.
|
4 |
__all__ = [EntityLinker]
|
|
|
1 |
from .EntityLinker import EntityLinker
|
2 |
|
3 |
+
__version__ = '0.0.5'
|
4 |
__all__ = [EntityLinker]
|
tests/test_EntityLinker.py
CHANGED
@@ -14,10 +14,11 @@ class TestEntityLinker(unittest.TestCase):
|
|
14 |
|
15 |
self.nlp.add_pipe(entityLinker, last=True, name="entityLinker")
|
16 |
|
17 |
-
doc = self.nlp(
|
|
|
18 |
|
19 |
doc._.linkedEntities.pretty_print()
|
20 |
-
|
21 |
for sent in doc.sents:
|
22 |
sent._.linkedEntities.pretty_print()
|
23 |
|
|
|
14 |
|
15 |
self.nlp.add_pipe(entityLinker, last=True, name="entityLinker")
|
16 |
|
17 |
+
doc = self.nlp(
|
18 |
+
"Elon Musk was born in South Africa. Bill Gates and Steve Jobs come from in the United States")
|
19 |
|
20 |
doc._.linkedEntities.pretty_print()
|
21 |
+
doc._.linkedEntities.print_super_entities()
|
22 |
for sent in doc.sents:
|
23 |
sent._.linkedEntities.pretty_print()
|
24 |
|