egerber1 commited on
Commit
24a93ab
1 Parent(s): 8b513d0

change method names

Browse files
README.md CHANGED
@@ -35,22 +35,89 @@ nlp.add_pipe(entityLinker, last=True, name="entityLinker")
35
 
36
  doc = nlp("I watched the Pirates of the Carribean last silvester")
37
 
38
-
39
  #returns all entities in the whole document
40
  all_linked_entities=doc._.linkedEntities
41
  #iterates over sentences and prints linked entities
42
  for sent in doc.sents:
43
  sent._.linkedEntities.pretty_print()
 
 
 
 
 
 
44
 
45
- '''
46
- https://www.wikidata.org/wiki/Q194318 194318 Pirates of the Caribbean Series of fantasy adventure films
47
- https://www.wikidata.org/wiki/Q12525597 12525597 Silvester the day celebrated on 31 December (Roman Catholic Church) or 2 January (Eastern Orthodox Churches)
 
 
48
 
49
- '''
 
 
 
 
 
 
 
 
 
50
  ```
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  ## Example
53
- In the following example we will use SpacyEntityLinker to extract all
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
 
56
  ### Entity Linking Policy
 
35
 
36
  doc = nlp("I watched the Pirates of the Carribean last silvester")
37
 
 
38
  #returns all entities in the whole document
39
  all_linked_entities=doc._.linkedEntities
40
  #iterates over sentences and prints linked entities
41
  for sent in doc.sents:
42
  sent._.linkedEntities.pretty_print()
43
+
44
+ #OUTPUT:
45
+ #https://www.wikidata.org/wiki/Q194318 194318 Pirates of the Caribbean Series of fantasy adventure films
46
+ #https://www.wikidata.org/wiki/Q12525597 12525597 Silvester the day celebrated on 31 December (Roman Catholic Church) or 2 January (Eastern Orthodox Churches)
47
+
48
+ ```
49
 
50
+ ### EntityCollection
51
+ contains an array of entity elements. It can be accessed like an array but also implements the following
52
+ helper functions:
53
+ - <code>pretty_print()</code> prints out information about all contained entities
54
+ - <code>print_super_classes()</code> groups and prints all entites by their super class
55
 
56
+ ```python
57
+ doc = nlp("Elon Musk was born in South Africa. Bill Gates and Steve Jobs come from the United States")
58
+ doc._.linkedEntities.print_super_entities()
59
+ #OUTPUT:
60
+ #human (3) : Elon Musk,Bill Gates,Steve Jobs
61
+ #country (2) : South Africa,United States of America
62
+ #sovereign state (2) : South Africa,United States of America
63
+ #federal state (1) : United States of America
64
+ #constitutional republic (1) : United States of America
65
+ #democratic republic (1) : United States of America
66
  ```
67
+ ### EntityElement
68
+ each linked Entity is an object of type <code>EntityElement</code>. Each entity contains the methods
69
+
70
+ - <code>get_description()</code> returns description from Wikidata
71
+ - <code>get_id()</code> returns Wikidata ID
72
+ - <code>get_label()</code> returns Wikidata label
73
+ - <code>get_span()</code> returns the span from the spacy document that contains the linked entity
74
+ - <code>get_url()</code> returns the url to the corresponding Wikidata item
75
+ - <code>pretty_print()</code> prints out information about the entity element
76
+ - <code>get_sub_entities(limit=10)</code> returns EntityCollection of all entities that derive from the current entityElement (e.g. fruit -> apple, banana, etc.)
77
+ - <code>get_super_entities(limit=10)</code> returns EntityCollection of all entities that the current entityElement derives from (e.g. New England Patriots -> Football Team))
78
 
79
  ## Example
80
+ In the following example we will use SpacyEntityLinker to find find the mentioned Football Team in our text
81
+ and explore other football teams of the same type
82
+
83
+ ```python
84
+
85
+ doc = nlp("I follow the New England Patriots")
86
+
87
+ patriots_entity=doc._.linkedEntities[0]
88
+ patriots_entity.pretty_print()
89
+ #OUTPUT:
90
+ #https://www.wikidata.org/wiki/Q193390
91
+ #193390
92
+ #New England Patriots
93
+ #National Football League franchise in Foxborough, Massachusetts
94
+
95
+ football_team_entity=patriots_entity.get_super_entities()[0]
96
+ football_team_entity.pretty_print()
97
+ #OUTPUT:
98
+ #https://www.wikidata.org/wiki/Q17156793
99
+ #17156793
100
+ #American football team
101
+ #organization, in which a group of players are organized to compete as a team in American football
102
+
103
+
104
+ for child in football_team_entity.get_sub_entities(limit=32):
105
+ print(child)
106
+ #OUTPUT:
107
+ #New Orleans Saints
108
+ #New York Giants
109
+ #Pittsburgh Steelers
110
+ #New England Patriots
111
+ #Indianapolis Colts
112
+ #Miami Seahawks
113
+ #Dallas Cowboys
114
+ #Chicago Bears
115
+ #Washington Redskins
116
+ #Green Bay Packers
117
+ #...
118
+ ```
119
+
120
+ </pre>
121
 
122
 
123
  ### Entity Linking Policy
spacyEntityLinker/DatabaseConnection.py CHANGED
@@ -11,7 +11,7 @@ conn = None
11
  entity_cache = {}
12
  chain_cache = {}
13
 
14
- DB_DEFAULT_PATH = os.path.abspath('../data_spacy_entity_linker/wikidb_filtered.db')
15
 
16
  wikidata_instance = None
17
 
@@ -93,11 +93,11 @@ class WikidataQueryController:
93
 
94
  if res and len(res):
95
  if res[0] == None:
96
- self._append_chain_elements("name", item_id, 'no label')
97
  else:
98
- self._append_chain_elements("name", item_id, res[0])
99
  else:
100
- self._append_chain_elements("name", item_id, '<none>')
101
 
102
  return self._get_cached_value("name", item_id)
103
 
 
11
  entity_cache = {}
12
  chain_cache = {}
13
 
14
+ DB_DEFAULT_PATH = os.path.abspath(__file__ + '/../../data_spacy_entity_linker/wikidb_filtered.db')
15
 
16
  wikidata_instance = None
17
 
 
93
 
94
  if res and len(res):
95
  if res[0] == None:
96
+ self._add_to_cache("name", item_id, 'no label')
97
  else:
98
+ self._add_to_cache("name", item_id, res[0])
99
  else:
100
+ self._add_to_cache("name", item_id, '<none>')
101
 
102
  return self._get_cached_value("name", item_id)
103
 
spacyEntityLinker/EntityCollection.py CHANGED
@@ -27,7 +27,7 @@ class EntityCollection:
27
 
28
  return categories
29
 
30
- def print_categories(self, max_depth=1, limit=10):
31
  wikidataInstance = get_wikidata_instance()
32
 
33
  all_categories = []
@@ -49,7 +49,7 @@ class EntityCollection:
49
  for entity in self.entities:
50
  entity.pretty_print()
51
 
52
- def grouped_by_category(self, max_depth=1):
53
  counter = Counter()
54
  counter.update(self.get_categories(max_depth))
55
 
@@ -57,9 +57,3 @@ class EntityCollection:
57
 
58
  def get_distinct_categories(self, max_depth=1):
59
  return list(set(self.get_categories(max_depth)))
60
-
61
- def most_frequent_categories(self):
62
- pass
63
-
64
- def get_most_significant_categories(self, priors):
65
- pass
 
27
 
28
  return categories
29
 
30
+ def print_super_entities(self, max_depth=1, limit=10):
31
  wikidataInstance = get_wikidata_instance()
32
 
33
  all_categories = []
 
49
  for entity in self.entities:
50
  entity.pretty_print()
51
 
52
+ def grouped_by_super_entities(self, max_depth=1):
53
  counter = Counter()
54
  counter.update(self.get_categories(max_depth))
55
 
 
57
 
58
  def get_distinct_categories(self, max_depth=1):
59
  return list(set(self.get_categories(max_depth)))
 
 
 
 
 
 
spacyEntityLinker/EntityElement.py CHANGED
@@ -1,4 +1,5 @@
1
  from spacyEntityLinker.DatabaseConnection import get_wikidata_instance
 
2
 
3
 
4
  class EntityElement:
@@ -61,11 +62,13 @@ class EntityElement:
61
  def get_categories(self, max_depth=10):
62
  return self.wikidata_instance.get_categories(self.identifier, max_depth=max_depth)
63
 
64
- def get_children(self, limit=10):
65
- return [EntityElement(row, None) for row in self.wikidata_instance.get_children(self.get_id(), limit)]
 
66
 
67
- def get_parents(self, limit=10):
68
- return [EntityElement(row, None) for row in self.wikidata_instance.get_parents(self.get_id(), limit)]
 
69
 
70
  def get_subclass_hierarchy(self):
71
  chain = self.wikidata_instance.get_chain(self.identifier, max_depth=5, property=279)
@@ -122,3 +125,6 @@ class EntityElement:
122
  return label
123
  else:
124
  return ""
 
 
 
 
1
  from spacyEntityLinker.DatabaseConnection import get_wikidata_instance
2
+ from spacyEntityLinker.EntityCollection import EntityCollection
3
 
4
 
5
  class EntityElement:
 
62
  def get_categories(self, max_depth=10):
63
  return self.wikidata_instance.get_categories(self.identifier, max_depth=max_depth)
64
 
65
+ def get_sub_entities(self, limit=10):
66
+ return EntityCollection(
67
+ [EntityElement(row, None) for row in self.wikidata_instance.get_children(self.get_id(), limit)])
68
 
69
+ def get_super_entities(self, limit=10):
70
+ return EntityCollection(
71
+ [EntityElement(row, None) for row in self.wikidata_instance.get_parents(self.get_id(), limit)])
72
 
73
  def get_subclass_hierarchy(self):
74
  chain = self.wikidata_instance.get_chain(self.identifier, max_depth=5, property=279)
 
125
  return label
126
  else:
127
  return ""
128
+
129
+ def __eq__(self, other):
130
+ return isinstance(other, EntityElement) and other.get_id() == self.get_id()
spacyEntityLinker/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
  from .EntityLinker import EntityLinker
2
 
3
- __version__ = '0.0.2'
4
  __all__ = [EntityLinker]
 
1
  from .EntityLinker import EntityLinker
2
 
3
+ __version__ = '0.0.5'
4
  __all__ = [EntityLinker]
tests/test_EntityLinker.py CHANGED
@@ -14,10 +14,11 @@ class TestEntityLinker(unittest.TestCase):
14
 
15
  self.nlp.add_pipe(entityLinker, last=True, name="entityLinker")
16
 
17
- doc = self.nlp("I watched the Pirates of the Caribbean last silvester. Then I saw a snake. It was great.")
 
18
 
19
  doc._.linkedEntities.pretty_print()
20
-
21
  for sent in doc.sents:
22
  sent._.linkedEntities.pretty_print()
23
 
 
14
 
15
  self.nlp.add_pipe(entityLinker, last=True, name="entityLinker")
16
 
17
+ doc = self.nlp(
18
+ "Elon Musk was born in South Africa. Bill Gates and Steve Jobs come from in the United States")
19
 
20
  doc._.linkedEntities.pretty_print()
21
+ doc._.linkedEntities.print_super_entities()
22
  for sent in doc.sents:
23
  sent._.linkedEntities.pretty_print()
24