Spaces:
Running
Running
jianghuyihei
commited on
Commit
•
789383a
1
Parent(s):
863d8a3
fix
Browse files- searcher/sementic_search.py +13 -3
searcher/sementic_search.py
CHANGED
@@ -132,7 +132,11 @@ class SementicSearcher:
|
|
132 |
return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
133 |
|
134 |
def read_arxiv_from_path(self, pdf_path):
|
135 |
-
|
|
|
|
|
|
|
|
|
136 |
return article_dict
|
137 |
|
138 |
async def get_paper_embbeding_and_score_async(self,query_embedding, paper,llm):
|
@@ -281,7 +285,10 @@ Abstract: {paper['abstract']}
|
|
281 |
abstract = result['abstract']
|
282 |
citationCount = result['citationCount']
|
283 |
year = result['year']
|
284 |
-
|
|
|
|
|
|
|
285 |
if not article:
|
286 |
continue
|
287 |
final_results.append(Result(title,abstract,article,citationCount,year))
|
@@ -350,7 +357,10 @@ Abstract: {paper['abstract']}
|
|
350 |
url = paper[2]
|
351 |
content = await self.download_pdf_async(url)
|
352 |
if content:
|
353 |
-
|
|
|
|
|
|
|
354 |
if not article:
|
355 |
continue
|
356 |
result = Result(paper[0],paper[1],article,paper[3],paper[4])
|
|
|
132 |
return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
133 |
|
134 |
def read_arxiv_from_path(self, pdf_path):
|
135 |
+
try:
|
136 |
+
article_dict = scipdf.parse_pdf_to_dict(pdf_path)
|
137 |
+
except Exception as e:
|
138 |
+
print(f"Failed to parse the PDF file: {pdf_path}")
|
139 |
+
return None
|
140 |
return article_dict
|
141 |
|
142 |
async def get_paper_embbeding_and_score_async(self,query_embedding, paper,llm):
|
|
|
285 |
abstract = result['abstract']
|
286 |
citationCount = result['citationCount']
|
287 |
year = result['year']
|
288 |
+
try:
|
289 |
+
article = scipdf.parse_pdf_to_dict(content)
|
290 |
+
except Exception as e:
|
291 |
+
article = None
|
292 |
if not article:
|
293 |
continue
|
294 |
final_results.append(Result(title,abstract,article,citationCount,year))
|
|
|
357 |
url = paper[2]
|
358 |
content = await self.download_pdf_async(url)
|
359 |
if content:
|
360 |
+
try:
|
361 |
+
article = scipdf.parse_pdf_to_dict(content)
|
362 |
+
except Exception as e:
|
363 |
+
article = None
|
364 |
if not article:
|
365 |
continue
|
366 |
result = Result(paper[0],paper[1],article,paper[3],paper[4])
|