Spaces:
Running
Running
File size: 5,572 Bytes
c5b0bb7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
# Semantic_Scholar.py
# Description: This file contains the functions to interact with the Semantic Scholar API
#
# Imports
from typing import List, Dict, Any
import requests
#
####################################################################################################
#
# Functions
# Constants
FIELDS_OF_STUDY = [
"Computer Science", "Medicine", "Chemistry", "Biology", "Materials Science",
"Physics", "Geology", "Psychology", "Art", "History", "Geography",
"Sociology", "Business", "Political Science", "Economics", "Philosophy",
"Mathematics", "Engineering", "Environmental Science",
"Agricultural and Food Sciences", "Education", "Law", "Linguistics"
]
PUBLICATION_TYPES = [
"Review", "JournalArticle", "CaseReport", "ClinicalTrial", "Conference",
"Dataset", "Editorial", "LettersAndComments", "MetaAnalysis", "News",
"Study", "Book", "BookSection"
]
def search_papers(
query: str,
page: int,
fields_of_study: List[str],
publication_types: List[str],
year_range: str,
venue: str,
min_citations: int,
open_access_only: bool,
limit: int = 10
) -> Dict[str, Any]:
"""Search for papers using the Semantic Scholar API with all available filters"""
if not query.strip():
return {"total": 0, "offset": 0, "next": 0, "data": []}
try:
url = "https://api.semanticscholar.org/graph/v1/paper/search"
params = {
"query": query,
"offset": page * limit,
"limit": limit,
"fields": "title,abstract,year,citationCount,authors,venue,openAccessPdf,url,publicationTypes,publicationDate"
}
# Add optional filters
if fields_of_study:
params["fieldsOfStudy"] = ",".join(fields_of_study)
if publication_types:
params["publicationTypes"] = ",".join(publication_types)
if venue:
params["venue"] = venue
if min_citations:
params["minCitationCount"] = str(min_citations)
if open_access_only:
params["openAccessPdf"] = ""
if year_range:
try:
if "-" in year_range:
start_year, end_year = year_range.split("-")
params["year"] = f"{start_year.strip()}-{end_year.strip()}"
else:
params["year"] = year_range.strip()
except ValueError:
pass
response = requests.get(url, params=params)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
return {"error": f"API Error: {str(e)}", "total": 0, "offset": 0, "data": []}
def get_paper_details(paper_id):
"""Get detailed information about a specific paper"""
try:
url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}"
params = {
"fields": "title,abstract,year,citationCount,authors,venue,openAccessPdf,url,references,citations"
}
response = requests.get(url, params=params)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
return {"error": f"API Error: {str(e)}"}
def format_paper_info(paper: Dict[str, Any]) -> str:
"""Format paper information for display"""
authors = ", ".join([author["name"] for author in paper.get("authors", [])])
year = f"Year: {paper.get('year', 'N/A')}"
venue = f"Venue: {paper.get('venue', 'N/A')}"
citations = f"Citations: {paper.get('citationCount', 0)}"
pub_types = f"Types: {', '.join(paper.get('publicationTypes', ['N/A']))}"
pdf_link = ""
if paper.get("openAccessPdf"):
pdf_link = f"\nPDF: {paper['openAccessPdf']['url']}"
s2_link = f"\nSemantic Scholar: {paper.get('url', '')}"
formatted = f"""# {paper.get('title', 'No Title')}
Authors: {authors}
{year} | {venue} | {citations}
{pub_types}
Abstract:
{paper.get('abstract', 'No abstract available')}
Links:{pdf_link}{s2_link}
"""
return formatted
def search_and_display(
query: str,
page: int,
fields_of_study: List[str],
publication_types: List[str],
year_range: str,
venue: str,
min_citations: int,
open_access_only: bool
) -> tuple[str, int, int, str]:
"""Search for papers and return formatted results with pagination info"""
result = search_papers(
query, page, fields_of_study, publication_types,
year_range, venue, min_citations, open_access_only
)
if "error" in result:
return result["error"], 0, 0, "0"
if not result["data"]:
return "No results found.", 0, 0, "0"
papers = result["data"]
total_results = int(result.get("total", "0"))
max_pages = (total_results + 9) // 10 # Ceiling division
results = []
for paper in papers:
results.append(format_paper_info(paper))
formatted_results = "\n\n---\n\n".join(results)
# Add pagination information
pagination_info = f"\n\n---\n\nShowing results {result['offset'] + 1}-{result['offset'] + len(papers)} of {total_results}"
return formatted_results + pagination_info, page, max_pages - 1, str(total_results)
#
# End of Semantic_Scholar.py
####################################################################################################
|