Spaces:
Sleeping
Sleeping
File size: 1,211 Bytes
96f4037 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
from fastapi import FastAPI, HTTPException
import requests
from bs4 import BeautifulSoup
app = FastAPI()
def scrape_web_page(url):
try:
# Send a GET request to the URL
response = requests.get(url)
response.raise_for_status() # Raise an exception for bad response status
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
# Find and extract relevant content from the HTML
text_content = soup.get_text(separator='\n').strip() # Get all text content, strip whitespace
return text_content
except requests.exceptions.RequestException as e:
print(f"Error fetching URL: {e}")
return None
@app.get("/")
def read_root():
return {"message": "Welcome to the FastAPI server!"}
@app.get("/scrape/")
def scrape(url: str):
scraped_content = scrape_web_page(url)
if scraped_content:
return {"url": url, "scraped_content": scraped_content}
else:
raise HTTPException(status_code=500, detail="Failed to scrape content")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
|