File size: 1,211 Bytes
96f4037
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from fastapi import FastAPI, HTTPException
import requests
from bs4 import BeautifulSoup

app = FastAPI()

def scrape_web_page(url):
    try:
        # Send a GET request to the URL
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad response status

        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find and extract relevant content from the HTML
        text_content = soup.get_text(separator='\n').strip()  # Get all text content, strip whitespace

        return text_content
    except requests.exceptions.RequestException as e:
        print(f"Error fetching URL: {e}")
        return None

@app.get("/")
def read_root():
    return {"message": "Welcome to the FastAPI server!"}

@app.get("/scrape/")
def scrape(url: str):
    scraped_content = scrape_web_page(url)
    if scraped_content:
        return {"url": url, "scraped_content": scraped_content}
    else:
        raise HTTPException(status_code=500, detail="Failed to scrape content")

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)