Spaces:

hprasath
/

SCRAP_NODE_SERVER

Sleeping

SCRAP_NODE_SERVER / main.py

Upload 8 files

96f4037 verified 4 months ago

1.21 kB

	from fastapi import FastAPI, HTTPException
	import requests
	from bs4 import BeautifulSoup

	app = FastAPI()

	def scrape_web_page(url):
	try:
	# Send a GET request to the URL
	response = requests.get(url)
	response.raise_for_status() # Raise an exception for bad response status

	# Parse the HTML content using BeautifulSoup
	soup = BeautifulSoup(response.content, 'html.parser')

	# Find and extract relevant content from the HTML
	text_content = soup.get_text(separator='\n').strip() # Get all text content, strip whitespace

	return text_content
	except requests.exceptions.RequestException as e:
	print(f"Error fetching URL: {e}")
	return None

	@app.get("/")
	def read_root():
	return {"message": "Welcome to the FastAPI server!"}

	@app.get("/scrape/")
	def scrape(url: str):
	scraped_content = scrape_web_page(url)
	if scraped_content:
	return {"url": url, "scraped_content": scraped_content}
	else:
	raise HTTPException(status_code=500, detail="Failed to scrape content")

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=8000)