import os from dotenv import load_dotenv from scrapegraphai.graphs import SmartScraperGraph from scrapegraphai.utils import prettify_exec_info from langchain_community.llms import HuggingFaceEndpoint from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings import gradio as gr import subprocess # Ensure Playwright installs required browsers and dependencies subprocess.run(["playwright", "install"]) #subprocess.run(["playwright", "install-deps"]) # Load environment variables load_dotenv() HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN') # Initialize the model instances repo_id = "mistralai/Mistral-7B-Instruct-v0.2" llm_model_instance = HuggingFaceEndpoint( repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN ) embedder_model_instance = HuggingFaceInferenceAPIEmbeddings( api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2" ) graph_config = { "llm": {"model_instance": llm_model_instance}, "embeddings": {"model_instance": embedder_model_instance} } def scrape_and_summarize(prompt, source): smart_scraper_graph = SmartScraperGraph( prompt=prompt, source=source, config=graph_config ) result = smart_scraper_graph.run() exec_info = smart_scraper_graph.get_execution_info() return result, prettify_exec_info(exec_info) # Gradio interface with gr.Blocks(theme="Nymbo_Alyx_Theme") as demo: gr.Markdown("# Scrape with Instructions") with gr.Row(): with gr.Column(): model_dropdown = gr.Textbox(label="Model", value="Mistral-7B-Instruct-v0.2") prompt_input = gr.Textbox(label="Prompt", value="List me all the press releases with their headlines and urls.") source_input = gr.Textbox(label="Source URL", value="https://www.whitehouse.gov/") scrape_button = gr.Button("Scrape and Summarize") with gr.Column(): result_output = gr.JSON(label="Result") exec_info_output = gr.Textbox(label="Execution Info") scrape_button.click( scrape_and_summarize, inputs=[prompt_input, source_input], outputs=[result_output, exec_info_output] ) # Launch the Gradio app if __name__ == "__main__": demo.launch()