Spaces:

onurnsfw
/

brochures_generator

Sleeping

App Files Files Community

onurnsfw commited on 9 days ago

Commit

32063f6

•

1 Parent(s): b4ecccf

Create app.py

Browse files

Files changed (1) hide show

app.py +121 -0

app.py ADDED Viewed

	@@ -0,0 +1,121 @@

+## Creating Brochure App that takes a website, fetches all links and retrieves data from all and then creates a brochure with all the data
+from bs4 import BeautifulSoup
+import requests
+import json
+import gradio as gr
+from langchain_groq import ChatGroq
+import os
+# A class to represent a Webpage
+class Website:
+    """
+    A utility class to represent a Website that we have scraped, now with links
+    """
+    def __init__(self, url):
+        self.url = url
+        response = requests.get(url)
+        self.body = response.content
+        soup = BeautifulSoup(self.body, 'html.parser')
+        self.title = soup.title.string if soup.title else "No title found"
+        if soup.body:
+            for irrelevant in soup.body(["script", "style", "img", "input"]):
+                irrelevant.decompose()
+            self.text = soup.body.get_text(separator="\n", strip=True)
+        else:
+            self.text = ""
+        links = [link.get('href') for link in soup.find_all('a')]
+        self.links = [link for link in links if link]
+    def get_contents(self):
+        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"
+## Check whether the links are relevant or not
+def get_links_system_prompt(website):
+    link_system_prompt = "You are provided with a list of links found on a webpage. \
+    You are able to decide which of the links would be most relevant to include in a brochure about the company, \
+    such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
+    link_system_prompt += "You should respond in JSON as in this example, do not say anything else:"
+    link_system_prompt += """
+    {
+        "links": [
+            {"type": "about page", "url": "https://full.url/goes/here/about"},
+            {"type": "careers page": "url": "https://another.full.url/careers"}
+        ]
+    }
+    """
+    return link_system_prompt
+def get_links_user_prompt(website):
+    user_prompt = f"Here is the list of links on the website of {website.url} - "
+    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
+Do not include Terms of Service, Privacy, email links.\n"
+    user_prompt += "Links (some might be relative links):\n"
+    user_prompt += "\n".join(website.links)
+    return user_prompt
+def get_relevant_links(url, llm):
+    website=Website(url)
+    messages = [
+        ("system", get_links_system_prompt(website)),
+        ("human", get_links_user_prompt(website))
+    ]
+    result = llm.invoke(messages)
+    return json.loads(result.content)
+def get_all_details(url, llm):
+    result = "Landing page:\n"
+    result += Website(url).get_contents()
+    links = get_relevant_links(url, llm)
+    print("Found links:", links)
+    for link in links["links"]:
+        result += f"\n\n{link['type']}\n"
+        result += Website(link["url"]).get_contents()
+    return result
+def get_brochure_system_prompt():
+    system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
+    and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
+    Include details of company culture, customers and careers/jobs if you have the information."
+    return system_prompt
+def get_brochure_user_prompt(company_name, url, llm):
+    user_prompt = f"You are looking at a company called: {company_name}\n"
+    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
+    user_prompt += get_all_details(url,llm)
+    user_prompt = user_prompt[:20_000] # Truncate if more than 20,000 characters
+    return user_prompt
+def create_brochure(company_name, url, llm_choice, groq_api_key):
+    # Set the Groq API key dynamically
+    os.environ["GROQ_API_KEY"] = groq_api_key
+    llm = ChatGroq(model=llm_options[llm_choice])
+    messages = [
+        ("system", get_brochure_system_prompt()),
+        ("human", get_brochure_user_prompt(company_name, url, llm))
+    ]
+    # Process the stream and return the result
+    return llm.invoke(messages).content
+llm_options = {
+    "Gemma2":"gemma2-9b-it",
+    "LLama": "llama-3.2-3b-preview",
+    "Mixtral": "mixtral-8x7b-32768"
+}
+# Gradio interface
+demo = gr.Interface(
+    fn=create_brochure,
+    inputs=[
+        "text",  # Company name input
+        "text",  # URL input
+        gr.Dropdown(choices=list(llm_options.keys()), label="Select LLM"),  # LLM selection
+        gr.Textbox(type="password", label="Enter Groq API Key")  # API Key input
+    ],
+    outputs="markdown",  # Output format
+    title="Brochure Generator",
+    description="Generate brochures by selecting a company name, URL, and LLM. Provide your Groq API Key."
+)
+demo.launch()