arssite's picture
Update app.py
d3452aa verified
import streamlit as st
from langchain_community.document_loaders import WebBaseLoader
#from chains import Chain
#from portfolio import Portfolio
#from utils import clean_text
import re
import pandas as pd
import chromadb
import uuid
import os
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.exceptions import OutputParserException
#from dotenv import load_dotenv
#from google.colab import userdata
#load_dotenv()
class Chain:
def __init__(self):
# Get the API key from environment variables
groq_api_key = os.getenv("GROQ_API_KEY")
self.llm = ChatGroq(temperature=0, groq_api_key=groq_api_key, model_name="llama-3.1-70b-versatile")
def extract_jobs(self, cleaned_text):
prompt_extract = PromptTemplate.from_template(
"""
### SCRAPED TEXT FROM WEBSITE:
{page_data}
### INSTRUCTION:
The scraped text is from the career's page of a website.
Your job is to extract the job postings and return them in JSON format containing the following keys: `role`, `experience`, `skills` and `description`.
Only return the valid JSON.
### VALID JSON (NO PREAMBLE):
"""
)
chain_extract = prompt_extract | self.llm
res = chain_extract.invoke(input={"page_data": cleaned_text})
try:
json_parser = JsonOutputParser()
res = json_parser.parse(res.content)
except OutputParserException:
raise OutputParserException("Context too big. Unable to parse jobs.")
return res if isinstance(res, list) else [res]
def write_mail(self, job, links):
prompt_email = PromptTemplate.from_template(
"""
### JOB DESCRIPTION:
{job_description}
### INSTRUCTION:
You are Anmol R Srivastava, a student pursuing a bachelor's degree in Computer Science Engineering with a specialization in Artificial Intelligence and Machine Learning, graduating in 2025. You have experience in cloud computing, AI, and software development, focusing on building AI-driven systems for various applications. Your task is to write a cold email to a potential client regarding a project that involves creating a predictive analytics tool for supply chain management. Highlight your expertise in AI and machine learning, particularly in predictive models and scalable solutions. Mention your ability to deliver customized and efficient systems tailored to client needs. Also, include your portfolio links to showcase your work:
GitHub: https://github.com/arssite
LinkedIn: https://www.linkedin.com/in/anmol-r-srivastava/
Hugging Face: https://huggingface.co/arssite
Contact email: arssite2020@gmail.com
Also add the most relevant ones from the following links to showcase My Resume: {link_list}
.
Do not provide a preamble.
### EMAIL (NO PREAMBLE):
"""
)
chain_email = prompt_email | self.llm
res = chain_email.invoke({"job_description": str(job), "link_list": links})
return res.content
class Portfolio:
def __init__(self, file_path="links.csv"):
self.file_path = file_path
self.data = pd.read_csv(file_path)
self.chroma_client = chromadb.PersistentClient('vectorstore')
self.collection = self.chroma_client.get_or_create_collection(name="portfolio")
def load_portfolio(self):
if not self.collection.count():
for _, row in self.data.iterrows():
self.collection.add(documents=row["Techstack"],
metadatas={"links": row["Links"]},
ids=[str(uuid.uuid4())])
def query_links(self, skills):
return self.collection.query(query_texts=skills, n_results=2).get('metadatas', [])
def clean_text(text):
# Remove HTML tags
text = re.sub(r'<[^>]*?>', '', text)
# Remove URLs
text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
# Remove special characters
text = re.sub(r'[^a-zA-Z0-9 ]', '', text)
# Replace multiple spaces with a single space
text = re.sub(r'\s{2,}', ' ', text)
# Trim leading and trailing whitespace
text = text.strip()
# Remove extra whitespace
text = ' '.join(text.split())
return text
def create_streamlit_app(llm, portfolio, clean_text):
st.title("πŸ“§ Cold eMail Generator")
url_input = st.text_input("Enter a URL:", value="write Website or JD url")
submit_button = st.button("Submit")
if submit_button:
try:
loader = WebBaseLoader([url_input])
data = clean_text(loader.load().pop().page_content)
portfolio.load_portfolio()
jobs = llm.extract_jobs(data)
for job in jobs:
skills = job.get('skills', [])
links = portfolio.query_links(skills)
email = llm.write_mail(job, links)
st.code(email, language='markdown')
except Exception as e:
st.error(f"An Error Occurred: {e}")
if __name__ == "__main__":
chain = Chain()
portfolio = Portfolio()
st.set_page_config(layout="wide", page_title="Cold Email Generator by ARS", page_icon="πŸ“§")
create_streamlit_app(chain, portfolio, clean_text)