import gradio as gr import PyPDF2 import os import subprocess import tempfile import google.generativeai as genai from nltk.corpus import stopwords from nltk.tokenize import word_tokenize, sent_tokenize import nltk nltk.download('stopwords') nltk.download('punkt') def summarize_text(text): # Tokenizing the text stopWords = set(stopwords.words("english")) words = word_tokenize(text) # Creating a frequency table to keep the score of each word freqTable = dict() for word in words: word = word.lower() if word in stopWords: continue if word in freqTable: freqTable[word] += 1 else: freqTable[word] = 1 # Creating a dictionary to keep the score of each sentence sentences = sent_tokenize(text) sentenceValue = dict() for sentence in sentences: for word, freq in freqTable.items(): if word in sentence.lower(): if sentence in sentenceValue: sentenceValue[sentence] += freq else: sentenceValue[sentence] = freq sumValues = 0 for sentence in sentenceValue: sumValues += sentenceValue[sentence] # Average value of a sentence from the original text average = int(sumValues / len(sentenceValue)) # Storing sentences into our summary. summary = '' for sentence in sentences: if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.2 * average)): summary += " " + sentence return summary def Notes_data(API,prompt): genai.configure(api_key=API) model = genai.GenerativeModel('gemini-pro') response = model.generate_content(prompt) return response.text def markdown_to_pdf(markdown_content): # Create a temporary file to store the markdown content with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".md", encoding="utf-8") as temp_md_file: temp_md_file.write(markdown_content) temp_md_filename = temp_md_file.name # Define output PDF filename output_pdf_filename = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False).name # Execute mdpdf command command = f"mdpdf -o {str(output_pdf_filename)} {str(temp_md_filename)}" subprocess.run(command, shell=True) # Delete temporary markdown file os.remove(temp_md_filename) return output_pdf_filename # Authenticate using the API key # Set the API key as an environment variable def extract_text_from_pdf(pdf_file): text = "" with open(pdf_file, "rb") as file: pdf_reader = PyPDF2.PdfReader(file) for page_num in range(len(pdf_reader.pages)): page = pdf_reader.pages[page_num] text += page.extract_text() return text def get_prompt(text,subject,topic): prompt = f""" you are given a {subject} topic {text} Create the well Structured detailed Notes as a html code in the following templat. Consider the following points while creating the template. 1. Introduction part must have minimum 500 words 2. In the notes part please describe each topic in detail with word count 1000 3. Key points part must have minimum 250 words. 4. The notes should be on topic.
""" prompt2 = f""" Create detailed study material from the given given Chapter from a book by considering the following points in mind 1.Ensure a thorough understanding of the chapter's main ideas and themes. 2.Identify and emphasise essential concepts and arguments. 3.Create a well-organised outline reflecting the chapter's structure. 4.Provide concise, to-the-point summaries for each section. 5.Define and clarify key terms and concepts introduced in the chapter. 6.Pose engaging questions for self-assessment and discussion. 7.Ensure accurate citations for quotes and references. 8.use given text only for reference you can add as much as detail you can. 9.The response should be complete. 10.Generate response as a html page with all type of formatting. 11.Each point you take in the study material please describe it in detail. Use the following HTML template to structure your notes:
Chapter Context: {text}""" prompt3 = f""" Create detailed study material from the given given topic from a book chapter by considering the following points in mind 1.Notes will be based on topic specific. 2.Ensure that explanations are clear and concise, using simple language to make complex concepts understandable. 3.Highlight key concepts, theories, and important formulas related to each topic in detail. 4.For mathematical components like physics and chemistry, provide step-by-step solutions for numerical problems. 5.The notes should be in detail cover all the concepts related to the topic. 6.use given text only for reference you can add as much as detail you can. 7.The response should be complete. 8.Ignore unneccessory informstion in the context. 9.Generate response as a valid 'mardown formate' page with all type of formatting. Topics: {topic} Chapter Reference:- {text}""" return prompt2 if subject=="English" else prompt3 def pdf_generator(API,class_name, subject,chapter,query=None): # Create a list to store the generated PDF file paths pdf_file_path = f"Books/{class_name}/{subject}/{chapter}.pdf" pdf_text = summarize_text(extract_text_from_pdf(pdf_file_path)) study_material = generate_study_material(API,pdf_text,subject,query) study_material = markdown_to_pdf(study_material) return study_material def generate_study_material(API,pdf_text,subject,topic): response = Notes_data(API,get_prompt(pdf_text,subject,topic)) return response # Define Gradio input and output components input_components = [ gr.Textbox(label="Enter your Gemini API"), gr.Dropdown(["Class 10", "Class 11", "Class 12"], label="Select Class"), gr.Dropdown(["Math", "Science", "English","Chemistry"], label="Select Subject"), gr.Dropdown(["Chapter 1", "Chapter 2", "Chapter 3"], label="Select Chapters"), gr.Textbox(label="Write the topics name saperated by ','") ] output_component = gr.File(label="Notes") # Create the Gradio interface iface = gr.Interface( fn=pdf_generator, inputs=input_components, outputs=output_component, title="PDF Generator", description="Generate PDFs based on class, subject, and chapter selection.", ) # Launch the Gradio app iface.launch()