Spaces:

ShebMichel
/

GeoScience_Exam_Marker

Running

File size: 3,438 Bytes

#!pip install python-docx
#!pip install PyPDF2 --upgrade


import os
import json
from PyPDF2 import PdfReader
from docx import Document

def extract_from_pdf(pdf_path):
    """Extract text from a PDF file."""
    pdf_data = ""
    with open(pdf_path, "rb") as pdf_file:
        reader = PdfReader(pdf_file)
        for page_num in range(len(reader.pages)):
            page = reader.pages[page_num]
            pdf_data += page.extract_text()
    return pdf_data

def extract_from_json(json_path):
    """Extract data from a JSON file."""
    with open(json_path, "r") as json_file:
        json_data = json.load(json_file)
    return json_data

def extract_from_word(word_path):
    """Extract text from a Word (.docx) file."""
    doc = Document(word_path)
    word_data = ""
    for para in doc.paragraphs:
        word_data += para.text + "\n"
    return word_data

def extract_data(file_path):
    """Extract data from a file based on its extension."""
    _, file_extension = os.path.splitext(file_path)
    
    if file_extension == ".pdf":
        return extract_from_pdf(file_path)
    elif file_extension == ".json":
        return extract_from_json(file_path)
    elif file_extension == ".docx":
        return extract_from_word(file_path)
    else:
        raise ValueError("Unsupported file extension: " + file_extension)

def create_data_dictionary(files):
    """Create a dictionary containing data from files based on their extension."""
    data_dict = {}
    for file_path in files:
        try:
            file_data = extract_data(file_path)
            data_dict[file_path] = file_data
        except ValueError as e:
            print(e)
    return data_dict

# Usage example
path      = ''
# Usage example
exam_files     = 'data'
#exam_data = [files[1]]
print(exam_files)
data_dict = create_data_dictionary(exam_files)

##
school_data   = ['university','department','course_code','course_title','date','duration','instructor']
qcm_data      = ['question','options', 'answer']
short_data    = ['question','answer']
#print(data_dict[str(exam_data[0])]['multiple_choice_questions'])
multiple_choice_questions = data_dict[str(exam_data[0])]['multiple_choice_questions']
short_answer_questions    = data_dict[str(exam_data[0])]['short_answer_questions']
long_answer_questions     = data_dict[str(exam_data[0])]['long_answer_questions']

for s_data in school_data:
   print(f" {s_data}: {data_dict[str(exam_data[0])]['header'][str(s_data)]}")
print(f"***************'school data'************************")

for idx,qcm in enumerate(multiple_choice_questions):
    print(f" Index is: {idx} and 'Question': {qcm['question']}")
    print(f" Index is: {idx} and 'Options': {qcm['options']}")
    print(f" Index is: {idx} and 'Answer': {qcm['answer']}")
print(f"***************'multiple_choice_questions'************************")
for idx,qcm in enumerate(short_answer_questions):  
    print(f" Index is: {idx} and 'Question': {qcm['question']}")
    print(f" Index is: {idx} and 'Answer': {qcm['answer']}")
print(f"***************' END short_answer_questions'************************")   
print(f"***************' START long_answer_questions'************************")
for idx,qcm in enumerate(long_answer_questions):   
    print(f" Index is: {idx} and 'Question': {qcm['question']}")
    print(f" Index is: {idx} and 'Answer': {qcm['answer']}")
print(f"***************' END long_answer_questions'************************")