Spaces:
Runtime error
Runtime error
import os | |
import re | |
import json | |
import shutil | |
import requests | |
from bs4 import BeautifulSoup | |
from typing import List | |
rubric = None | |
message = None | |
rubric_file = 'docs/rubric_data.json' | |
discussion_entries_file = 'docs/discussion_entries.json' | |
class DiscussionEntry: | |
def __init__(self, id: int, parent_id: int, name: str, message: str, replies: List): | |
self.id = id | |
self.parent_id = parent_id | |
self.name = name | |
self.message = message | |
self.replies = replies | |
def to_json(self): | |
return { | |
'id': self.id, | |
'parent_id': self.parent_id, | |
'name': self.name, | |
'message': self.message, | |
'replies': [reply.to_json() for reply in self.replies] | |
} | |
def dump_json(self, filename): | |
with open(filename, 'w') as f: | |
json.dump(self.to_json(), f) | |
def extract_entries(entries, participants): | |
result = [] | |
for entry in entries: | |
if 'message' in entry and 'deleted' not in entry: | |
id = entry['id'] | |
parent_id = entry['parent_id'] | |
user_id = entry['user_id'] | |
name = next((p['display_name'] for p in participants if p['id'] == user_id), None) | |
message = entry['message'] | |
replies = [] | |
if 'replies' in entry: | |
replies = extract_entries(entry['replies'], participants) | |
result.append(DiscussionEntry(id, parent_id, name, message, replies)) | |
return result | |
def save_messages(entries, group_id=None): | |
for entry in entries: | |
filename = f'docs/{entry.name}.html' | |
if group_id is not None: | |
filename = f'docs/group_{group_id}_{entry.name}.html' | |
with open(filename, 'a+') as f: | |
if entry.parent_id == None: | |
f.write(f'<h1><b>Student Post: {entry.name}</b></h1>') | |
f.write(entry.message) | |
f.write('<hr>') | |
else: | |
f.write(f'<h2><b>Reply to: {entry.parent_id}</b></h2>') | |
f.write(entry.message) | |
f.write('<hr>') | |
save_messages(entry.replies, group_id) | |
def extract_group_discussions(group_topic_children, headers): | |
group_entries = [] | |
for group_topic in group_topic_children: | |
group_id = group_topic['group_id'] | |
topic_id = group_topic['id'] | |
group_discussion_url = f'{base_url}/api/v1/groups/{group_id}/discussion_topics/{topic_id}/view' | |
group_discussion_response = requests.get(group_discussion_url, headers=headers) | |
if group_discussion_response.ok: | |
group_discussion_data = group_discussion_response.json() | |
entries = extract_entries(group_discussion_data['view'], group_discussion_data['participants']) | |
# Dump JSON data for group-based discussion | |
with open(discussion_entries_file, 'w') as f: | |
json.dump([entry.to_json() for entry in entries], f) | |
group_entries.append({ | |
'group_id': group_id, | |
'entries': entries | |
}) | |
return group_entries | |
def extract_individual_discussion(discussion_url, headers): | |
individual_entries = [] | |
discussion_response = requests.get(discussion_url, headers=headers) | |
if discussion_response.ok: | |
discussion_data = discussion_response.json() | |
entries = extract_entries(discussion_data['view'], discussion_data['participants']) | |
# Dump JSON data for individual discussion | |
with open(discussion_entries_file, 'w') as f: | |
json.dump([entry.to_json() for entry in entries], f) | |
individual_entries.extend(entries) | |
return individual_entries | |
def ingest_canvas_discussions(input_url, access_token): | |
global base_url, rubric, message | |
match = re.match(r'https://canvas.illinois.edu/courses/(\d+)/discussion_topics/(\d+)', input_url) | |
if match: | |
course_id, discussion_topic_id = match.groups() | |
else: | |
raise ValueError("Invalid URL") | |
base_url = 'https://canvas.illinois.edu' | |
headers = { | |
'Authorization': f'Bearer {access_token}' | |
} | |
discussion_url = f'{base_url}/api/v1/courses/{course_id}/discussion_topics/{discussion_topic_id}/view' | |
instruction_url = f'{base_url}/api/v1/courses/{course_id}/discussion_topics/{discussion_topic_id}' | |
instruction_response = requests.get(instruction_url, headers=headers) | |
if instruction_response.ok: | |
instruction_data = instruction_response.json() | |
print(instruction_data) | |
rubric = [] | |
# Extract title if it exists | |
if 'title' in instruction_data: | |
title = instruction_data['title'] | |
rubric = [{'title': title}] | |
if 'description' in instruction_data['assignment']: | |
message_html = instruction_data['assignment']['description'] | |
soup = BeautifulSoup(message_html, 'html.parser') | |
message = soup.get_text() | |
rubric.append({'instruction': message}) | |
if 'rubric' in instruction_data['assignment'] and 'description' in instruction_data['assignment']: | |
rubric.extend(instruction_data['assignment']['rubric']) | |
if 'points_possible' in instruction_data['assignment']: | |
points_possible = instruction_data['assignment']['points_possible'] | |
rubric.append({'points_possible': points_possible}) | |
# Check if the docs folder exists | |
if os.path.exists('docs'): | |
#delete the folder | |
shutil.rmtree('docs') | |
# Create the docs folder | |
os.makedirs('docs') | |
with open(rubric_file, 'w') as f: | |
json.dump(rubric, f) | |
print("Extracted instructions and rubric") | |
else: | |
print(f'Error: {instruction_response.text}') | |
# Check if the discussion is an individual discussion with associated group-based discussions | |
if 'group_topic_children' in instruction_data: | |
# Extract and save group-based discussions | |
group_entries = extract_group_discussions(instruction_data['group_topic_children'], headers) | |
os.makedirs('docs', exist_ok=True) | |
print("Extracted group discussion entries") | |
for group_entry in group_entries: | |
save_messages(group_entry['entries'], group_entry['group_id']) | |
else: | |
# Extract and save standalone individual or group-based discussion | |
individual_entries = extract_individual_discussion(discussion_url, headers) | |
print("Extracted individual discussion entries") | |
os.makedirs('docs', exist_ok=True) | |
save_messages(individual_entries) | |
else: | |
print(f'Error: {instruction_response.text}') | |
def create_vector_store(): | |
return None |