LLM4SciLit / src /utils.py
tommymarto's picture
first attempt to hf spaces
7f7b773
raw
history blame
1.1 kB
from pathlib import Path
from langchain.schema import Document
import json
from typing import Iterable
def save_docs_to_jsonl(array:Iterable[Document], path: str)->None:
"""
Save an array of Document objects to a JSONL file.
Args:
array: An iterable of Document objects.
path: The path to the folder where the output file should be.
Returns:
None
"""
Path(path).mkdir(parents=True, exist_ok=True)
with open(f"{path}/data.jsonl", 'w', encoding='utf-8') as jsonl_file:
for doc in array:
jsonl_file.write(doc.json() + '\n')
def load_docs_from_jsonl(path: str) -> Iterable[Document]:
"""
Load an array of Document objects from a JSONL file.
Args:
path: The path to the folder where the input file is.
Returns:
An iterable of Document objects.
"""
array = []
with open(f"{path}/data.jsonl", 'r', encoding='utf-8') as jsonl_file:
for line in jsonl_file:
data = json.loads(line)
obj = Document(**data)
array.append(obj)
return array