import pymupdf doc = pymupdf.open("data/State Machines.pdf") texts = [page.get_text().encode("utf-8") for page in doc] print("Done") # with open("data/State Machines.pdf", "wb", encoding="utf-8") as out: