Spaces:
Sleeping
Sleeping
Update lib/read_pdf.py
Browse files- lib/read_pdf.py +3 -3
lib/read_pdf.py
CHANGED
@@ -105,10 +105,10 @@ def extract_and_format_paragraphs(pdf_path):
|
|
105 |
height = page.height
|
106 |
|
107 |
header_height = height * 0.075 # Adjust this value based on your PDF
|
108 |
-
|
109 |
|
110 |
-
left_bbox = (0, header_height, width / 2, height) # Left column
|
111 |
-
right_bbox = (width / 2, header_height, width, height)
|
112 |
# Extract text from the left column
|
113 |
left_column_text = page.within_bbox(left_bbox).extract_text() or ""
|
114 |
# Clean the left column text
|
|
|
105 |
height = page.height
|
106 |
|
107 |
header_height = height * 0.075 # Adjust this value based on your PDF
|
108 |
+
footer_height = height * 0.1 # Adjust this value based on your PDF
|
109 |
|
110 |
+
left_bbox = (0, header_height, width / 2, height - footer_height) # Left column
|
111 |
+
right_bbox = (width / 2, header_height, width, height - footer_height)
|
112 |
# Extract text from the left column
|
113 |
left_column_text = page.within_bbox(left_bbox).extract_text() or ""
|
114 |
# Clean the left column text
|