Cachoups commited on
Commit
5f421ac
1 Parent(s): 04fa692

Update lib/read_pdf.py

Browse files
Files changed (1) hide show
  1. lib/read_pdf.py +3 -3
lib/read_pdf.py CHANGED
@@ -105,10 +105,10 @@ def extract_and_format_paragraphs(pdf_path):
105
  height = page.height
106
 
107
  header_height = height * 0.075 # Adjust this value based on your PDF
108
- #footer_height = height * 0.1 # Adjust this value based on your PDF
109
 
110
- left_bbox = (0, header_height, width / 2, height) # Left column
111
- right_bbox = (width / 2, header_height, width, height)
112
  # Extract text from the left column
113
  left_column_text = page.within_bbox(left_bbox).extract_text() or ""
114
  # Clean the left column text
 
105
  height = page.height
106
 
107
  header_height = height * 0.075 # Adjust this value based on your PDF
108
+ footer_height = height * 0.1 # Adjust this value based on your PDF
109
 
110
+ left_bbox = (0, header_height, width / 2, height - footer_height) # Left column
111
+ right_bbox = (width / 2, header_height, width, height - footer_height)
112
  # Extract text from the left column
113
  left_column_text = page.within_bbox(left_bbox).extract_text() or ""
114
  # Clean the left column text