milekpl commited on
Commit
e83cf61
1 Parent(s): 6e0aa88

Fix runon lines

Browse files

Fix the issue with OCR which is that multiple lines get concatenated without whitespace. This garbles the text (you get run-on text). Instead, add a newline for </s> token.

Files changed (1) hide show
  1. processing_florence2.py +1 -1
processing_florence2.py CHANGED
@@ -324,7 +324,7 @@ class Florence2Processor(ProcessorMixin):
324
  if task_answer_post_processing_type == 'pure_text':
325
  final_answer = task_answer
326
  # remove the special tokens
327
- final_answer = final_answer.replace('<s>', '').replace('</s>', '')
328
  elif task_answer_post_processing_type in ['od', 'description_with_bboxes', 'bboxes']:
329
  od_instances = task_answer
330
  bboxes_od = [_od_instance['bbox'] for _od_instance in od_instances]
 
324
  if task_answer_post_processing_type == 'pure_text':
325
  final_answer = task_answer
326
  # remove the special tokens
327
+ final_answer = final_answer.replace('<s>', '').replace('</s>', '\n')
328
  elif task_answer_post_processing_type in ['od', 'description_with_bboxes', 'bboxes']:
329
  od_instances = task_answer
330
  bboxes_od = [_od_instance['bbox'] for _od_instance in od_instances]