Fix runon lines
Browse filesFix the issue with OCR which is that multiple lines get concatenated without whitespace. This garbles the text (you get run-on text). Instead, add a newline for </s> token.
- processing_florence2.py +1 -1
processing_florence2.py
CHANGED
@@ -324,7 +324,7 @@ class Florence2Processor(ProcessorMixin):
|
|
324 |
if task_answer_post_processing_type == 'pure_text':
|
325 |
final_answer = task_answer
|
326 |
# remove the special tokens
|
327 |
-
final_answer = final_answer.replace('<s>', '').replace('</s>', '')
|
328 |
elif task_answer_post_processing_type in ['od', 'description_with_bboxes', 'bboxes']:
|
329 |
od_instances = task_answer
|
330 |
bboxes_od = [_od_instance['bbox'] for _od_instance in od_instances]
|
|
|
324 |
if task_answer_post_processing_type == 'pure_text':
|
325 |
final_answer = task_answer
|
326 |
# remove the special tokens
|
327 |
+
final_answer = final_answer.replace('<s>', '').replace('</s>', '\n')
|
328 |
elif task_answer_post_processing_type in ['od', 'description_with_bboxes', 'bboxes']:
|
329 |
od_instances = task_answer
|
330 |
bboxes_od = [_od_instance['bbox'] for _od_instance in od_instances]
|