feat: more robust generation script
Browse files- font_dataset/font.py +18 -1
- font_dataset/layout.py +7 -2
- font_ds_generate_script.py +31 -11
font_dataset/font.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import yaml
|
2 |
import os
|
|
|
|
|
3 |
|
4 |
|
5 |
from .utils import get_files
|
@@ -37,7 +39,7 @@ def load_fonts(config_path="configs/font.yml"):
|
|
37 |
if rule is not None and not rule(file):
|
38 |
print("skip: " + file)
|
39 |
continue
|
40 |
-
font_list.append(DSFont(file, spec["language"]))
|
41 |
|
42 |
font_list.sort(key=lambda x: x.path)
|
43 |
|
@@ -51,3 +53,18 @@ def load_fonts(config_path="configs/font.yml"):
|
|
51 |
return False
|
52 |
|
53 |
return font_list, exclusion_rule
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import yaml
|
2 |
import os
|
3 |
+
from typing import Dict
|
4 |
+
import pickle
|
5 |
|
6 |
|
7 |
from .utils import get_files
|
|
|
39 |
if rule is not None and not rule(file):
|
40 |
print("skip: " + file)
|
41 |
continue
|
42 |
+
font_list.append(DSFont(str(file).replace("\\", "/"), spec["language"]))
|
43 |
|
44 |
font_list.sort(key=lambda x: x.path)
|
45 |
|
|
|
53 |
return False
|
54 |
|
55 |
return font_list, exclusion_rule
|
56 |
+
|
57 |
+
|
58 |
+
def load_font_with_exclusion(
|
59 |
+
config_path="configs/font.yml", cache_path="font_list_cache.bin"
|
60 |
+
) -> Dict:
|
61 |
+
if os.path.exists(cache_path):
|
62 |
+
return pickle.load(open(cache_path, "rb"))
|
63 |
+
font_list, exclusion_rule = load_fonts(config_path)
|
64 |
+
font_list = list(filter(lambda x: not exclusion_rule(x), font_list))
|
65 |
+
font_list.sort(key=lambda x: x.path)
|
66 |
+
print("font count: " + str(len(font_list)))
|
67 |
+
ret = {font_list[i].path: i for i in range(len(font_list))}
|
68 |
+
with open("font_list_cache.bin", "wb") as f:
|
69 |
+
pickle.dump(ret, f)
|
70 |
+
return ret
|
font_dataset/layout.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from typing import Tuple
|
2 |
|
3 |
-
__all__ = ["generate_font_image"]
|
4 |
|
5 |
|
6 |
epislon = 1e-6
|
@@ -237,6 +237,11 @@ def RGB2RGBA(color):
|
|
237 |
return color + (255,)
|
238 |
|
239 |
|
|
|
|
|
|
|
|
|
|
|
240 |
def generate_font_image(
|
241 |
img_path: str, font: DSFont, corpus_manager: CorpusGeneratorManager
|
242 |
) -> Tuple[Image.Image, FontLabel]:
|
@@ -368,7 +373,7 @@ def generate_font_image(
|
|
368 |
text_size = int(render_calculation_size * render_height / render_calculation_height)
|
369 |
|
370 |
if text_size < text_size_min:
|
371 |
-
raise
|
372 |
|
373 |
render_width_no_rotation = int(
|
374 |
render_calculation_width_no_rotation / render_calculation_height * render_height
|
|
|
1 |
from typing import Tuple
|
2 |
|
3 |
+
__all__ = ["generate_font_image", "TextSizeTooSmallException"]
|
4 |
|
5 |
|
6 |
epislon = 1e-6
|
|
|
237 |
return color + (255,)
|
238 |
|
239 |
|
240 |
+
class TextSizeTooSmallException(Exception):
|
241 |
+
def __init__(self):
|
242 |
+
super().__init__(f"Text Size Too Small")
|
243 |
+
|
244 |
+
|
245 |
def generate_font_image(
|
246 |
img_path: str, font: DSFont, corpus_manager: CorpusGeneratorManager
|
247 |
) -> Tuple[Image.Image, FontLabel]:
|
|
|
373 |
text_size = int(render_calculation_size * render_height / render_calculation_height)
|
374 |
|
375 |
if text_size < text_size_min:
|
376 |
+
raise TextSizeTooSmallException()
|
377 |
|
378 |
render_width_no_rotation = int(
|
379 |
render_calculation_width_no_rotation / render_calculation_height * render_height
|
font_ds_generate_script.py
CHANGED
@@ -5,8 +5,8 @@ import os
|
|
5 |
import concurrent.futures
|
6 |
from tqdm import tqdm
|
7 |
import time
|
8 |
-
from font_dataset.font import load_fonts
|
9 |
-
from font_dataset.layout import generate_font_image
|
10 |
from font_dataset.text import CorpusGeneratorManager, UnqualifiedFontException
|
11 |
from font_dataset.background import background_image_generator
|
12 |
|
@@ -39,9 +39,27 @@ corpus_manager = CorpusGeneratorManager()
|
|
39 |
images = background_image_generator()
|
40 |
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
def generate_dataset(dataset_type: str, cnt: int):
|
43 |
-
|
44 |
-
os.makedirs(
|
45 |
|
46 |
def _generate_single(args):
|
47 |
i, j, font = args
|
@@ -61,8 +79,8 @@ def generate_dataset(dataset_type: str, cnt: int):
|
|
61 |
image_file_name = f"font_{i}_img_{j}.jpg"
|
62 |
label_file_name = f"font_{i}_img_{j}.bin"
|
63 |
|
64 |
-
image_file_path = os.path.join(
|
65 |
-
label_file_path = os.path.join(
|
66 |
|
67 |
# detect cache
|
68 |
if os.path.exists(image_file_path) and os.path.exists(label_file_path):
|
@@ -79,14 +97,16 @@ def generate_dataset(dataset_type: str, cnt: int):
|
|
79 |
pickle.dump(label, open(label_file_path, "wb"))
|
80 |
return
|
81 |
except UnqualifiedFontException as e:
|
82 |
-
|
83 |
-
|
84 |
-
with open(unqualified_log_file_name, "a+") as f:
|
85 |
-
f.write(f"{e.font.path}\n")
|
86 |
return
|
87 |
-
except
|
88 |
traceback.print_exc()
|
89 |
continue
|
|
|
|
|
|
|
|
|
90 |
|
91 |
work_list = []
|
92 |
|
|
|
5 |
import concurrent.futures
|
6 |
from tqdm import tqdm
|
7 |
import time
|
8 |
+
from font_dataset.font import load_fonts, DSFont
|
9 |
+
from font_dataset.layout import generate_font_image, TextSizeTooSmallException
|
10 |
from font_dataset.text import CorpusGeneratorManager, UnqualifiedFontException
|
11 |
from font_dataset.background import background_image_generator
|
12 |
|
|
|
39 |
images = background_image_generator()
|
40 |
|
41 |
|
42 |
+
def add_exclusion(font: DSFont, reason: str, dataset_base_dir: str, i: int, j: int):
|
43 |
+
print(f"Excluded font: {font.path}, reason: {reason}")
|
44 |
+
runtime_exclusion_list.append(font.path)
|
45 |
+
with open(unqualified_log_file_name, "a+") as f:
|
46 |
+
f.write(f"{font.path} # {reason}\n")
|
47 |
+
for i in range(j + 1):
|
48 |
+
image_file_name = f"font_{i}_img_{j}.jpg"
|
49 |
+
label_file_name = f"font_{i}_img_{j}.bin"
|
50 |
+
|
51 |
+
image_file_path = os.path.join(dataset_base_dir, image_file_name)
|
52 |
+
label_file_path = os.path.join(dataset_base_dir, label_file_name)
|
53 |
+
|
54 |
+
if os.path.exists(image_file_path):
|
55 |
+
os.remove(image_file_path)
|
56 |
+
if os.path.exists(label_file_path):
|
57 |
+
os.remove(label_file_path)
|
58 |
+
|
59 |
+
|
60 |
def generate_dataset(dataset_type: str, cnt: int):
|
61 |
+
dataset_base_dir = os.path.join(dataset_path, dataset_type)
|
62 |
+
os.makedirs(dataset_base_dir, exist_ok=True)
|
63 |
|
64 |
def _generate_single(args):
|
65 |
i, j, font = args
|
|
|
79 |
image_file_name = f"font_{i}_img_{j}.jpg"
|
80 |
label_file_name = f"font_{i}_img_{j}.bin"
|
81 |
|
82 |
+
image_file_path = os.path.join(dataset_base_dir, image_file_name)
|
83 |
+
label_file_path = os.path.join(dataset_base_dir, label_file_name)
|
84 |
|
85 |
# detect cache
|
86 |
if os.path.exists(image_file_path) and os.path.exists(label_file_path):
|
|
|
97 |
pickle.dump(label, open(label_file_path, "wb"))
|
98 |
return
|
99 |
except UnqualifiedFontException as e:
|
100 |
+
traceback.print_exc()
|
101 |
+
add_exclusion(font, "unqualified font", dataset_base_dir, i, j)
|
|
|
|
|
102 |
return
|
103 |
+
except TextSizeTooSmallException as e:
|
104 |
traceback.print_exc()
|
105 |
continue
|
106 |
+
except Exception as e:
|
107 |
+
traceback.print_exc()
|
108 |
+
add_exclusion(font, f"other: {repr(e)}", dataset_base_dir, i, j)
|
109 |
+
return
|
110 |
|
111 |
work_list = []
|
112 |
|