|
__all__ = ["generate"] |
|
|
|
|
|
epislon = 1e-6 |
|
render_calculation_size = 128 |
|
|
|
|
|
ltr_ratio = 0.5 |
|
ttb_ratio = 0.5 |
|
|
|
assert ltr_ratio + ttb_ratio - 1 < epislon |
|
|
|
|
|
short_ratio = 0.1 |
|
median_ratio = 0.6 |
|
long_ratio = 0.3 |
|
|
|
from .text import CorpusGenerationConfig |
|
|
|
short_condition = CorpusGenerationConfig( |
|
min_num_line=1, max_num_line=1, min_num_char_per_line=2, max_num_char_per_line=5 |
|
) |
|
|
|
median_condition = CorpusGenerationConfig( |
|
min_num_line=1, max_num_line=4, min_num_char_per_line=1, max_num_char_per_line=20 |
|
) |
|
|
|
long_condition = CorpusGenerationConfig( |
|
min_num_line=5, max_num_line=10, min_num_char_per_line=1, max_num_char_per_line=20 |
|
) |
|
|
|
assert short_ratio + median_ratio + long_ratio - 1 < epislon |
|
|
|
|
|
gray_ratio = 0.3 |
|
color_ratio = 0.7 |
|
|
|
|
|
pure_color_ratio = 0.5 |
|
stroke_color_ratio = 0.5 |
|
|
|
assert pure_color_ratio + stroke_color_ratio - 1 < epislon |
|
|
|
|
|
stroke_width_max_ratio = 0.25 |
|
|
|
assert gray_ratio + color_ratio - 1 < epislon |
|
|
|
|
|
clip_width_max_ratio = 0.7 |
|
clip_width_min_ratio = 0.1 |
|
clip_width_height_min_ratio = 0.75 |
|
clip_width_height_max_ratio = 1.25 |
|
|
|
|
|
text_longer_max_ratio = 1.0 |
|
text_longer_min_ratio = 0.6 |
|
|
|
|
|
line_spacing_max_ratio = 1.5 |
|
line_spacing_min_ratio = 0.0 |
|
|
|
|
|
no_rotation_ratio = 0.3 |
|
rotation_ratio = 0.7 |
|
|
|
assert no_rotation_ratio + rotation_ratio - 1 < epislon |
|
|
|
|
|
rotation_max_angle = 30 |
|
|
|
|
|
cjk_ratio = 3 |
|
|
|
cjk_distribution = { |
|
"ja": 0.3, |
|
"ko": 0.2, |
|
"zh-Hans": 0.3, |
|
"zh-Hant": 0.07, |
|
"zh-Hant-HK": 0.06, |
|
"zh-Hant-TW": 0.06, |
|
} |
|
|
|
assert sum(cjk_distribution.values()) - 1 < epislon |
|
|
|
train_cnt = 100 |
|
val_cnt = 10 |
|
test_cnt = 30 |
|
|
|
train_cnt_cjk = int(train_cnt * cjk_ratio) |
|
val_cnt_cjk = int(val_cnt * cjk_ratio) |
|
test_cnt_cjk = int(test_cnt * cjk_ratio) |
|
|
|
|
|
import math |
|
import random |
|
from PIL import Image, ImageDraw, ImageFont |
|
from .fontlabel import FontLabel |
|
from .font import DSFont |
|
|
|
|
|
def random_color(): |
|
return random.randint(0, 255), random.randint(0, 255), random.randint(0, 255) |
|
|
|
|
|
def render_bbox( |
|
draw, |
|
xy, |
|
text: str, |
|
font=None, |
|
anchor=None, |
|
spacing=4, |
|
align="left", |
|
direction=None, |
|
features=None, |
|
language=None, |
|
stroke_width=0, |
|
embedded_color=False, |
|
): |
|
if ("\n" in text or "\r" in text) and direction == "ttb": |
|
lines = text.splitlines(keepends=False) |
|
height = 0 |
|
width = 0 |
|
x = 0 |
|
y = 0 |
|
for i, line in enumerate(lines): |
|
bbox = draw.textbbox( |
|
(0, 0), |
|
line, |
|
font, |
|
anchor, |
|
spacing, |
|
align, |
|
direction, |
|
features, |
|
language, |
|
stroke_width, |
|
embedded_color, |
|
) |
|
height = max(height, bbox[3] - bbox[1]) |
|
width += bbox[2] - bbox[0] |
|
if i > 0: |
|
width += spacing |
|
else: |
|
x = bbox[0] |
|
y = bbox[1] |
|
return x, y, x + width, y + height |
|
else: |
|
return draw.textbbox( |
|
xy, |
|
text, |
|
font, |
|
anchor, |
|
spacing, |
|
align, |
|
direction, |
|
features, |
|
language, |
|
stroke_width, |
|
embedded_color, |
|
) |
|
|
|
|
|
def render_text( |
|
draw, |
|
xy, |
|
text, |
|
fill=None, |
|
font=None, |
|
anchor=None, |
|
spacing=4, |
|
align="left", |
|
direction=None, |
|
features=None, |
|
language=None, |
|
stroke_width=0, |
|
stroke_fill=None, |
|
embedded_color=False, |
|
*args, |
|
**kwargs, |
|
): |
|
if ("\n" in text or "\r" in text) and direction == "ttb": |
|
lines = text.splitlines(keepends=False) |
|
margin_x = 0 |
|
x, y = xy |
|
for i, line in enumerate(lines): |
|
bbox = draw.textbbox( |
|
(0, 0), |
|
line, |
|
font, |
|
anchor, |
|
spacing, |
|
align, |
|
direction, |
|
features, |
|
language, |
|
stroke_width, |
|
embedded_color, |
|
) |
|
draw.text( |
|
(x + margin_x, y), |
|
line, |
|
fill, |
|
font, |
|
anchor, |
|
spacing, |
|
align, |
|
direction, |
|
features, |
|
language, |
|
stroke_width, |
|
stroke_fill, |
|
embedded_color, |
|
*args, |
|
**kwargs, |
|
) |
|
margin_x += bbox[2] - bbox[0] |
|
margin_x += spacing |
|
else: |
|
draw.text( |
|
xy, |
|
text, |
|
fill, |
|
font, |
|
anchor, |
|
spacing, |
|
align, |
|
direction, |
|
features, |
|
language, |
|
stroke_width, |
|
stroke_fill, |
|
embedded_color, |
|
*args, |
|
**kwargs, |
|
) |
|
|
|
|
|
def RGB2RGBA(color): |
|
if color is None: |
|
return None |
|
return color + (255,) |
|
|
|
|
|
def generate(img_path: str, font: DSFont) -> tuple[Image.Image, FontLabel]: |
|
while True: |
|
try: |
|
im = Image.open(img_path) |
|
|
|
width, height = im.size |
|
clip_width = random.randint( |
|
int(width * clip_width_min_ratio), int(width * clip_width_max_ratio) |
|
) |
|
clip_height = random.randint( |
|
int(clip_width * clip_width_height_min_ratio), |
|
int(clip_width * clip_width_height_max_ratio), |
|
) |
|
if clip_height > height: |
|
clip_height = height |
|
clip_x = random.randint(0, width - clip_width) |
|
clip_y = random.randint(0, height - clip_height) |
|
im = im.crop((clip_x, clip_y, clip_x + clip_width, clip_y + clip_height)) |
|
|
|
|
|
render_language = font.language |
|
if render_language == "CJK": |
|
render_language = random.choices( |
|
list(cjk_distribution.keys()), list(cjk_distribution.values()) |
|
)[0] |
|
|
|
|
|
if random.random() < ltr_ratio: |
|
text_direction = "ltr" |
|
else: |
|
text_direction = "ttb" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text = "测试文本\n第二行" |
|
|
|
|
|
if random.random() < gray_ratio: |
|
text_color = random.randint(0, 255) |
|
text_color = (text_color, text_color, text_color) |
|
|
|
stroke_ratio = 0 |
|
stroke_color = None |
|
im = im.convert("L") |
|
else: |
|
text_color = random_color() |
|
|
|
if random.random() < pure_color_ratio: |
|
stroke_ratio = 0 |
|
stroke_color = None |
|
else: |
|
stroke_ratio = random.random() * stroke_width_max_ratio |
|
stroke_color = random_color() |
|
|
|
|
|
line_spacing_ratio = ( |
|
random.random() * (line_spacing_max_ratio - line_spacing_min_ratio) |
|
+ line_spacing_min_ratio |
|
) |
|
|
|
|
|
render_calculation_stroke_width = int( |
|
stroke_ratio * render_calculation_size |
|
) |
|
render_calculation_line_spacing = int( |
|
line_spacing_ratio * render_calculation_size |
|
) |
|
|
|
pil_font = ImageFont.truetype(font.path, size=render_calculation_size) |
|
text_bbox = render_bbox( |
|
ImageDraw.Draw(im), |
|
(0, 0), |
|
text, |
|
font=pil_font, |
|
direction=text_direction, |
|
spacing=render_calculation_line_spacing, |
|
stroke_width=render_calculation_stroke_width, |
|
language=render_language, |
|
) |
|
( |
|
render_calculation_width_no_rotation, |
|
render_calculation_height_no_rotation, |
|
) = (text_bbox[2] - text_bbox[0], text_bbox[3] - text_bbox[1]) |
|
render_calculation_font_x_no_rotation = text_bbox[0] |
|
render_calculation_font_y_no_rotation = text_bbox[1] |
|
|
|
if random.random() < no_rotation_ratio: |
|
render_angle = 0 |
|
|
|
render_calculation_width = render_calculation_width_no_rotation |
|
render_calculation_height = render_calculation_height_no_rotation |
|
else: |
|
render_angle = random.randint(-rotation_max_angle, rotation_max_angle) |
|
|
|
render_calculation_width = int( |
|
render_calculation_width_no_rotation |
|
* math.cos(math.radians(abs(render_angle))) |
|
+ render_calculation_height_no_rotation |
|
* math.sin(math.radians(abs(render_angle))) |
|
) |
|
render_calculation_height = int( |
|
render_calculation_width_no_rotation |
|
* math.sin(math.radians(abs(render_angle))) |
|
+ render_calculation_height_no_rotation |
|
* math.cos(math.radians(abs(render_angle))) |
|
) |
|
|
|
|
|
render_ratio = ( |
|
random.random() * (text_longer_max_ratio - text_longer_min_ratio) |
|
+ text_longer_min_ratio |
|
) |
|
if ( |
|
render_calculation_width / render_calculation_height |
|
< clip_width / clip_height |
|
): |
|
|
|
render_height = int(clip_height * render_ratio) |
|
render_width = int( |
|
render_calculation_width / render_calculation_height * render_height |
|
) |
|
else: |
|
|
|
render_width = int(clip_width * render_ratio) |
|
render_height = int( |
|
render_calculation_height / render_calculation_width * render_width |
|
) |
|
|
|
|
|
text_size = int( |
|
render_calculation_size * render_height / render_calculation_height |
|
) |
|
render_width_no_rotation = int( |
|
render_calculation_width_no_rotation |
|
/ render_calculation_height |
|
* render_height |
|
) |
|
render_height_no_rotation = int( |
|
render_calculation_height_no_rotation |
|
/ render_calculation_height |
|
* render_height |
|
) |
|
render_font_x_no_rotation = int( |
|
render_calculation_font_x_no_rotation |
|
/ render_calculation_height |
|
* render_height |
|
) |
|
render_font_y_no_rotation = int( |
|
render_calculation_font_y_no_rotation |
|
/ render_calculation_height |
|
* render_height |
|
) |
|
stroke_width = int(text_size * stroke_ratio) |
|
line_spacing = int(text_size * line_spacing_ratio) |
|
|
|
|
|
render_x = random.randint(0, clip_width - render_width) |
|
render_y = random.randint(0, clip_height - render_height) |
|
|
|
font_image = Image.new( |
|
"RGBA", |
|
(render_width_no_rotation, render_height_no_rotation), |
|
(0, 0, 0, 0), |
|
) |
|
pil_font = ImageFont.truetype(font.path, size=text_size) |
|
render_text( |
|
ImageDraw.Draw(font_image), |
|
(-render_font_x_no_rotation, -render_font_y_no_rotation), |
|
text, |
|
font=pil_font, |
|
fill=RGB2RGBA(text_color), |
|
direction=text_direction, |
|
spacing=line_spacing, |
|
stroke_width=stroke_width, |
|
stroke_fill=RGB2RGBA(stroke_color), |
|
language=render_language, |
|
) |
|
if rotation_max_angle != 0: |
|
font_image = font_image.rotate( |
|
render_angle, expand=True, fillcolor=(0, 0, 0, 0) |
|
) |
|
|
|
im.paste(font_image, (render_x, render_y), font_image) |
|
return im, FontLabel( |
|
clip_width, |
|
clip_height, |
|
text, |
|
font, |
|
text_color, |
|
text_size, |
|
text_direction, |
|
stroke_width, |
|
stroke_color, |
|
line_spacing, |
|
render_language, |
|
(render_x, render_y, render_width, render_height), |
|
render_angle, |
|
) |
|
except Exception as e: |
|
print(e) |
|
|