/
from_strings.py
154 lines (142 loc) · 5.06 KB
/
from_strings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import os
from typing import List, Tuple
from trdg.data_generator import FakeTextDataGenerator
from trdg.utils import load_dict, load_fonts
# support RTL
from arabic_reshaper import ArabicReshaper
from bidi.algorithm import get_display
class GeneratorFromStrings:
"""Generator that uses a given list of strings"""
def __init__(
self,
strings: List[str],
count: int = -1,
fonts: List[str] = [],
language: str = "en",
size: int = 32,
skewing_angle: int = 0,
random_skew: bool = False,
blur: int = 0,
random_blur: bool = False,
background_type: int = 0,
distorsion_type: int = 0,
distorsion_orientation: int = 0,
is_handwritten: bool = False,
width: int = -1,
alignment: int = 1,
text_color: str = "#282828",
orientation: int = 0,
space_width: float = 1.0,
character_spacing: int = 0,
margins: Tuple[int, int, int, int] = (5, 5, 5, 5),
fit: bool = False,
output_mask: bool = False,
word_split: bool = False,
image_dir: str = os.path.join(
"..", os.path.split(os.path.realpath(__file__))[0], "images"
),
stroke_width: int = 0,
stroke_fill: str = "#282828",
image_mode: str = "RGB",
output_bboxes: int = 0,
rtl: bool = False,
):
self.count = count
self.strings = strings
self.fonts = fonts
if len(fonts) == 0:
self.fonts = load_fonts(language)
self.rtl = rtl
self.orig_strings = []
if self.rtl:
if language == "ckb":
ar_reshaper_config = {"delete_harakat": True, "language": "Kurdish"}
else:
ar_reshaper_config = {"delete_harakat": False}
self.rtl_shaper = ArabicReshaper(configuration=ar_reshaper_config)
# save a backup of the original strings before arabic-reshaping
self.orig_strings = self.strings
# reshape the strings
self.strings = self.reshape_rtl(self.strings, self.rtl_shaper)
self.language = language
self.size = size
self.skewing_angle = skewing_angle
self.random_skew = random_skew
self.blur = blur
self.random_blur = random_blur
self.background_type = background_type
self.distorsion_type = distorsion_type
self.distorsion_orientation = distorsion_orientation
self.is_handwritten = is_handwritten
self.width = width
self.alignment = alignment
self.text_color = text_color
self.orientation = orientation
self.space_width = space_width
self.character_spacing = character_spacing
self.margins = margins
self.fit = fit
self.output_mask = output_mask
self.word_split = word_split
self.image_dir = image_dir
self.output_bboxes = output_bboxes
self.generated_count = 0
self.stroke_width = stroke_width
self.stroke_fill = stroke_fill
self.image_mode = image_mode
def __iter__(self):
return self
def __next__(self):
return self.next()
def next(self):
if self.generated_count == self.count:
raise StopIteration
self.generated_count += 1
return (
FakeTextDataGenerator.generate(
self.generated_count,
self.strings[(self.generated_count - 1) % len(self.strings)],
self.fonts[(self.generated_count - 1) % len(self.fonts)],
None,
self.size,
None,
self.skewing_angle,
self.random_skew,
self.blur,
self.random_blur,
self.background_type,
self.distorsion_type,
self.distorsion_orientation,
self.is_handwritten,
0,
self.width,
self.alignment,
self.text_color,
self.orientation,
self.space_width,
self.character_spacing,
self.margins,
self.fit,
self.output_mask,
self.word_split,
self.image_dir,
self.stroke_width,
self.stroke_fill,
self.image_mode,
self.output_bboxes,
),
self.orig_strings[(self.generated_count - 1) % len(self.orig_strings)]
if self.rtl
else self.strings[(self.generated_count - 1) % len(self.strings)],
)
def reshape_rtl(self, strings: list, rtl_shaper: ArabicReshaper):
# reshape RTL characters before generating any image
rtl_strings = []
for string in strings:
reshaped_string = rtl_shaper.reshape(string)
rtl_strings.append(get_display(reshaped_string))
return rtl_strings
if __name__ == "__main__":
from trdg.generators.from_wikipedia import GeneratorFromWikipedia
s = GeneratorFromWikipedia("test")
next(s)