3 #include "testing/testing.h"
29 {
"You should see the Greek word 'kosme': \"\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5\" |",
30 "You should see the Greek word 'kosme': \"\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5\" |",
"\x00"},
36 {
"2.1.1 1 byte (U-00000000): \"\x00\" |",
37 "2.1.1 1 byte (U-00000000): \"\" |",
"\x01"},
38 {
"2.1.2 2 bytes (U-00000080): \"\xc2\x80\" |",
39 "2.1.2 2 bytes (U-00000080): \"\xc2\x80\" |",
"\x00"},
40 {
"2.1.3 3 bytes (U-00000800): \"\xe0\xa0\x80\" |",
41 "2.1.3 3 bytes (U-00000800): \"\xe0\xa0\x80\" |",
"\x00"},
42 {
"2.1.4 4 bytes (U-00010000): \"\xf0\x90\x80\x80\" |",
43 "2.1.4 4 bytes (U-00010000): \"\xf0\x90\x80\x80\" |",
"\x00"},
44 {
"2.1.5 5 bytes (U-00200000): \"\xf8\x88\x80\x80\x80\" |",
45 "2.1.5 5 bytes (U-00200000): \"\xf8\x88\x80\x80\x80\" |",
"\x00"},
46 {
"2.1.6 6 bytes (U-04000000): \"\xfc\x84\x80\x80\x80\x80\" |",
47 "2.1.6 6 bytes (U-04000000): \"\xfc\x84\x80\x80\x80\x80\" |",
"\x00"},
49 {
"2.2.1 1 byte (U-0000007F): \"\x7f\" |",
50 "2.2.1 1 byte (U-0000007F): \"\x7f\" |",
"\x00"},
51 {
"2.2.2 2 bytes (U-000007FF): \"\xdf\xbf\" |",
52 "2.2.2 2 bytes (U-000007FF): \"\xdf\xbf\" |",
"\x00"},
53 {
"2.2.3 3 bytes (U-0000FFFF): \"\xef\xbf\xbf\" |",
54 "2.2.3 3 bytes (U-0000FFFF): \"\" |",
"\x03"},
55 {
"2.2.4 4 bytes (U-001FFFFF): \"\xf7\xbf\xbf\xbf\" |",
56 "2.2.4 4 bytes (U-001FFFFF): \"\xf7\xbf\xbf\xbf\" |",
"\x00"},
57 {
"2.2.5 5 bytes (U-03FFFFFF): \"\xfb\xbf\xbf\xbf\xbf\" |",
58 "2.2.5 5 bytes (U-03FFFFFF): \"\xfb\xbf\xbf\xbf\xbf\" |",
"\x00"},
59 {
"2.2.6 6 bytes (U-7FFFFFFF): \"\xfd\xbf\xbf\xbf\xbf\xbf\" |",
60 "2.2.6 6 bytes (U-7FFFFFFF): \"\xfd\xbf\xbf\xbf\xbf\xbf\" |",
"\x00"},
62 {
"2.3.1 U-0000D7FF = ed 9f bf = \"\xed\x9f\xbf\" |",
63 "2.3.1 U-0000D7FF = ed 9f bf = \"\xed\x9f\xbf\" |",
"\x00"},
64 {
"2.3.2 U-0000E000 = ee 80 80 = \"\xee\x80\x80\" |",
65 "2.3.2 U-0000E000 = ee 80 80 = \"\xee\x80\x80\" |",
"\x00"},
66 {
"2.3.3 U-0000FFFD = ef bf bd = \"\xef\xbf\xbd\" |",
67 "2.3.3 U-0000FFFD = ef bf bd = \"\xef\xbf\xbd\" |",
"\x00"},
68 {
"2.3.4 U-0010FFFF = f4 8f bf bf = \"\xf4\x8f\xbf\xbf\" |",
69 "2.3.4 U-0010FFFF = f4 8f bf bf = \"\xf4\x8f\xbf\xbf\" |",
"\x00"},
70 {
"2.3.5 U-00110000 = f4 90 80 80 = \"\xf4\x90\x80\x80\" |",
71 "2.3.5 U-00110000 = f4 90 80 80 = \"\xf4\x90\x80\x80\" |",
"\x00"},
76 {
"3.1.1 First continuation byte 0x80: \"\x80\" |",
77 "3.1.1 First continuation byte 0x80: \"\" |",
"\x01"},
78 {
"3.1.2 Last continuation byte 0xbf: \"\xbf\" |",
79 "3.1.2 Last continuation byte 0xbf: \"\" |",
"\x01"},
80 {
"3.1.3 2 continuation bytes: \"\x80\xbf\" |",
81 "3.1.3 2 continuation bytes: \"\" |",
"\x02"},
82 {
"3.1.4 3 continuation bytes: \"\x80\xbf\x80\" |",
83 "3.1.4 3 continuation bytes: \"\" |",
"\x03"},
84 {
"3.1.5 4 continuation bytes: \"\x80\xbf\x80\xbf\" |",
85 "3.1.5 4 continuation bytes: \"\" |",
"\x04"},
86 {
"3.1.6 5 continuation bytes: \"\x80\xbf\x80\xbf\x80\" |",
87 "3.1.6 5 continuation bytes: \"\" |",
"\x05"},
88 {
"3.1.7 6 continuation bytes: \"\x80\xbf\x80\xbf\x80\xbf\" |",
89 "3.1.7 6 continuation bytes: \"\" |",
"\x06"},
90 {
"3.1.8 7 continuation bytes: \"\x80\xbf\x80\xbf\x80\xbf\x80\" |",
91 "3.1.8 7 continuation bytes: \"\" |",
"\x07"},
93 {
"3.1.9 \"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
94 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
95 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
96 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\" |",
97 "3.1.9 \"\" |",
"\x40"},
100 {
"3.2.1 \"\xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf "
101 "\xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \" |",
102 "3.2.1 \" \" |",
"\x20"},
104 {
"3.2.2 \"\xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \" |",
105 "3.2.2 \" \" |",
"\x10"},
107 {
"3.2.3 \"\xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \" |",
108 "3.2.3 \" \" |",
"\x08"},
110 {
"3.2.4 \"\xf8 \xf9 \xfa \xfb \" |",
111 "3.2.4 \" \" |",
"\x04"},
113 {
"3.2.4 \"\xfc \xfd \" |",
114 "3.2.4 \" \" |",
"\x02"},
119 {
"3.3.1 2-byte sequence with last byte missing (U+0000): \"\xc0\" |",
120 "3.3.1 2-byte sequence with last byte missing (U+0000): \"\" |",
"\x01"},
121 {
"3.3.2 3-byte sequence with last byte missing (U+0000): \"\xe0\x80\" |",
122 "3.3.2 3-byte sequence with last byte missing (U+0000): \"\" |",
"\x02"},
123 {
"3.3.3 4-byte sequence with last byte missing (U+0000): \"\xf0\x80\x80\" |",
124 "3.3.3 4-byte sequence with last byte missing (U+0000): \"\" |",
"\x03"},
125 {
"3.3.4 5-byte sequence with last byte missing (U+0000): \"\xf8\x80\x80\x80\" |",
126 "3.3.4 5-byte sequence with last byte missing (U+0000): \"\" |",
"\x04"},
127 {
"3.3.5 6-byte sequence with last byte missing (U+0000): \"\xfc\x80\x80\x80\x80\" |",
128 "3.3.5 6-byte sequence with last byte missing (U+0000): \"\" |",
"\x05"},
129 {
"3.3.6 2-byte sequence with last byte missing (U-000007FF): \"\xdf\" |",
130 "3.3.6 2-byte sequence with last byte missing (U-000007FF): \"\" |",
"\x01"},
131 {
"3.3.7 3-byte sequence with last byte missing (U-0000FFFF): \"\xef\xbf\" |",
132 "3.3.7 3-byte sequence with last byte missing (U-0000FFFF): \"\" |",
"\x02"},
133 {
"3.3.8 4-byte sequence with last byte missing (U-001FFFFF): \"\xf7\xbf\xbf\" |",
134 "3.3.8 4-byte sequence with last byte missing (U-001FFFFF): \"\" |",
"\x03"},
135 {
"3.3.9 5-byte sequence with last byte missing (U-03FFFFFF): \"\xfb\xbf\xbf\xbf\" |",
136 "3.3.9 5-byte sequence with last byte missing (U-03FFFFFF): \"\" |",
"\x04"},
137 {
"3.3.10 6-byte sequence with last byte missing (U-7FFFFFFF): \"\xfd\xbf\xbf\xbf\xbf\" |",
138 "3.3.10 6-byte sequence with last byte missing (U-7FFFFFFF): \"\" |",
"\x05"},
141 {
"3.4 \"\xc0\xe0\x80\xf0\x80\x80\xf8\x80\x80\x80\xfc\x80\x80\x80\x80"
142 "\xdf\xef\xbf\xf7\xbf\xbf\xfb\xbf\xbf\xbf\xfd\xbf\xbf\xbf\xbf\""
144 "3.4 \"\" |",
"\x1e"},
147 {
"3.5.1 fe = \"\xfe\" |",
148 "3.5.1 fe = \"\" |",
"\x01"},
149 {
"3.5.2 ff = \"\xff\" |",
150 "3.5.2 ff = \"\" |",
"\x01"},
151 {
"3.5.3 fe fe ff ff = \"\xfe\xfe\xff\xff\" |",
152 "3.5.3 fe fe ff ff = \"\" |",
"\x04"},
172 {
"4.1.1 U+002F = c0 af = \"\xc0\xaf\" |",
173 "4.1.1 U+002F = c0 af = \"\" |",
"\x02"},
174 {
"4.1.2 U+002F = e0 80 af = \"\xe0\x80\xaf\" |",
175 "4.1.2 U+002F = e0 80 af = \"\" |",
"\x03"},
176 {
"4.1.3 U+002F = f0 80 80 af = \"\xf0\x80\x80\xaf\" |",
177 "4.1.3 U+002F = f0 80 80 af = \"\" |",
"\x04"},
178 {
"4.1.4 U+002F = f8 80 80 80 af = \"\xf8\x80\x80\x80\xaf\" |",
179 "4.1.4 U+002F = f8 80 80 80 af = \"\" |",
"\x05"},
180 {
"4.1.5 U+002F = fc 80 80 80 80 af = \"\xfc\x80\x80\x80\x80\xaf\" |",
181 "4.1.5 U+002F = fc 80 80 80 80 af = \"\" |",
"\x06"},
186 {
"4.2.1 U-0000007F = c1 bf = \"\xc1\xbf\" |",
187 "4.2.1 U-0000007F = c1 bf = \"\" |",
"\x02"},
188 {
"4.2.2 U-000007FF = e0 9f bf = \"\xe0\x9f\xbf\" |",
189 "4.2.2 U-000007FF = e0 9f bf = \"\" |",
"\x03"},
190 {
"4.2.3 U-0000FFFF = f0 8f bf bf = \"\xf0\x8f\xbf\xbf\" |",
191 "4.2.3 U-0000FFFF = f0 8f bf bf = \"\" |",
"\x04"},
192 {
"4.2.4 U-001FFFFF = f8 87 bf bf bf = \"\xf8\x87\xbf\xbf\xbf\" |",
193 "4.2.4 U-001FFFFF = f8 87 bf bf bf = \"\" |",
"\x05"},
194 {
"4.2.5 U+0000 = fc 83 bf bf bf bf = \"\xfc\x83\xbf\xbf\xbf\xbf\" |",
195 "4.2.5 U+0000 = fc 83 bf bf bf bf = \"\" |",
"\x06"},
199 {
"4.3.1 U+0000 = c0 80 = \"\xc0\x80\" |",
200 "4.3.1 U+0000 = c0 80 = \"\" |",
"\x02"},
201 {
"4.3.2 U+0000 = e0 80 80 = \"\xe0\x80\x80\" |",
202 "4.3.2 U+0000 = e0 80 80 = \"\" |",
"\x03"},
203 {
"4.3.3 U+0000 = f0 80 80 80 = \"\xf0\x80\x80\x80\" |",
204 "4.3.3 U+0000 = f0 80 80 80 = \"\" |",
"\x04"},
205 {
"4.3.4 U+0000 = f8 80 80 80 80 = \"\xf8\x80\x80\x80\x80\" |",
206 "4.3.4 U+0000 = f8 80 80 80 80 = \"\" |",
"\x05"},
207 {
"4.3.5 U+0000 = fc 80 80 80 80 80 = \"\xfc\x80\x80\x80\x80\x80\" |",
208 "4.3.5 U+0000 = fc 80 80 80 80 80 = \"\" |",
"\x06"},
215 {
"5.1.1 U+D800 = ed a0 80 = \"\xed\xa0\x80\" |",
216 "5.1.1 U+D800 = ed a0 80 = \"\" |",
"\x03"},
217 {
"5.1.2 U+DB7F = ed ad bf = \"\xed\xad\xbf\" |",
218 "5.1.2 U+DB7F = ed ad bf = \"\" |",
"\x03"},
219 {
"5.1.3 U+DB80 = ed ae 80 = \"\xed\xae\x80\" |",
220 "5.1.3 U+DB80 = ed ae 80 = \"\" |",
"\x03"},
221 {
"5.1.4 U+DBFF = ed af bf = \"\xed\xaf\xbf\" |",
222 "5.1.4 U+DBFF = ed af bf = \"\" |",
"\x03"},
223 {
"5.1.5 U+DC00 = ed b0 80 = \"\xed\xb0\x80\" |",
224 "5.1.5 U+DC00 = ed b0 80 = \"\" |",
"\x03"},
225 {
"5.1.6 U+DF80 = ed be 80 = \"\xed\xbe\x80\" |",
226 "5.1.6 U+DF80 = ed be 80 = \"\" |",
"\x03"},
227 {
"5.1.7 U+DFFF = ed bf bf = \"\xed\xbf\xbf\" |",
228 "5.1.7 U+DFFF = ed bf bf = \"\" |",
"\x03"},
230 {
"5.2.1 U+D800 U+DC00 = ed a0 80 ed b0 80 = \"\xed\xa0\x80\xed\xb0\x80\" |",
231 "5.2.1 U+D800 U+DC00 = ed a0 80 ed b0 80 = \"\" |",
"\x06"},
232 {
"5.2.2 U+D800 U+DFFF = ed a0 80 ed bf bf = \"\xed\xa0\x80\xed\xbf\xbf\" |",
233 "5.2.2 U+D800 U+DFFF = ed a0 80 ed bf bf = \"\" |",
"\x06"},
234 {
"5.2.3 U+DB7F U+DC00 = ed ad bf ed b0 80 = \"\xed\xad\xbf\xed\xb0\x80\" |",
235 "5.2.3 U+DB7F U+DC00 = ed ad bf ed b0 80 = \"\" |",
"\x06"},
236 {
"5.2.4 U+DB7F U+DFFF = ed ad bf ed bf bf = \"\xed\xad\xbf\xed\xbf\xbf\" |",
237 "5.2.4 U+DB7F U+DFFF = ed ad bf ed bf bf = \"\" |",
"\x06"},
238 {
"5.2.5 U+DB80 U+DC00 = ed ae 80 ed b0 80 = \"\xed\xae\x80\xed\xb0\x80\" |",
239 "5.2.5 U+DB80 U+DC00 = ed ae 80 ed b0 80 = \"\" |",
"\x06"},
240 {
"5.2.6 U+DB80 U+DFFF = ed ae 80 ed bf bf = \"\xed\xae\x80\xed\xbf\xbf\" |",
241 "5.2.6 U+DB80 U+DFFF = ed ae 80 ed bf bf = \"\" |",
"\x06"},
242 {
"5.2.7 U+DBFF U+DC00 = ed af bf ed b0 80 = \"\xed\xaf\xbf\xed\xb0\x80\" |",
243 "5.2.7 U+DBFF U+DC00 = ed af bf ed b0 80 = \"\" |",
"\x06"},
244 {
"5.2.8 U+DBFF U+DFFF = ed af bf ed bf bf = \"\xed\xaf\xbf\xed\xbf\xbf\" |",
245 "5.2.8 U+DBFF U+DFFF = ed af bf ed bf bf = \"\" |",
"\x06"},
260 {
"5.3.1 U+FFFE = ef bf be = \"\xef\xbf\xbe\" |",
261 "5.3.1 U+FFFE = ef bf be = \"\" |",
"\x03"},
262 {
"5.3.2 U+FFFF = ef bf bf = \"\xef\xbf\xbf\" |",
263 "5.3.2 U+FFFF = ef bf bf = \"\" |",
"\x03"},
267 {
nullptr,
nullptr,
nullptr},
280 memcpy(buff, tst,
sizeof(buff));
284 printf(
"[%02d] -> [%02d] \"%s\" -> \"%s\"\n", errors_num, errors_found_num, tst, buff);
286 EXPECT_STREQ(buff, tst_stripped);
298 size_t i = 0, result_len = 0;
299 while ((i < str_len) && (
str[i] !=
'\0')) {
302 r_result[result_len++] =
c;
308 template<
size_t Size,
size_t SizeWithPadding>
311 char utf8_src_with_pad[SizeWithPadding] = {0};
313 memcpy(utf8_src_with_pad, utf8_src, Size);
315 char32_t unicode_dst_a[Size], unicode_dst_b[Size];
317 memset(unicode_dst_a, 0xff,
sizeof(unicode_dst_a));
318 const size_t index_a =
utf8_as_char32(utf8_src, Size, unicode_dst_a);
322 for (
int pass = 0; pass < 2; pass++) {
323 memset(unicode_dst_b, 0xff,
sizeof(unicode_dst_b));
325 utf8_src_with_pad, pass ? Size : SizeWithPadding, unicode_dst_b);
328 EXPECT_EQ_ARRAY(unicode_dst_a, unicode_dst_b, Size);
339 utf8_as_char32_test_compare_with_pad_bytes<Size, Size + 1>(utf8_src);
340 utf8_as_char32_test_compare_with_pad_bytes<Size, Size + 7>(utf8_src);
348 for (
int i = 0; i <= 0xff; i++) {
349 memset(utf8_src, i,
sizeof(utf8_src));
350 utf8_as_char32_test_compare<Size>(utf8_src);
355 for (
int ofs = 1; ofs < (int)Size; ofs++) {
356 utf8_src[ofs] = (char)(i + (ofs *
mul));
358 utf8_as_char32_test_compare<Size>(utf8_src);
360 for (
int ofs = 1; ofs < (int)Size; ofs++) {
361 utf8_src[ofs] = (char)(i - (ofs *
mul));
363 utf8_as_char32_test_compare<Size>(utf8_src);
370 for (
int i = 0; i < 256; i++) {
372 utf8_as_char32_test_compare<Size>(utf8_src);
377 TEST(
string, Utf8AsUnicodeStep)
381 utf8_as_char32_test_at_buffer_size<1>();
382 utf8_as_char32_test_at_buffer_size<2>();
383 utf8_as_char32_test_at_buffer_size<3>();
384 utf8_as_char32_test_at_buffer_size<4>();
385 utf8_as_char32_test_at_buffer_size<5>();
386 utf8_as_char32_test_at_buffer_size<6>();
387 utf8_as_char32_test_at_buffer_size<7>();
388 utf8_as_char32_test_at_buffer_size<8>();
389 utf8_as_char32_test_at_buffer_size<9>();
390 utf8_as_char32_test_at_buffer_size<10>();
391 utf8_as_char32_test_at_buffer_size<11>();
392 utf8_as_char32_test_at_buffer_size<12>();
EXPECT_EQ(BLI_expr_pylike_eval(expr, nullptr, 0, &result), EXPR_PYLIKE_INVALID)
void BLI_rng_free(struct RNG *rng) ATTR_NONNULL(1)
struct RNG * BLI_rng_new(unsigned int seed)
void BLI_rng_get_char_n(RNG *rng, char *bytes, size_t bytes_len) ATTR_NONNULL(1
unsigned int BLI_str_utf8_as_unicode_step(const char *__restrict p, size_t p_len, size_t *__restrict index) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1
int BLI_str_utf8_invalid_strip(char *str, size_t length) ATTR_NONNULL(1)
void utf8_as_char32_test_at_buffer_size()
static size_t utf8_as_char32(const char *str, const char str_len, char32_t *r_result)
void utf8_as_char32_test_compare_with_pad_bytes(const char utf8_src[Size])
void utf8_as_char32_test_compare(const char utf8_src[Size])
TEST(string, Utf8InvalidBytes)
static const char * utf8_invalid_tests[][3]
static void mul(btAlignedObjectArray< T > &items, const Q &value)