Blender  V3.3
msgfmt.c
Go to the documentation of this file.
1 /* SPDX-License-Identifier: GPL-2.0-or-later
2  * Copyright 2017 Blender Foundation. All rights reserved. */
3 
4 /*
5  * Based on C++ version by Sergey Sharybin <sergey.vfx@gmail.com>.
6  * Based on Python script msgfmt.py from Python source code tree, which was written by
7  * Martin v. Löwis <loewis@informatik.hu-berlin.de>
8  *
9  * Generate binary message catalog from textual translation description.
10  *
11  * This program converts a textual Uniform-style message catalog (.po file)
12  * into a binary GNU catalog (.mo file).
13  * This is essentially the same function as the GNU msgfmt program,
14  * however, it is a simpler implementation.
15  *
16  * Usage: msgfmt input.po output.po
17  */
18 
19 #include <stdlib.h>
20 #include <string.h>
21 
22 #include "BLI_dynstr.h"
23 #include "BLI_fileops.h"
24 #include "BLI_ghash.h"
25 #include "BLI_linklist.h"
26 #include "BLI_memarena.h"
27 #include "BLI_utildefines.h"
28 
29 #include "MEM_guardedalloc.h"
30 
31 /* Stupid stub necessary because some BLI files includes winstuff.h, which uses G a bit... */
32 #ifdef WIN32
33 typedef struct Global {
34  void *dummy;
35 } Global;
36 
37 Global G;
38 #endif
39 
40 /* We cannot use NULL char until ultimate step, would give nightmare to our C string processing...
41  * Using one of the UTF-8 invalid bytes (as per our BLI string_utf8.c) */
42 #define NULLSEP_STR "\xff"
43 #define NULLSEP_CHR '\xff'
44 
45 typedef enum {
50 } eSectionType;
51 
52 typedef struct Message {
56 
57  bool is_fuzzy;
59 
60 static char *trim(char *str)
61 {
62  const size_t len = strlen(str);
63  size_t i;
64 
65  if (len == 0) {
66  return str;
67  }
68 
69  for (i = 0; i < len && ELEM(str[0], ' ', '\t', '\r', '\n'); str++, i++) {
70  /* pass */
71  }
72 
73  char *end = &str[len - 1 - i];
74  for (i = len; i > 0 && ELEM(end[0], ' ', '\t', '\r', '\n'); end--, i--) {
75  /* pass */
76  }
77 
78  end[1] = '\0';
79 
80  return str;
81 }
82 
83 static char *unescape(char *str)
84 {
85  char *curr, *next;
86  for (curr = next = str; next[0] != '\0'; curr++, next++) {
87  if (next[0] == '\\') {
88  switch (next[1]) {
89  case '\0':
90  /* Get rid of trailing escape char... */
91  curr--;
92  break;
93  case '\\':
94  *curr = '\\';
95  next++;
96  break;
97  case 'n':
98  *curr = '\n';
99  next++;
100  break;
101  case 't':
102  *curr = '\t';
103  next++;
104  break;
105  default:
106  /* Get rid of useless escape char. */
107  next++;
108  *curr = *next;
109  }
110  }
111  else if (curr != next) {
112  *curr = *next;
113  }
114  }
115  *curr = '\0';
116 
117  if (str[0] == '"' && *(curr - 1) == '"') {
118  *(curr - 1) = '\0';
119  return str + 1;
120  }
121  return str;
122 }
123 
124 static int qsort_str_cmp(const void *a, const void *b)
125 {
126  return strcmp(*(const char **)a, *(const char **)b);
127 }
128 
129 static char **get_keys_sorted(GHash *messages, const uint32_t num_keys)
130 {
131  GHashIterator iter;
132 
133  char **keys = MEM_mallocN(sizeof(*keys) * num_keys, __func__);
134  char **k = keys;
135 
136  GHASH_ITER (iter, messages) {
137  *k = BLI_ghashIterator_getKey(&iter);
138  k++;
139  }
140 
141  qsort(keys, num_keys, sizeof(*keys), qsort_str_cmp);
142 
143  return keys;
144 }
145 
146 BLI_INLINE size_t uint32_to_bytes(const int value, char *bytes)
147 {
148  size_t i;
149  for (i = 0; i < sizeof(value); i++) {
150  bytes[i] = (char)((value >> ((int)i * 8)) & 0xff);
151  }
152  return i;
153 }
154 
155 BLI_INLINE size_t msg_to_bytes(char *msg, char *bytes, uint32_t size)
156 {
157  /* Note that we also perform replacing of our NULLSEP placeholder by real NULL char... */
158  size_t i;
159  for (i = 0; i < size; i++, msg++, bytes++) {
160  *bytes = (*msg == NULLSEP_CHR) ? '\0' : *msg;
161  }
162  return i;
163 }
164 
165 typedef struct Offset {
168 
169 /* Return the generated binary output. */
170 static char *generate(GHash *messages, size_t *r_output_size)
171 {
172  const uint32_t num_keys = BLI_ghash_len(messages);
173 
174  /* Get list of sorted keys. */
175  char **keys = get_keys_sorted(messages, num_keys);
176  char **vals = MEM_mallocN(sizeof(*vals) * num_keys, __func__);
177  uint32_t tot_keys_len = 0;
178  uint32_t tot_vals_len = 0;
179 
180  Offset *offsets = MEM_mallocN(sizeof(*offsets) * num_keys, __func__);
181 
182  for (int i = 0; i < num_keys; i++) {
183  Offset *off = &offsets[i];
184 
185  vals[i] = BLI_ghash_lookup(messages, keys[i]);
186 
187  /* For each string, we need size and file offset.
188  * Each string is NULL terminated; the NULL does not count into the size. */
189  off->key_offset = tot_keys_len;
190  off->key_len = (uint32_t)strlen(keys[i]);
191  tot_keys_len += off->key_len + 1;
192 
193  off->val_offset = tot_vals_len;
194  off->val_len = (uint32_t)strlen(vals[i]);
195  tot_vals_len += off->val_len + 1;
196  }
197 
198  /* The header is 7 32-bit unsigned integers.
199  * Then comes the keys index table, then the values index table. */
200  const uint32_t idx_keystart = 7 * 4;
201  const uint32_t idx_valstart = idx_keystart + 8 * num_keys;
202  /* We don't use hash tables, so the keys start right after the index tables. */
203  const uint32_t keystart = idx_valstart + 8 * num_keys;
204  /* and the values start after the keys */
205  const uint32_t valstart = keystart + tot_keys_len;
206 
207  /* Final buffer representing the binary MO file. */
208  *r_output_size = valstart + tot_vals_len;
209  char *output = MEM_mallocN(*r_output_size, __func__);
210  char *h = output;
211  char *ik = output + idx_keystart;
212  char *iv = output + idx_valstart;
213  char *k = output + keystart;
214  char *v = output + valstart;
215 
216  h += uint32_to_bytes(0x950412de, h); /* Magic */
217  h += uint32_to_bytes(0x0, h); /* Version */
218  h += uint32_to_bytes(num_keys, h); /* Number of entries */
219  h += uint32_to_bytes(idx_keystart, h); /* Start of key index */
220  h += uint32_to_bytes(idx_valstart, h); /* Start of value index */
221  h += uint32_to_bytes(0, h); /* Size of hash table */
222  h += uint32_to_bytes(0, h); /* Offset of hash table */
223 
224  BLI_assert(h == ik);
225 
226  for (int i = 0; i < num_keys; i++) {
227  Offset *off = &offsets[i];
228 
229  /* The index table first has the list of keys, then the list of values.
230  * Each entry has first the size of the string, then the file offset. */
231  ik += uint32_to_bytes(off->key_len, ik);
232  ik += uint32_to_bytes(off->key_offset + keystart, ik);
233  iv += uint32_to_bytes(off->val_len, iv);
234  iv += uint32_to_bytes(off->val_offset + valstart, iv);
235 
236  k += msg_to_bytes(keys[i], k, off->key_len + 1);
237  v += msg_to_bytes(vals[i], v, off->val_len + 1);
238  }
239 
240  BLI_assert(ik == output + idx_valstart);
241  BLI_assert(iv == output + keystart);
242  BLI_assert(k == output + valstart);
243 
244  MEM_freeN(keys);
245  MEM_freeN(vals);
246  MEM_freeN(offsets);
247 
248  return output;
249 }
250 
251 /* Add a non-fuzzy translation to the dictionary. */
252 static void add(GHash *messages, MemArena *memarena, const Message *msg)
253 {
254  const size_t msgctxt_len = (size_t)BLI_dynstr_get_len(msg->ctxt);
255  const size_t msgid_len = (size_t)BLI_dynstr_get_len(msg->id);
256  const size_t msgstr_len = (size_t)BLI_dynstr_get_len(msg->str);
257  const size_t msgkey_len = msgid_len + ((msgctxt_len == 0) ? 0 : msgctxt_len + 1);
258 
259  if (!msg->is_fuzzy && msgstr_len != 0) {
260  char *msgkey = BLI_memarena_alloc(memarena, sizeof(*msgkey) * (msgkey_len + 1));
261  char *msgstr = BLI_memarena_alloc(memarena, sizeof(*msgstr) * (msgstr_len + 1));
262 
263  if (msgctxt_len != 0) {
264  BLI_dynstr_get_cstring_ex(msg->ctxt, msgkey);
265  msgkey[msgctxt_len] = '\x04'; /* Context/msgid separator */
266  BLI_dynstr_get_cstring_ex(msg->id, &msgkey[msgctxt_len + 1]);
267  }
268  else {
269  BLI_dynstr_get_cstring_ex(msg->id, msgkey);
270  }
271 
272  BLI_dynstr_get_cstring_ex(msg->str, msgstr);
273 
274  BLI_ghash_insert(messages, msgkey, msgstr);
275  }
276 }
277 
278 static void clear(Message *msg)
279 {
280  BLI_dynstr_clear(msg->ctxt);
281  BLI_dynstr_clear(msg->id);
282  BLI_dynstr_clear(msg->str);
283  msg->is_fuzzy = false;
284 }
285 
286 static int make(const char *input_file_name, const char *output_file_name)
287 {
289  MemArena *msgs_memarena = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, __func__);
290 
291  const char *msgctxt_kw = "msgctxt";
292  const char *msgid_kw = "msgid";
293  const char *msgid_plural_kw = "msgid_plural";
294  const char *msgstr_kw = "msgstr";
295  const size_t msgctxt_len = strlen(msgctxt_kw);
296  const size_t msgid_len = strlen(msgid_kw);
297  const size_t msgid_plural_len = strlen(msgid_plural_kw);
298  const size_t msgstr_len = strlen(msgstr_kw);
299 
300  /* NOTE: For now, we assume file encoding is always utf-8. */
301 
302  eSectionType section = SECTION_NONE;
303  bool is_plural = false;
304 
305  Message msg = {
307  .id = BLI_dynstr_new_memarena(),
308  .str = BLI_dynstr_new_memarena(),
309  .is_fuzzy = false,
310  };
311 
312  LinkNode *input_file_lines = BLI_file_read_as_lines(input_file_name);
313  LinkNode *ifl = input_file_lines;
314 
315  /* Parse the catalog. */
316  for (int lno = 1; ifl; ifl = ifl->next, lno++) {
317  char *l = ifl->link;
318  const bool is_comment = (l[0] == '#');
319  /* If we get a comment line after a msgstr, this is a new entry. */
320  if (is_comment) {
321  if (section == SECTION_STR) {
322  add(messages, msgs_memarena, &msg);
323  clear(&msg);
324  section = SECTION_NONE;
325  }
326  /* Record a fuzzy mark. */
327  if (l[1] == ',' && strstr(l, "fuzzy") != NULL) {
328  msg.is_fuzzy = true;
329  }
330  /* Skip comments */
331  continue;
332  }
333  if (strstr(l, msgctxt_kw) == l) {
334  if (section == SECTION_STR) {
335  /* New message, output previous section. */
336  add(messages, msgs_memarena, &msg);
337  }
338  if (!ELEM(section, SECTION_NONE, SECTION_STR)) {
339  printf("msgctxt not at start of new message on %s:%d\n", input_file_name, lno);
340  return EXIT_FAILURE;
341  }
342  section = SECTION_CTX;
343  l = l + msgctxt_len;
344  clear(&msg);
345  }
346  else if (strstr(l, msgid_plural_kw) == l) {
347  /* This is a message with plural forms. */
348  if (section != SECTION_ID) {
349  printf("msgid_plural not preceded by msgid on %s:%d\n", input_file_name, lno);
350  return EXIT_FAILURE;
351  }
352  l = l + msgid_plural_len;
353  BLI_dynstr_append(msg.id, NULLSEP_STR); /* separator of singular and plural */
354  is_plural = true;
355  }
356  else if (strstr(l, msgid_kw) == l) {
357  if (section == SECTION_STR) {
358  add(messages, msgs_memarena, &msg);
359  }
360  if (section != SECTION_CTX) {
361  clear(&msg);
362  }
363  section = SECTION_ID;
364  l = l + msgid_len;
365  is_plural = false;
366  }
367  else if (strstr(l, msgstr_kw) == l) {
368  l = l + msgstr_len;
369  /* Now we are in a `msgstr` section. */
370  section = SECTION_STR;
371  if (l[0] == '[') {
372  if (!is_plural) {
373  printf("plural without msgid_plural on %s:%d\n", input_file_name, lno);
374  return EXIT_FAILURE;
375  }
376  if ((l = strchr(l, ']')) == NULL) {
377  printf("Syntax error on %s:%d\n", input_file_name, lno);
378  return EXIT_FAILURE;
379  }
380  if (BLI_dynstr_get_len(msg.str) != 0) {
381  BLI_dynstr_append(msg.str, NULLSEP_STR); /* Separator of the various plural forms. */
382  }
383  }
384  else {
385  if (is_plural) {
386  printf("indexed msgstr required for plural on %s:%d\n", input_file_name, lno);
387  return EXIT_FAILURE;
388  }
389  }
390  }
391  /* Skip empty lines. */
392  l = trim(l);
393  if (l[0] == '\0') {
394  if (section == SECTION_STR) {
395  add(messages, msgs_memarena, &msg);
396  clear(&msg);
397  }
398  section = SECTION_NONE;
399  continue;
400  }
401  l = unescape(l);
402  if (section == SECTION_CTX) {
403  BLI_dynstr_append(msg.ctxt, l);
404  }
405  else if (section == SECTION_ID) {
406  BLI_dynstr_append(msg.id, l);
407  }
408  else if (section == SECTION_STR) {
409  BLI_dynstr_append(msg.str, l);
410  }
411  else {
412  printf("Syntax error on %s:%d\n", input_file_name, lno);
413  return EXIT_FAILURE;
414  }
415  }
416  /* Add last entry */
417  if (section == SECTION_STR) {
418  add(messages, msgs_memarena, &msg);
419  }
420 
421  BLI_dynstr_free(msg.ctxt);
422  BLI_dynstr_free(msg.id);
423  BLI_dynstr_free(msg.str);
424  BLI_file_free_lines(input_file_lines);
425 
426  /* Compute output */
427  size_t output_size;
428  char *output = generate(messages, &output_size);
429 
430  FILE *fp = BLI_fopen(output_file_name, "wb");
431  fwrite(output, 1, output_size, fp);
432  fclose(fp);
433 
434  MEM_freeN(output);
435  BLI_ghash_free(messages, NULL, NULL);
436  BLI_memarena_free(msgs_memarena);
437 
438  return EXIT_SUCCESS;
439 }
440 
441 int main(int argc, char **argv)
442 {
443  if (argc != 3) {
444  printf("Usage: %s <input.po> <output.mo>\n", argv[0]);
445  return EXIT_FAILURE;
446  }
447  const char *input_file = argv[1];
448  const char *output_file = argv[2];
449 
450  return make(input_file, output_file);
451 }
struct Global Global
#define BLI_assert(a)
Definition: BLI_assert.h:46
#define BLI_INLINE
A dynamically sized string ADT.
int BLI_dynstr_get_len(const DynStr *ds) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL()
Definition: BLI_dynstr.c:235
void BLI_dynstr_clear(DynStr *ds) ATTR_NONNULL()
Definition: BLI_dynstr.c:263
void BLI_dynstr_free(DynStr *ds) ATTR_NONNULL()
Definition: BLI_dynstr.c:281
DynStr * BLI_dynstr_new_memarena(void) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT
Definition: BLI_dynstr.c:60
void BLI_dynstr_get_cstring_ex(const DynStr *__restrict ds, char *__restrict rets) ATTR_NONNULL()
Definition: BLI_dynstr.c:240
void BLI_dynstr_append(DynStr *__restrict ds, const char *cstr) ATTR_NONNULL()
Definition: BLI_dynstr.c:75
File and directory operations.
FILE * BLI_fopen(const char *filepath, const char *mode) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL()
Definition: fileops.c:906
void BLI_file_free_lines(struct LinkNode *lines)
Definition: storage.c:564
struct LinkNode * BLI_file_read_as_lines(const char *file) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL()
Definition: storage.c:518
bool BLI_ghashutil_strcmp(const void *a, const void *b)
BLI_INLINE void * BLI_ghashIterator_getKey(GHashIterator *ghi) ATTR_WARN_UNUSED_RESULT
Definition: BLI_ghash.h:298
#define GHASH_ITER(gh_iter_, ghash_)
Definition: BLI_ghash.h:321
GHash * BLI_ghash_new(GHashHashFP hashfp, GHashCmpFP cmpfp, const char *info) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT
Definition: BLI_ghash.c:689
void * BLI_ghash_lookup(const GHash *gh, const void *key) ATTR_WARN_UNUSED_RESULT
Definition: BLI_ghash.c:734
unsigned int BLI_ghash_len(const GHash *gh) ATTR_WARN_UNUSED_RESULT
Definition: BLI_ghash.c:705
unsigned int BLI_ghashutil_strhash_p_murmur(const void *ptr)
void BLI_ghash_insert(GHash *gh, void *key, void *val)
Definition: BLI_ghash.c:710
void BLI_ghash_free(GHash *gh, GHashKeyFreeFP keyfreefp, GHashValFreeFP valfreefp)
Definition: BLI_ghash.c:863
void BLI_memarena_free(struct MemArena *ma) ATTR_NONNULL(1)
Definition: BLI_memarena.c:94
struct MemArena * BLI_memarena_new(size_t bufsize, const char *name) ATTR_WARN_UNUSED_RESULT ATTR_RETURNS_NONNULL ATTR_NONNULL(2) ATTR_MALLOC
Definition: BLI_memarena.c:64
#define BLI_MEMARENA_STD_BUFSIZE
Definition: BLI_memarena.h:20
void * BLI_memarena_alloc(struct MemArena *ma, size_t size) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1) ATTR_MALLOC ATTR_ALLOC_SIZE(2)
Definition: BLI_memarena.c:116
#define ELEM(...)
Read Guarded memory(de)allocation.
ATTR_WARN_UNUSED_RESULT const BMLoop * l
ATTR_WARN_UNUSED_RESULT const BMVert * v
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
int len
Definition: draw_manager.c:108
#define str(s)
ccl_global KernelShaderEvalInput ccl_global float * output
void(* MEM_freeN)(void *vmemh)
Definition: mallocn.c:27
void *(* MEM_mallocN)(size_t len, const char *str)
Definition: mallocn.c:33
static ulong * next
#define G(x, y, z)
static char * generate(GHash *messages, size_t *r_output_size)
Definition: msgfmt.c:170
struct Offset Offset
BLI_INLINE size_t uint32_to_bytes(const int value, char *bytes)
Definition: msgfmt.c:146
#define NULLSEP_STR
Definition: msgfmt.c:42
static int make(const char *input_file_name, const char *output_file_name)
Definition: msgfmt.c:286
int main(int argc, char **argv)
Definition: msgfmt.c:441
static char * unescape(char *str)
Definition: msgfmt.c:83
eSectionType
Definition: msgfmt.c:45
@ SECTION_CTX
Definition: msgfmt.c:47
@ SECTION_STR
Definition: msgfmt.c:49
@ SECTION_NONE
Definition: msgfmt.c:46
@ SECTION_ID
Definition: msgfmt.c:48
static void clear(Message *msg)
Definition: msgfmt.c:278
static char ** get_keys_sorted(GHash *messages, const uint32_t num_keys)
Definition: msgfmt.c:129
struct Message Message
static int qsort_str_cmp(const void *a, const void *b)
Definition: msgfmt.c:124
static char * trim(char *str)
Definition: msgfmt.c:60
static void add(GHash *messages, MemArena *memarena, const Message *msg)
Definition: msgfmt.c:252
#define NULLSEP_CHR
Definition: msgfmt.c:43
BLI_INLINE size_t msg_to_bytes(char *msg, char *bytes, uint32_t size)
Definition: msgfmt.c:155
static unsigned a[3]
Definition: RandGen.cpp:78
static const pxr::TfToken b("b", pxr::TfToken::Immortal)
unsigned int uint32_t
Definition: stdint.h:80
void * link
Definition: BLI_linklist.h:24
struct LinkNode * next
Definition: BLI_linklist.h:23
Definition: msgfmt.c:52
DynStr * id
Definition: msgfmt.c:54
DynStr * str
Definition: msgfmt.c:55
bool is_fuzzy
Definition: msgfmt.c:57
DynStr * ctxt
Definition: msgfmt.c:53
Definition: msgfmt.c:165
uint32_t key_len
Definition: msgfmt.c:166
uint32_t val_offset
Definition: msgfmt.c:166
uint32_t val_len
Definition: msgfmt.c:166
uint32_t key_offset
Definition: msgfmt.c:166