Blender  V3.3
Macros | Functions | Variables
string_utf8.c File Reference
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
#include <wcwidth.h>
#include "BLI_utildefines.h"
#include "BLI_string_utf8.h"

Go to the source code of this file.

Macros

#define BLI_STR_UTF8_CPY(dst, src, maxncpy)
 
#define UTF8_COMPUTE(Char, Mask, Len, Err)
 
#define UTF8_GET(Result, Chars, Count, Mask, Len, Err)
 
#define UTF8_VARS_FROM_CHAR32(Char, First, Len)
 

Functions

ptrdiff_t BLI_str_utf8_invalid_byte (const char *str, size_t length)
 
int BLI_str_utf8_invalid_strip (char *str, size_t length)
 
char * BLI_strncpy_utf8 (char *__restrict dst, const char *__restrict src, size_t maxncpy)
 
size_t BLI_strncpy_utf8_rlen (char *__restrict dst, const char *__restrict src, size_t maxncpy)
 
size_t BLI_strncpy_wchar_as_utf8 (char *__restrict dst, const wchar_t *__restrict src, const size_t maxncpy)
 
size_t BLI_wstrlen_utf8 (const wchar_t *src)
 
size_t BLI_strlen_utf8_ex (const char *strc, size_t *r_len_bytes)
 
size_t BLI_strlen_utf8 (const char *strc)
 
size_t BLI_strnlen_utf8_ex (const char *strc, const size_t maxlen, size_t *r_len_bytes)
 
size_t BLI_strnlen_utf8 (const char *strc, const size_t maxlen)
 
size_t BLI_strncpy_wchar_from_utf8 (wchar_t *__restrict dst_w, const char *__restrict src_c, const size_t maxncpy)
 
int BLI_wcwidth (char32_t ucs)
 
int BLI_wcswidth (const char32_t *pwcs, size_t n)
 
int BLI_str_utf8_char_width (const char *p)
 
int BLI_str_utf8_char_width_safe (const char *p)
 
int BLI_str_utf8_size (const char *p)
 
int BLI_str_utf8_size_safe (const char *p)
 
uint BLI_str_utf8_as_unicode (const char *p)
 
uint BLI_str_utf8_as_unicode_step_or_error (const char *__restrict p, const size_t p_len, size_t *__restrict index)
 
uint BLI_str_utf8_as_unicode_step (const char *__restrict p, const size_t p_len, size_t *__restrict index)
 
size_t BLI_str_utf8_from_unicode_len (const uint c)
 
size_t BLI_str_utf8_from_unicode (uint c, char *outbuf, const size_t outbuf_len)
 
size_t BLI_str_utf8_as_utf32 (char32_t *__restrict dst_w, const char *__restrict src_c, const size_t maxncpy)
 
size_t BLI_str_utf32_as_utf8 (char *__restrict dst, const char32_t *__restrict src, const size_t maxncpy)
 
size_t BLI_str_utf32_as_utf8_len (const char32_t *src)
 
const char * BLI_str_find_prev_char_utf8 (const char *p, const char *str_start)
 
const char * BLI_str_find_next_char_utf8 (const char *p, const char *str_end)
 
size_t BLI_str_partition_utf8 (const char *str, const uint delim[], const char **sep, const char **suf)
 
size_t BLI_str_rpartition_utf8 (const char *str, const uint delim[], const char **sep, const char **suf)
 
size_t BLI_str_partition_ex_utf8 (const char *str, const char *end, const uint delim[], const char **sep, const char **suf, const bool from_right)
 
Offset Conversion in Strings
int BLI_str_utf8_offset_to_index (const char *str, int offset)
 
int BLI_str_utf8_offset_from_index (const char *str, int index)
 
int BLI_str_utf8_offset_to_column (const char *str, int offset)
 
int BLI_str_utf8_offset_from_column (const char *str, int column)
 

Variables

static const size_t utf8_skip_data [256]
 

Macro Definition Documentation

◆ BLI_STR_UTF8_CPY

#define BLI_STR_UTF8_CPY (   dst,
  src,
  maxncpy 
)
Value:
{ \
size_t utf8_size; \
while (*src != '\0' && (utf8_size = utf8_skip_data[*src]) < maxncpy) { \
maxncpy -= utf8_size; \
switch (utf8_size) { \
case 6: \
*dst++ = *src++; \
ATTR_FALLTHROUGH; \
case 5: \
*dst++ = *src++; \
ATTR_FALLTHROUGH; \
case 4: \
*dst++ = *src++; \
ATTR_FALLTHROUGH; \
case 3: \
*dst++ = *src++; \
ATTR_FALLTHROUGH; \
case 2: \
*dst++ = *src++; \
ATTR_FALLTHROUGH; \
case 1: \
*dst++ = *src++; \
} \
} \
*dst = '\0'; \
} \
(void)0
SyclQueue void void * src
SyclQueue void void size_t num_bytes void
static const size_t utf8_skip_data[256]
Definition: string_utf8.c:35

Compatible with BLI_strncpy, but ensure no partial UTF8 chars.

Definition at line 207 of file string_utf8.c.

◆ UTF8_COMPUTE

#define UTF8_COMPUTE (   Char,
  Mask,
  Len,
  Err 
)
Value:
if (Char < 128) { \
Len = 1; \
Mask = 0x7f; \
} \
else if ((Char & 0xe0) == 0xc0) { \
Len = 2; \
Mask = 0x1f; \
} \
else if ((Char & 0xf0) == 0xe0) { \
Len = 3; \
Mask = 0x0f; \
} \
else if ((Char & 0xf8) == 0xf0) { \
Len = 4; \
Mask = 0x07; \
} \
else if ((Char & 0xfc) == 0xf8) { \
Len = 5; \
Mask = 0x03; \
} \
else if ((Char & 0xfe) == 0xfc) { \
Len = 6; \
Mask = 0x01; \
} \
else { \
Len = Err; /* -1 is the typical error value or 1 to skip */ \
} \
(void)0

Definition at line 409 of file string_utf8.c.

◆ UTF8_GET

#define UTF8_GET (   Result,
  Chars,
  Count,
  Mask,
  Len,
  Err 
)
Value:
(Result) = (Chars)[0] & (Mask); \
for ((Count) = 1; (Count) < (Len); ++(Count)) { \
if (((Chars)[(Count)] & 0xc0) != 0x80) { \
(Result) = Err; \
break; \
} \
(Result) <<= 6; \
(Result) |= ((Chars)[(Count)] & 0x3f); \
} \
(void)0
struct Mask Mask

Definition at line 440 of file string_utf8.c.

◆ UTF8_VARS_FROM_CHAR32

#define UTF8_VARS_FROM_CHAR32 (   Char,
  First,
  Len 
)
Value:
if (Char < 0x80) { \
First = 0; \
Len = 1; \
} \
else if (Char < 0x800) { \
First = 0xc0; \
Len = 2; \
} \
else if (Char < 0x10000) { \
First = 0xe0; \
Len = 3; \
} \
else if (Char < 0x200000) { \
First = 0xf0; \
Len = 4; \
} \
else if (Char < 0x4000000) { \
First = 0xf8; \
Len = 5; \
} \
else { \
First = 0xfc; \
Len = 6; \
} \
(void)0

Definition at line 536 of file string_utf8.c.

Function Documentation

◆ BLI_str_find_next_char_utf8()

const char* BLI_str_find_next_char_utf8 ( const char *  p,
const char *  str_end 
)

Definition at line 680 of file string_utf8.c.

References BLI_assert.

Referenced by BLI_str_utf8_as_utf32().

◆ BLI_str_find_prev_char_utf8()

const char* BLI_str_find_prev_char_utf8 ( const char *  p,
const char *  str_start 
)

Definition at line 665 of file string_utf8.c.

References BLI_assert.

Referenced by BLI_str_partition_ex_utf8().

◆ BLI_str_partition_ex_utf8()

size_t BLI_str_partition_ex_utf8 ( const char *  str,
const char *  end,
const uint  delim[],
const char **  sep,
const char **  suf,
const bool  from_right 
)

◆ BLI_str_partition_utf8()

size_t BLI_str_partition_utf8 ( const char *  str,
const uint  delim[],
const char **  sep,
const char **  suf 
)

Definition at line 693 of file string_utf8.c.

References BLI_str_partition_ex_utf8(), NULL, and str.

◆ BLI_str_rpartition_utf8()

size_t BLI_str_rpartition_utf8 ( const char *  str,
const uint  delim[],
const char **  sep,
const char **  suf 
)

Definition at line 701 of file string_utf8.c.

References BLI_str_partition_ex_utf8(), NULL, and str.

◆ BLI_str_utf32_as_utf8()

size_t BLI_str_utf32_as_utf8 ( char *__restrict  dst,
const char32_t *__restrict  src,
const size_t  maxncpy 
)

Definition at line 634 of file string_utf8.c.

References BLI_assert, BLI_str_utf8_from_unicode(), len, src, and UNLIKELY.

◆ BLI_str_utf32_as_utf8_len()

size_t BLI_str_utf32_as_utf8_len ( const char32_t *  src)
Returns
The UTF-32 len in UTF-8.

Definition at line 654 of file string_utf8.c.

References BLI_str_utf8_from_unicode_len(), len, and src.

Referenced by BKE_vfont_clipboard_set(), and ED_curve_editfont_load().

◆ BLI_str_utf8_as_unicode()

uint BLI_str_utf8_as_unicode ( const char *  p)
Parameters
pa pointer to Unicode character encoded as UTF-8

Converts a sequence of bytes encoded as UTF-8 to a Unicode character. If p does not point to a valid UTF-8 encoded character, results are undefined. If you are not sure that the bytes are complete valid Unicode characters, you should use g_utf8_get_char_validated() instead.

Return value: the resulting character

Definition at line 478 of file string_utf8.c.

References BLI_UTF8_ERR, Freestyle::c, len, mask(), result, UNLIKELY, UTF8_COMPUTE, and UTF8_GET.

Referenced by BLI_str_partition_ex_utf8(), BLI_str_utf8_char_width(), BLI_str_utf8_char_width_safe(), blender::string_search::extract_normalized_words(), blender::string_search::get_fuzzy_match_errors(), insert_text_invoke(), key_event_glyph_or_text(), text_autocomplete_build(), and text_insert_invoke().

◆ BLI_str_utf8_as_unicode_step()

uint BLI_str_utf8_as_unicode_step ( const char *__restrict  p,
const size_t  p_len,
size_t *__restrict  index 
)

◆ BLI_str_utf8_as_unicode_step_or_error()

uint BLI_str_utf8_as_unicode_step_or_error ( const char *__restrict  p,
const size_t  p_len,
size_t *__restrict  index 
)

◆ BLI_str_utf8_as_utf32()

size_t BLI_str_utf8_as_utf32 ( char32_t *__restrict  dst_w,
const char *__restrict  src_c,
const size_t  maxncpy 
)

◆ BLI_str_utf8_char_width()

int BLI_str_utf8_char_width ( const char *  p)
Warning
can return -1 on bad chars.

Definition at line 378 of file string_utf8.c.

References BLI_str_utf8_as_unicode(), BLI_UTF8_ERR, and BLI_wcwidth().

◆ BLI_str_utf8_char_width_safe()

int BLI_str_utf8_char_width_safe ( const char *  p)

◆ BLI_str_utf8_from_unicode()

size_t BLI_str_utf8_from_unicode ( unsigned int  c,
char *  outbuf,
size_t  outbuf_len 
)

BLI_str_utf8_from_unicode:

Parameters
ca Unicode character code
outbufoutput buffer, must have at least outbuf_len bytes of space. If the length required by c exceeds outbuf_len, the bytes available bytes will be zeroed and outbuf_len returned.

Converts a single character to UTF-8.

Returns
number of bytes written.

Definition at line 575 of file string_utf8.c.

References Freestyle::c, len, UNLIKELY, and UTF8_VARS_FROM_CHAR32.

Referenced by BLI_str_utf32_as_utf8(), BLI_strncpy_wchar_as_utf8(), find_family_object(), txt_add_char_intern(), txt_extended_ascii_as_utf8(), txt_replace_char(), and wm_event_add_ghostevent().

◆ BLI_str_utf8_from_unicode_len()

size_t BLI_str_utf8_from_unicode_len ( const uint  c)

Definition at line 563 of file string_utf8.c.

References Freestyle::c, len, UTF8_VARS_FROM_CHAR32, and void.

Referenced by BLI_str_utf32_as_utf8_len(), and BLI_wstrlen_utf8().

◆ BLI_str_utf8_invalid_byte()

ptrdiff_t BLI_str_utf8_invalid_byte ( const char *  str,
size_t  length 
)

Find first UTF-8 invalid byte in given str, of length bytes.

Returns
the offset of the first invalid byte.

Definition at line 46 of file string_utf8.c.

References Freestyle::c, ELEM, blender::math::length(), str, and utf8_skip_data.

Referenced by BLI_str_utf8_invalid_strip(), and txt_extended_ascii_as_utf8().

◆ BLI_str_utf8_invalid_strip()

int BLI_str_utf8_invalid_strip ( char *  str,
size_t  length 
)

Remove any invalid UTF-8 byte (taking into account multi-bytes sequence of course).

Returns
number of stripped bytes.

Definition at line 181 of file string_utf8.c.

References BLI_assert, BLI_str_utf8_invalid_byte(), blender::math::length(), and str.

Referenced by BKE_id_new_name_validate(), id_name_final_build(), objfnt_to_ftvfontdata(), outputNumInput(), SEQ_edit_sequence_name_set(), TEST(), ui_textedit_copypaste(), and ui_textedit_end().

◆ BLI_str_utf8_offset_from_column()

int BLI_str_utf8_offset_from_column ( const char *  str,
int  column 
)

◆ BLI_str_utf8_offset_from_index()

int BLI_str_utf8_offset_from_index ( const char *  str,
int  index 
)

◆ BLI_str_utf8_offset_to_column()

int BLI_str_utf8_offset_to_column ( const char *  str,
int  offset 
)

◆ BLI_str_utf8_offset_to_index()

int BLI_str_utf8_offset_to_index ( const char *  str,
int  offset 
)

Definition at line 761 of file string_utf8.c.

References BLI_str_utf8_size(), offset, pos, and str.

◆ BLI_str_utf8_size()

int BLI_str_utf8_size ( const char *  p)

◆ BLI_str_utf8_size_safe()

int BLI_str_utf8_size_safe ( const char *  p)

◆ BLI_strlen_utf8()

size_t BLI_strlen_utf8 ( const char *  strc)

◆ BLI_strlen_utf8_ex()

size_t BLI_strlen_utf8_ex ( const char *  strc,
size_t *  r_len_bytes 
)

Definition at line 304 of file string_utf8.c.

References BLI_str_utf8_size_safe(), and len.

Referenced by BLI_strlen_utf8().

◆ BLI_strncpy_utf8()

char* BLI_strncpy_utf8 ( char *__restrict  dst,
const char *__restrict  src,
size_t  maxncpy 
)

Definition at line 236 of file string_utf8.c.

References BLI_assert, BLI_STR_UTF8_CPY, and src.

◆ BLI_strncpy_utf8_rlen()

size_t BLI_strncpy_utf8_rlen ( char *__restrict  dst,
const char *__restrict  src,
size_t  maxncpy 
)

Definition at line 252 of file string_utf8.c.

References BLI_assert, BLI_STR_UTF8_CPY, and src.

◆ BLI_strncpy_wchar_as_utf8()

size_t BLI_strncpy_wchar_as_utf8 ( char *__restrict  dst,
const wchar_t *__restrict  src,
const size_t  maxncpy 
)

Definition at line 273 of file string_utf8.c.

References BLI_assert, BLI_str_utf8_from_unicode(), len, src, and UNLIKELY.

◆ BLI_strncpy_wchar_from_utf8()

size_t BLI_strncpy_wchar_from_utf8 ( wchar_t *__restrict  dst_w,
const char *__restrict  src_c,
const size_t  maxncpy 
)

Definition at line 348 of file string_utf8.c.

References BLI_str_utf8_as_utf32(), and conv_utf_8_to_16().

◆ BLI_strnlen_utf8()

size_t BLI_strnlen_utf8 ( const char *  strc,
size_t  maxlen 
)
Parameters
strcthe string to measure the length.
maxlenthe string length (in bytes)
Returns
the unicode length (not in bytes!)

Definition at line 342 of file string_utf8.c.

References BLI_strnlen_utf8_ex().

Referenced by blender::string_search::count_utf8_code_points(), and ui_text_position_to_hidden().

◆ BLI_strnlen_utf8_ex()

size_t BLI_strnlen_utf8_ex ( const char *  strc,
const size_t  maxlen,
size_t *  r_len_bytes 
)

Definition at line 323 of file string_utf8.c.

References BLI_str_utf8_size_safe(), and len.

Referenced by BLI_strnlen_utf8().

◆ BLI_wcswidth()

int BLI_wcswidth ( const char32_t *  pwcs,
size_t  n 
)

Definition at line 373 of file string_utf8.c.

◆ BLI_wcwidth()

int BLI_wcwidth ( char32_t  ucs)

Count columns that character/string occupies (based on wcwidth.co).

Definition at line 364 of file string_utf8.c.

Referenced by blf_font_draw_mono(), blf_glyph_render(), BLI_str_utf8_char_width(), and BLI_str_utf8_char_width_safe().

◆ BLI_wstrlen_utf8()

size_t BLI_wstrlen_utf8 ( const wchar_t *  src)
Returns
the wchar_t length in UTF-8.

Definition at line 293 of file string_utf8.c.

References BLI_str_utf8_from_unicode_len(), len, and src.

Variable Documentation

◆ utf8_skip_data

const size_t utf8_skip_data[256]
static
Initial value:
= {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1,
}

Array copied from GLIB's gutf8.c.

Note
last two values (0xfe and 0xff) are forbidden in UTF-8, so they are considered 1 byte length too.

Definition at line 35 of file string_utf8.c.

Referenced by BLI_str_utf8_invalid_byte().