libunibreak 5.1
Loading...
Searching...
No Matches
linebreakdef.h
Go to the documentation of this file.
1/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2
3/*
4 * Line breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer.
6 *
7 * Copyright (C) 2008-2020 Wu Yongwei <wuyongwei at gmail dot com>
8 * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
9 *
10 * This software is provided 'as-is', without any express or implied
11 * warranty. In no event will the author be held liable for any damages
12 * arising from the use of this software.
13 *
14 * Permission is granted to anyone to use this software for any purpose,
15 * including commercial applications, and to alter it and redistribute
16 * it freely, subject to the following restrictions:
17 *
18 * 1. The origin of this software must not be misrepresented; you must
19 * not claim that you wrote the original software. If you use this
20 * software in a product, an acknowledgement in the product
21 * documentation would be appreciated but is not required.
22 * 2. Altered source versions must be plainly marked as such, and must
23 * not be misrepresented as being the original software.
24 * 3. This notice may not be removed or altered from any source
25 * distribution.
26 *
27 * The main reference is Unicode Standard Annex 14 (UAX #14):
28 * <URL:http://www.unicode.org/reports/tr14/>
29 *
30 * When this library was designed, this annex was at Revision 19, for
31 * Unicode 5.0.0:
32 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
33 *
34 * This library has been updated according to Revision 45, for
35 * Unicode 13.0.0:
36 * <URL:http://www.unicode.org/reports/tr14/tr14-45.html>
37 *
38 * The Unicode Terms of Use are available at
39 * <URL:http://www.unicode.org/copyright.html>
40 */
41
52#ifndef LINEBREAKDEF_H
53#define LINEBREAKDEF_H
54
55#include "unibreakdef.h"
56
62{
63 /* This is used to signal an error condition. */
66 /* The following break classes are treated in the pair table. */
100 /* The following break class is treated in the pair table, but it is
101 * not part of Table 2 of UAX #14-37. */
104 /* The following break classes are not treated in the pair table */
114 LBP_XX
116
118{
122
128{
132};
133
139{
140 const char *lang;
141 size_t namelen;
142 const struct LineBreakProperties *lbp;
143};
144
150{
151 const char *lang;
157 bool fLb8aZwj;
161};
162
163/* Declarations */
164extern const struct LineBreakProperties lb_prop_supplementary[];
165extern const unsigned int lb_prop_supplementary_len;
166extern const char lb_prop_bmp[];
167extern const struct LineBreakPropertiesLang lb_prop_lang_map[];
168
169/* Function Prototype */
171 struct LineBreakContext *lbpCtx,
172 utf32_t ch,
173 const char *lang);
175 struct LineBreakContext *lbpCtx,
176 utf32_t ch);
177size_t set_linebreaks(
178 const void *s,
179 size_t len,
180 const char *lang,
181 enum BreakOutputType outputType,
182 char *brks,
183 get_next_char_t get_next_char);
184
185#endif /* LINEBREAKDEF_H */
const unsigned int lb_prop_supplementary_len
Definition: linebreakdata.c:711
void lb_init_break_context(struct LineBreakContext *lbpCtx, utf32_t ch, const char *lang)
Initializes line breaking context for a given language.
Definition: linebreak.c:649
const char lb_prop_bmp[]
Line breaking properties for BMP.
Definition: linebreakdata.c:715
LineBreakClass
Line break classes.
Definition: linebreakdef.h:62
@ LBP_WJ
Word joiner.
Definition: linebreakdef.h:89
@ LBP_GL
Glue.
Definition: linebreakdef.h:71
@ LBP_CL
Closing punctuation.
Definition: linebreakdef.h:68
@ LBP_NS
Non-starters.
Definition: linebreakdef.h:72
@ LBP_RI
Regional indicator.
Definition: linebreakdef.h:95
@ LBP_ID
Ideographic.
Definition: linebreakdef.h:81
@ LBP_JL
Hangul L Jamo.
Definition: linebreakdef.h:92
@ LBP_SA
South-East Asian.
Definition: linebreakdef.h:111
@ LBP_B2
Break on either side (but not pair)
Definition: linebreakdef.h:86
@ LBP_SY
Symbols allowing break after.
Definition: linebreakdef.h:74
@ LBP_CB
Contingent break.
Definition: linebreakdef.h:102
@ LBP_XX
Unknown.
Definition: linebreakdef.h:114
@ LBP_EM
Emoji modifier.
Definition: linebreakdef.h:97
@ LBP_CJ
Conditional Japanese starter.
Definition: linebreakdef.h:107
@ LBP_ZWJ
Zero width joiner.
Definition: linebreakdef.h:98
@ LBP_EX
Exclamation/Interrogation.
Definition: linebreakdef.h:73
@ LBP_SP
Space.
Definition: linebreakdef.h:113
@ LBP_JV
Hangul V Jamo.
Definition: linebreakdef.h:93
@ LBP_JT
Hangul T Jamo.
Definition: linebreakdef.h:94
@ LBP_BK
Break (mandatory)
Definition: linebreakdef.h:106
@ LBP_HY
Hyphen.
Definition: linebreakdef.h:83
@ LBP_NU
Numeric.
Definition: linebreakdef.h:78
@ LBP_BA
Break after.
Definition: linebreakdef.h:84
@ LBP_H2
Hangul LV.
Definition: linebreakdef.h:90
@ LBP_EB
Emoji base.
Definition: linebreakdef.h:96
@ LBP_CR
Carriage return.
Definition: linebreakdef.h:108
@ LBP_Undefined
Undefined.
Definition: linebreakdef.h:64
@ LBP_LF
Line feed.
Definition: linebreakdef.h:109
@ LBP_PR
Prefix.
Definition: linebreakdef.h:76
@ LBP_BB
Break before.
Definition: linebreakdef.h:85
@ LBP_IS
Infix separator.
Definition: linebreakdef.h:75
@ LBP_AL
Alphabetic.
Definition: linebreakdef.h:79
@ LBP_ZW
Zero-width space.
Definition: linebreakdef.h:87
@ LBP_HL
Hebrew letter.
Definition: linebreakdef.h:80
@ LBP_QU
Ambiguous quotation.
Definition: linebreakdef.h:70
@ LBP_CP
Closing parenthesis.
Definition: linebreakdef.h:69
@ LBP_PO
Postfix.
Definition: linebreakdef.h:77
@ LBP_AI
Ambiguous (alphabetic or ideograph)
Definition: linebreakdef.h:105
@ LBP_OP
Opening punctuation.
Definition: linebreakdef.h:67
@ LBP_NL
Next line.
Definition: linebreakdef.h:110
@ LBP_SG
Surrogates.
Definition: linebreakdef.h:112
@ LBP_CM
Combining marks.
Definition: linebreakdef.h:88
@ LBP_IN
Inseparable characters.
Definition: linebreakdef.h:82
@ LBP_H3
Hangul LVT.
Definition: linebreakdef.h:91
size_t set_linebreaks(const void *s, size_t len, const char *lang, enum BreakOutputType outputType, char *brks, get_next_char_t get_next_char)
Sets the line breaking information for a generic input string.
Definition: linebreak.c:752
const struct LineBreakPropertiesLang lb_prop_lang_map[]
Association data of language-specific line breaking properties with language names.
Definition: linebreakdef.c:117
const struct LineBreakProperties lb_prop_supplementary[]
Line breaking properties for supplementary planes.
Definition: linebreakdata.c:9
int lb_process_next_char(struct LineBreakContext *lbpCtx, utf32_t ch)
Updates LineBreakingContext for the next codepoint and returns the detected break.
Definition: linebreak.c:680
BreakOutputType
Definition: linebreakdef.h:118
@ LBOT_PER_CODE_UNIT
Definition: linebreakdef.h:119
@ LBOT_PER_CODE_POINT
Definition: linebreakdef.h:120
Context representing internal state of the line breaking algorithm.
Definition: linebreakdef.h:150
bool fLb10LeadSpace
Flag for leading space (LB10)
Definition: linebreakdef.h:158
bool fLb8aZwj
Flag for ZWJ (LB8a)
Definition: linebreakdef.h:157
enum LineBreakClass lbcCur
Breaking class of current codepoint.
Definition: linebreakdef.h:154
int cLb30aRI
Count of RI characters (LB30a)
Definition: linebreakdef.h:160
enum LineBreakClass lbcNew
Breaking class of next codepoint.
Definition: linebreakdef.h:155
bool fLb21aHebrew
Flag for Hebrew letters (LB21a)
Definition: linebreakdef.h:159
enum LineBreakClass lbcLast
Breaking class of last codepoint.
Definition: linebreakdef.h:156
const struct LineBreakProperties * lbpLang
Pointer to LineBreakProperties.
Definition: linebreakdef.h:152
const char * lang
Language name.
Definition: linebreakdef.h:151
Struct for association of language-specific line breaking properties with language names.
Definition: linebreakdef.h:139
size_t namelen
Length of name to match.
Definition: linebreakdef.h:141
const struct LineBreakProperties * lbp
Pointer to associated data.
Definition: linebreakdef.h:142
const char * lang
Language name.
Definition: linebreakdef.h:140
Struct for entries of line break properties.
Definition: linebreakdef.h:128
utf32_t start
Start codepoint.
Definition: linebreakdef.h:129
enum LineBreakClass prop
The line breaking property.
Definition: linebreakdef.h:131
utf32_t end
End codepoint, inclusive.
Definition: linebreakdef.h:130
unsigned int utf32_t
Type for UTF-32 data points.
Definition: unibreakbase.h:49
Header file for private definitions in the libunibreak library.
utf32_t(* get_next_char_t)(const void *, size_t, size_t *)
Abstract function interface for ub_get_next_char_utf8, ub_get_next_char_utf16, and ub_get_next_char_u...
Definition: unibreakdef.h:65