Libcroco
Data Structures | Macros
cr-tknzr.c File Reference
#include "string.h"
#include "cr-tknzr.h"
#include "cr-doc-handler.h"

Go to the source code of this file.

Data Structures

struct  _CRTknzrPriv
 

Macros

#define PRIVATE(obj)   ((obj)->priv)
 
#define IS_NUM(a_char)   (((a_char) >= '0' && (a_char) <= '9')?TRUE:FALSE)
 return TRUE if the character is a number ([0-9]), FALSE otherwise More...
 
#define CHECK_PARSING_STATUS(status, is_exception)
 Checks if 'status' equals CR_OK. More...
 
#define PEEK_NEXT_CHAR(a_tknzr, a_to_char)
 Peeks the next char from the input stream of the current tokenizer. More...
 
#define READ_NEXT_CHAR(a_tknzr, to_char)
 Reads the next char from the input stream of the current parser. More...
 
#define RECORD_INITIAL_POS(a_tknzr, a_pos)
 Gets information about the current position in the input of the parser. More...
 
#define RECORD_CUR_BYTE_ADDR(a_tknzr, a_addr)
 Gets the address of the current byte inside the parser input. More...
 
#define PEEK_BYTE(a_tknzr, a_offset, a_byte_ptr)
 Peeks a byte from the topmost parser input at a given offset from the current position. More...
 
#define BYTE(a_input, a_n, a_eof)   cr_input_peek_byte2 (a_input, a_n, a_eof)
 
#define READ_NEXT_BYTE(a_tknzr, a_byte_ptr)
 Reads a byte from the topmost parser input steam. More...
 
#define SKIP_BYTES(a_tknzr, a_nb_bytes)
 Skips a given number of byte in the topmost parser input. More...
 
#define SKIP_CHARS(a_tknzr, a_nb_chars)
 Skip utf8 encoded characters. More...
 
#define ENSURE_PARSING_COND(condition)   if (! (condition)) {status = CR_PARSING_ERROR; goto error ;}
 Tests the condition and if it is false, sets status to "CR_PARSING_ERROR" and goto the 'error' label. More...
 

Functions

CRTknzrcr_tknzr_new (CRInput *a_input)
 
CRTknzrcr_tknzr_new_from_buf (guchar *a_buf, gulong a_len, enum CREncoding a_enc, gboolean a_free_at_destroy)
 
CRTknzrcr_tknzr_new_from_uri (const guchar *a_file_uri, enum CREncoding a_enc)
 
void cr_tknzr_ref (CRTknzr *a_this)
 
gboolean cr_tknzr_unref (CRTknzr *a_this)
 
enum CRStatus cr_tknzr_set_input (CRTknzr *a_this, CRInput *a_input)
 
enum CRStatus cr_tknzr_get_input (CRTknzr *a_this, CRInput **a_input)
 
enum CRStatus cr_tknzr_read_byte (CRTknzr *a_this, guchar *a_byte)
 Reads the next byte from the parser input stream. More...
 
enum CRStatus cr_tknzr_read_char (CRTknzr *a_this, guint32 *a_char)
 Reads the next char from the parser input stream. More...
 
enum CRStatus cr_tknzr_peek_char (CRTknzr *a_this, guint32 *a_char)
 Peeks a char from the parser input stream. More...
 
enum CRStatus cr_tknzr_peek_byte (CRTknzr *a_this, gulong a_offset, guchar *a_byte)
 Peeks a byte ahead at a given postion in the parser input stream. More...
 
guchar cr_tknzr_peek_byte2 (CRTknzr *a_this, gulong a_offset, gboolean *a_eof)
 Same as cr_tknzr_peek_byte() but this api returns the byte peeked. More...
 
glong cr_tknzr_get_nb_bytes_left (CRTknzr *a_this)
 Gets the number of bytes left in the topmost input stream associated to this parser. More...
 
enum CRStatus cr_tknzr_get_cur_pos (CRTknzr *a_this, CRInputPos *a_pos)
 
enum CRStatus cr_tknzr_get_parsing_location (CRTknzr *a_this, CRParsingLocation *a_loc)
 
enum CRStatus cr_tknzr_get_cur_byte_addr (CRTknzr *a_this, guchar **a_addr)
 
enum CRStatus cr_tknzr_seek_index (CRTknzr *a_this, enum CRSeekPos a_origin, gint a_pos)
 
enum CRStatus cr_tknzr_consume_chars (CRTknzr *a_this, guint32 a_char, glong *a_nb_char)
 
enum CRStatus cr_tknzr_set_cur_pos (CRTknzr *a_this, CRInputPos *a_pos)
 
enum CRStatus cr_tknzr_unget_token (CRTknzr *a_this, CRToken *a_token)
 
enum CRStatus cr_tknzr_get_next_token (CRTknzr *a_this, CRToken **a_tk)
 Returns the next token of the input stream. More...
 
enum CRStatus cr_tknzr_parse_token (CRTknzr *a_this, enum CRTokenType a_type, enum CRTokenExtraType a_et, gpointer a_res, gpointer a_extra_res)
 
void cr_tknzr_destroy (CRTknzr *a_this)
 

Detailed Description

The definition of the CRTknzr (tokenizer) class.

Definition in file cr-tknzr.c.

Macro Definition Documentation

◆ BYTE

#define BYTE (   a_input,
  a_n,
  a_eof 
)    cr_input_peek_byte2 (a_input, a_n, a_eof)

Definition at line 161 of file cr-tknzr.c.

◆ CHECK_PARSING_STATUS

#define CHECK_PARSING_STATUS (   status,
  is_exception 
)
Value:
if ((status) != CR_OK) \
{ \
if (is_exception == FALSE) \
{ \
status = CR_PARSING_ERROR ; \
} \
goto error ; \
}

Checks if 'status' equals CR_OK.

If not, goto the 'error' label.

Parameters
statusthe status (of type enum CRStatus) to test.
is_exceptionif set to FALSE, the final status returned the current function will be CR_PARSING_ERROR. If set to TRUE, the current status will be the current value of the 'status' variable.

Definition at line 80 of file cr-tknzr.c.

◆ ENSURE_PARSING_COND

#define ENSURE_PARSING_COND (   condition)    if (! (condition)) {status = CR_PARSING_ERROR; goto error ;}

Tests the condition and if it is false, sets status to "CR_PARSING_ERROR" and goto the 'error' label.

Parameters
conditionthe condition to test.

Definition at line 210 of file cr-tknzr.c.

◆ IS_NUM

#define IS_NUM (   a_char)    (((a_char) >= '0' && (a_char) <= '9')?TRUE:FALSE)

return TRUE if the character is a number ([0-9]), FALSE otherwise

Parameters
a_charthe char to test.

Definition at line 69 of file cr-tknzr.c.

◆ PEEK_BYTE

#define PEEK_BYTE (   a_tknzr,
  a_offset,
  a_byte_ptr 
)
Value:
status = cr_tknzr_peek_byte (a_tknzr, \
a_offset, \
a_byte_ptr) ; \
CHECK_PARSING_STATUS (status, TRUE) ;

Peeks a byte from the topmost parser input at a given offset from the current position.

If it fails, goto the "error:" label.

Parameters
a_parserthe current instance of CRTknzr.
a_offsetthe offset of the byte to peek, the current byte having the offset '0'.
a_byte_ptrout parameter a pointer (guchar*) to where the peeked char is to be stored.

Definition at line 155 of file cr-tknzr.c.

◆ PEEK_NEXT_CHAR

#define PEEK_NEXT_CHAR (   a_tknzr,
  a_to_char 
)
Value:
{\
status = cr_tknzr_peek_char (a_tknzr, a_to_char) ; \
CHECK_PARSING_STATUS (status, TRUE) \
}

Peeks the next char from the input stream of the current tokenizer.

invokes CHECK_PARSING_STATUS on the status returned by cr_tknzr_input_peek_char().

Parameters
thecurrent instance of #CRTkzr.
to_chara pointer to the char where to store the char peeked.

Definition at line 99 of file cr-tknzr.c.

◆ PRIVATE

#define PRIVATE (   obj)    ((obj)->priv)

Definition at line 63 of file cr-tknzr.c.

◆ READ_NEXT_BYTE

#define READ_NEXT_BYTE (   a_tknzr,
  a_byte_ptr 
)
Value:
status = \
cr_input_read_byte (PRIVATE (a_tknzr)->input, a_byte_ptr) ;\
CHECK_PARSING_STATUS (status, TRUE) ;

Reads a byte from the topmost parser input steam.

If it fails, goto the "error" label.

Parameters
a_parserthe current instance of CRTknzr.
a_byte_ptrthe guchar * where to put the read char.

Definition at line 171 of file cr-tknzr.c.

◆ READ_NEXT_CHAR

#define READ_NEXT_CHAR (   a_tknzr,
  to_char 
)
Value:
status = cr_tknzr_read_char (a_tknzr, to_char) ;\
CHECK_PARSING_STATUS (status, TRUE)

Reads the next char from the input stream of the current parser.

In case of error, jumps to the "error:" label located in the function where this macro is called.

Parameters
parserthe curent instance of CRTknzr
to_chara pointer to the guint32 char where to store the character read.

Definition at line 113 of file cr-tknzr.c.

◆ RECORD_CUR_BYTE_ADDR

#define RECORD_CUR_BYTE_ADDR (   a_tknzr,
  a_addr 
)
Value:
(PRIVATE (a_tknzr)->input, a_addr) ; \
CHECK_PARSING_STATUS (status, TRUE)

Gets the address of the current byte inside the parser input.

Parameters
parserthe current instance of CRTknzr.
addrout parameter a pointer (guchar*) to where the address must be put.

Definition at line 139 of file cr-tknzr.c.

◆ RECORD_INITIAL_POS

#define RECORD_INITIAL_POS (   a_tknzr,
  a_pos 
)
Value:
(a_tknzr)->input, a_pos) ; \
g_return_val_if_fail (status == CR_OK, status)

Gets information about the current position in the input of the parser.

In case of failure, this macro returns from the calling function and returns a status code of type enum CRStatus.

Parameters
parserthe current instance of CRTknzr.
posout parameter. A pointer to the position inside the current parser input. Must

Definition at line 127 of file cr-tknzr.c.

◆ SKIP_BYTES

#define SKIP_BYTES (   a_tknzr,
  a_nb_bytes 
)
Value:
status = cr_input_seek_index (PRIVATE (a_tknzr)->input, \
CR_SEEK_CUR, a_nb_bytes) ; \
CHECK_PARSING_STATUS (status, TRUE) ;

Skips a given number of byte in the topmost parser input.

Don't update line and column number. In case of error, jumps to the "error:" label of the surrounding function.

Parameters
a_parserthe current instance of CRTknzr.
a_nb_bytesthe number of bytes to skip.

Definition at line 184 of file cr-tknzr.c.

◆ SKIP_CHARS

#define SKIP_CHARS (   a_tknzr,
  a_nb_chars 
)
Value:
{ \
gulong nb_chars = a_nb_chars ; \
(PRIVATE (a_tknzr)->input,0, &nb_chars) ; \
CHECK_PARSING_STATUS (status, TRUE) ; \
}

Skip utf8 encoded characters.

Updates line and column numbers.

Parameters
a_parserthe current instance of CRTknzr.
a_nb_charsthe number of chars to skip. Must be of type glong.

Definition at line 196 of file cr-tknzr.c.

Function Documentation

◆ cr_tknzr_consume_chars()

enum CRStatus cr_tknzr_consume_chars ( CRTknzr a_this,
guint32  a_char,
glong *  a_nb_char 
)

◆ cr_tknzr_destroy()

void cr_tknzr_destroy ( CRTknzr a_this)

Definition at line 2740 of file cr-tknzr.c.

References cr_input_unref(), cr_token_destroy(), and PRIVATE.

Referenced by cr_tknzr_unref().

◆ cr_tknzr_get_cur_byte_addr()

enum CRStatus cr_tknzr_get_cur_byte_addr ( CRTknzr a_this,
guchar **  a_addr 
)

◆ cr_tknzr_get_cur_pos()

enum CRStatus cr_tknzr_get_cur_pos ( CRTknzr a_this,
CRInputPos a_pos 
)

◆ cr_tknzr_get_input()

enum CRStatus cr_tknzr_get_input ( CRTknzr a_this,
CRInput **  a_input 
)

Definition at line 1701 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, CR_OK, and PRIVATE.

◆ cr_tknzr_get_nb_bytes_left()

glong cr_tknzr_get_nb_bytes_left ( CRTknzr a_this)

Gets the number of bytes left in the topmost input stream associated to this parser.

Parameters
a_thisthe current instance of CRTknzr
Returns
the number of bytes left or -1 in case of error.

Definition at line 1834 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_get_nb_bytes_left(), cr_input_set_cur_pos(), cr_token_destroy(), and PRIVATE.

◆ cr_tknzr_get_next_token()

enum CRStatus cr_tknzr_get_next_token ( CRTknzr a_this,
CRToken **  a_tk 
)

Returns the next token of the input stream.

This method is really central. Each parsing method calls it.

Parameters
a_thisthe current tokenizer.
a_tkout parameter. The returned token. for the sake of mem leak avoidance, *a_tk must be NULL.
CR_OKupon successfull completion, an error code otherwise.

Definition at line 1969 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, CR_END_OF_INPUT_ERROR, cr_input_get_end_of_file(), CR_OK, ENSURE_PARSING_COND, PRIVATE, and RECORD_INITIAL_POS.

Referenced by cr_parser_parse_charset(), cr_parser_parse_font_face(), cr_parser_parse_media(), cr_parser_parse_page(), cr_parser_parse_prio(), cr_parser_parse_statement_core(), cr_parser_parse_term(), cr_parser_try_to_skip_spaces_and_comments(), and cr_tknzr_parse_token().

◆ cr_tknzr_get_parsing_location()

enum CRStatus cr_tknzr_get_parsing_location ( CRTknzr a_this,
CRParsingLocation a_loc 
)

Definition at line 1867 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_get_parsing_location(), and PRIVATE.

Referenced by cr_parser_get_parsing_location().

◆ cr_tknzr_new()

CRTknzr* cr_tknzr_new ( CRInput a_input)

◆ cr_tknzr_new_from_buf()

CRTknzr* cr_tknzr_new_from_buf ( guchar *  a_buf,
gulong  a_len,
enum CREncoding  a_enc,
gboolean  a_free_at_destroy 
)

Definition at line 1627 of file cr-tknzr.c.

References cr_input_new_from_buf(), and cr_tknzr_new().

Referenced by cr_parser_parse_buf().

◆ cr_tknzr_new_from_uri()

CRTknzr* cr_tknzr_new_from_uri ( const guchar *  a_file_uri,
enum CREncoding  a_enc 
)

Definition at line 1645 of file cr-tknzr.c.

References cr_input_new_from_uri(), and cr_tknzr_new().

Referenced by cr_parser_new_from_file(), and cr_parser_parse_file().

◆ cr_tknzr_parse_token()

enum CRStatus cr_tknzr_parse_token ( CRTknzr a_this,
enum CRTokenType  a_type,
enum CRTokenExtraType  a_et,
gpointer  a_res,
gpointer  a_extra_res 
)

◆ cr_tknzr_peek_byte()

enum CRStatus cr_tknzr_peek_byte ( CRTknzr a_this,
gulong  a_offset,
guchar *  a_byte 
)

Peeks a byte ahead at a given postion in the parser input stream.

Parameters
a_thisthe current instance of CRTknzr.
a_offsetthe offset of the peeked byte starting from the current byte in the parser input stream.
a_byteout parameter. The peeked byte upon successfull completion.
Returns
CR_OK upon successfull completion, an error code otherwise.

Definition at line 1791 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_peek_byte(), cr_input_set_cur_pos(), CR_SEEK_CUR, cr_token_destroy(), and PRIVATE.

Referenced by cr_parser_parse_expr().

◆ cr_tknzr_peek_byte2()

guchar cr_tknzr_peek_byte2 ( CRTknzr a_this,
gulong  a_offset,
gboolean *  a_eof 
)

Same as cr_tknzr_peek_byte() but this api returns the byte peeked.

Parameters
a_thisthe current instance of CRTknzr.
a_offsetthe offset of the peeked byte starting from the current byte in the parser input stream.
a_eofout parameter. If not NULL, is set to TRUE if we reached end of file, FALE otherwise. If the caller sets it to NULL, this parameter is just ignored.
Returns
the peeked byte.

Definition at line 1819 of file cr-tknzr.c.

References cr_input_peek_byte2(), and PRIVATE.

◆ cr_tknzr_peek_char()

enum CRStatus cr_tknzr_peek_char ( CRTknzr a_this,
guint32 *  a_char 
)

Peeks a char from the parser input stream.

To "peek a char" means reads the next char without consuming it. Subsequent calls to this function return the same char.

Parameters
a_thisthe current instance of CRTknzr.
a_charout parameter. The peeked char uppon successfull completion.
Returns
CR_OK upon successfull completion, an error code otherwise.

Definition at line 1765 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_peek_char(), cr_input_set_cur_pos(), cr_token_destroy(), and PRIVATE.

◆ cr_tknzr_read_byte()

enum CRStatus cr_tknzr_read_byte ( CRTknzr a_this,
guchar *  a_byte 
)

Reads the next byte from the parser input stream.

Parameters
a_thisthe "this pointer" of the current instance of CRParser.
a_byteout parameter the place where to store the byte read.
Returns
CR_OK upon successfull completion, an error code otherwise.

Definition at line 1724 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_read_byte(), and PRIVATE.

◆ cr_tknzr_read_char()

enum CRStatus cr_tknzr_read_char ( CRTknzr a_this,
guint32 *  a_char 
)

Reads the next char from the parser input stream.

Parameters
a_thisthe current instance of CRTknzr.
a_charout parameter. The read char.
Returns
CR_OK upon successfull completion, an error code otherwise.

Definition at line 1740 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_read_char(), cr_input_set_cur_pos(), cr_token_destroy(), and PRIVATE.

◆ cr_tknzr_ref()

void cr_tknzr_ref ( CRTknzr a_this)

Definition at line 1660 of file cr-tknzr.c.

References PRIVATE.

Referenced by cr_parser_set_tknzr().

◆ cr_tknzr_seek_index()

enum CRStatus cr_tknzr_seek_index ( CRTknzr a_this,
enum CRSeekPos  a_origin,
gint  a_pos 
)

◆ cr_tknzr_set_cur_pos()

enum CRStatus cr_tknzr_set_cur_pos ( CRTknzr a_this,
CRInputPos a_pos 
)

Definition at line 1932 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_set_cur_pos(), cr_token_destroy(), and PRIVATE.

◆ cr_tknzr_set_input()

enum CRStatus cr_tknzr_set_input ( CRTknzr a_this,
CRInput a_input 
)

Definition at line 1685 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_ref(), cr_input_unref(), CR_OK, and PRIVATE.

Referenced by cr_tknzr_new().

◆ cr_tknzr_unget_token()

enum CRStatus cr_tknzr_unget_token ( CRTknzr a_this,
CRToken a_token 
)

Definition at line 1946 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, CR_OK, and PRIVATE.

Referenced by cr_parser_parse_page(), and cr_parser_parse_statement_core().

◆ cr_tknzr_unref()

gboolean cr_tknzr_unref ( CRTknzr a_this)

Definition at line 1668 of file cr-tknzr.c.

References cr_tknzr_destroy(), and PRIVATE.

Referenced by cr_parser_destroy(), and cr_parser_set_tknzr().

cr_tknzr_read_char
enum CRStatus cr_tknzr_read_char(CRTknzr *a_this, guint32 *a_char)
Reads the next char from the parser input stream.
Definition: cr-tknzr.c:1740
CR_SEEK_CUR
@ CR_SEEK_CUR
Definition: cr-utils.h:76
cr_input_get_cur_byte_addr
enum CRStatus cr_input_get_cur_byte_addr(CRInput *a_this, guchar **a_offset)
cr_input_get_cur_byte_addr: @a_this: the current input stream @a_offset: out parameter.
Definition: cr-input.c:908
CR_OK
@ CR_OK
Definition: cr-utils.h:44
cr_input_seek_index
enum CRStatus cr_input_seek_index(CRInput *a_this, enum CRSeekPos a_origin, gint a_pos)
cr_input_seek_index: @a_this: the current instance of CRInput.
Definition: cr-input.c:940
cr_input_consume_chars
enum CRStatus cr_input_consume_chars(CRInput *a_this, guint32 a_char, gulong *a_nb_char)
cr_input_consume_chars: @a_this: the this pointer of the current instance of CRInput.
Definition: cr-input.c:663
cr_tknzr_peek_char
enum CRStatus cr_tknzr_peek_char(CRTknzr *a_this, guint32 *a_char)
Peeks a char from the parser input stream.
Definition: cr-tknzr.c:1765
cr_input_get_cur_pos
enum CRStatus cr_input_get_cur_pos(CRInput const *a_this, CRInputPos *a_pos)
cr_input_get_cur_pos: @a_this: the current instance of CRInput.
Definition: cr-input.c:995
PRIVATE
#define PRIVATE(obj)
Definition: cr-tknzr.c:63
CR_PARSING_ERROR
@ CR_PARSING_ERROR
Definition: cr-utils.h:59
cr_tknzr_peek_byte
enum CRStatus cr_tknzr_peek_byte(CRTknzr *a_this, gulong a_offset, guchar *a_byte)
Peeks a byte ahead at a given postion in the parser input stream.
Definition: cr-tknzr.c:1791