pega-texto
A Parsing Expression Grammars (PEG) runtime engine in C
Data Structures | Macros | Typedefs | Enumerations | Functions | Variables
pega-texto.h File Reference

pega-texto.h – Parsing Expression Grammar (PEG) runtime engine More...

#include <stdlib.h>
#include <stdint.h>
#include <string.h>

Go to the source code of this file.

Data Structures

union  PT_DATA
 Default data type for Actions to return. More...
 
struct  pt_expr
 Parsing Expressions. More...
 
struct  pt_match_result
 Match result: a {number of matched chars/match error code, action result} pair. More...
 
struct  pt_match_options
 Options passed to pt_match. More...
 

Macros

#define PT_DECL   extern
 
#define PT_RANGE_PACK(from, to)    (((uintptr_t) (from)) | (((uintptr_t) (to)) << (8 * sizeof(uintptr_t) / 2)))
 
#define PT_RANGE_UNPACK_FROM(r)    ((PT_ELEMENT_TYPE) ((r) & ((UINTPTR_MAX) >> (8 * sizeof(uintptr_t) / 2))))
 
#define PT_RANGE_UNPACK_TO(r)    ((PT_ELEMENT_TYPE) ((r) >> (8 * sizeof(uintptr_t) / 2)))
 
#define PT_NARG(...)    PT_NARG_(__VA_ARGS__, PT_RSEQ_N())
 
#define PT_NARG_(...)    PT_ARG_N(__VA_ARGS__)
 
#define PT_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, _61, _62, _63, N, ...)   N
 
#define PT_RSEQ_N()
 
#define PT_END()   ((pt_expr){ PT_OP_END })
 
#define PT_ELEMENT(e)   ((pt_expr){ PT_OP_ELEMENT, 0, (void *) e })
 
#define PT_LITERAL(str, size)   ((pt_expr){ PT_OP_LITERAL, size, str })
 
#define PT_LITERAL_S(str)   ((pt_expr){ PT_OP_LITERAL, sizeof(str) - 1, str })
 
#define PT_LITERAL_0(str)   ((pt_expr){ PT_OP_LITERAL, strlen(str), str })
 
#define PT_CASE(str, size)   ((pt_expr){ PT_OP_CASE_INSENSITIVE, size, str })
 
#define PT_CASE_S(str)   ((pt_expr){ PT_OP_CASE_INSENSITIVE, sizeof(str) - 1, str })
 
#define PT_CASE_0(str)   ((pt_expr){ PT_OP_CASE_INSENSITIVE, strlen(str), str })
 
#define PT_CLASS(c)   ((pt_expr){ PT_OP_CHARACTER_CLASS, c })
 
#define PT_ALNUM()   PT_CLASS(PT_CLASS_ALNUM)
 
#define PT_ALPHA()   PT_CLASS(PT_CLASS_ALPHA)
 
#define PT_CNTRL()   PT_CLASS(PT_CLASS_CNTRL)
 
#define PT_DIGIT()   PT_CLASS(PT_CLASS_DIGIT)
 
#define PT_GRAPH()   PT_CLASS(PT_CLASS_GRAPH)
 
#define PT_LOWER()   PT_CLASS(PT_CLASS_LOWER)
 
#define PT_PUNCT()   PT_CLASS(PT_CLASS_PUNCT)
 
#define PT_SPACE()   PT_CLASS(PT_CLASS_SPACE)
 
#define PT_UPPER()   PT_CLASS(PT_CLASS_UPPER)
 
#define PT_XDIGIT()   PT_CLASS(PT_CLASS_XDIGIT)
 
#define PT_SET(str, size)   ((pt_expr){ PT_OP_SET, size, str })
 
#define PT_SET_S(str)   ((pt_expr){ PT_OP_SET, sizeof(str) - 1, str })
 
#define PT_SET_0(str)   ((pt_expr){ PT_OP_SET, strlen(str), str })
 
#define PT_RANGE(from, to)   ((pt_expr){ PT_OP_RANGE, 0, (void *) PT_RANGE_PACK(from, to) })
 
#define PT_ANY()   ((pt_expr){ PT_OP_ANY, 0 })
 
#define PT_CALL(index)   ((pt_expr){ PT_OP_NON_TERMINAL, 0, (void *) index })
 
#define PT_AT_LEAST(n, ...)   ((pt_expr){ PT_OP_AT_LEAST, PT_NARG(__VA_ARGS__), (void *) n }), __VA_ARGS__
 
#define PT_AT_MOST(n, ...)   ((pt_expr){ PT_OP_AT_MOST, PT_NARG(__VA_ARGS__), (void *) n }), __VA_ARGS__
 
#define PT_AND(...)   ((pt_expr){ PT_OP_AND, PT_NARG(__VA_ARGS__) }), __VA_ARGS__
 
#define PT_NOT(...)   ((pt_expr){ PT_OP_NOT, PT_NARG(__VA_ARGS__) }), __VA_ARGS__
 
#define PT_SEQUENCE(...)   ((pt_expr){ PT_OP_SEQUENCE, PT_NARG(__VA_ARGS__) }), __VA_ARGS__
 
#define PT_CHOICE(...)   ((pt_expr){ PT_OP_CHOICE, PT_NARG(__VA_ARGS__) }), __VA_ARGS__
 
#define PT_CUSTOM_MATCHER(f)   ((pt_expr){ PT_OP_CUSTOM_MATCHER, 0, (void *) f })
 
#define PT_ACTION(action, ...)   ((pt_expr){ PT_OP_ACTION, PT_NARG(__VA_ARGS__), (void *) action }), __VA_ARGS__
 
#define PT_ERROR(error_action)   ((pt_expr){ PT_OP_ERROR, 0, (void *) error_action })
 
#define PT_ONE_OR_MORE(...)   PT_AT_LEAST(1, __VA_ARGS__)
 
#define PT_ZERO_OR_MORE(...)   PT_AT_LEAST(0, __VA_ARGS__)
 
#define PT_OPTIONAL(...)   PT_AT_MOST(1, __VA_ARGS__)
 
#define PT_ANY_BUT(...)   PT_SEQUENCE(PT_NOT(__VA_ARGS__), PT_ANY())
 
#define PT_RULE(...)   { __VA_ARGS__, PT_END() }
 
#define PT_ERROR_IF(error_action, ...)   PT_OPTIONAL(PT_AND(__VA_ARGS__), PT_ERROR(error_action))
 

Typedefs

typedef const char PT_ELEMENT_TYPE
 
typedef PT_ELEMENT_TYPE * pt_element_string
 
typedef int(* pt_custom_matcher_function) (pt_element_string, void *)
 A function that receives a string and userdata and match it (positive) or not, advancing the matched number.
 
typedef PT_DATA(* pt_expression_action) (pt_element_string str, size_t size, int argc, PT_DATA *argv, void *userdata)
 Action to be called for a capture after the whole match succeeds. More...
 
typedef void(* pt_error_action) (pt_element_string str, size_t where, void *userdata)
 Action to be called when an Error Expression is matched (on syntatic errors). More...
 
typedef pt_expr pt_rule[]
 Rule typedef, an array of expressions.
 
typedef pt_exprpt_grammar[]
 Grammar typedef, a 2D array of expressions, or array of Rules.
 

Enumerations

enum  pt_operation {
  PT_OP_END = 0 , PT_OP_ELEMENT , PT_OP_LITERAL , PT_OP_CASE_INSENSITIVE ,
  PT_OP_CHARACTER_CLASS , PT_OP_SET , PT_OP_RANGE , PT_OP_ANY ,
  PT_OP_CUSTOM_MATCHER , PT_OP_NON_TERMINAL , PT_OP_AT_LEAST , PT_OP_AT_MOST ,
  PT_OP_NOT , PT_OP_AND , PT_OP_SEQUENCE , PT_OP_CHOICE ,
  PT_OP_ACTION , PT_OP_ERROR , PT_OP_OPERATION_ENUM_COUNT
}
 Operations for constructing Parsing Expressions.
 
enum  pt_character_class {
  PT_CLASS_ALNUM = 'w' , PT_CLASS_ALPHA = 'a' , PT_CLASS_CNTRL = 'c' , PT_CLASS_DIGIT = 'd' ,
  PT_CLASS_GRAPH = 'g' , PT_CLASS_LOWER = 'l' , PT_CLASS_PUNCT = 'p' , PT_CLASS_SPACE = 's' ,
  PT_CLASS_UPPER = 'u' , PT_CLASS_XDIGIT = 'x'
}
 Character classes supported by pega-texto. More...
 
enum  pt_macth_error_code { PT_NO_MATCH = -1 , PT_NO_STACK_MEM = -2 , PT_MATCHED_ERROR = -3 , PT_NULL_INPUT = -4 }
 Possible error codes returned by pt_match. More...
 

Functions

PT_DECL pt_match_result pt_match (const pt_grammar grammar, pt_element_string str, const pt_match_options *const opts)
 Try to match the string str with a PEG. More...
 

Variables

PT_DECL const char *const pt_operation_names []
 String literals of the operations.
 
PT_DECL const pt_match_options pt_default_match_options
 Default match options: all 0 or NULL.
 

Detailed Description

pega-texto.h – Parsing Expression Grammar (PEG) runtime engine

Project URL: https://github.com/gilzoide/pega-texto

Do this:

#define PEGA_TEXTO_IMPLEMENTATION

before you include this file in one C or C++ file to create the implementation.

i.e.:

#include ...
#include ...
#define PEGA_TEXTO_IMPLEMENTATION
#include "pega-texto.h"

Optionally provide the following defines with your own implementations:

Definition in file pega-texto.h.

Macro Definition Documentation

◆ PT_RSEQ_N

#define PT_RSEQ_N ( )
Value:
63,62,61,60, \
59,58,57,56,55,54,53,52,51,50, \
49,48,47,46,45,44,43,42,41,40, \
39,38,37,36,35,34,33,32,31,30, \
29,28,27,26,25,24,23,22,21,20, \
19,18,17,16,15,14,13,12,11,10, \
9,8,7,6,5,4,3,2,1,0

Definition at line 239 of file pega-texto.h.

Typedef Documentation

◆ pt_expression_action

typedef PT_DATA(* pt_expression_action) (pt_element_string str, size_t size, int argc, PT_DATA *argv, void *userdata)

Action to be called for a capture after the whole match succeeds.

Actions will be called only if the whole match succeeds, in the order the Actions Expressions were matched.

Expression Actions reduce inner Actions' result into a single value.

Parameters:

  • Pointer to the start of the match/capture
  • Number of bytes contained in the match/capture
  • Number of PT_DATA arguments
  • PT_DATA arguments, processed on inner Actions. Currently, this array is reused, so you should not rely on it after your function has returned
  • User custom data from match options

Definition at line 174 of file pega-texto.h.

◆ pt_error_action

typedef void(* pt_error_action) (pt_element_string str, size_t where, void *userdata)

Action to be called when an Error Expression is matched (on syntatic errors).

Parameters:

  • The original subject string
  • Position where the error was encountered
  • User custom data from match options

Definition at line 188 of file pega-texto.h.

Enumeration Type Documentation

◆ pt_character_class

Character classes supported by pega-texto.

Each of them correspond to the is* functions defined in ctype.h header.

Definition at line 105 of file pega-texto.h.

◆ pt_macth_error_code

Possible error codes returned by pt_match.

Enumerator
PT_NO_MATCH 

Subject string didn't match the given PEG.

PT_NO_STACK_MEM 

Error while allocating memory for the Action stack.

PT_MATCHED_ERROR 

Matched an Error Expression.

PT_NULL_INPUT 

Provided string is a NULL pointer.

Definition at line 119 of file pega-texto.h.

Function Documentation

◆ pt_match()

PT_DECL pt_match_result pt_match ( const pt_grammar  grammar,
pt_element_string  str,
const pt_match_options *const  opts 
)

Try to match the string str with a PEG.

Warning
This function doesn't check for ill-formed grammars, so it's advised that you validate it before running the match algorithm.
Parameters
grammarExpression array of arbitrary size. For a single Expression, just pass a pointer to it.
strSubject string to match.
optsMatch options. If NULL, pega-texto will use the default value pt_default_match_options.
Returns
Number of matched characters/error code, result of Action folding.