pega-texto
A Parsing Expression Grammars (PEG) runtime engine in C
pega-texto.h
Go to the documentation of this file.
1 
42 #ifndef PEGA_TEXTO_H
43 #define PEGA_TEXTO_H
44 
45 #include <stdlib.h>
46 #include <stdint.h>
47 #include <string.h>
48 
49 #ifndef PT_DECL
50  #ifdef PT_STATIC
51  #define PT_DECL static
52  #else
53  #define PT_DECL extern
54  #endif
55 #endif
56 
57 // Define PT_ELEMENT_TYPE to the string element type, so there can be
58 // parsers for stuff other than `const char` like `const uint8_t`
59 #ifndef PT_ELEMENT_TYPE
60  typedef const char PT_ELEMENT_TYPE;
61 #endif
62 
63 typedef PT_ELEMENT_TYPE *pt_element_string;
64 
65 #ifdef __cplusplus
66 extern "C" {
67 #endif
68 
71  PT_OP_END = 0,
72  // Primary
73  PT_OP_ELEMENT, // 'b'
74  PT_OP_LITERAL, // "string"
75  PT_OP_CASE_INSENSITIVE, // I"string"
76  PT_OP_CHARACTER_CLASS, // int(char) // If return 0, match fails
77  // If return non-zero, match succeeds, advance 1
78  PT_OP_SET, // [chars]
79  PT_OP_RANGE, // [c1-c2]
80  PT_OP_ANY, // .
81  // Custom match by function
82  PT_OP_CUSTOM_MATCHER, // int(const char *, void *) // Return how many characters were matched
83  // Return non-positive values for no match to occur
84  // Unary
85  PT_OP_NON_TERMINAL, // <non-terminal> // Recurse to non-terminal expression
86  PT_OP_AT_LEAST, // e^N // Match N or more occurrences of next Expression
87  PT_OP_AT_MOST, // e^-N // Match N or less occurrences of next Expression. Always succeeds
88  PT_OP_NOT, // !e
89  PT_OP_AND, // &e
90  // N-ary
91  PT_OP_SEQUENCE, // e1 e2
92  PT_OP_CHOICE, // e1 / e2
93  PT_OP_ACTION, // Push an action to the stack
94  PT_OP_ERROR, // ERROR // Represents a syntactic error
95 
96  PT_OP_OPERATION_ENUM_COUNT,
97 };
98 
100 PT_DECL const char* const pt_operation_names[];
101 
106  PT_CLASS_ALNUM = 'w',
107  PT_CLASS_ALPHA = 'a',
108  PT_CLASS_CNTRL = 'c',
109  PT_CLASS_DIGIT = 'd',
110  PT_CLASS_GRAPH = 'g',
111  PT_CLASS_LOWER = 'l',
112  PT_CLASS_PUNCT = 'p',
113  PT_CLASS_SPACE = 's',
114  PT_CLASS_UPPER = 'u',
115  PT_CLASS_XDIGIT = 'x',
116 };
117 
119 typedef enum pt_macth_error_code {
129 
130 #ifndef PT_DATA
138  typedef union PT_DATA {
139  void *p;
140  char c;
141  unsigned char uc;
142  short s;
143  unsigned short us;
144  int i;
145  unsigned int ui;
146  long l;
147  unsigned long ul;
148  long long ll;
149  unsigned long long ull;
150  ssize_t ssz;
151  size_t sz;
152  float f;
153  double d;
154  } PT_DATA;
155 #endif
156 
158 typedef int (*pt_custom_matcher_function)(pt_element_string, void *);
159 
175  pt_element_string str,
176  size_t size,
177  int argc,
178  PT_DATA *argv,
179  void *userdata
180 );
181 
188 typedef void (*pt_error_action)(
189  pt_element_string str,
190  size_t where,
191  void *userdata
192 );
193 
195 typedef struct pt_expr {
197  uint8_t op;
199  uint16_t N;
201  union {
202  const void *data;
203  const pt_element_string str;
204  const pt_custom_matcher_function matcher;
205  const pt_expression_action action;
206  const pt_error_action error_action;
207  uintptr_t element;
208  uintptr_t index;
209  uintptr_t range;
210  uintptr_t quantifier;
211  };
212 } pt_expr;
213 
215 typedef pt_expr pt_rule[];
217 typedef pt_expr* pt_grammar[];
218 
219 #define PT_RANGE_PACK(from, to) \
220  (((uintptr_t) (from)) | (((uintptr_t) (to)) << (8 * sizeof(uintptr_t) / 2)))
221 #define PT_RANGE_UNPACK_FROM(r) \
222  ((PT_ELEMENT_TYPE) ((r) & ((UINTPTR_MAX) >> (8 * sizeof(uintptr_t) / 2))))
223 #define PT_RANGE_UNPACK_TO(r) \
224  ((PT_ELEMENT_TYPE) ((r) >> (8 * sizeof(uintptr_t) / 2)))
225 
226 // Ref: https://groups.google.com/g/comp.std.c/c/d-6Mj5Lko_s
227 #define PT_NARG(...) \
228  PT_NARG_(__VA_ARGS__, PT_RSEQ_N())
229 #define PT_NARG_(...) \
230  PT_ARG_N(__VA_ARGS__)
231 #define PT_ARG_N( \
232  _1, _2, _3, _4, _5, _6, _7, _8, _9,_10, \
233  _11,_12,_13,_14,_15,_16,_17,_18,_19,_20, \
234  _21,_22,_23,_24,_25,_26,_27,_28,_29,_30, \
235  _31,_32,_33,_34,_35,_36,_37,_38,_39,_40, \
236  _41,_42,_43,_44,_45,_46,_47,_48,_49,_50, \
237  _51,_52,_53,_54,_55,_56,_57,_58,_59,_60, \
238  _61,_62,_63,N,...) N
239 #define PT_RSEQ_N() \
240  63,62,61,60, \
241  59,58,57,56,55,54,53,52,51,50, \
242  49,48,47,46,45,44,43,42,41,40, \
243  39,38,37,36,35,34,33,32,31,30, \
244  29,28,27,26,25,24,23,22,21,20, \
245  19,18,17,16,15,14,13,12,11,10, \
246  9,8,7,6,5,4,3,2,1,0
247 
248 #define PT_END() ((pt_expr){ PT_OP_END })
249 #define PT_ELEMENT(e) ((pt_expr){ PT_OP_ELEMENT, 0, (void *) e })
250 #define PT_LITERAL(str, size) ((pt_expr){ PT_OP_LITERAL, size, str })
251 #define PT_LITERAL_S(str) ((pt_expr){ PT_OP_LITERAL, sizeof(str) - 1, str })
252 #define PT_LITERAL_0(str) ((pt_expr){ PT_OP_LITERAL, strlen(str), str })
253 #define PT_CASE(str, size) ((pt_expr){ PT_OP_CASE_INSENSITIVE, size, str })
254 #define PT_CASE_S(str) ((pt_expr){ PT_OP_CASE_INSENSITIVE, sizeof(str) - 1, str })
255 #define PT_CASE_0(str) ((pt_expr){ PT_OP_CASE_INSENSITIVE, strlen(str), str })
256 #define PT_CLASS(c) ((pt_expr){ PT_OP_CHARACTER_CLASS, c })
257 #define PT_ALNUM() PT_CLASS(PT_CLASS_ALNUM)
258 #define PT_ALPHA() PT_CLASS(PT_CLASS_ALPHA)
259 #define PT_CNTRL() PT_CLASS(PT_CLASS_CNTRL)
260 #define PT_DIGIT() PT_CLASS(PT_CLASS_DIGIT)
261 #define PT_GRAPH() PT_CLASS(PT_CLASS_GRAPH)
262 #define PT_LOWER() PT_CLASS(PT_CLASS_LOWER)
263 #define PT_PUNCT() PT_CLASS(PT_CLASS_PUNCT)
264 #define PT_SPACE() PT_CLASS(PT_CLASS_SPACE)
265 #define PT_UPPER() PT_CLASS(PT_CLASS_UPPER)
266 #define PT_XDIGIT() PT_CLASS(PT_CLASS_XDIGIT)
267 #define PT_SET(str, size) ((pt_expr){ PT_OP_SET, size, str })
268 #define PT_SET_S(str) ((pt_expr){ PT_OP_SET, sizeof(str) - 1, str })
269 #define PT_SET_0(str) ((pt_expr){ PT_OP_SET, strlen(str), str })
270 #define PT_RANGE(from, to) ((pt_expr){ PT_OP_RANGE, 0, (void *) PT_RANGE_PACK(from, to) })
271 #define PT_ANY() ((pt_expr){ PT_OP_ANY, 0 })
272 #define PT_CALL(index) ((pt_expr){ PT_OP_NON_TERMINAL, 0, (void *) index })
273 #define PT_AT_LEAST(n, ...) ((pt_expr){ PT_OP_AT_LEAST, PT_NARG(__VA_ARGS__), (void *) n }), __VA_ARGS__
274 #define PT_AT_MOST(n, ...) ((pt_expr){ PT_OP_AT_MOST, PT_NARG(__VA_ARGS__), (void *) n }), __VA_ARGS__
275 #define PT_AND(...) ((pt_expr){ PT_OP_AND, PT_NARG(__VA_ARGS__) }), __VA_ARGS__
276 #define PT_NOT(...) ((pt_expr){ PT_OP_NOT, PT_NARG(__VA_ARGS__) }), __VA_ARGS__
277 #define PT_SEQUENCE(...) ((pt_expr){ PT_OP_SEQUENCE, PT_NARG(__VA_ARGS__) }), __VA_ARGS__
278 #define PT_CHOICE(...) ((pt_expr){ PT_OP_CHOICE, PT_NARG(__VA_ARGS__) }), __VA_ARGS__
279 #define PT_CUSTOM_MATCHER(f) ((pt_expr){ PT_OP_CUSTOM_MATCHER, 0, (void *) f })
280 #define PT_ACTION(action, ...) ((pt_expr){ PT_OP_ACTION, PT_NARG(__VA_ARGS__), (void *) action }), __VA_ARGS__
281 #define PT_ERROR(error_action) ((pt_expr){ PT_OP_ERROR, 0, (void *) error_action })
282 
283 // Aliases
284 #define PT_ONE_OR_MORE(...) PT_AT_LEAST(1, __VA_ARGS__)
285 #define PT_ZERO_OR_MORE(...) PT_AT_LEAST(0, __VA_ARGS__)
286 #define PT_OPTIONAL(...) PT_AT_MOST(1, __VA_ARGS__)
287 #define PT_ANY_BUT(...) PT_SEQUENCE(PT_NOT(__VA_ARGS__), PT_ANY())
288 #define PT_RULE(...) { __VA_ARGS__, PT_END() }
289 #define PT_ERROR_IF(error_action, ...) PT_OPTIONAL(PT_AND(__VA_ARGS__), PT_ERROR(error_action))
290 
291 #ifdef PT_DEFINE_SHORTCUTS
292  #define ELEMENT PT_ELEMENT
293  #define B PT_ELEMENT
294  #define LITERAL PT_LITERAL_S
295  #define L PT_LITERAL_S
296  #define CASE_INSENSITIVE PT_CASE_S
297  #define I PT_CASE_S
298  #define CLASS PT_CLASS
299  #define C PT_CLASS
300  #define ALNUM PT_ALNUM
301  #define ALPHA PT_ALPHA
302  #define CNTRL PT_CNTRL
303  #define DIGIT PT_DIGIT
304  #define GRAPH PT_GRAPH
305  #define LOWER PT_LOWER
306  #define PUNCT PT_PUNCT
307  #define SPACE PT_SPACE
308  #define UPPER PT_UPPER
309  #define XDIGIT PT_XDIGIT
310  #define SET PT_SET_S
311  #define S PT_SET_S
312  #define RANGE PT_RANGE
313  #define R PT_RANGE
314  #define ANY PT_ANY
315  #define CALL PT_CALL
316  #define V PT_CALL
317  #define AT_LEAST PT_AT_LEAST
318  #define AT_MOST PT_AT_MOST
319  #define ONE_OR_MORE PT_ONE_OR_MORE
320  #define ZERO_OR_MORE PT_ZERO_OR_MORE
321  #define OPTIONAL PT_OPTIONAL
322  #define OPT PT_OPTIONAL
323  #define AND PT_AND
324  #define NOT PT_NOT
325  #define SEQ PT_SEQUENCE
326  #define EITHER PT_CHOICE
327  #define CUSTOM_MATCHER PT_CUSTOM_MATCHER
328  #define F PT_CUSTOM_MATCHER
329  #define ERROR PT_ERROR
330  #define ERROR_IF PT_ERROR_IF
331  #define E PT_ERROR
332  #define ACTION PT_ACTION
333  #define ACT PT_ACTION
334  #define ANY_BUT PT_ANY_BUT
335 #endif
336 
339 typedef struct pt_match_result {
342  int matched;
350 
352 typedef struct pt_match_options {
353  void *userdata;
357 
358 
361 
373 PT_DECL pt_match_result pt_match(const pt_grammar grammar, pt_element_string str, const pt_match_options *const opts);
374 
375 // TODO: grammar validation
376 
377 #ifdef __cplusplus
378 }
379 #endif
380 
381 #endif // PEGA_TEXTO_H
382 
384 
385 #ifdef PEGA_TEXTO_IMPLEMENTATION
386 
387 #include <ctype.h>
388 
389 #ifndef PT_ASSERT
390  #include <assert.h>
391  #define PT_ASSERT(cond, message, d) assert(cond && message)
392 #endif
393 
394 #ifndef PT_MALLOC
395  #define PT_MALLOC(size, d) malloc(size)
396 #endif
397 #ifndef PT_REALLOC
398  #define PT_REALLOC(p, size, d) realloc(p, size)
399 #endif
400 #ifndef PT_FREE
401  #define PT_FREE(p, d) free(p)
402 #endif
403 
404 const char * const pt_operation_names[] = {
405  "PT_OP_END",
406  "PT_OP_ELEMENT",
407  "PT_OP_LITERAL",
408  "PT_OP_CASE_INSENSITIVE",
409  "PT_OP_CHARACTER_CLASS",
410  "PT_OP_SET",
411  "PT_OP_RANGE",
412  "PT_OP_ANY",
413  "PT_OP_CUSTOM_MATCHER",
414  "PT_OP_NON_TERMINAL",
415  "PT_OP_AT_LEAST",
416  "PT_OP_AT_MOST",
417  "PT_OP_AND",
418  "PT_OP_NOT",
419  "PT_OP_SEQUENCE",
420  "PT_OP_CHOICE",
421  "PT_OP_ACTION",
422  "PT_OP_ERROR",
423 };
424 
426 typedef int(*pt__character_class_function)(int);
427 
431 static inline pt__character_class_function pt__function_for_character_class(enum pt_character_class c) {
432  switch(c) {
433  case PT_CLASS_ALNUM: return isalnum;
434  case PT_CLASS_ALPHA: return isalpha;
435  case PT_CLASS_CNTRL: return iscntrl;
436  case PT_CLASS_DIGIT: return isdigit;
437  case PT_CLASS_GRAPH: return isgraph;
438  case PT_CLASS_LOWER: return islower;
439  case PT_CLASS_PUNCT: return ispunct;
440  case PT_CLASS_SPACE: return isspace;
441  case PT_CLASS_UPPER: return isupper;
442  case PT_CLASS_XDIGIT: return isxdigit;
443  default: return NULL;
444  }
445 }
446 
448 
450 #ifndef PT_DEFAULT_INITIAL_STACK_CAPACITY
451  #define PT_DEFAULT_INITIAL_STACK_CAPACITY 64
452 #endif
453 
455 typedef struct pt__match_action {
457  pt_element_string str;
458  size_t size;
459  int argc;
460 } pt__match_action;
461 
463 typedef struct pt__match_action_stack {
464  pt__match_action *actions;
465  size_t capacity;
466  size_t size;
467 } pt__match_action_stack;
468 
469 typedef struct pt__match_context {
470  const pt_expr *const *const grammar;
471  const pt_match_options *opts;
472  const pt_element_string str;
473  pt__match_action_stack action_stack;
474 } pt__match_context;
475 
476 
477 static int pt__initialize_action_stack(pt__match_context *context) {
478  size_t initial_capacity = context->opts->initial_stack_capacity;
479  if(initial_capacity == 0) {
480  initial_capacity = PT_DEFAULT_INITIAL_STACK_CAPACITY;
481  }
482  context->action_stack.actions = (pt__match_action *) PT_MALLOC(initial_capacity * sizeof(pt__match_action), context->opts->userdata);
483  if(context->action_stack.actions) {
484  context->action_stack.size = 0;
485  context->action_stack.capacity = initial_capacity;
486  return 1;
487  }
488  else {
489  return 0;
490  }
491 }
492 
493 static void pt__destroy_action_stack(pt__match_context *context) {
494  PT_FREE(context->action_stack.actions, context->opts->userdata);
495 }
496 
497 static pt__match_action *pt__push_action(pt__match_context *context, pt_expression_action f, pt_element_string str, size_t size, int argc) {
498  pt__match_action *action;
499  // Double capacity, if reached
500  if(context->action_stack.size == context->action_stack.capacity) {
501  int new_capacity = context->action_stack.capacity * 2;
502  action = (pt__match_action *) PT_REALLOC(context->action_stack.actions, new_capacity * sizeof(pt__match_action), context->opts->userdata);
503  if(action) {
504  context->action_stack.capacity = new_capacity;
505  context->action_stack.actions = action;
506  }
507  else {
508  return NULL;
509  }
510  }
511  action = context->action_stack.actions + (context->action_stack.size)++;
512  action->f = f;
513  action->str = str;
514  action->size = size;
515  action->argc = argc;
516 
517  return action;
518 }
519 
520 static void pt__run_actions(pt__match_context *context, pt_match_result *result) {
521  PT_DATA *data_stack;
522  if(sizeof(PT_DATA) > sizeof(pt__match_action)) {
523  // Allocate the data stack
524  data_stack = (PT_DATA *) PT_MALLOC(context->action_stack.size * sizeof(PT_DATA), context->opts->userdata);
525  if(data_stack == NULL) {
526  result->matched = PT_NO_STACK_MEM;
527  return;
528  }
529  }
530  else {
531  // PT_DATA may safely overwrite popped actions memory, as the later will be read just before pushing results
532  // This avoids a malloc/free pair
533  data_stack = (PT_DATA *) context->action_stack.actions;
534  }
535 
536  // index to current Data on the stack
537  int data_index = 0;
538 
539  // Fold It, 'til there are no Actions left.
540  // Note that this only works because of how the Actions are layed out in
541  // the Action Stack.
542  pt__match_action *action;
543  for(action = context->action_stack.actions; action < context->action_stack.actions + context->action_stack.size; action++) {
544  // "pop" arguments
545  data_index -= action->argc;
546  // run action with arguments (which are still stacked in `data_stack` in the right position)
547  data_stack[data_index] = action->f(
548  action->str,
549  action->size,
550  action->argc,
551  data_stack + data_index,
552  context->opts->userdata
553  );
554  // "push" result
555  data_index++;
556  }
557  result->data = data_stack[0];
558  if(sizeof(PT_DATA) > sizeof(pt__match_action)) {
559  PT_FREE(data_stack, context->opts->userdata);
560  }
561 }
562 
563 typedef struct pt__match_expr_result {
564  int success; // < 0 on exception, 0 on no match, 1 on match success
565  int sp_advance;
566  int e_advance;
567 } pt__match_expr_result;
568 
569 static pt__match_expr_result pt__match_expr(pt__match_context *context, const pt_expr *const e, pt_element_string sp);
570 
571 static pt__match_expr_result pt__match_sequence(pt__match_context *context, const pt_expr *const e, pt_element_string sp) {
572  pt__match_expr_result result = { 1, 0, 1 + e->N }, subresult;
573  size_t current_action_count = context->action_stack.size;
574  for(int i = 0; result.success > 0 && i < e->N; i += subresult.e_advance) {
575  subresult = pt__match_expr(context, e + 1 + i, sp + result.sp_advance);
576  result.success = subresult.success;
577  result.sp_advance += subresult.sp_advance;
578  }
579  if(result.success <= 0) {
580  context->action_stack.size = current_action_count;
581  }
582  return result;
583 }
584 
585 static pt__match_expr_result pt__match_rule(pt__match_context *context, size_t index, pt_element_string sp) {
586  pt__match_expr_result result = { 1, 0, 1 }, subresult;
587  size_t current_action_count = context->action_stack.size;
588  for(const pt_expr *e = context->grammar[index]; result.success > 0 && e->op != PT_OP_END; e += subresult.e_advance) {
589  subresult = pt__match_expr(context, e, sp + result.sp_advance);
590  result.success = subresult.success;
591  result.sp_advance += subresult.sp_advance;
592  }
593  if(result.success <= 0) {
594  context->action_stack.size = current_action_count;
595  }
596  return result;
597 }
598 
599 static pt__match_expr_result pt__match_expr(pt__match_context *context, const pt_expr *const e, pt_element_string sp) {
600  pt__match_expr_result result = { 0, 0, 1 };
601  switch(e->op) {
602  case PT_OP_END: {
603  result.success = 1;
604  break;
605  }
606 
607  case PT_OP_ELEMENT: {
608  result.success = (*sp) == (PT_ELEMENT_TYPE) e->element;
609  result.sp_advance = 1;
610  break;
611  }
612 
613  case PT_OP_LITERAL: {
614  result.success = strncmp(sp, e->str, e->N) == 0;
615  result.sp_advance = e->N;
616  break;
617  }
618 
619  case PT_OP_CASE_INSENSITIVE: {
620  result.success = strncasecmp(sp, e->str, e->N) == 0;
621  result.sp_advance = e->N;
622  break;
623  }
624 
625  case PT_OP_CHARACTER_CLASS: {
626  result.success = pt__function_for_character_class((enum pt_character_class) e->N)(*sp) != 0;
627  result.sp_advance = 1;
628  break;
629  }
630 
631  case PT_OP_SET: {
632  result.success = *sp && strchr(e->str, *sp);
633  result.sp_advance = 1;
634  break;
635  }
636 
637  case PT_OP_RANGE: {
638  PT_ELEMENT_TYPE element = *sp;
639  result.success = element >= PT_RANGE_UNPACK_FROM(e->range) && element <= PT_RANGE_UNPACK_TO(e->range);
640  result.sp_advance = 1;
641  break;
642  }
643 
644  case PT_OP_ANY: {
645  result.success = (*sp) != 0;
646  result.sp_advance = 1;
647  break;
648  }
649 
650  case PT_OP_CUSTOM_MATCHER: {
651  int custom_matcher_result = e->matcher(sp, context->opts->userdata);
652  result.success = custom_matcher_result > 0;
653  result.sp_advance = custom_matcher_result;
654  break;
655  }
656 
657  case PT_OP_NON_TERMINAL:
658  return pt__match_rule(context, e->index, sp);
659 
660  case PT_OP_AT_LEAST: {
661  unsigned int counter = 0;
662  pt__match_expr_result subresult;
663  while(1) {
664  subresult = pt__match_sequence(context, e, sp + result.sp_advance);
665  if(subresult.success > 0 && subresult.sp_advance > 0) {
666  result.sp_advance += subresult.sp_advance;
667  counter++;
668  }
669  else {
670  break;
671  }
672  }
673  if(subresult.success >= 0) {
674  result.success = counter >= e->quantifier;
675  result.e_advance = 1 + e->N;
676  }
677  else {
678  result.success = subresult.success;
679  }
680  break;
681  }
682 
683  case PT_OP_AT_MOST: {
684  pt__match_expr_result subresult;
685  for(unsigned int counter = 0; counter < e->quantifier; counter++) {
686  subresult = pt__match_sequence(context, e, sp + result.sp_advance);
687  if(subresult.success > 0 && subresult.sp_advance > 0) {
688  result.sp_advance += subresult.sp_advance;
689  }
690  else {
691  break;
692  }
693  }
694  if(subresult.success >= 0) {
695  result.success = 1;
696  result.e_advance = 1 + e->N;
697  }
698  else {
699  result.success = subresult.success;
700  }
701  break;
702  }
703 
704  case PT_OP_NOT: {
705  size_t current_action_count = context->action_stack.size;
706  result = pt__match_sequence(context, e, sp);
707  if(result.success >= 0) {
708  context->action_stack.size = current_action_count; // ignore captures
709  result.success = !result.success;
710  result.sp_advance = 0;
711  }
712  break;
713  }
714 
715  case PT_OP_AND: {
716  size_t current_action_count = context->action_stack.size;
717  result = pt__match_sequence(context, e, sp);
718  context->action_stack.size = current_action_count; // ignore captures
719  result.sp_advance = 0;
720  break;
721  }
722 
723  case PT_OP_SEQUENCE:
724  return pt__match_sequence(context, e, sp);
725 
726  case PT_OP_CHOICE: {
727  for(int i = 0; result.success == 0 && i < e->N; i += result.e_advance) {
728  result = pt__match_expr(context, e + 1 + i, sp);
729  }
730  result.e_advance = 1 + e->N;
731  break;
732  }
733 
734  case PT_OP_ACTION: {
735  size_t previous_action_count = context->action_stack.size;
736  result = pt__match_sequence(context, e, sp);
737  if(result.success > 0) {
738  if(!pt__push_action(context, e->action, sp, result.sp_advance, context->action_stack.size - previous_action_count)) {
739  result.success = PT_NO_STACK_MEM;
740  }
741  }
742  break;
743  }
744 
745  case PT_OP_ERROR: {
746  if(e->error_action) {
747  e->error_action(context->str, sp - context->str, context->opts->userdata);
748  }
749  result.success = PT_MATCHED_ERROR;
750  break;
751  }
752 
753  default: {
754  PT_ASSERT(0, "Unknown operation", context->opts->userdata);
755  break;
756  }
757  }
758  return result;
759 }
760 
761 PT_DECL pt_match_result pt_match(const pt_grammar grammar, pt_element_string str, const pt_match_options *const opts) {
762  pt_match_result result = {};
763  if(str == NULL) {
764  result.matched = PT_NULL_INPUT;
765  return result;
766  }
767  pt__match_context context = {
768  (const pt_expr *const *const) grammar,
769  opts == NULL ? &pt_default_match_options : opts,
770  str,
771  };
772  if(!pt__initialize_action_stack(&context)) {
773  result.matched = PT_NO_STACK_MEM;
774  return result;
775  }
776  pt__match_expr_result subresult = pt__match_rule(&context, 0, str);
777  if(subresult.success > 0) {
778  result.matched = subresult.sp_advance;
779  if(context.action_stack.size > 0) {
780  pt__run_actions(&context, &result);
781  }
782  }
783  else if(subresult.success == 0) {
784  result.matched = PT_NO_MATCH;
785  }
786  else {
787  result.matched = subresult.success;
788  }
789  pt__destroy_action_stack(&context);
790  return result;
791 }
792 
793 #endif // PEGA_TEXTO_IMPLEMENTATION
PT_DECL pt_match_result pt_match(const pt_grammar grammar, pt_element_string str, const pt_match_options *const opts)
Try to match the string str with a PEG.
PT_DECL const char *const pt_operation_names[]
String literals of the operations.
Definition: pega-texto.h:100
pt_operation
Operations for constructing Parsing Expressions.
Definition: pega-texto.h:70
pt_expr * pt_grammar[]
Grammar typedef, a 2D array of expressions, or array of Rules.
Definition: pega-texto.h:217
PT_DECL const pt_match_options pt_default_match_options
Default match options: all 0 or NULL.
Definition: pega-texto.h:360
PT_DATA(* pt_expression_action)(pt_element_string str, size_t size, int argc, PT_DATA *argv, void *userdata)
Action to be called for a capture after the whole match succeeds.
Definition: pega-texto.h:174
pt_character_class
Character classes supported by pega-texto.
Definition: pega-texto.h:105
int(* pt_custom_matcher_function)(pt_element_string, void *)
A function that receives a string and userdata and match it (positive) or not, advancing the matched ...
Definition: pega-texto.h:158
void(* pt_error_action)(pt_element_string str, size_t where, void *userdata)
Action to be called when an Error Expression is matched (on syntatic errors).
Definition: pega-texto.h:188
pt_macth_error_code
Possible error codes returned by pt_match.
Definition: pega-texto.h:119
@ PT_NULL_INPUT
Provided string is a NULL pointer.
Definition: pega-texto.h:127
@ PT_NO_MATCH
Subject string didn't match the given PEG.
Definition: pega-texto.h:121
@ PT_NO_STACK_MEM
Error while allocating memory for the Action stack.
Definition: pega-texto.h:123
@ PT_MATCHED_ERROR
Matched an Error Expression.
Definition: pega-texto.h:125
pt_expr pt_rule[]
Rule typedef, an array of expressions.
Definition: pega-texto.h:215
Parsing Expressions.
Definition: pega-texto.h:195
uint16_t N
Range byte pair, Literal length, Character Class index.
Definition: pega-texto.h:199
uint8_t op
Operation to be performed.
Definition: pega-texto.h:197
Options passed to pt_match.
Definition: pega-texto.h:352
size_t initial_stack_capacity
The initial capacity for the stack. If 0, stack capacity will begin at a reasonable default.
Definition: pega-texto.h:355
pt_error_action on_error
The action to be performed when a syntactic error is found.
Definition: pega-texto.h:354
void * userdata
Custom user data for the actions.
Definition: pega-texto.h:353
Match result: a {number of matched chars/match error code, action result} pair.
Definition: pega-texto.h:339
int matched
If non-negative, represents the number of characters matched; otherwise, it's an error code.
Definition: pega-texto.h:342
PT_DATA data
Resulting data from the last top-level Action.
Definition: pega-texto.h:348
Default data type for Actions to return.
Definition: pega-texto.h:138