- Notifications
You must be signed in to change notification settings - Fork 255
/
Copy pathparser.h
719 lines (542 loc) · 17.2 KB
/
parser.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
/*
* Copyright (c) 2015, 2024, Oracle and/or its affiliates.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2.0, as
* published by the Free Software Foundation.
*
* This program is designed to work with certain software (including
* but not limited to OpenSSL) that is licensed under separate terms, as
* designated in a particular file or component or in included license
* documentation. The authors of MySQL hereby grant you an additional
* permission to link the program and your derivative works with the
* separately licensed software that they have either included with
* the program or referenced in the documentation.
*
* Without limiting anything contained in the foregoing, this file,
* which is part of Connector/C++, is also subject to the
* Universal FOSS Exception, version 1.0, a copy of which can be found at
* https://oss.oracle.com/licenses/universal-foss-exception.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License, version 2.0, for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef CDK_PARSER_PARSER_H
#defineCDK_PARSER_PARSER_H
#include<mysql/cdk/api/expression.h>
#include"tokenizer.h"
#ifdef _MSC_VER
/*
4061 = enum constant not explicitly handled by switch() case.
We have a lot of token type constants and we commonly use default:
clause to catch all otherwise not handled token types. Thus this
warning must be disabled.
*/
DISABLE_WARNING_CDK(4061)
#endif
/*
Infrastructure for building list and document parsers from base
expression parsers.
*/
namespaceparser {
typedef Tokenizer::iterator It;
using std::string;
using cdk::throw_error;
/*
Class that implements token navigation and usage methods
*/
classToken_base
{
protected:
It *m_first;
It m_last;
Token m_last_tok;
const Token* consume_token()
{
const Token *t = peek_token();
if (!t)
returnNULL;
m_last_tok = *t;
assert(m_first);
++(*m_first);
return &m_last_tok;
}
// return null if no more tokens available
const Token* consume_token(Token::Type type)
{
if (!tokens_available() || !cur_token_type_is(type))
returnNULL;
returnconsume_token();
}
const Token& consume_token_throw(Token::Type type, const string &msg)
{
const Token *t = consume_token(type);
if (!t)
parse_error(msg);
return *t;
}
const Token* peek_token()
{
if (!tokens_available())
returnNULL;
assert(m_first);
return &(**m_first);
}
boolcur_token_type_is(Token::Type type)
{
returntokens_available() && peek_token()->get_type() == type;
}
boolcur_token_type_in(Token::Set types)
{
returntokens_available()
&& types.find(peek_token()->get_type()) != types.end();
}
It& cur_pos()
{
assert(m_first);
return *m_first;
}
const It& cur_pos() const
{
returnconst_cast<Token_base*>(this)->cur_pos();
}
const It& end_pos() const
{
return m_last;
}
booltokens_available() const
{
return m_first && cur_pos() != end_pos();
}
typedef Tokenizer::Error Error;
voidparse_error(const string&) const;
voidunsupported(const string&) const;
public:
Token_base()
: m_first(NULL)
{}
Token_base(It &first, const It &last)
{
set_tokens(first, last);
}
voidset_tokens(It &first, const It &last)
{
m_first = &first;
m_last = last;
}
};
inline
voidToken_base::parse_error(const string &msg) const
{
throwError(*m_first, msg);
}
inline
voidToken_base::unsupported(const string &what) const
{
string msg(what);
msg.append(" not supported yet");
parse_error(msg);
}
/*
Base class for parsers which parse tokens and present result as
an expression over processor PRC.
Parser objects which derive from Expr_parser<PRC> parse range of tokens
specified when the object is created, assuming that these tokens represent
an expression over PRC. The parsed expression is reported to a processor
in process() method.
Assuming that P is a class that derives from Expr_parser<PRC>, p is a
parser object of class P and prc is a processor of type PRC, a call:
p.process(prc);
will report to prc the expression represented by tokens specified when p
was created. If tokens could not be correctly parsed an error is thrown
either when this method is called or when p is created. It is up to
the implementation to decide when the parsing happens: it can be during
parser creation (and then the parser must store results of parsing) or it
can be done on-the-fly, in the process() method. Method p.parse(prc) has
the same effect as p.process(prc).
It is also possible to call:
p.consume();
which consumes tokens of the expression without reporting them to any
parser.
Since parsing consumes tokens, it is assumed that parse()/process() can
be called only once for a given parser instance. Another call will throw
an error.
Derived classes should implement the functionality by overriding do_parse()
and do_consume() methods. By default do_consume() is implemented by calling
do_parse() with NULL processor pointer.
*/
template <classPRC, classTokens = Token_base>
classExpr_parser
: public cdk::api::Expr_base<PRC>
, protected Tokens
{
protected:
using Tokens::set_tokens;
using Tokens::cur_pos;
using Tokens::end_pos;
using Tokens::tokens_available;
using Tokens::cur_token_type_is;
using Tokens::cur_token_type_in;
using Tokens::consume_token;
using Tokens::parse_error;
using Tokens::unsupported;
usingtypename Tokens::Error;
public:
Expr_parser(It &first, const It &last)
: m_consumed(false)
{
set_tokens(first, last);
}
voidprocess(PRC &prc) const
{
if (!const_cast<Expr_parser*>(this)->parse(prc))
parse_error("Failed to parse the string");
}
/*
Parse tokens specified when creating this parser instance and
report parsed expression to the given processor.
This method can be called only once and it is assumed that it
consumes the tokens that were parsed. That is, after a successful
call to parse() the first iterator passed to the constructor
is moved and points at the first token after the parsed expression.
Returns false if tokens could not be parsed as an expression and
and no tokens have been consumed (first iterator is not moved).
Returns true if complete expression has been parsed. Otherwise
(could not parse but some tokens were consumed) throws parse
error.
Note: this method is implemented in terms of do_parse() which
should be overridden by derived classes.
*/
boolparse(PRC &prc)
{
if (m_consumed)
THROW("Expr_praser: second pass");
if (!do_parse(&prc))
returnfalse;
m_consumed = true;
returntrue;
}
/*
Consume tokens that form an expression without reporting them
to any processor.
Like parse(), this method can be called only once and should
move the first iterator.
Note: this method is implemented in terms of do_consume() which
can be overridden by derived classes to provide more efficient
implementation.
*/
voidconsume()
{
if (m_consumed)
return;
do_consume();
m_consumed = true;
}
/*
Helper method which calls consume() if prc is NULL, otherwise
calls parse() reporting results to the processor pointed by prc.
*/
boolprocess_if(PRC *prc)
{
if (prc)
returnparse(*prc);
consume();
returntrue;
}
protected:
bool m_consumed;
/*
Internal method that implements parse() method - to be overridden
by derived class.
See documentation of parse() for return value specification and
information how first iterator should be updated.
*/
virtualbooldo_parse(PRC *prc) =0;
/*
Internal method that implements consume() method. By default it
calls parse() with NULL processor but derived classes can override
to provide more efficient implementation.
*/
virtualvoiddo_consume()
{
if (!do_parse(NULL))
throwError(cur_pos(), "Failed to parse the string");
}
};
} // parser
//-------------------------------------------------------------------------
namespaceparser {
using cdk::api::List_processor;
/*
Template which constructs a parser for a list of expressions given a base
parser for a single expression.
List_parser<> is implemented using on-the-fly parsing.
*/
template <classBase>
structList_parser
: public Expr_parser< List_processor<typename Base::Processor> >
, cdk::foundation::nocopy
{
typedeftypename Base::Processor PRC;
typedef List_processor<PRC> LPRC;
typedef Expr_parser<LPRC> Parser_base;
using Parser_base::set_tokens;
using Parser_base::cur_pos;
using Parser_base::end_pos;
using Parser_base::tokens_available;
using Parser_base::cur_token_type_is;
using Parser_base::cur_token_type_in;
using Parser_base::consume_token;
using Parser_base::parse_error;
using Parser_base::unsupported;
Token::Type m_list_sep;
List_parser(It &first, const It &last, Token::Type sep = Token::COMMA)
: Expr_parser<LPRC>(first, last), m_list_sep(sep)
{}
booldo_parse(LPRC *prc)
{
bool first_element = true;
do {
Base el_parser(cur_pos(), end_pos());
if (!el_parser.process_if(prc ? prc->list_el() : NULL))
{
if (first_element)
returnfalse;
else
parse_error("Expected next list element");
}
if (!consume_token(m_list_sep))
break;
first_element = false;
}
while (true);
returntrue;
}
};
} // parser
//-------------------------------------------------------------------------
namespaceparser {
using cdk::api::Expr_base;
using cdk::api::Expr_list;
using cdk::api::Any;
using cdk::api::Doc_processor;
using cdk::api::Any_processor;
/*
Extend base parser with document and array parsing.
Given type Base of the base parser, Any_parser<Base> is a parser
which can parse documents, arrays or expressions recognized by the
base parser. Document and array elements can be again any kind of
expression recognized by Any_parser. If the first token is '{' or '['
then Any_parser<> assumes that this is document/array expression.
Otherwise it must be base expression.
Any_parser<Base> reports parsed expression to a processor of type
Any_processor<SPRC>, where SPRC is a processor type for base (scalar)
values. Normally SPRC is the processor type of the base parser, but
a different SPRC type can be specified when instantiating Any_parser<>
template.
The Base class must define static method for converting processor
used by Any_parser<> to a processor used by the base parser. The expected
signature of this method is:
static Base::Processor* get_base_prc(Any_processor<SPRC>*);
where SPRC is the scalar processor type specified for Any_parser<>
template (so, it is Base::Processor by default).
*/
template <classBase,
classSPRC = Any_processor<typename Base::Processor>
>
structAny_parser
: public Expr_parser< Any_processor<SPRC> >
{
typedeftypename Base::Processor PRC;
typedeftypename Any<SPRC>::Processor APRC;
typedeftypename Any<SPRC>::Document::Processor DPRC;
typedeftypename Any<SPRC>::List::Processor LPRC;
typedef Expr_parser< Any_processor<SPRC> > Parser_base;
using Parser_base::cur_pos;
using Parser_base::end_pos;
using Parser_base::tokens_available;
using Parser_base::cur_token_type_is;
using Parser_base::cur_token_type_in;
using Parser_base::consume_token;
using Parser_base::parse_error;
using Parser_base::unsupported;
Any_parser(It &first, const It &last)
: Expr_parser<APRC>(first, last)
{}
booldo_parse(APRC *prc)
{
if (cur_token_type_is(Token::LCURLY))
{
Doc_parser doc(cur_pos(), end_pos());
doc.process_if(prc ? prc->doc() : NULL);
}
elseif (cur_token_type_is(Token::LSQBRACKET))
{
Arr_parser arr(cur_pos(), end_pos());
arr.process_if(prc ? prc->arr() : NULL);
}
else
{
Base val(cur_pos(), end_pos());
return val.process_if(prc ? Base::get_base_prc(prc) : NULL);
}
returntrue;
}
// Array parser used by Any_parser
structArr_parser : publicExpr_parser<LPRC>
{
typedef Expr_parser<LPRC> Parser_base;
using Parser_base::cur_pos;
using Parser_base::end_pos;
using Parser_base::tokens_available;
using Parser_base::cur_token_type_is;
using Parser_base::cur_token_type_in;
using Parser_base::consume_token;
using Parser_base::parse_error;
Arr_parser(It &first, const It &last)
: Expr_parser<LPRC>(first, last)
{}
booldo_parse(LPRC *prc)
{
if (!consume_token(Token::LSQBRACKET))
returnfalse;
if (prc)
prc->list_begin();
if (!cur_token_type_is(Token::RSQBRACKET))
{
List_parser<Any_parser> list(cur_pos(), end_pos());
bool ok = list.process_if(prc);
if (!ok)
parse_error("Expected array element");
}
if (!consume_token(Token::RSQBRACKET))
parse_error("Expected ']' to close array");
if (prc)
prc->list_end();
returntrue;
}
};
// Document parser used by Any_parser
structDoc_parser
: public Expr_parser<DPRC>
, cdk::foundation::nocopy
{
typedef Expr_parser<DPRC> Parser_base;
using Parser_base::cur_pos;
using Parser_base::end_pos;
using Parser_base::tokens_available;
using Parser_base::cur_token_type_is;
using Parser_base::cur_token_type_in;
using Parser_base::consume_token;
using Parser_base::parse_error;
Doc_parser(It &first, const It &last)
: Expr_parser<DPRC>(first, last)
{}
/*
Document parser treats document body as a list of
key-value pairs. KV_parser parses single key-value
pair and reports it to a document processor (using
key_val() callback).
*/
structKV_parser;
/*
LPrc instance converts a document processor into
a list processor that can process results of parsing
a list of key-value pairs. Given document processor
is returned for each pair in the list. This way a KV_parser
which parses the key-value pair will report it to the
document processor.
*/
structLPrc : publicList_processor<DPRC>
{
usingtypename List_processor<DPRC>::Element_prc;
DPRC *m_prc;
LPrc(DPRC *prc) : m_prc(prc)
{}
voidlist_begin() {}
voidlist_end() {}
Element_prc* list_el()
{
return m_prc ? m_prc : NULL;
}
};
booldo_parse(DPRC *prc)
{
if (!consume_token(Token::LCURLY))
returnfalse;
if (prc)
prc->doc_begin();
if (!cur_token_type_is(Token::RCURLY))
{
List_parser<KV_parser> kv_list(cur_pos(), end_pos());
LPrc kv_prc(prc);
bool ok = kv_list.parse(kv_prc);
if (!ok)
parse_error("Expected a key-value pair in a document");
}
if (!consume_token(Token::RCURLY))
parse_error("Expected '}' closing a document");
if (prc)
prc->doc_end();
returntrue;
}
// TODO: efficient skipping of documents
// Parser for a single key-value pair.
structKV_parser
: public Expr_parser<DPRC>
{
typedef Expr_parser<DPRC> Parser_base;
using Parser_base::cur_pos;
using Parser_base::end_pos;
using Parser_base::tokens_available;
using Parser_base::cur_token_type_is;
using Parser_base::cur_token_type_in;
using Parser_base::consume_token;
using Parser_base::parse_error;
cdk::string m_key;
KV_parser(It &first, const It &last)
: Expr_parser<DPRC>(first, last)
{}
booldo_parse(DPRC *prc)
{
// Note: official JSON specs do not allow plain WORD as key name
if (!cur_token_type_in({ Token::QQSTRING, Token::QSTRING, Token::WORD }))
returnfalse;
m_key = consume_token()->get_text();
if (!consume_token(Token::COLON))
parse_error("Expected ':' after key name in a document");
Any_parser val_parser(cur_pos(), end_pos());
bool ok = val_parser.process_if(prc ? prc->key_val(m_key) : NULL);
if (!ok)
parse_error("Expected key value after ':' in a document");
returntrue;
}
};
};
}; // Any_parser
/*
Expose document and array parsers from Any_parser<> in the
main namespace.
*/
template <classBase, classSPRC = typename Base::Processor>
structDoc_parser : publicAny_parser<Base, SPRC>::Doc_parser
{
Doc_parser(It &first, const It &last)
: Any_parser<Base,SPRC>::Doc_parser(first, last)
{}
};
template <classBase, classSPRC = typename Base::Processor>
structArr_parser : publicAny_parser<Base, SPRC>::Arr_parser
{
Arr_parser(It &first, const It &last)
: Any_parser<Base,SPRC>::Arr_parser(first, last)
{}
};
} // parser
#endif