- Notifications
You must be signed in to change notification settings - Fork 7.8k
/
Copy pathhtml5_parser.h
97 lines (89 loc) · 3.58 KB
/
html5_parser.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Niels Dossche <nielsdos@php.net> |
+----------------------------------------------------------------------+
*/
#ifndefHTML5_PARSER_H
#defineHTML5_PARSER_H
#include"namespace_compat.h"
#include<lexbor/html/parser.h>
#include<libxml/tree.h>
#include<Zend/zend_portability.h>
typedefenum {
LEXBOR_LIBXML2_BRIDGE_STATUS_OK=0,
LEXBOR_LIBXML2_BRIDGE_STATUS_CANNOT_INIT,
LEXBOR_LIBXML2_BRIDGE_STATUS_FATAL_PARSE,
LEXBOR_LIBXML2_BRIDGE_STATUS_OVERFLOW,
LEXBOR_LIBXML2_BRIDGE_STATUS_OOM,
} lexbor_libxml2_bridge_status;
typedefvoid (*lexbor_libxml2_bridge_tokenizer_error_reporter)(
void*application_data,
lxb_html_tokenizer_error_t*error,
size_toffset
);
typedefvoid (*lexbor_libxml2_bridge_tree_error_reporter)(
void*application_data,
lxb_html_tree_error_t*error,
size_tline,
size_tcolumn,
size_tlen
);
typedefstructlexbor_libxml2_bridge_extracted_observations {
boolhas_explicit_html_tag;
boolhas_explicit_head_tag;
boolhas_explicit_body_tag;
php_libxml_quirks_modequirks_mode;
} lexbor_libxml2_bridge_extracted_observations;
typedefstructlexbor_libxml2_bridge_parse_context {
/* Private fields */
lexbor_libxml2_bridge_tokenizer_error_reportertokenizer_error_reporter;
lexbor_libxml2_bridge_tree_error_reportertree_error_reporter;
/* Public fields */
lexbor_libxml2_bridge_extracted_observationsobservations;
/* Application data, do what you want with this */
void*application_data;
} lexbor_libxml2_bridge_parse_context;
voidlexbor_libxml2_bridge_parse_context_init(lexbor_libxml2_bridge_parse_context*ctx);
voidlexbor_libxml2_bridge_parse_set_error_callbacks(
lexbor_libxml2_bridge_parse_context*ctx,
lexbor_libxml2_bridge_tokenizer_error_reportertokenizer_error_reporter,
lexbor_libxml2_bridge_tree_error_reportertree_error_reporter
);
lexbor_libxml2_bridge_statuslexbor_libxml2_bridge_convert_document(
lxb_html_document_t*document,
xmlDocPtr*doc_out,
boolcompact_text_nodes,
boolcreate_default_ns,
php_dom_private_data*private_data
);
lexbor_libxml2_bridge_statuslexbor_libxml2_bridge_convert_fragment(
lxb_dom_node_t*start_node,
xmlDocPtrlxml_doc,
xmlNodePtr*fragment_out,
boolcompact_text_nodes,
boolcreate_default_ns,
php_dom_private_data*private_data
);
voidlexbor_libxml2_bridge_report_errors(
constlexbor_libxml2_bridge_parse_context*ctx,
lxb_html_parser_t*parser,
constlxb_char_t*input_html,
size_tchunk_offset,
size_t*error_index_offset_tokenizer,
size_t*error_index_offset_tree
);
voidlexbor_libxml2_bridge_copy_observations(
lxb_html_tree_t*tree,
lexbor_libxml2_bridge_extracted_observations*observations
);
#endif