- Notifications
You must be signed in to change notification settings - Fork 7.8k
/
Copy pathphp_unicode.h
175 lines (152 loc) · 7.55 KB
/
php_unicode.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Author: Wez Furlong (wez@thebrainroom.com) |
+----------------------------------------------------------------------+
Based on code from ucdata-2.5, which has the following Copyright:
Copyright 2001 Computing Research Labs, New Mexico State University
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
*/
#ifndefPHP_UNICODE_H
#definePHP_UNICODE_H
#defineUC_MN 0 /* Mark, Non-Spacing */
#defineUC_MC 1 /* Mark, Spacing Combining */
#defineUC_ME 2 /* Mark, Enclosing */
#defineUC_ND 3 /* Number, Decimal Digit */
#defineUC_NL 4 /* Number, Letter */
#defineUC_NO 5 /* Number, Other */
#defineUC_ZS 6 /* Separator, Space */
#defineUC_ZL 7 /* Separator, Line */
#defineUC_ZP 8 /* Separator, Paragraph */
#defineUC_OS 9 /* Other, Surrogate */
#defineUC_CO 10 /* Other, Private Use */
#defineUC_CN 11 /* Other, Not Assigned */
#defineUC_LU 12 /* Letter, Uppercase */
#defineUC_LL 13 /* Letter, Lowercase */
#defineUC_LT 14 /* Letter, Titlecase */
#defineUC_LM 15 /* Letter, Modifier */
#defineUC_LO 16 /* Letter, Other */
#defineUC_SM 17 /* Symbol, Math */
#defineUC_SC 18 /* Symbol, Currency */
#defineUC_SK 19 /* Symbol, Modifier */
#defineUC_SO 20 /* Symbol, Other */
#defineUC_L 21 /* Left-To-Right */
#defineUC_R 22 /* Right-To-Left */
#defineUC_EN 23 /* European Number */
#defineUC_ES 24 /* European Number Separator */
#defineUC_ET 25 /* European Number Terminator */
#defineUC_AN 26 /* Arabic Number */
#defineUC_CS 27 /* Common Number Separator */
#defineUC_B 28 /* Block Separator */
#defineUC_S 29 /* Segment Separator */
#defineUC_WS 30 /* Whitespace */
#defineUC_ON 31 /* Other Neutrals */
#defineUC_AL 32 /* Arabic Letter */
/* Merged property categories */
#defineUC_C 33 /* Control */
#defineUC_P 34 /* Punctuation */
/* Derived properties from DerivedCoreProperties.txt */
#defineUC_CASED 35
#defineUC_CASE_IGNORABLE 36
MBSTRING_APIboolphp_unicode_is_prop(unsigned longcode, ...);
MBSTRING_APIboolphp_unicode_is_prop1(unsigned longcode, intprop);
typedefenum {
PHP_UNICODE_CASE_UPPER=0,
PHP_UNICODE_CASE_LOWER,
PHP_UNICODE_CASE_TITLE,
PHP_UNICODE_CASE_FOLD,
PHP_UNICODE_CASE_UPPER_SIMPLE,
PHP_UNICODE_CASE_LOWER_SIMPLE,
PHP_UNICODE_CASE_TITLE_SIMPLE,
PHP_UNICODE_CASE_FOLD_SIMPLE,
PHP_UNICODE_CASE_MODE_MAX
} php_case_mode;
MBSTRING_APIzend_string*php_unicode_convert_case(
php_case_modecase_mode, constchar*srcstr, size_tsrclen,
constmbfl_encoding*src_encoding, constmbfl_encoding*dst_encoding, intillegal_mode, uint32_tillegal_substchar);
/* Optimize the common ASCII case for lower/upper */
staticinlineintphp_unicode_is_lower(unsigned longcode) {
if (code<0x80) {
returncode >= 0x61&&code <= 0x7A;
} else {
returnphp_unicode_is_prop1(code, UC_LL);
}
}
staticinlineintphp_unicode_is_upper(unsigned longcode) {
if (code<0x80) {
returncode >= 0x41&&code <= 0x5A;
} else {
returnphp_unicode_is_prop1(code, UC_LU);
}
}
#definephp_unicode_is_alpha(cc) php_unicode_is_prop(cc, UC_LU, UC_LL, UC_LM, UC_LO, UC_LT, -1)
#definephp_unicode_is_digit(cc) php_unicode_is_prop1(cc, UC_ND)
#definephp_unicode_is_alnum(cc) php_unicode_is_prop(cc, UC_LU, UC_LL, UC_LM, UC_LO, UC_LT, UC_ND, -1)
#definephp_unicode_is_cntrl(cc) php_unicode_is_prop1(cc, UC_C)
#definephp_unicode_is_blank(cc) php_unicode_is_prop1(cc, UC_ZS)
#definephp_unicode_is_punct(cc) php_unicode_is_prop1(cc, UC_P)
#definephp_unicode_is_graph(cc) php_unicode_is_prop(cc, \
UC_MN, UC_MC, UC_ME, UC_ND, UC_NL, UC_NO, \
UC_LU, UC_LL, UC_LT, UC_LM, UC_LO, UC_P, \
UC_SM, UC_SM, UC_SC, UC_SK, UC_SO, -1)
#definephp_unicode_is_print(cc) php_unicode_is_prop(cc, \
UC_MN, UC_MC, UC_ME, UC_ND, UC_NL, UC_NO, \
UC_LU, UC_LL, UC_LT, UC_LM, UC_LO, UC_P, \
UC_SM, UC_SM, UC_SC, UC_SK, UC_SO, UC_ZS, -1)
#definephp_unicode_is_title(cc) php_unicode_is_prop1(cc, UC_LT)
#definephp_unicode_is_symbol(cc) php_unicode_is_prop(cc, UC_SM, UC_SC, UC_SO, UC_SK, -1)
#definephp_unicode_is_number(cc) php_unicode_is_prop(cc, UC_ND, UC_NO, UC_NL, -1)
#definephp_unicode_is_nonspacing(cc) php_unicode_is_prop1(cc, UC_MN)
/*
* Directionality macros.
*/
#definephp_unicode_is_rtl(cc) php_unicode_is_prop1(cc, UC_R)
#definephp_unicode_is_ltr(cc) php_unicode_is_prop1(cc, UC_L)
#definephp_unicode_is_strong(cc) php_unicode_is_prop(cc, UC_L, UC_R, -1)
#definephp_unicode_is_weak(cc) php_unicode_is_prop(cc, UC_EN, UC_ES, UC_ET, UC_AN, UC_CS, -1)
#definephp_unicode_is_neutral(cc) php_unicode_is_prop(cc, UC_B, UC_S, UC_WS, UC_ON, -1)
#definephp_unicode_is_separator(cc) php_unicode_is_prop(cc, UC_B, UC_S, -1)
/*
* Other macros inspired by John Cowan.
*/
#definephp_unicode_is_mark(cc) php_unicode_is_prop(cc, UC_MN, UC_MC, UC_ME, -1)
#definephp_unicode_is_modif(cc) php_unicode_is_prop1(cc, UC_LM)
#definephp_unicode_is_letnum(cc) php_unicode_is_prop1(cc, UC_NL)
#definephp_unicode_is_math(cc) php_unicode_is_prop1(cc, UC_SM)
#definephp_unicode_is_currency(cc) php_unicode_is_prop1(cc, UC_SC)
#definephp_unicode_is_modifsymbol(cc) php_unicode_is_prop1(cc, UC_SK)
#definephp_unicode_is_nsmark(cc) php_unicode_is_prop1(cc, UC_MN)
#definephp_unicode_is_spmark(cc) php_unicode_is_prop1(cc, UC_MC)
#definephp_unicode_is_enclosing(cc) php_unicode_is_prop1(cc, UC_ME)
#definephp_unicode_is_private(cc) php_unicode_is_prop1(cc, UC_CO)
#definephp_unicode_is_surrogate(cc) php_unicode_is_prop1(cc, UC_OS)
#definephp_unicode_is_lsep(cc) php_unicode_is_prop1(cc, UC_ZL)
#definephp_unicode_is_psep(cc) php_unicode_is_prop1(cc, UC_ZP)
/*
* Other miscellaneous character property macros.
*/
#definephp_unicode_is_han(cc) (((cc) >= 0x4e00 && (cc) <= 0x9fff) ||\
((cc) >= 0xf900 && (cc) <= 0xfaff))
#definephp_unicode_is_hangul(cc) ((cc) >= 0xac00 && (cc) <= 0xd7ff)
/*
* Derived core properties.
*/
#definephp_unicode_is_cased(cc) php_unicode_is_prop1(cc, UC_CASED)
#definephp_unicode_is_case_ignorable(cc) php_unicode_is_prop1(cc, UC_CASE_IGNORABLE)
#endif/* PHP_UNICODE_H */