- Notifications
You must be signed in to change notification settings - Fork 234
/
Copy pathunicode_util.h
259 lines (221 loc) · 9.34 KB
/
unicode_util.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
/*
* PROGRAM: JRD International support
* MODULE: unicode_util.h
* DESCRIPTION: Unicode functions
*
* The contents of this file are subject to the Initial
* Developer's Public License Version 1.0 (the "License");
* you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
*
* Software distributed under the License is distributed AS IS,
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
* See the License for the specific language governing rights
* and limitations under the License.
*
* The Original Code was created by Adriano dos Santos Fernandes
* for the Firebird Open Source RDBMS project.
*
* Copyright (c) 2004 Adriano dos Santos Fernandes <adrianosf@uol.com.br>
* and all contributors signed below.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
*/
#ifndef JRD_UNICODE_UTIL_H
#defineJRD_UNICODE_UTIL_H
#include"intlobj_new.h"
#include"../common/IntlUtil.h"
#include"../common/os/mod_loader.h"
#include"../common/classes/array.h"
#include"../common/classes/fb_string.h"
#include"../common/classes/GenericMap.h"
#include"../common/classes/objects_array.h"
#include<unicode/ucnv.h>
#include<unicode/ucal.h>
structUCollator;
structUSet;
namespaceJrd {
classUnicodeUtil
{
private:
structICU;
public:
// encapsulate ICU conversion library
structConversionICU
{
UConverter* (U_EXPORT2* ucnv_open) (constchar* converterName, UErrorCode* err);
void (U_EXPORT2* ucnv_close) (UConverter *converter);
int32_t (U_EXPORT2* ucnv_fromUChars) (UConverter *cnv,
char *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode);
UChar32 (U_EXPORT2* u_tolower) (UChar32 c);
UChar32 (U_EXPORT2* u_toupper) (UChar32 c);
int32_t (U_EXPORT2* u_strCompare) (const UChar* s1, int32_t length1,
const UChar* s2, int32_t length2, UBool codePointOrder);
int32_t (U_EXPORT2* u_countChar32) (const UChar* s, int32_t length);
UChar32 (U_EXPORT2* utf8_nextCharSafeBody) (constuint8_t* s, int32_t* pi, int32_t length, UChar32 c, UBool strict);
void (U_EXPORT2* UCNV_TO_U_CALLBACK_STOP) (
constvoid *context,
UConverterToUnicodeArgs *toUArgs,
constchar* codeUnits,
int32_t length,
UConverterCallbackReason reason,
UErrorCode * err);
void (U_EXPORT2* ucnv_setToUCallBack) (
UConverter * converter,
UConverterToUCallback newAction,
constvoid* newContext,
UConverterToUCallback *oldAction,
constvoid** oldContext,
UErrorCode * err);
void (U_EXPORT2* ucnv_setFromUCallBack) (
UConverter * converter,
UConverterFromUCallback newAction,
constvoid *newContext,
UConverterFromUCallback *oldAction,
constvoid **oldContext,
UErrorCode * err);
void (U_EXPORT2* ucnv_fromUnicode) (
UConverter * converter,
char **target,
constchar *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
UBool flush,
UErrorCode * err);
void (U_EXPORT2* ucnv_toUnicode) (
UConverter *converter,
UChar **target,
const UChar *targetLimit,
constchar **source,
constchar *sourceLimit,
int32_t *offsets,
UBool flush,
UErrorCode *err);
void (U_EXPORT2* ucnv_getInvalidChars) (
const UConverter *converter,
char *errBytes,
int8_t *len,
UErrorCode *err);
int8_t (U_EXPORT2* ucnv_getMaxCharSize) (const UConverter *converter);
int8_t (U_EXPORT2* ucnv_getMinCharSize) (const UConverter *converter);
int32_t (U_EXPORT2* ustrcmp) (const UChar* s1, const UChar* s2);
constchar* (U_EXPORT2* ucalGetTZDataVersion) (UErrorCode* status);
int32_t (U_EXPORT2* ucalGetDefaultTimeZone) (UChar* result, int32_t resultCapacity, UErrorCode* ec);
UCalendar* (U_EXPORT2* ucalOpen) (const UChar* zoneID, int32_t len, constchar* locale, UCalendarType type,
UErrorCode* err);
void (U_EXPORT2* ucalClose) (UCalendar* cal);
void (U_EXPORT2* ucalSetAttribute) (UCalendar* cal, UCalendarAttribute attr, int32_t newValue);
void (U_EXPORT2* ucalSetMillis) (UCalendar* cal, UDate dateTime, UErrorCode* err);
int32_t (U_EXPORT2* ucalGet) (UCalendar* cal, UCalendarDateFields field, UErrorCode* err);
void (U_EXPORT2* ucalSetDateTime) (UCalendar* cal, int32_t year, int32_t month, int32_t date, int32_t hour,
int32_t minute, int32_t second, UErrorCode* err);
UDate (U_EXPORT2* ucalGetNow) ();
UBool (U_EXPORT2* ucalGetTimeZoneTransitionDate) (const UCalendar* cal, UTimeZoneTransitionType type,
UDate* transition, UErrorCode* status);
int vMajor, vMinor;
};
static Firebird::string getDefaultIcuVersion();
classICUModules;
// routines semantically equivalent with intlobj_new.h
static USHORT utf16KeyLength(USHORT len); // BOCU-1
static USHORT utf16ToKey(USHORT srcLen, const USHORT* src, USHORT dstLen, UCHAR* dst); // BOCU-1
static ULONG utf16LowerCase(ULONG srcLen, const USHORT* src, ULONG dstLen, USHORT* dst,
const ULONG* exceptions);
static ULONG utf16UpperCase(ULONG srcLen, const USHORT* src, ULONG dstLen, USHORT* dst,
const ULONG* exceptions);
static ULONG utf16ToUtf8(ULONG srcLen, const USHORT* src, ULONG dstLen, UCHAR* dst,
USHORT* err_code, ULONG* err_position);
static ULONG utf8ToUtf16(ULONG srcLen, const UCHAR* src, ULONG dstLen, USHORT* dst,
USHORT* err_code, ULONG* err_position);
static ULONG utf16ToUtf32(ULONG srcLen, const USHORT* src, ULONG dstLen, ULONG* dst,
USHORT* err_code, ULONG* err_position);
static ULONG utf32ToUtf16(ULONG srcLen, const ULONG* src, ULONG dstLen, USHORT* dst,
USHORT* err_code, ULONG* err_position);
static SSHORT utf16Compare(ULONG len1, const USHORT* str1, ULONG len2, const USHORT* str2,
INTL_BOOL* error_flag);
static ULONG utf16Length(ULONG len, const USHORT* str);
static ULONG utf16Substring(ULONG srcLen, const USHORT* src, ULONG dstLen, USHORT* dst,
ULONG startPos, ULONG length);
static INTL_BOOL utf8WellFormed(ULONG len, const UCHAR* str, ULONG* offending_position);
static INTL_BOOL utf16WellFormed(ULONG len, const USHORT* str, ULONG* offending_position);
static INTL_BOOL utf32WellFormed(ULONG len, const ULONG* str, ULONG* offending_position);
staticvoidutf8Normalize(Firebird::UCharBuffer& data);
static ConversionICU& getConversionICU();
static ICU* loadICU(const Firebird::string& icuVersion, const Firebird::string& configInfo);
staticvoidgetICUVersion(ICU* icu, int& majorVersion, int& minorVersion);
static ICU* getCollVersion(const Firebird::string& icuVersion,
const Firebird::string& configInfo, Firebird::string& collVersion);
classUtf16Collation
{
public:
static Utf16Collation* create(texttype* tt, USHORT attributes,
Firebird::IntlUtil::SpecificAttributesMap& specificAttributes,
const Firebird::string& configInfo);
Utf16Collation()
: contractionsPrefix(*getDefaultMemoryPool())
{
}
~Utf16Collation();
USHORT keyLength(USHORT len) const;
USHORT stringToKey(USHORT srcLen, const USHORT* src, USHORT dstLen, UCHAR* dst,
USHORT key_type) const;
SSHORT compare(ULONG len1, const USHORT* str1, ULONG len2, const USHORT* str2,
INTL_BOOL* error_flag) const;
ULONG canonical(ULONG srcLen, const USHORT* src, ULONG dstLen, ULONG* dst, const ULONG* exceptions);
private:
template <typename T>
classArrayComparator
{
public:
staticboolgreaterThan(const Firebird::Array<T>& i1, const Firebird::Array<T>& i2)
{
FB_SIZE_T minCount = MIN(i1.getCount(), i2.getCount());
int cmp = memcmp(i1.begin(), i2.begin(), minCount * sizeof(T));
if (cmp != 0)
return cmp > 0;
return i1.getCount() > i2.getCount();
}
staticboolgreaterThan(const Firebird::Array<T>* i1, const Firebird::Array<T>* i2)
{
returngreaterThan(*i1, *i2);
}
};
typedef Firebird::SortedObjectsArray<
Firebird::Array<UCHAR>,
Firebird::InlineStorage<Firebird::Array<UCHAR>*, 3>,
Firebird::Array<UCHAR>,
Firebird::DefaultKeyValue<const Firebird::Array<UCHAR>*>,
ArrayComparator<UCHAR>
> SortKeyArray;
typedef Firebird::GenericMap<
Firebird::Pair<
Firebird::Full<
Firebird::Array<USHORT>, // UTF-16 string
SortKeyArray // sort keys
>
>,
ArrayComparator<USHORT>
> ContractionsPrefixMap;
static ICU* loadICU(const Firebird::string& icuVersion, const Firebird::string& collVersion,
const Firebird::string& locale, const Firebird::string& configInfo);
voidnormalize(ULONG* strLen, const USHORT** str, bool forNumericSort,
Firebird::HalfStaticArray<USHORT, BUFFER_SMALL / 2>& buffer) const;
ICU* icu;
texttype* tt;
USHORT attributes;
UCollator* compareCollator;
UCollator* partialCollator;
UCollator* sortCollator;
ContractionsPrefixMap contractionsPrefix;
unsigned maxContractionsPrefixLength; // number of characters
bool numericSort;
};
friendclassUtf16Collation;
};
} // namespace Jrd
#endif // JRD_UNICODE_UTIL_H