123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005 | // Copyright (C) 2016 The Qt Company Ltd.// Copyright (C) 2016 Intel Corporation.// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only#include"qurl_p.h"#include <QtCore/qstringlist.h>#include <QtCore/private/qnumeric_p.h>#include <QtCore/private/qoffsetstringarray_p.h>#include <QtCore/private/qstringiterator_p.h>#include <QtCore/private/qunicodetables_p.h>#include <algorithm> QT_BEGIN_NAMESPACE using namespaceQt::StringLiterals;// needed by the punycode encoder/decoderstatic const uint base =36;static const uint tmin =1;static const uint tmax =26;static const uint skew =38;static const uint damp =700;static const uint initial_bias =72;static const uint initial_n =128;staticconstexpr qsizetype MaxDomainLabelLength =63;staticinline uint encodeDigit(uint digit){return digit +22+75* (digit <26);}staticinline uint adapt(uint delta, uint numpoints,bool firsttime){ delta /= (firsttime ? damp :2); delta += (delta / numpoints); uint k =0;for(; delta > ((base - tmin) * tmax) /2; k += base) delta /= (base - tmin);return k + (((base - tmin +1) * delta) / (delta + skew));}staticinlinevoidappendEncode(QString *output, uint delta, uint bias){ uint qq; uint k; uint t;// insert the variable length delta integer.for(qq = delta, k = base;; k += base) {// stop generating digits when the threshold is// detected. t = (k <= bias) ? tmin : (k >= bias + tmax) ? tmax : k - bias;if(qq < t)break;*output +=QChar(encodeDigit(t + (qq - t) % (base - t))); qq = (qq - t) / (base - t);}*output +=QChar(encodeDigit(qq));} Q_AUTOTEST_EXPORT voidqt_punycodeEncoder(QStringView in, QString *output){ uint n = initial_n; uint delta =0; uint bias = initial_bias;// Do not try to encode strings that certainly will result in output// that is longer than allowable domain name label length. Note that// non-BMP codepoints are encoded as two QChars.if(in.size() > MaxDomainLabelLength *2)return;int outLen = output->size(); output->resize(outLen + in.size()); QChar *d = output->data() + outLen;bool skipped =false;// copy all basic code points verbatim to output.for(QChar c : in) {if(c.unicode() <0x80)*d++ = c;else skipped =true;}// if there were only basic code points, just return them// directly; don't do any encoding.if(!skipped)return; output->truncate(d - output->constData());int copied = output->size() - outLen;// h and b now contain the number of basic code points in input. uint b = copied; uint h = copied;// if basic code points were copied, add the delimiter character.if(h >0)*output += u'-';// compute the input length in Unicode code points. uint inputLength =0;for(QStringIterator iter(in); iter.hasNext();) { inputLength++;if(iter.next(char32_t(-1)) ==char32_t(-1)) { output->truncate(outLen);return;// invalid surrogate pair}}// while there are still unprocessed non-basic code points left in// the input string...while(h < inputLength) {// find the character in the input string with the lowest unprocessed value. uint m =std::numeric_limits<uint>::max();for(QStringIterator iter(in); iter.hasNext();) {auto c = iter.nextUnchecked();static_assert(std::numeric_limits<decltype(m)>::max()>=std::numeric_limits<decltype(c)>::max(),"Punycode uint should be able to cover all codepoints");if(c >= n && c < m) m = c;}// delta = delta + (m - n) * (h + 1), fail on overflow uint tmp;if(qMulOverflow<uint>(m - n, h +1, &tmp) || qAddOverflow<uint>(delta, tmp, &delta)) { output->truncate(outLen);return;// punycode_overflow} n = m;for(QStringIterator iter(in); iter.hasNext();) {auto c = iter.nextUnchecked();// increase delta until we reach the character processed in this iteration;// fail if delta overflows.if(c < n) {if(qAddOverflow<uint>(delta,1, &delta)) { output->truncate(outLen);return;// punycode_overflow}}if(c == n) {appendEncode(output, delta, bias); bias =adapt(delta, h +1, h == b); delta =0;++h;}}++delta;++n;}// prepend ACE prefix output->insert(outLen,"xn--"_L1);return;} Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc){ uint n = initial_n; uint i =0; uint bias = initial_bias;// Do not try to decode strings longer than allowable for a domain label.// Non-ASCII strings are not allowed here anyway, so there is no need// to account for surrogates.if(pc.size() > MaxDomainLabelLength)returnQString();// strip any ACE prefixint start = pc.startsWith("xn--"_L1) ?4:0;if(!start)return pc;// find the last delimiter character '-' in the input array. copy// all data before this delimiter directly to the output array.int delimiterPos = pc.lastIndexOf(u'-');auto output = delimiterPos <4?std::u32string(): pc.mid(start, delimiterPos - start).toStdU32String();// if a delimiter was found, skip to the position after it;// otherwise start at the front of the input string. everything// before the delimiter is assumed to be basic code points. uint cnt = delimiterPos +1;// loop through the rest of the input string, inserting non-basic// characters into output as we go.while(cnt < (uint) pc.size()) { uint oldi = i; uint w =1;// find the next index for inserting a non-basic character.for(uint k = base; cnt < (uint) pc.size(); k += base) {// grab a character from the punycode input and find its// delta digit (each digit code is part of the// variable-length integer delta) uint digit = pc.at(cnt++).unicode();if(digit -48<10) digit -=22;else if(digit -65<26) digit -=65;else if(digit -97<26) digit -=97;else digit = base;// Fail if the code point has no digit valueif(digit >= base)returnQString();// i = i + digit * w, fail on overflow uint tmp;if(qMulOverflow<uint>(digit, w, &tmp) || qAddOverflow<uint>(i, tmp, &i))returnQString();// detect threshold to stop reading delta digits uint t;if(k <= bias) t = tmin;else if(k >= bias + tmax) t = tmax;else t = k - bias;if(digit < t)break;// w = w * (base - t), fail on overflowif(qMulOverflow<uint>(w, base - t, &w))returnQString();}// find new bias and calculate the next non-basic code// character. uint outputLength =static_cast<uint>(output.length()); bias =adapt(i - oldi, outputLength +1, oldi ==0);// n = n + i div (length(output) + 1), fail on overflowif(qAddOverflow<uint>(n, i / (outputLength +1), &n))returnQString();// allow the deltas to wrap around i %= (outputLength +1);// if n is a basic code point then fail; this should not happen with// correct implementation of Punycode, but check just n case.if(n < initial_n) {// Don't use Q_ASSERT() to avoid possibility of DoSqWarning("Attempt to insert a basic codepoint. Unhandled overflow?");returnQString();}// Surrogates should normally be rejected later by other IDNA code.// But because of Qt's use of UTF-16 to represent strings the// IDNA code is not able to distinguish characters represented as pairs// of surrogates from normal code points. This is why surrogates are// not allowed here.//// Allowing surrogates would lead to non-unique (after normalization)// encoding of strings with non-BMP characters.//// Punycode that encodes characters outside the Unicode range is also// invalid and is rejected here.if(QChar::isSurrogate(n) || n >QChar::LastValidCodePoint)returnQString();// insert the character n at position i output.insert(i,1,static_cast<char32_t>(n));++i;}returnQString::fromStdU32String(output);}staticconstexpr auto idn_whitelist =qOffsetStringArray("ac","ar","asia","at","biz","br","cat","ch","cl","cn","com","de","dk","es","fi","gr","hu","il","info","io","ir","is","jp","kr","li","lt","lu","lv","museum","name","net","no","nu","nz","org","pl","pr","se","sh","tel","th","tm","tw","ua","vn","xn--fiqs8s",// China"xn--fiqz9s",// China"xn--fzc2c9e2c",// Sri Lanka"xn--j6w193g",// Hong Kong"xn--kprw13d",// Taiwan"xn--kpry57d",// Taiwan"xn--mgba3a4f16a",// Iran"xn--mgba3a4fra",// Iran"xn--mgbaam7a8h",// UAE"xn--mgbayh7gpa",// Jordan"xn--mgberp4a5d4ar",// Saudi Arabia"xn--ogbpf8fl",// Syria"xn--p1ai",// Russian Federation"xn--wgbh1c",// Egypt"xn--wgbl6a",// Qatar"xn--xkc2al3hye2a"// Sri Lanka); Q_CONSTINIT static QStringList *user_idn_whitelist =nullptr;static boollessThan(const QChar *a,int l,const char*c){constauto*uc =reinterpret_cast<const char16_t *>(a);const char16_t *e = uc + l;if(!c || *c ==0)return false;while(*c) {if(uc == e || *uc !=static_cast<unsigned char>(*c))break;++uc;++c;}return uc == e ? *c : (*uc <static_cast<unsigned char>(*c));}static boolequal(const QChar *a,int l,const char*b){while(l && a->unicode() && *b) {if(*a !=QLatin1Char(*b))return false;++a;++b;--l;}return l ==0;}static boolqt_is_idn_enabled(QStringView aceDomain){auto idx = aceDomain.lastIndexOf(u'.');if(idx == -1)return false;auto tldString = aceDomain.mid(idx +1);constauto len = tldString.size();const QChar *tld = tldString.constData();if(user_idn_whitelist)return user_idn_whitelist->contains(tldString);int l =0;int r = idn_whitelist.count() -1;int i = (l + r +1) /2;while(r != l) {if(lessThan(tld, len, idn_whitelist.at(i))) r = i -1;else l = i; i = (l + r +1) /2;}returnequal(tld, len, idn_whitelist.at(i));}template<typename C>staticinlineboolisValidInNormalizedAsciiLabel(C c){return c == u'-'|| c == u'_'|| (c >= u'0'&& c <= u'9') || (c >= u'a'&& c <= u'z');}template<typename C>staticinlineboolisValidInNormalizedAsciiName(C c){returnisValidInNormalizedAsciiLabel(c) || c == u'.';}/* Map domain name according to algorithm in UTS #46, 4.1 Returns empty string if there are disallowed characters in the input. Sets resultIsAscii if the result is known for sure to be all ASCII.*/static QString mapDomainName(const QString &in,QUrl::AceProcessingOptions options,bool*resultIsAscii){*resultIsAscii =true;// Check if the input is already normalized ASCII first and can be returned as is.int i =0;for(auto c : in) {if(c.unicode() >=0x80|| !isValidInNormalizedAsciiName(c))break; i++;}if(i == in.size())return in; QString result; result.reserve(in.size()); result.append(in.constData(), i);bool allAscii =true;for(QStringIterator iter(QStringView(in).sliced(i)); iter.hasNext();) { char32_t uc = iter.next();// Fast path for ASCII-only inputsif(Q_LIKELY(uc <0x80)) {if(uc >= U'A'&& uc <= U'Z') uc |=0x20;// lower-case itif(isValidInNormalizedAsciiName(uc)) { result.append(static_cast<char16_t>(uc));continue;}} allAscii =false;// Capital sharp S is a special case since UTR #46 revision 31 (Unicode 15.1)if(uc ==0x1E9E&& options.testFlag(QUrl::AceTransitionalProcessing)) { result.append(u"ss"_s);continue;}QUnicodeTables::IdnaStatus status =QUnicodeTables::idnaStatus(uc);if(status ==QUnicodeTables::IdnaStatus::Deviation) status = options.testFlag(QUrl::AceTransitionalProcessing)?QUnicodeTables::IdnaStatus::Mapped :QUnicodeTables::IdnaStatus::Valid;switch(status) {caseQUnicodeTables::IdnaStatus::Ignored:continue;caseQUnicodeTables::IdnaStatus::Valid:caseQUnicodeTables::IdnaStatus::Disallowed:for(auto c :QChar::fromUcs4(uc)) result.append(c);break;caseQUnicodeTables::IdnaStatus::Mapped: result.append(QUnicodeTables::idnaMapping(uc));break;default:Q_UNREACHABLE();}}*resultIsAscii = allAscii;return result;}/* Check the rules for an ASCII label. Check the size restriction and that the label does not start or end with dashes. The label should be nonempty.*/static boolvalidateAsciiLabel(QStringView label){if(label.size() > MaxDomainLabelLength)return false;if(label.first() == u'-'|| label.last() == u'-')return false;returnstd::all_of(label.begin(), label.end(), isValidInNormalizedAsciiLabel<QChar>);}namespace{class DomainValidityChecker {bool domainNameIsBidi =false;bool hadBidiErrors =false;bool ignoreBidiErrors;staticconstexpr char32_t ZWNJ = U'\u200C';staticconstexpr char32_t ZWJ = U'\u200D';public:DomainValidityChecker(bool ignoreBidiErrors =false) :ignoreBidiErrors(ignoreBidiErrors) { }boolcheckLabel(const QString &label,QUrl::AceProcessingOptions options);private:static boolcheckContextJRules(QStringView label);static boolcheckBidiRules(QStringView label);};}// anonymous namespace/* Check CONTEXTJ rules according to RFC 5892, appendix A.1 & A.2. Rule Set for U+200C (ZWNJ): False; If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True; If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C (Joining_Type:T)*(Joining_Type:{R,D})) Then True; Rule Set for U+200D (ZWJ): False; If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;*/boolDomainValidityChecker::checkContextJRules(QStringView label){constexprunsigned char CombiningClassVirama =9;enumclass State { Initial, LD_T,// L,D with possible following T* ZWNJ_T,// ZWNJ with possible following T*}; State regexpState =State::Initial;bool previousIsVirama =false;for(QStringIterator iter(label); iter.hasNext();) {auto ch = iter.next();if(ch == ZWJ) {if(!previousIsVirama)return false; regexpState =State::Initial;}else if(ch == ZWNJ) {if(!previousIsVirama && regexpState !=State::LD_T)return false; regexpState = previousIsVirama ?State::Initial :State::ZWNJ_T;}else{switch(QChar::joiningType(ch)) {caseQChar::Joining_Left:if(regexpState ==State::ZWNJ_T)return false; regexpState =State::LD_T;break;caseQChar::Joining_Right: regexpState =State::Initial;break;caseQChar::Joining_Dual: regexpState =State::LD_T;break;caseQChar::Joining_Transparent:break;default: regexpState =State::Initial;break;}} previousIsVirama =QChar::combiningClass(ch) == CombiningClassVirama;}return regexpState !=State::ZWNJ_T;}/* Check if the label conforms to BiDi rule of RFC 5893. 1. The first character must be a character with Bidi property L, R, or AL. If it has the R or AL property, it is an RTL label; if it has the L property, it is an LTR label. 2. In an RTL label, only characters with the Bidi properties R, AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed. 3. In an RTL label, the end of the label must be a character with Bidi property R, AL, EN, or AN, followed by zero or more characters with Bidi property NSM. 4. In an RTL label, if an EN is present, no AN may be present, and vice versa. 5. In an LTR label, only characters with the Bidi properties L, EN, ES, CS, ET, ON, BN, or NSM are allowed. 6. In an LTR label, the end of the label must be a character with Bidi property L or EN, followed by zero or more characters with Bidi property NSM.*/boolDomainValidityChecker::checkBidiRules(QStringView label){if(label.isEmpty())return true; QStringIterator iter(label);Q_ASSERT(iter.hasNext()); char32_t ch = iter.next();bool labelIsRTL =false;switch(QChar::direction(ch)) {caseQChar::DirL:break;caseQChar::DirR:caseQChar::DirAL: labelIsRTL =true;break;default:return false;}bool tailOk =true;bool labelHasEN =false;bool labelHasAN =false;while(iter.hasNext()) { ch = iter.next();switch(QChar::direction(ch)) {caseQChar::DirR:caseQChar::DirAL:if(!labelIsRTL)return false; tailOk =true;break;caseQChar::DirL:if(labelIsRTL)return false; tailOk =true;break;caseQChar::DirES:caseQChar::DirCS:caseQChar::DirET:caseQChar::DirON:caseQChar::DirBN: tailOk =false;break;caseQChar::DirNSM:break;caseQChar::DirAN:if(labelIsRTL) {if(labelHasEN)return false; labelHasAN =true; tailOk =true;}else{return false;}break;caseQChar::DirEN:if(labelIsRTL) {if(labelHasAN)return false; labelHasEN =true;} tailOk =true;break;default:return false;}}return tailOk;}/* Check if the given label is valid according to UTS #46 validity criteria. NFC check can be skipped if the label was transformed to NFC before calling this function (as optimization). The domain name is considered invalid if this function returns false at least once. 1. The label must be in Unicode Normalization Form NFC. 2. If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character in both the third and fourth positions. 3. If CheckHyphens, the label must neither begin nor end with a U+002D HYPHEN-MINUS character. 4. The label must not contain a U+002E ( . ) FULL STOP. 5. The label must not begin with a combining mark, that is: General_Category=Mark. 6. Each code point in the label must only have certain status values according to Section 5, IDNA Mapping Table: 1. For Transitional Processing, each value must be valid. 2. For Nontransitional Processing, each value must be either valid or deviation. 7. If CheckJoiners, the label must satisfy the ContextJ rules from Appendix A, in The Unicode Code Points and Internationalized Domain Names for Applications (IDNA). 8. If CheckBidi, and if the domain name is a Bidi domain name, then the label must satisfy all six of the numbered conditions in RFC 5893, Section 2. NOTE: Don't use QStringView for label, so that call to QString::normalized() can avoid memory allocation when there is nothing to normalize.*/boolDomainValidityChecker::checkLabel(const QString &label,QUrl::AceProcessingOptions options){if(label.isEmpty())return true;if(label != label.normalized(QString::NormalizationForm_C))return false;if(label.size() >=4) {// This assumes that the first two characters are in BMP, but that's ok// because non-BMP characters are unlikely to be used for specifying// future extensions.if(label[2] == u'-'&& label[3] == u'-')return ignoreBidiErrors && label.startsWith(u"xn") &&validateAsciiLabel(label);}if(label.startsWith(u'-') || label.endsWith(u'-'))return false;if(label.contains(u'.'))return false; QStringIterator iter(label);auto c = iter.next();if(QChar::isMark(c))return false;// As optimization, CONTEXTJ rules check can be skipped if no// ZWJ/ZWNJ characters were found during the first pass.bool hasJoiners =false;for(;;) { hasJoiners = hasJoiners || c == ZWNJ || c == ZWJ;if(!ignoreBidiErrors && !domainNameIsBidi) {switch(QChar::direction(c)) {caseQChar::DirR:caseQChar::DirAL:caseQChar::DirAN: domainNameIsBidi =true;if(hadBidiErrors)return false;break;default:break;}}switch(QUnicodeTables::idnaStatus(c)) {caseQUnicodeTables::IdnaStatus::Valid:break;caseQUnicodeTables::IdnaStatus::Deviation:if(options.testFlag(QUrl::AceTransitionalProcessing))return false;break;default:return false;}if(!iter.hasNext())break; c = iter.next();}if(hasJoiners && !checkContextJRules(label))return false; hadBidiErrors = hadBidiErrors || !checkBidiRules(label);if(domainNameIsBidi && hadBidiErrors)return false;return true;}static QString convertToAscii(QStringView normalizedDomain, AceLeadingDot dot){ qsizetype lastIdx =0; QString aceForm;// this variable is here for caching QString aceResult;while(true) { qsizetype idx = normalizedDomain.indexOf(u'.', lastIdx);if(idx == -1) idx = normalizedDomain.size();const qsizetype labelLength = idx - lastIdx;if(labelLength) {constauto label = normalizedDomain.sliced(lastIdx, labelLength); aceForm.clear();qt_punycodeEncoder(label, &aceForm);if(aceForm.isEmpty())return{}; aceResult.append(aceForm);}if(idx == normalizedDomain.size())break;if(labelLength ==0&& (dot == ForbidLeadingDot || idx >0))return{};// two delimiters in a row -- empty label not allowed lastIdx = idx +1; aceResult += u'.';}return aceResult;}static boolcheckAsciiDomainName(QStringView normalizedDomain, AceLeadingDot dot,bool*usesPunycode){ qsizetype lastIdx =0;bool hasPunycode =false;*usesPunycode =false;while(lastIdx < normalizedDomain.size()) {auto idx = normalizedDomain.indexOf(u'.', lastIdx);if(idx == -1) idx = normalizedDomain.size();constauto labelLength = idx - lastIdx;if(labelLength ==0) {if(idx == normalizedDomain.size())break;if(dot == ForbidLeadingDot || idx >0)return false;// two delimiters in a row -- empty label not allowed}else{constauto label = normalizedDomain.sliced(lastIdx, labelLength);if(!validateAsciiLabel(label))return false; hasPunycode = hasPunycode || label.startsWith("xn--"_L1);} lastIdx = idx +1;}*usesPunycode = hasPunycode;return true;}static QString convertToUnicode(const QString &asciiDomain,QUrl::AceProcessingOptions options){ QString result; result.reserve(asciiDomain.size()); qsizetype lastIdx =0; DomainValidityChecker checker;while(true) {auto idx = asciiDomain.indexOf(u'.', lastIdx);if(idx == -1) idx = asciiDomain.size();constauto labelLength = idx - lastIdx;if(labelLength ==0) {if(idx == asciiDomain.size())break;}else{constauto label = asciiDomain.sliced(lastIdx, labelLength);constauto unicodeLabel =qt_punycodeDecoder(label);if(unicodeLabel.isEmpty())return asciiDomain;if(!checker.checkLabel(unicodeLabel, options))return asciiDomain; result.append(unicodeLabel);}if(idx == asciiDomain.size())break; lastIdx = idx +1; result += u'.';}return result;}static boolcheckUnicodeName(const QString &domainName,QUrl::AceProcessingOptions options){ qsizetype lastIdx =0; DomainValidityChecker checker(true);while(true) { qsizetype idx = domainName.indexOf(u'.', lastIdx);if(idx == -1) idx = domainName.size();const qsizetype labelLength = idx - lastIdx;if(labelLength) {constauto label = domainName.sliced(lastIdx, labelLength);if(!checker.checkLabel(label, options))return false;}if(idx == domainName.size())break; lastIdx = idx +1;}return true;} QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot,QUrl::AceProcessingOptions options){if(domain.isEmpty())return{};bool mappedToAscii;const QString mapped =mapDomainName(domain, options, &mappedToAscii);const QString normalized = mappedToAscii ? mapped : mapped.normalized(QString::NormalizationForm_C);if(normalized.isEmpty())return{};if(!mappedToAscii && !checkUnicodeName(normalized, options))return{};bool needsConversionToUnicode;const QString aceResult = mappedToAscii ? normalized :convertToAscii(normalized, dot);if(aceResult.isEmpty() || !checkAsciiDomainName(aceResult, dot, &needsConversionToUnicode))return{};if(op == ToAceOnly || !needsConversionToUnicode || (!options.testFlag(QUrl::IgnoreIDNWhitelist) && !qt_is_idn_enabled(aceResult))) {return aceResult;}returnconvertToUnicode(aceResult, options);}/*! \since 4.2 Returns the current whitelist of top-level domains that are allowed to have non-ASCII characters in their compositions. See setIdnWhitelist() for the rationale of this list. \sa AceProcessingOption*/ QStringList QUrl::idnWhitelist(){if(user_idn_whitelist)return*user_idn_whitelist;static const QStringList list = [] { QStringList list; list.reserve(idn_whitelist.count());int i =0;while(i < idn_whitelist.count()) { list <<QLatin1StringView(idn_whitelist.at(i));++i;}return list;}();return list;}/*! \since 4.2 Sets the whitelist of Top-Level Domains (TLDs) that are allowed to have non-ASCII characters in domains to the value of \a list. Note that if you call this function, you need to do so \e before you start any threads that might access idnWhitelist(). Qt comes with a default list that contains the Internet top-level domains that have published support for Internationalized Domain Names (IDNs) and rules to guarantee that no deception can happen between similarly-looking characters (such as the Latin lowercase letter \c 'a' and the Cyrillic equivalent, which in most fonts are visually identical). This list is periodically maintained, as registrars publish new rules. This function is provided for those who need to manipulate the list, in order to add or remove a TLD. It is not recommended to change its value for purposes other than testing, as it may expose users to security risks.*/voidQUrl::setIdnWhitelist(const QStringList &list){if(!user_idn_whitelist) user_idn_whitelist =new QStringList;*user_idn_whitelist = list;} QT_END_NAMESPACE
|