summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qbytearraymatcher.cpp
blob: 9f27e10f3d57f6192e36a1bd05ae100a854ed6d0 (plain)
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
// Copyright (C) 2016 The Qt Company Ltd.// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only#include"qbytearraymatcher.h"#include <qtconfiginclude.h>#ifndef QT_BOOTSTRAPPED# include <private/qtcore-config_p.h>#endif#include <limits.h> QT_BEGIN_NAMESPACE staticinlinevoidbm_init_skiptable(const uchar *cc, qsizetype len, uchar *skiptable){int l =int(qMin(len,qsizetype(255)));memset(skiptable, l,256*sizeof(uchar)); cc += len - l;while(l--) skiptable[*cc++] = l;}staticinline qsizetype bm_find(const uchar *cc, qsizetype l, qsizetype index,const uchar *puc, qsizetype pl,const uchar *skiptable){if(pl ==0)return index > l ? -1: index;const qsizetype pl_minus_one = pl -1;const uchar *current = cc + index + pl_minus_one;const uchar *end = cc + l;while(current < end) { qsizetype skip = skiptable[*current];if(!skip) {// possible matchwhile(skip < pl) {if(*(current - skip) != puc[pl_minus_one - skip])break; skip++;}if(skip > pl_minus_one)// we have a matchreturn(current - cc) - skip +1;// in case we don't have a match we are a bit inefficient as we only skip by one// when we have the non matching char in the string.if(skiptable[*(current - skip)] == pl) skip = pl - skip;else skip =1;}if(current > end - skip)break; current += skip;}return-1;// not found}/*! \class QByteArrayMatcher \inmodule QtCore \brief The QByteArrayMatcher class holds a sequence of bytes that can be quickly matched in a byte array. \ingroup tools \ingroup string-processing This class is useful when you have a sequence of bytes that you want to repeatedly match against some byte arrays (perhaps in a loop), or when you want to search for the same sequence of bytes multiple times in the same byte array. Using a matcher object and indexIn() is faster than matching a plain QByteArray with QByteArray::indexOf() if repeated matching takes place. This class offers no benefit if you are doing one-off byte array matches. Create the QByteArrayMatcher with the QByteArray you want to search for. Then call indexIn() on the QByteArray that you want to search. \sa QByteArray, QStringMatcher*//*! Constructs an empty byte array matcher that won't match anything. Call setPattern() to give it a pattern to match.*/QByteArrayMatcher::QByteArrayMatcher():d(nullptr){ p.p =nullptr; p.l =0;memset(p.q_skiptable,0,sizeof(p.q_skiptable));}/*! Constructs a byte array matcher from \a pattern. \a pattern has the given \a length. Call indexIn() to perform a search. \note the data that \a pattern is referencing must remain valid while this object is used.*/QByteArrayMatcher::QByteArrayMatcher(const char*pattern, qsizetype length) :d(nullptr){ p.p =reinterpret_cast<const uchar *>(pattern);if(length <0) p.l =qstrlen(pattern);else p.l = length;bm_init_skiptable(p.p, p.l, p.q_skiptable);}/*! Constructs a byte array matcher that will search for \a pattern. Call indexIn() to perform a search.*/QByteArrayMatcher::QByteArrayMatcher(const QByteArray &pattern):d(nullptr),q_pattern(pattern){ p.p =reinterpret_cast<const uchar *>(pattern.constData()); p.l = pattern.size();bm_init_skiptable(p.p, p.l, p.q_skiptable);}/*! \fn QByteArrayMatcher::QByteArrayMatcher(QByteArrayView pattern) \since 6.3 \overload Constructs a byte array matcher that will search for \a pattern. Call indexIn() to perform a search. \note the data that \a pattern is referencing must remain valid while this object is used.*//*! Copies the \a other byte array matcher to this byte array matcher.*/QByteArrayMatcher::QByteArrayMatcher(const QByteArrayMatcher &other):d(nullptr){operator=(other);}/*! Destroys the byte array matcher.*/QByteArrayMatcher::~QByteArrayMatcher(){Q_UNUSED(d);}/*! Assigns the \a other byte array matcher to this byte array matcher.*/ QByteArrayMatcher &QByteArrayMatcher::operator=(const QByteArrayMatcher &other){ q_pattern = other.q_pattern;memcpy(&p, &other.p,sizeof(p));return*this;}/*! Sets the byte array that this byte array matcher will search for to \a pattern. \sa pattern(), indexIn()*/voidQByteArrayMatcher::setPattern(const QByteArray &pattern){ q_pattern = pattern; p.p =reinterpret_cast<const uchar *>(pattern.constData()); p.l = pattern.size();bm_init_skiptable(p.p, p.l, p.q_skiptable);}/*! Searches the char string \a str, which has length \a len, from byte position \a from (default 0, i.e. from the first byte), for the byte array pattern() that was set in the constructor or in the most recent call to setPattern(). Returns the position where the pattern() matched in \a str, or -1 if no match was found.*/ qsizetype QByteArrayMatcher::indexIn(const char*str, qsizetype len, qsizetype from)const{if(from <0) from =0;returnbm_find(reinterpret_cast<const uchar *>(str), len, from, p.p, p.l, p.q_skiptable);}/*! \fn qsizetype QByteArrayMatcher::indexIn(QByteArrayView data, qsizetype from) const \since 6.3 \overload Searches the byte array \a data, from byte position \a from (default 0, i.e. from the first byte), for the byte array pattern() that was set in the constructor or in the most recent call to setPattern(). Returns the position where the pattern() matched in \a data, or -1 if no match was found.*/ qsizetype QByteArrayMatcher::indexIn(QByteArrayView data, qsizetype from)const{if(from <0) from =0;returnbm_find(reinterpret_cast<const uchar *>(data.data()), data.size(), from, p.p, p.l, p.q_skiptable);}/*! \fn QByteArray QByteArrayMatcher::pattern() const Returns the byte array pattern that this byte array matcher will search for. \sa setPattern()*//*! \internal */ Q_NEVER_INLINE static qsizetype qFindByteArrayBoyerMoore(const char*haystack, qsizetype haystackLen, qsizetype haystackOffset,const char*needle, qsizetype needleLen){ uchar skiptable[256];bm_init_skiptable((const uchar *)needle, needleLen, skiptable);if(haystackOffset <0) haystackOffset =0;returnbm_find((const uchar *)haystack, haystackLen, haystackOffset,(const uchar *)needle, needleLen, skiptable);}/*! \internal */static qsizetype qFindByteArray(const char*haystack0, qsizetype l, qsizetype from,const char*needle, qsizetype sl); qsizetype QtPrivate::findByteArray(QByteArrayView haystack, qsizetype from, QByteArrayView needle) noexcept {constauto haystack0 = haystack.data();constauto l = haystack.size();constauto sl = needle.size();#if !QT_CONFIG(memmem)if(sl ==1)returnfindByteArray(haystack, from, needle.front());#endifif(from <0) from += l;if(std::size_t(sl + from) >std::size_t(l))return-1;if(!sl)return from;if(!l)return-1;#if QT_CONFIG(memmem)auto where =memmem(haystack0 + from, l - from, needle.data(), sl);return where ?static_cast<const char*>(where) - haystack0 : -1;#endif/* We use the Boyer-Moore algorithm in cases where the overhead for the skip table should pay off, otherwise we use a simple hash function. */if(l >500&& sl >5)returnqFindByteArrayBoyerMoore(haystack0, l, from, needle.data(), sl);returnqFindByteArray(haystack0, l, from, needle.data(), sl);} qsizetype qFindByteArray(const char*haystack0, qsizetype l, qsizetype from,const char*needle, qsizetype sl){/* We use some hashing for efficiency's sake. Instead of comparing strings, we compare the hash value of str with that of a part of this QByteArray. Only if that matches, we call memcmp(). */const char*haystack = haystack0 + from;const char*end = haystack0 + (l - sl);const qregisteruint sl_minus_1 = sl -1; qregisteruint hashNeedle =0, hashHaystack =0; qsizetype idx;for(idx =0; idx < sl; ++idx) { hashNeedle = ((hashNeedle<<1) + needle[idx]); hashHaystack = ((hashHaystack<<1) + haystack[idx]);} hashHaystack -= *(haystack + sl_minus_1);while(haystack <= end) { hashHaystack += *(haystack + sl_minus_1);if(hashHaystack == hashNeedle && *needle == *haystack &&memcmp(needle, haystack, sl) ==0)return haystack - haystack0;if(sl_minus_1 <sizeof(sl_minus_1) * CHAR_BIT) hashHaystack -=qregisteruint(*haystack) << sl_minus_1; hashHaystack <<=1;++haystack;}return-1;}/*! \class QStaticByteArrayMatcherBase \since 5.9 \internal \brief Non-template base class of QStaticByteArrayMatcher.*//*! \class QStaticByteArrayMatcher \since 5.9 \inmodule QtCore \brief The QStaticByteArrayMatcher class is a compile-time version of QByteArrayMatcher. \ingroup tools \ingroup string-processing This class is useful when you have a sequence of bytes that you want to repeatedly match against some byte arrays (perhaps in a loop), or when you want to search for the same sequence of bytes multiple times in the same byte array. Using a matcher object and indexIn() is faster than matching a plain QByteArray with QByteArray::indexOf(), in particular if repeated matching takes place. Unlike QByteArrayMatcher, this class calculates the internal representation at \e{compile-time}, so it can even benefit if you are doing one-off byte array matches. Create the QStaticByteArrayMatcher by calling qMakeStaticByteArrayMatcher(), passing it the C string literal you want to search for. Store the return value of that function in a \c{static const auto} variable, so you don't need to pass the \c{N} template parameter explicitly: \snippet code/src_corelib_text_qbytearraymatcher.cpp 0 Then call indexIn() on the QByteArray in which you want to search, just like with QByteArrayMatcher. Since this class is designed to do all the up-front calculations at compile-time, it does not offer a setPattern() method. \sa QByteArrayMatcher, QStringMatcher*//*! \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const char *haystack, qsizetype hlen, qsizetype from = 0) const Searches the char string \a haystack, which has length \a hlen, from byte position \a from (default 0, i.e. from the first byte), for the byte array pattern() that was set in the constructor. Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.*//*! \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, qsizetype from = 0) const Searches the char string \a haystack, from byte position \a from (default 0, i.e. from the first byte), for the byte array pattern() that was set in the constructor. Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.*//*! \fn template <size_t N> QByteArray QStaticByteArrayMatcher<N>::pattern() const Returns the byte array pattern that this byte array matcher will search for. \sa QByteArrayMatcher::setPattern()*//*! \internal*/ qsizetype QStaticByteArrayMatcherBase::indexOfIn(const char*needle,size_t nlen,const char*haystack, qsizetype hlen, qsizetype from)const noexcept {if(from <0) from =0;returnbm_find(reinterpret_cast<const uchar *>(haystack), hlen, from,reinterpret_cast<const uchar *>(needle), nlen, m_skiptable.data);}/*! \fn template <size_t N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N]) \internal*//*! \fn template <size_t N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N]) \since 5.9 \relates QStaticByteArrayMatcher Return a QStaticByteArrayMatcher with the correct \c{N} determined automatically from the \a pattern passed. To take full advantage of this function, assign the result to an \c{auto} variable: \snippet code/src_corelib_text_qbytearraymatcher.cpp 1*/ QT_END_NAMESPACE 
close