summaryrefslogtreecommitdiff
path: root/prism/util/pm_char.c
blob: a51dc11645ff63085cf1c76aaed2114e1d2f2a1c (plain)
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
#include "prism/util/pm_char.h" #define PRISM_CHAR_BIT_WHITESPACE (1 << 0) #define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1) #define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2) #define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0) #define PRISM_NUMBER_BIT_BINARY_NUMBER (1 << 1) #define PRISM_NUMBER_BIT_OCTAL_DIGIT (1 << 2) #define PRISM_NUMBER_BIT_OCTAL_NUMBER (1 << 3) #define PRISM_NUMBER_BIT_DECIMAL_DIGIT (1 << 4) #define PRISM_NUMBER_BIT_DECIMAL_NUMBER (1 << 5) #define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6) #define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7) static const uint8_t pm_byte_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 4x 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 5x 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 6x 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 7x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx }; static const uint8_t pm_number_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x 0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx }; /** * Returns the number of characters at the start of the string that match the * given kind. Disallows searching past the given maximum number of characters. */ static inline size_t pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) { if (length <= 0) return 0; size_t size = 0; size_t maximum = (size_t) length; while (size < maximum && (pm_byte_table[string[size]] & kind)) size++; return size; } /** * Returns the number of characters at the start of the string that are * whitespace. Disallows searching past the given maximum number of characters. */ size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) { return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_WHITESPACE); } /** * Returns the number of characters at the start of the string that are * whitespace while also tracking the location of each newline. Disallows * searching past the given maximum number of characters. */ size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list) { if (length <= 0) return 0; size_t size = 0; size_t maximum = (size_t) length; while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) { if (string[size] == '\n') { pm_newline_list_append(newline_list, string + size); } size++; } return size; } /** * Returns the number of characters at the start of the string that are inline * whitespace. Disallows searching past the given maximum number of characters. */ size_t pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) { return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_INLINE_WHITESPACE); } /** * Returns the number of characters at the start of the string that are regexp * options. Disallows searching past the given maximum number of characters. */ size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) { return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION); } /** * Returns true if the given character matches the given kind. */ static inline bool pm_char_is_char_kind(const uint8_t b, uint8_t kind) { return (pm_byte_table[b] & kind) != 0; } /** * Returns true if the given character is a whitespace character. */ bool pm_char_is_whitespace(const uint8_t b) { return pm_char_is_char_kind(b, PRISM_CHAR_BIT_WHITESPACE); } /** * Returns true if the given character is an inline whitespace character. */ bool pm_char_is_inline_whitespace(const uint8_t b) { return pm_char_is_char_kind(b, PRISM_CHAR_BIT_INLINE_WHITESPACE); } /** * Scan through the string and return the number of characters at the start of * the string that match the given kind. Disallows searching past the given * maximum number of characters. */ static inline size_t pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) { if (length <= 0) return 0; size_t size = 0; size_t maximum = (size_t) length; while (size < maximum && (pm_number_table[string[size]] & kind)) size++; return size; } /** * Scan through the string and return the number of characters at the start of * the string that match the given kind. Disallows searching past the given * maximum number of characters. * * Additionally, report the location of the last invalid underscore character * found in the string through the out invalid parameter. */ static inline size_t pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) { if (length <= 0) return 0; size_t size = 0; size_t maximum = (size_t) length; bool underscore = false; while (size < maximum && (pm_number_table[string[size]] & kind)) { if (string[size] == '_') { if (underscore) *invalid = string + size; underscore = true; } else { underscore = false; } size++; } if (size > 0 && string[size - 1] == '_') *invalid = string + size - 1; return size; } /** * Returns the number of characters at the start of the string that are binary * digits or underscores. Disallows searching past the given maximum number of * characters. * * If multiple underscores are found in a row or if an underscore is * found at the end of the number, then the invalid pointer is set to the index * of the first invalid underscore. */ size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_BINARY_NUMBER); } /** * Returns the number of characters at the start of the string that are octal * digits or underscores. Disallows searching past the given maximum number of * characters. * * If multiple underscores are found in a row or if an underscore is * found at the end of the number, then the invalid pointer is set to the index * of the first invalid underscore. */ size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_OCTAL_NUMBER); } /** * Returns the number of characters at the start of the string that are decimal * digits. Disallows searching past the given maximum number of characters. */ size_t pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) { return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_DECIMAL_DIGIT); } /** * Returns the number of characters at the start of the string that are decimal * digits or underscores. Disallows searching past the given maximum number of * characters. * * If multiple underscores are found in a row or if an underscore is * found at the end of the number, then the invalid pointer is set to the index * of the first invalid underscore */ size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_DECIMAL_NUMBER); } /** * Returns the number of characters at the start of the string that are * hexadecimal digits. Disallows searching past the given maximum number of * characters. */ size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) { return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT); } /** * Returns the number of characters at the start of the string that are * hexadecimal digits or underscores. Disallows searching past the given maximum * number of characters. * * If multiple underscores are found in a row or if an underscore is * found at the end of the number, then the invalid pointer is set to the index * of the first invalid underscore. */ size_t pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER); } /** * Returns true if the given character matches the given kind. */ static inline bool pm_char_is_number_kind(const uint8_t b, uint8_t kind) { return (pm_number_table[b] & kind) != 0; } /** * Returns true if the given character is a binary digit. */ bool pm_char_is_binary_digit(const uint8_t b) { return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_BINARY_DIGIT); } /** * Returns true if the given character is an octal digit. */ bool pm_char_is_octal_digit(const uint8_t b) { return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_OCTAL_DIGIT); } /** * Returns true if the given character is a decimal digit. */ bool pm_char_is_decimal_digit(const uint8_t b) { return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_DECIMAL_DIGIT); } /** * Returns true if the given character is a hexadecimal digit. */ bool pm_char_is_hexadecimal_digit(const uint8_t b) { return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT); } #undef PRISM_CHAR_BIT_WHITESPACE #undef PRISM_CHAR_BIT_INLINE_WHITESPACE #undef PRISM_CHAR_BIT_REGEXP_OPTION #undef PRISM_NUMBER_BIT_BINARY_DIGIT #undef PRISM_NUMBER_BIT_BINARY_NUMBER #undef PRISM_NUMBER_BIT_OCTAL_DIGIT #undef PRISM_NUMBER_BIT_OCTAL_NUMBER #undef PRISM_NUMBER_BIT_DECIMAL_DIGIT #undef PRISM_NUMBER_BIT_DECIMAL_NUMBER #undef PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER #undef PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT 
close