| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // | ||
| 2 | // Copyright (c) 2021 Vinnie Falco (vinnie dot falco at gmail dot com) | ||
| 3 | // | ||
| 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | ||
| 5 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | ||
| 6 | // | ||
| 7 | // Official repository: https://github.com/boostorg/url | ||
| 8 | // | ||
| 9 | |||
| 10 | #ifndef BOOST_URL_GRAMMAR_LUT_CHARS_HPP | ||
| 11 | #define BOOST_URL_GRAMMAR_LUT_CHARS_HPP | ||
| 12 | |||
| 13 | #include <boost/url/detail/config.hpp> | ||
| 14 | #include <boost/url/grammar/detail/charset.hpp> | ||
| 15 | #include <cstdint> | ||
| 16 | #include <type_traits> | ||
| 17 | |||
| 18 | // Credit to Peter Dimov for ideas regarding | ||
| 19 | // SIMD constexpr, and character set masks. | ||
| 20 | |||
| 21 | namespace boost { | ||
| 22 | namespace urls { | ||
| 23 | namespace grammar { | ||
| 24 | |||
| 25 | #ifndef BOOST_URL_DOCS | ||
| 26 | namespace detail { | ||
| 27 | template<class T, class = void> | ||
| 28 | struct is_pred : std::false_type {}; | ||
| 29 | |||
| 30 | template<class T> | ||
| 31 | struct is_pred<T, void_t< | ||
| 32 | decltype( | ||
| 33 | std::declval<bool&>() = | ||
| 34 | std::declval<T const&>().operator()( | ||
| 35 | std::declval<char>()) | ||
| 36 | ) > > : std::true_type | ||
| 37 | { | ||
| 38 | }; | ||
| 39 | } // detail | ||
| 40 | #endif | ||
| 41 | |||
| 42 | /** A set of characters | ||
| 43 | |||
| 44 | The characters defined by instances of | ||
| 45 | this set are provided upon construction. | ||
| 46 | The `constexpr` implementation allows | ||
| 47 | these to become compile-time constants. | ||
| 48 | |||
| 49 | @par Example | ||
| 50 | Character sets are used with rules and the | ||
| 51 | functions @ref find_if and @ref find_if_not. | ||
| 52 | @code | ||
| 53 | constexpr lut_chars vowel_chars = "AEIOU" "aeiou"; | ||
| 54 | |||
| 55 | system::result< core::string_view > rv = parse( "Aiea", token_rule( vowel_chars ) ); | ||
| 56 | @endcode | ||
| 57 | |||
| 58 | @see | ||
| 59 | @ref find_if, | ||
| 60 | @ref find_if_not, | ||
| 61 | @ref parse, | ||
| 62 | @ref token_rule. | ||
| 63 | */ | ||
| 64 | class lut_chars | ||
| 65 | { | ||
| 66 | std::uint64_t mask_[4] = {}; | ||
| 67 | |||
| 68 | constexpr | ||
| 69 | static | ||
| 70 | std::uint64_t | ||
| 71 | 155542 | lo(char c) noexcept | |
| 72 | { | ||
| 73 | 155542 | return static_cast< | |
| 74 | 155542 | unsigned char>(c) & 3; | |
| 75 | } | ||
| 76 | |||
| 77 | constexpr | ||
| 78 | static | ||
| 79 | std::uint64_t | ||
| 80 | 136531 | hi(char c) noexcept | |
| 81 | { | ||
| 82 | 136531 | return 1ULL << (static_cast< | |
| 83 | 136531 | unsigned char>(c) >> 2); | |
| 84 | } | ||
| 85 | |||
| 86 | constexpr | ||
| 87 | static | ||
| 88 | lut_chars | ||
| 89 | construct( | ||
| 90 | char const* s) noexcept | ||
| 91 | { | ||
| 92 | return *s | ||
| 93 | ? lut_chars(*s) + | ||
| 94 | construct(s+1) | ||
| 95 | : lut_chars(); | ||
| 96 | } | ||
| 97 | |||
| 98 | constexpr | ||
| 99 | static | ||
| 100 | lut_chars | ||
| 101 | 34048 | construct( | |
| 102 | unsigned char ch, | ||
| 103 | bool b) noexcept | ||
| 104 | { | ||
| 105 | return b | ||
| 106 | 5824 | ? lut_chars(ch) | |
| 107 |
4/4✓ Branch 0 taken 5824 times.
✓ Branch 1 taken 28224 times.
✓ Branch 3 taken 84672 times.
✓ Branch 4 taken 28224 times.
|
118720 | : lut_chars(); |
| 108 | } | ||
| 109 | |||
| 110 | template<class Pred> | ||
| 111 | constexpr | ||
| 112 | static | ||
| 113 | lut_chars | ||
| 114 | 68096 | construct( | |
| 115 | Pred pred, | ||
| 116 | unsigned char ch) noexcept | ||
| 117 | { | ||
| 118 | return ch == 255 | ||
| 119 | 266 | ? construct(ch, pred(static_cast<char>(ch))) | |
| 120 | 67830 | : construct(ch, pred(static_cast<char>(ch))) + | |
| 121 |
2/2✓ Branch 0 taken 133 times.
✓ Branch 1 taken 33915 times.
|
136192 | construct(pred, ch + 1); |
| 122 | } | ||
| 123 | |||
| 124 | constexpr | ||
| 125 | 28224 | lut_chars() = default; | |
| 126 | |||
| 127 | constexpr | ||
| 128 | 34105 | lut_chars( | |
| 129 | std::uint64_t m0, | ||
| 130 | std::uint64_t m1, | ||
| 131 | std::uint64_t m2, | ||
| 132 | std::uint64_t m3) noexcept | ||
| 133 | 34105 | : mask_{ m0, m1, m2, m3 } | |
| 134 | { | ||
| 135 | 34105 | } | |
| 136 | |||
| 137 | public: | ||
| 138 | /** Constructor | ||
| 139 | |||
| 140 | This function constructs a character | ||
| 141 | set which has as a single member, | ||
| 142 | the character `ch`. | ||
| 143 | |||
| 144 | @par Example | ||
| 145 | @code | ||
| 146 | constexpr lut_chars asterisk( '*' ); | ||
| 147 | @endcode | ||
| 148 | |||
| 149 | @par Complexity | ||
| 150 | Constant. | ||
| 151 | |||
| 152 | @par Exception Safety | ||
| 153 | Throws nothing. | ||
| 154 | |||
| 155 | @param ch A character. | ||
| 156 | */ | ||
| 157 | constexpr | ||
| 158 | 6337 | lut_chars(char ch) noexcept | |
| 159 | 6337 | : mask_ { | |
| 160 |
2/2✓ Branch 1 taken 1362 times.
✓ Branch 2 taken 4975 times.
|
6337 | lo(ch) == 0 ? hi(ch) : 0, |
| 161 |
2/2✓ Branch 0 taken 1901 times.
✓ Branch 1 taken 4436 times.
|
6337 | lo(ch) == 1 ? hi(ch) : 0, |
| 162 |
2/2✓ Branch 0 taken 1738 times.
✓ Branch 1 taken 4599 times.
|
6337 | lo(ch) == 2 ? hi(ch) : 0, |
| 163 |
2/2✓ Branch 3 taken 1336 times.
✓ Branch 4 taken 5001 times.
|
19011 | lo(ch) == 3 ? hi(ch) : 0 } |
| 164 | { | ||
| 165 | 6337 | } | |
| 166 | |||
| 167 | /** Constructor | ||
| 168 | |||
| 169 | This function constructs a character | ||
| 170 | set which has as members, all of the | ||
| 171 | characters present in the null-terminated | ||
| 172 | string `s`. | ||
| 173 | |||
| 174 | @par Example | ||
| 175 | @code | ||
| 176 | constexpr lut_chars digits = "0123456789"; | ||
| 177 | @endcode | ||
| 178 | |||
| 179 | @par Complexity | ||
| 180 | Linear in `::strlen(s)`, or constant | ||
| 181 | if `s` is a constant expression. | ||
| 182 | |||
| 183 | @par Exception Safety | ||
| 184 | Throws nothing. | ||
| 185 | |||
| 186 | @param s A null-terminated string. | ||
| 187 | */ | ||
| 188 | constexpr | ||
| 189 | lut_chars( | ||
| 190 | char const* s) noexcept | ||
| 191 | : lut_chars(construct(s)) | ||
| 192 | { | ||
| 193 | } | ||
| 194 | |||
| 195 | /** Constructor. | ||
| 196 | |||
| 197 | This function constructs a character | ||
| 198 | set which has as members, every value | ||
| 199 | of `char ch` for which the expression | ||
| 200 | `pred(ch)` returns `true`. | ||
| 201 | |||
| 202 | @par Example | ||
| 203 | @code | ||
| 204 | struct is_digit | ||
| 205 | { | ||
| 206 | constexpr bool | ||
| 207 | operator()(char c ) const noexcept | ||
| 208 | { | ||
| 209 | return c >= '0' && c <= '9'; | ||
| 210 | } | ||
| 211 | }; | ||
| 212 | |||
| 213 | constexpr lut_chars digits( is_digit{} ); | ||
| 214 | @endcode | ||
| 215 | |||
| 216 | @par Complexity | ||
| 217 | Linear in `pred`, or constant if | ||
| 218 | `pred(ch)` is a constant expression. | ||
| 219 | |||
| 220 | @par Exception Safety | ||
| 221 | Throws nothing. | ||
| 222 | |||
| 223 | @param pred The function object to | ||
| 224 | use for determining membership in | ||
| 225 | the character set. | ||
| 226 | */ | ||
| 227 | template<class Pred | ||
| 228 | #ifndef BOOST_URL_DOCS | ||
| 229 | ,class = typename std::enable_if< | ||
| 230 | detail::is_pred<Pred>::value && | ||
| 231 | ! std::is_base_of< | ||
| 232 | lut_chars, Pred>::value>::type | ||
| 233 | #endif | ||
| 234 | > | ||
| 235 | constexpr | ||
| 236 | 266 | lut_chars(Pred const& pred) noexcept | |
| 237 | : lut_chars( | ||
| 238 | 266 | construct(pred, 0)) | |
| 239 | { | ||
| 240 | 266 | } | |
| 241 | |||
| 242 | /** Return true if ch is in the character set. | ||
| 243 | |||
| 244 | This function returns true if the | ||
| 245 | character `ch` is in the set, otherwise | ||
| 246 | it returns false. | ||
| 247 | |||
| 248 | @par Complexity | ||
| 249 | Constant. | ||
| 250 | |||
| 251 | @par Exception Safety | ||
| 252 | Throws nothing. | ||
| 253 | |||
| 254 | @param ch The character to test. | ||
| 255 | */ | ||
| 256 | constexpr | ||
| 257 | bool | ||
| 258 | 1280 | operator()( | |
| 259 | unsigned char ch) const noexcept | ||
| 260 | { | ||
| 261 | 1280 | return operator()(static_cast<char>(ch)); | |
| 262 | } | ||
| 263 | |||
| 264 | /// @copydoc operator()(unsigned char) const | ||
| 265 | constexpr | ||
| 266 | bool | ||
| 267 | 130194 | operator()(char ch) const noexcept | |
| 268 | { | ||
| 269 | 130194 | return mask_[lo(ch)] & hi(ch); | |
| 270 | } | ||
| 271 | |||
| 272 | /** Return the union of two character sets. | ||
| 273 | |||
| 274 | This function returns a new character | ||
| 275 | set which contains all of the characters | ||
| 276 | in `cs0` as well as all of the characters | ||
| 277 | in `cs`. | ||
| 278 | |||
| 279 | @par Example | ||
| 280 | This creates a character set which | ||
| 281 | includes all letters and numbers | ||
| 282 | @code | ||
| 283 | constexpr lut_chars alpha_chars( | ||
| 284 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | ||
| 285 | "abcdefghijklmnopqrstuvwxyz"); | ||
| 286 | |||
| 287 | constexpr lut_chars alnum_chars = alpha_chars + "0123456789"; | ||
| 288 | @endcode | ||
| 289 | |||
| 290 | @par Complexity | ||
| 291 | Constant. | ||
| 292 | |||
| 293 | @return The new character set. | ||
| 294 | |||
| 295 | @param cs0 A character to join | ||
| 296 | |||
| 297 | @param cs1 A character to join | ||
| 298 | */ | ||
| 299 | friend | ||
| 300 | constexpr | ||
| 301 | lut_chars | ||
| 302 | 33919 | operator+( | |
| 303 | lut_chars const& cs0, | ||
| 304 | lut_chars const& cs1) noexcept | ||
| 305 | { | ||
| 306 | return lut_chars( | ||
| 307 | 33919 | cs0.mask_[0] | cs1.mask_[0], | |
| 308 | 33919 | cs0.mask_[1] | cs1.mask_[1], | |
| 309 | 33919 | cs0.mask_[2] | cs1.mask_[2], | |
| 310 | 33919 | cs0.mask_[3] | cs1.mask_[3]); | |
| 311 | } | ||
| 312 | |||
| 313 | /** Return a new character set by subtracting | ||
| 314 | |||
| 315 | This function returns a new character | ||
| 316 | set which is formed from all of the | ||
| 317 | characters in `cs0` which are not in `cs`. | ||
| 318 | |||
| 319 | @par Example | ||
| 320 | This statement declares a character set | ||
| 321 | containing all the lowercase letters | ||
| 322 | which are not vowels: | ||
| 323 | @code | ||
| 324 | constexpr lut_chars consonants = lut_chars("abcdefghijklmnopqrstuvwxyz") - "aeiou"; | ||
| 325 | @endcode | ||
| 326 | |||
| 327 | @par Complexity | ||
| 328 | Constant. | ||
| 329 | |||
| 330 | @return The new character set. | ||
| 331 | |||
| 332 | @param cs0 A character set to join. | ||
| 333 | |||
| 334 | @param cs1 A character set to join. | ||
| 335 | */ | ||
| 336 | friend | ||
| 337 | constexpr | ||
| 338 | lut_chars | ||
| 339 | 186 | operator-( | |
| 340 | lut_chars const& cs0, | ||
| 341 | lut_chars const& cs1) noexcept | ||
| 342 | { | ||
| 343 | return lut_chars( | ||
| 344 | 186 | cs0.mask_[0] & ~cs1.mask_[0], | |
| 345 | 186 | cs0.mask_[1] & ~cs1.mask_[1], | |
| 346 | 186 | cs0.mask_[2] & ~cs1.mask_[2], | |
| 347 | 186 | cs0.mask_[3] & ~cs1.mask_[3]); | |
| 348 | } | ||
| 349 | |||
| 350 | /** Return a new character set which is the complement of another character set. | ||
| 351 | |||
| 352 | This function returns a new character | ||
| 353 | set which contains all of the characters | ||
| 354 | that are not in `*this`. | ||
| 355 | |||
| 356 | @par Example | ||
| 357 | This statement declares a character set | ||
| 358 | containing everything but vowels: | ||
| 359 | @code | ||
| 360 | constexpr lut_chars not_vowels = ~lut_chars( "AEIOU" "aeiou" ); | ||
| 361 | @endcode | ||
| 362 | |||
| 363 | @par Complexity | ||
| 364 | Constant. | ||
| 365 | |||
| 366 | @par Exception Safety | ||
| 367 | Throws nothing. | ||
| 368 | |||
| 369 | @return The new character set. | ||
| 370 | */ | ||
| 371 | constexpr | ||
| 372 | lut_chars | ||
| 373 | operator~() const noexcept | ||
| 374 | { | ||
| 375 | return lut_chars( | ||
| 376 | ~mask_[0], | ||
| 377 | ~mask_[1], | ||
| 378 | ~mask_[2], | ||
| 379 | ~mask_[3] | ||
| 380 | ); | ||
| 381 | } | ||
| 382 | |||
| 383 | #ifndef BOOST_URL_DOCS | ||
| 384 | #ifdef BOOST_URL_USE_SSE2 | ||
| 385 | char const* | ||
| 386 | 1603 | find_if( | |
| 387 | char const* first, | ||
| 388 | char const* last) const noexcept | ||
| 389 | { | ||
| 390 | 1603 | return detail::find_if_pred( | |
| 391 | 1603 | *this, first, last); | |
| 392 | } | ||
| 393 | |||
| 394 | char const* | ||
| 395 | 13897 | find_if_not( | |
| 396 | char const* first, | ||
| 397 | char const* last) const noexcept | ||
| 398 | { | ||
| 399 | 13897 | return detail::find_if_not_pred( | |
| 400 | 13897 | *this, first, last); | |
| 401 | } | ||
| 402 | #endif | ||
| 403 | #endif | ||
| 404 | }; | ||
| 405 | |||
| 406 | } // grammar | ||
| 407 | } // urls | ||
| 408 | } // boost | ||
| 409 | |||
| 410 | #endif | ||
| 411 |