GCC Code Coverage Report


Directory: libs/url/
File: boost/url/grammar/lut_chars.hpp
Date: 2024-01-19 15:42:54
Exec Total Coverage
Lines: 45 45 100.0%
Functions: 15 15 100.0%
Branches: 14 14 100.0%

Line Branch Exec Source
1 //
2 // Copyright (c) 2021 Vinnie Falco (vinnie dot falco at gmail dot com)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 // Official repository: https://github.com/boostorg/url
8 //
9
10 #ifndef BOOST_URL_GRAMMAR_LUT_CHARS_HPP
11 #define BOOST_URL_GRAMMAR_LUT_CHARS_HPP
12
13 #include <boost/url/detail/config.hpp>
14 #include <boost/url/grammar/detail/charset.hpp>
15 #include <cstdint>
16 #include <type_traits>
17
18 // Credit to Peter Dimov for ideas regarding
19 // SIMD constexpr, and character set masks.
20
21 namespace boost {
22 namespace urls {
23 namespace grammar {
24
25 #ifndef BOOST_URL_DOCS
26 namespace detail {
27 template<class T, class = void>
28 struct is_pred : std::false_type {};
29
30 template<class T>
31 struct is_pred<T, void_t<
32 decltype(
33 std::declval<bool&>() =
34 std::declval<T const&>().operator()(
35 std::declval<char>())
36 ) > > : std::true_type
37 {
38 };
39 } // detail
40 #endif
41
42 /** A set of characters
43
44 The characters defined by instances of
45 this set are provided upon construction.
46 The `constexpr` implementation allows
47 these to become compile-time constants.
48
49 @par Example
50 Character sets are used with rules and the
51 functions @ref find_if and @ref find_if_not.
52 @code
53 constexpr lut_chars vowel_chars = "AEIOU" "aeiou";
54
55 system::result< core::string_view > rv = parse( "Aiea", token_rule( vowel_chars ) );
56 @endcode
57
58 @see
59 @ref find_if,
60 @ref find_if_not,
61 @ref parse,
62 @ref token_rule.
63 */
64 class lut_chars
65 {
66 std::uint64_t mask_[4] = {};
67
68 constexpr
69 static
70 std::uint64_t
71 154840 lo(char c) noexcept
72 {
73 154840 return static_cast<
74 154840 unsigned char>(c) & 3;
75 }
76
77 constexpr
78 static
79 std::uint64_t
80 135829 hi(char c) noexcept
81 {
82 135829 return 1ULL << (static_cast<
83 135829 unsigned char>(c) >> 2);
84 }
85
86 constexpr
87 static
88 lut_chars
89 construct(
90 char const* s) noexcept
91 {
92 return *s
93 ? lut_chars(*s) +
94 construct(s+1)
95 : lut_chars();
96 }
97
98 constexpr
99 static
100 lut_chars
101 34048 construct(
102 unsigned char ch,
103 bool b) noexcept
104 {
105 return b
106 5824 ? lut_chars(ch)
107
4/4
✓ Branch 0 taken 5824 times.
✓ Branch 1 taken 28224 times.
✓ Branch 3 taken 84672 times.
✓ Branch 4 taken 28224 times.
118720 : lut_chars();
108 }
109
110 template<class Pred>
111 constexpr
112 static
113 lut_chars
114 68096 construct(
115 Pred pred,
116 unsigned char ch) noexcept
117 {
118 return ch == 255
119 266 ? construct(ch, pred(ch))
120 67830 : construct(ch, pred(ch)) +
121
2/2
✓ Branch 0 taken 133 times.
✓ Branch 1 taken 33915 times.
136192 construct(pred, ch + 1);
122 }
123
124 constexpr
125 28224 lut_chars() = default;
126
127 constexpr
128 34105 lut_chars(
129 std::uint64_t m0,
130 std::uint64_t m1,
131 std::uint64_t m2,
132 std::uint64_t m3) noexcept
133 34105 : mask_{ m0, m1, m2, m3 }
134 {
135 34105 }
136
137 public:
138 /** Constructor
139
140 This function constructs a character
141 set which has as a single member,
142 the character `ch`.
143
144 @par Example
145 @code
146 constexpr lut_chars asterisk( '*' );
147 @endcode
148
149 @par Complexity
150 Constant.
151
152 @par Exception Safety
153 Throws nothing.
154
155 @param ch A character.
156 */
157 constexpr
158 6337 lut_chars(char ch) noexcept
159 6337 : mask_ {
160
2/2
✓ Branch 1 taken 1362 times.
✓ Branch 2 taken 4975 times.
6337 lo(ch) == 0 ? hi(ch) : 0,
161
2/2
✓ Branch 0 taken 1901 times.
✓ Branch 1 taken 4436 times.
6337 lo(ch) == 1 ? hi(ch) : 0,
162
2/2
✓ Branch 0 taken 1738 times.
✓ Branch 1 taken 4599 times.
6337 lo(ch) == 2 ? hi(ch) : 0,
163
2/2
✓ Branch 3 taken 1336 times.
✓ Branch 4 taken 5001 times.
19011 lo(ch) == 3 ? hi(ch) : 0 }
164 {
165 6337 }
166
167 /** Constructor
168
169 This function constructs a character
170 set which has as members, all of the
171 characters present in the null-terminated
172 string `s`.
173
174 @par Example
175 @code
176 constexpr lut_chars digits = "0123456789";
177 @endcode
178
179 @par Complexity
180 Linear in `::strlen(s)`, or constant
181 if `s` is a constant expression.
182
183 @par Exception Safety
184 Throws nothing.
185
186 @param s A null-terminated string.
187 */
188 constexpr
189 lut_chars(
190 char const* s) noexcept
191 : lut_chars(construct(s))
192 {
193 }
194
195 /** Constructor.
196
197 This function constructs a character
198 set which has as members, every value
199 of `char ch` for which the expression
200 `pred(ch)` returns `true`.
201
202 @par Example
203 @code
204 struct is_digit
205 {
206 constexpr bool
207 operator()(char c ) const noexcept
208 {
209 return c >= '0' && c <= '9';
210 }
211 };
212
213 constexpr lut_chars digits( is_digit{} );
214 @endcode
215
216 @par Complexity
217 Linear in `pred`, or constant if
218 `pred(ch)` is a constant expression.
219
220 @par Exception Safety
221 Throws nothing.
222
223 @param pred The function object to
224 use for determining membership in
225 the character set.
226 */
227 template<class Pred
228 #ifndef BOOST_URL_DOCS
229 ,class = typename std::enable_if<
230 detail::is_pred<Pred>::value &&
231 ! std::is_base_of<
232 lut_chars, Pred>::value>::type
233 #endif
234 >
235 constexpr
236 266 lut_chars(Pred const& pred) noexcept
237 : lut_chars(
238 266 construct(pred, 0))
239 {
240 266 }
241
242 /** Return true if ch is in the character set.
243
244 This function returns true if the
245 character `ch` is in the set, otherwise
246 it returns false.
247
248 @par Complexity
249 Constant.
250
251 @par Exception Safety
252 Throws nothing.
253
254 @param ch The character to test.
255 */
256 constexpr
257 bool
258 129492 operator()(
259 unsigned char ch) const noexcept
260 {
261 129492 return mask_[lo(ch)] & hi(ch);
262 }
263
264 /** Return the union of two character sets.
265
266 This function returns a new character
267 set which contains all of the characters
268 in `cs0` as well as all of the characters
269 in `cs`.
270
271 @par Example
272 This creates a character set which
273 includes all letters and numbers
274 @code
275 constexpr lut_chars alpha_chars(
276 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
277 "abcdefghijklmnopqrstuvwxyz");
278
279 constexpr lut_chars alnum_chars = alpha_chars + "0123456789";
280 @endcode
281
282 @par Complexity
283 Constant.
284
285 @return The new character set.
286
287 @param cs0 A character to join
288
289 @param cs1 A character to join
290 */
291 friend
292 constexpr
293 lut_chars
294 33919 operator+(
295 lut_chars const& cs0,
296 lut_chars const& cs1) noexcept
297 {
298 return lut_chars(
299 33919 cs0.mask_[0] | cs1.mask_[0],
300 33919 cs0.mask_[1] | cs1.mask_[1],
301 33919 cs0.mask_[2] | cs1.mask_[2],
302 33919 cs0.mask_[3] | cs1.mask_[3]);
303 }
304
305 /** Return a new character set by subtracting
306
307 This function returns a new character
308 set which is formed from all of the
309 characters in `cs0` which are not in `cs`.
310
311 @par Example
312 This statement declares a character set
313 containing all the lowercase letters
314 which are not vowels:
315 @code
316 constexpr lut_chars consonants = lut_chars("abcdefghijklmnopqrstuvwxyz") - "aeiou";
317 @endcode
318
319 @par Complexity
320 Constant.
321
322 @return The new character set.
323
324 @param cs0 A character set to join.
325
326 @param cs1 A character set to join.
327 */
328 friend
329 constexpr
330 lut_chars
331 186 operator-(
332 lut_chars const& cs0,
333 lut_chars const& cs1) noexcept
334 {
335 return lut_chars(
336 186 cs0.mask_[0] & ~cs1.mask_[0],
337 186 cs0.mask_[1] & ~cs1.mask_[1],
338 186 cs0.mask_[2] & ~cs1.mask_[2],
339 186 cs0.mask_[3] & ~cs1.mask_[3]);
340 }
341
342 /** Return a new character set which is the complement of another character set.
343
344 This function returns a new character
345 set which contains all of the characters
346 that are not in `*this`.
347
348 @par Example
349 This statement declares a character set
350 containing everything but vowels:
351 @code
352 constexpr lut_chars not_vowels = ~lut_chars( "AEIOU" "aeiou" );
353 @endcode
354
355 @par Complexity
356 Constant.
357
358 @par Exception Safety
359 Throws nothing.
360
361 @return The new character set.
362 */
363 constexpr
364 lut_chars
365 operator~() const noexcept
366 {
367 return lut_chars(
368 ~mask_[0],
369 ~mask_[1],
370 ~mask_[2],
371 ~mask_[3]
372 );
373 }
374
375 #ifndef BOOST_URL_DOCS
376 #ifdef BOOST_URL_USE_SSE2
377 char const*
378 1603 find_if(
379 char const* first,
380 char const* last) const noexcept
381 {
382 1603 return detail::find_if_pred(
383 1603 *this, first, last);
384 }
385
386 char const*
387 13841 find_if_not(
388 char const* first,
389 char const* last) const noexcept
390 {
391 13841 return detail::find_if_not_pred(
392 13841 *this, first, last);
393 }
394 #endif
395 #endif
396 };
397
398 } // grammar
399 } // urls
400 } // boost
401
402 #endif
403