Line | Branch | Exec | Source |
---|---|---|---|
1 | // | ||
2 | // Copyright (c) 2022 Alan de Freitas (alandefreitas@gmail.com) | ||
3 | // | ||
4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | ||
5 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | ||
6 | // | ||
7 | // Official repository: https://github.com/boostorg/url | ||
8 | // | ||
9 | |||
10 | #ifndef BOOST_URL_DETAIL_IMPL_PATTERN_IPP | ||
11 | #define BOOST_URL_DETAIL_IMPL_PATTERN_IPP | ||
12 | |||
13 | #include <boost/url/detail/config.hpp> | ||
14 | #include "pattern.hpp" | ||
15 | #include "pct_format.hpp" | ||
16 | #include "boost/url/detail/replacement_field_rule.hpp" | ||
17 | #include <boost/url/grammar/alpha_chars.hpp> | ||
18 | #include <boost/url/grammar/optional_rule.hpp> | ||
19 | #include <boost/url/grammar/token_rule.hpp> | ||
20 | #include "../rfc/detail/charsets.hpp" | ||
21 | #include "../rfc/detail/host_rule.hpp" | ||
22 | #include "boost/url/rfc/detail/path_rules.hpp" | ||
23 | #include "../rfc/detail/port_rule.hpp" | ||
24 | #include "../rfc/detail/scheme_rule.hpp" | ||
25 | |||
26 | namespace boost { | ||
27 | namespace urls { | ||
28 | namespace detail { | ||
29 | |||
30 | static constexpr auto lhost_chars = host_chars + ':'; | ||
31 | |||
32 | void | ||
33 | 140 | pattern:: | |
34 | apply( | ||
35 | url_base& u, | ||
36 | format_args const& args) const | ||
37 | { | ||
38 | // measure total | ||
39 | struct sizes | ||
40 | { | ||
41 | std::size_t scheme = 0; | ||
42 | std::size_t user = 0; | ||
43 | std::size_t pass = 0; | ||
44 | std::size_t host = 0; | ||
45 | std::size_t port = 0; | ||
46 | std::size_t path = 0; | ||
47 | std::size_t query = 0; | ||
48 | std::size_t frag = 0; | ||
49 | }; | ||
50 | 140 | sizes n; | |
51 | |||
52 | 140 | format_parse_context pctx(nullptr, nullptr, 0); | |
53 |
1/2✓ Branch 1 taken 140 times.
✗ Branch 2 not taken.
|
140 | measure_context mctx(args); |
54 |
2/2✓ Branch 1 taken 54 times.
✓ Branch 2 taken 86 times.
|
140 | if (!scheme.empty()) |
55 | { | ||
56 | 54 | pctx = {scheme, pctx.next_arg_id()}; | |
57 |
1/2✓ Branch 2 taken 54 times.
✗ Branch 3 not taken.
|
54 | n.scheme = pct_vmeasure( |
58 | grammar::alpha_chars, pctx, mctx); | ||
59 | 54 | mctx.advance_to(0); | |
60 | } | ||
61 |
2/2✓ Branch 0 taken 47 times.
✓ Branch 1 taken 93 times.
|
140 | if (has_authority) |
62 | { | ||
63 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 39 times.
|
47 | if (has_user) |
64 | { | ||
65 | 8 | pctx = {user, pctx.next_arg_id()}; | |
66 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | n.user = pct_vmeasure( |
67 | user_chars, pctx, mctx); | ||
68 | 8 | mctx.advance_to(0); | |
69 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | if (has_pass) |
70 | { | ||
71 | 6 | pctx = {pass, pctx.next_arg_id()}; | |
72 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | n.pass = pct_vmeasure( |
73 | password_chars, pctx, mctx); | ||
74 | 6 | mctx.advance_to(0); | |
75 | } | ||
76 | } | ||
77 |
2/2✓ Branch 1 taken 1 times.
✓ Branch 2 taken 46 times.
|
47 | if (host.starts_with('[')) |
78 | { | ||
79 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
|
1 | BOOST_ASSERT(host.ends_with(']')); |
80 |
1/2✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
|
1 | pctx = {host.substr(1, host.size() - 2), pctx.next_arg_id()}; |
81 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | n.host = pct_vmeasure( |
82 | 1 | lhost_chars, pctx, mctx) + 2; | |
83 | 1 | mctx.advance_to(0); | |
84 | } | ||
85 | else | ||
86 | { | ||
87 | 46 | pctx = {host, pctx.next_arg_id()}; | |
88 |
1/2✓ Branch 1 taken 46 times.
✗ Branch 2 not taken.
|
46 | n.host = pct_vmeasure( |
89 | host_chars, pctx, mctx); | ||
90 | 46 | mctx.advance_to(0); | |
91 | } | ||
92 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 34 times.
|
47 | if (has_port) |
93 | { | ||
94 | 13 | pctx = {port, pctx.next_arg_id()}; | |
95 |
1/2✓ Branch 2 taken 13 times.
✗ Branch 3 not taken.
|
13 | n.port = pct_vmeasure( |
96 | grammar::digit_chars, pctx, mctx); | ||
97 | 13 | mctx.advance_to(0); | |
98 | } | ||
99 | } | ||
100 |
2/2✓ Branch 1 taken 102 times.
✓ Branch 2 taken 38 times.
|
140 | if (!path.empty()) |
101 | { | ||
102 | 102 | pctx = {path, pctx.next_arg_id()}; | |
103 |
2/2✓ Branch 1 taken 100 times.
✓ Branch 2 taken 2 times.
|
102 | n.path = pct_vmeasure( |
104 | path_chars, pctx, mctx); | ||
105 | 100 | mctx.advance_to(0); | |
106 | } | ||
107 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 125 times.
|
138 | if (has_query) |
108 | { | ||
109 | 13 | pctx = {query, pctx.next_arg_id()}; | |
110 |
1/2✓ Branch 1 taken 13 times.
✗ Branch 2 not taken.
|
13 | n.query = pct_vmeasure( |
111 | query_chars, pctx, mctx); | ||
112 | 13 | mctx.advance_to(0); | |
113 | } | ||
114 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 131 times.
|
138 | if (has_frag) |
115 | { | ||
116 | 7 | pctx = {frag, pctx.next_arg_id()}; | |
117 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | n.frag = pct_vmeasure( |
118 | fragment_chars, pctx, mctx); | ||
119 | 7 | mctx.advance_to(0); | |
120 | } | ||
121 | 138 | std::size_t const n_total = | |
122 | 138 | n.scheme + | |
123 | 138 | (n.scheme != 0) * 1 + // ":" | |
124 | 138 | has_authority * 2 + // "//" | |
125 | 138 | n.user + | |
126 | 138 | has_pass * 1 + // ":" | |
127 | 138 | n.pass + | |
128 | 138 | has_user * 1 + // "@" | |
129 | 138 | n.host + | |
130 | 138 | has_port * 1 + // ":" | |
131 | 138 | n.port + | |
132 | 138 | n.path + | |
133 | 138 | has_query * 1 + // "?" | |
134 | 138 | n.query + | |
135 | 138 | has_frag * 1 + // "#" | |
136 | 138 | n.frag; | |
137 |
2/2✓ Branch 1 taken 137 times.
✓ Branch 2 taken 1 times.
|
138 | u.reserve(n_total); |
138 | |||
139 | // Apply | ||
140 | 137 | pctx = {nullptr, nullptr, 0}; | |
141 | 137 | format_context fctx(nullptr, args); | |
142 | 274 | url_base::op_t op(u); | |
143 | using parts = parts_base; | ||
144 |
2/2✓ Branch 1 taken 53 times.
✓ Branch 2 taken 84 times.
|
137 | if (!scheme.empty()) |
145 | { | ||
146 | 106 | auto dest = u.resize_impl( | |
147 | parts::id_scheme, | ||
148 |
1/2✓ Branch 1 taken 53 times.
✗ Branch 2 not taken.
|
53 | n.scheme + 1, op); |
149 | 53 | pctx = {scheme, pctx.next_arg_id()}; | |
150 | 53 | fctx.advance_to(dest); | |
151 |
1/2✓ Branch 2 taken 53 times.
✗ Branch 3 not taken.
|
53 | const char* dest1 = pct_vformat( |
152 | grammar::alpha_chars, pctx, fctx); | ||
153 | 53 | dest[n.scheme] = ':'; | |
154 | // validate | ||
155 |
2/2✓ Branch 3 taken 1 times.
✓ Branch 4 taken 52 times.
|
53 | if (!grammar::parse({dest, dest1}, scheme_rule())) |
156 | { | ||
157 | 1 | throw_invalid_argument(); | |
158 | } | ||
159 | } | ||
160 |
2/2✓ Branch 0 taken 45 times.
✓ Branch 1 taken 91 times.
|
136 | if (has_authority) |
161 | { | ||
162 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 37 times.
|
45 | if (has_user) |
163 | { | ||
164 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | auto dest = u.set_user_impl( |
165 | n.user, op); | ||
166 | 8 | pctx = {user, pctx.next_arg_id()}; | |
167 | 8 | fctx.advance_to(dest); | |
168 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | char const* dest1 = pct_vformat( |
169 | user_chars, pctx, fctx); | ||
170 | 8 | u.impl_.decoded_[parts::id_user] = | |
171 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | pct_string_view(dest, dest1 - dest) |
172 | 8 | ->decoded_size(); | |
173 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | if (has_pass) |
174 | { | ||
175 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | char* destp = u.set_password_impl( |
176 | n.pass, op); | ||
177 | 6 | pctx = {pass, pctx.next_arg_id()}; | |
178 | 6 | fctx.advance_to(destp); | |
179 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | dest1 = pct_vformat( |
180 | password_chars, pctx, fctx); | ||
181 | 6 | u.impl_.decoded_[parts::id_pass] = | |
182 |
1/2✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
|
6 | pct_string_view({destp, dest1}) |
183 | 6 | ->decoded_size() + 1; | |
184 | } | ||
185 | } | ||
186 |
1/2✓ Branch 1 taken 45 times.
✗ Branch 2 not taken.
|
45 | auto dest = u.set_host_impl( |
187 | n.host, op); | ||
188 |
2/2✓ Branch 1 taken 1 times.
✓ Branch 2 taken 44 times.
|
45 | if (host.starts_with('[')) |
189 | { | ||
190 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
|
1 | BOOST_ASSERT(host.ends_with(']')); |
191 |
1/2✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
|
1 | pctx = {host.substr(1, host.size() - 2), pctx.next_arg_id()}; |
192 | 1 | *dest++ = '['; | |
193 | 1 | fctx.advance_to(dest); | |
194 | char* dest1 = | ||
195 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | pct_vformat(lhost_chars, pctx, fctx); |
196 | 1 | *dest1++ = ']'; | |
197 | 1 | u.impl_.decoded_[parts::id_host] = | |
198 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
2 | pct_string_view(dest - 1, dest1 - dest) |
199 | 1 | ->decoded_size(); | |
200 | } | ||
201 | else | ||
202 | { | ||
203 | 44 | pctx = {host, pctx.next_arg_id()}; | |
204 | 44 | fctx.advance_to(dest); | |
205 | char const* dest1 = | ||
206 |
1/2✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
|
44 | pct_vformat(host_chars, pctx, fctx); |
207 | 44 | u.impl_.decoded_[parts::id_host] = | |
208 |
1/2✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
|
88 | pct_string_view(dest, dest1 - dest) |
209 | 44 | ->decoded_size(); | |
210 | } | ||
211 | 45 | auto uh = u.encoded_host(); | |
212 |
1/2✓ Branch 4 taken 45 times.
✗ Branch 5 not taken.
|
45 | auto h = grammar::parse(uh, host_rule).value(); |
213 | 45 | std::memcpy( | |
214 | 45 | u.impl_.ip_addr_, | |
215 | h.addr, | ||
216 | sizeof(u.impl_.ip_addr_)); | ||
217 | 45 | u.impl_.host_type_ = h.host_type; | |
218 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 32 times.
|
45 | if (has_port) |
219 | { | ||
220 |
1/2✓ Branch 1 taken 13 times.
✗ Branch 2 not taken.
|
13 | dest = u.set_port_impl(n.port, op); |
221 | 13 | pctx = {port, pctx.next_arg_id()}; | |
222 | 13 | fctx.advance_to(dest); | |
223 |
1/2✓ Branch 2 taken 13 times.
✗ Branch 3 not taken.
|
13 | char const* dest1 = pct_vformat( |
224 | grammar::digit_chars, pctx, fctx); | ||
225 | 13 | u.impl_.decoded_[parts::id_port] = | |
226 |
1/2✓ Branch 1 taken 13 times.
✗ Branch 2 not taken.
|
13 | pct_string_view(dest, dest1 - dest) |
227 | 13 | ->decoded_size() + 1; | |
228 | 13 | core::string_view up = {dest - 1, dest1}; | |
229 |
1/2✓ Branch 3 taken 13 times.
✗ Branch 4 not taken.
|
13 | auto p = grammar::parse(up, detail::port_part_rule).value(); |
230 |
1/2✓ Branch 0 taken 13 times.
✗ Branch 1 not taken.
|
13 | if (p.has_port) |
231 | 13 | u.impl_.port_number_ = p.port_number; | |
232 | } | ||
233 | } | ||
234 |
2/2✓ Branch 1 taken 100 times.
✓ Branch 2 taken 36 times.
|
136 | if (!path.empty()) |
235 | { | ||
236 |
1/2✓ Branch 1 taken 100 times.
✗ Branch 2 not taken.
|
100 | auto dest = u.resize_impl( |
237 | parts::id_path, | ||
238 | n.path, op); | ||
239 | 100 | pctx = {path, pctx.next_arg_id()}; | |
240 | 100 | fctx.advance_to(dest); | |
241 |
1/2✓ Branch 1 taken 100 times.
✗ Branch 2 not taken.
|
100 | auto dest1 = pct_vformat( |
242 | path_chars, pctx, fctx); | ||
243 |
1/2✓ Branch 1 taken 100 times.
✗ Branch 2 not taken.
|
100 | pct_string_view npath(dest, dest1 - dest); |
244 | 100 | u.impl_.decoded_[parts::id_path] += | |
245 | 100 | npath.decoded_size(); | |
246 |
1/2✓ Branch 1 taken 100 times.
✗ Branch 2 not taken.
|
100 | if (!npath.empty()) |
247 | { | ||
248 |
1/2✓ Branch 1 taken 100 times.
✗ Branch 2 not taken.
|
100 | u.impl_.nseg_ = std::count( |
249 | 100 | npath.begin() + 1, | |
250 | 200 | npath.end(), '/') + 1; | |
251 | } | ||
252 | // handle edge cases | ||
253 | // 1) path is first component and the | ||
254 | // first segment contains an unencoded ':' | ||
255 | // This is impossible because the template | ||
256 | // "{}" would be a host. | ||
257 |
4/4✓ Branch 2 taken 78 times.
✓ Branch 3 taken 22 times.
✓ Branch 4 taken 78 times.
✓ Branch 5 taken 22 times.
|
178 | if (u.scheme().empty() && |
258 |
1/2✓ Branch 1 taken 78 times.
✗ Branch 2 not taken.
|
78 | !u.has_authority()) |
259 | { | ||
260 | 78 | auto fseg = u.encoded_segments().front(); | |
261 |
1/2✓ Branch 2 taken 78 times.
✗ Branch 3 not taken.
|
78 | std::size_t nc = std::count( |
262 | 78 | fseg.begin(), fseg.end(), ':'); | |
263 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 74 times.
|
78 | if (nc) |
264 | { | ||
265 | 4 | std::size_t diff = nc * 2; | |
266 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | u.reserve(n_total + diff); |
267 | 8 | dest = u.resize_impl( | |
268 | parts::id_path, | ||
269 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | n.path + diff, op); |
270 | 4 | char* dest0 = dest + diff; | |
271 | 4 | std::memmove(dest0, dest, n.path); | |
272 |
2/2✓ Branch 0 taken 23 times.
✓ Branch 1 taken 4 times.
|
27 | while (dest0 != dest) |
273 | { | ||
274 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 8 times.
|
23 | if (*dest0 != ':') |
275 | { | ||
276 | 15 | *dest++ = *dest0++; | |
277 | } | ||
278 | else | ||
279 | { | ||
280 | 8 | *dest++ = '%'; | |
281 | 8 | *dest++ = '3'; | |
282 | 8 | *dest++ = 'A'; | |
283 | 8 | dest0++; | |
284 | } | ||
285 | } | ||
286 | } | ||
287 | } | ||
288 | // 2) url has no authority and path | ||
289 | // starts with "//" | ||
290 |
4/4✓ Branch 1 taken 86 times.
✓ Branch 2 taken 14 times.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 84 times.
|
186 | if (!u.has_authority() && |
291 |
2/2✓ Branch 2 taken 2 times.
✓ Branch 3 taken 98 times.
|
186 | u.encoded_path().starts_with("//")) |
292 | { | ||
293 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | u.reserve(n_total + 2); |
294 | 4 | dest = u.resize_impl( | |
295 | parts::id_path, | ||
296 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | n.path + 2, op); |
297 | 2 | std::memmove(dest + 2, dest, n.path); | |
298 | 2 | *dest++ = '/'; | |
299 | 2 | *dest = '.'; | |
300 | } | ||
301 | } | ||
302 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 123 times.
|
136 | if (has_query) |
303 | { | ||
304 | 26 | auto dest = u.resize_impl( | |
305 | parts::id_query, | ||
306 |
1/2✓ Branch 1 taken 13 times.
✗ Branch 2 not taken.
|
13 | n.query + 1, op); |
307 | 13 | *dest++ = '?'; | |
308 | 13 | pctx = {query, pctx.next_arg_id()}; | |
309 | 13 | fctx.advance_to(dest); | |
310 |
1/2✓ Branch 1 taken 13 times.
✗ Branch 2 not taken.
|
13 | auto dest1 = pct_vformat( |
311 | query_chars, pctx, fctx); | ||
312 |
1/2✓ Branch 1 taken 13 times.
✗ Branch 2 not taken.
|
13 | pct_string_view nquery(dest, dest1 - dest); |
313 | 13 | u.impl_.decoded_[parts::id_query] += | |
314 | 13 | nquery.decoded_size() + 1; | |
315 |
1/2✓ Branch 1 taken 13 times.
✗ Branch 2 not taken.
|
13 | if (!nquery.empty()) |
316 | { | ||
317 |
1/2✓ Branch 2 taken 13 times.
✗ Branch 3 not taken.
|
13 | u.impl_.nparam_ = std::count( |
318 | nquery.begin(), | ||
319 | 26 | nquery.end(), '&') + 1; | |
320 | } | ||
321 | } | ||
322 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 129 times.
|
136 | if (has_frag) |
323 | { | ||
324 | 14 | auto dest = u.resize_impl( | |
325 | parts::id_frag, | ||
326 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | n.frag + 1, op); |
327 | 7 | *dest++ = '#'; | |
328 | 7 | pctx = {frag, pctx.next_arg_id()}; | |
329 | 7 | fctx.advance_to(dest); | |
330 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | auto dest1 = pct_vformat( |
331 | fragment_chars, pctx, fctx); | ||
332 | 7 | u.impl_.decoded_[parts::id_frag] += | |
333 | 7 | make_pct_string_view( | |
334 | 7 | core::string_view(dest, dest1 - dest)) | |
335 | 7 | ->decoded_size() + 1; | |
336 | } | ||
337 | 136 | } | |
338 | |||
339 | // This rule represents a pct-encoded string | ||
340 | // that contains an arbitrary number of | ||
341 | // replacement ids in it | ||
342 | template<class CharSet> | ||
343 | struct pct_encoded_fmt_string_rule_t | ||
344 | { | ||
345 | using value_type = pct_string_view; | ||
346 | |||
347 | constexpr | ||
348 | pct_encoded_fmt_string_rule_t( | ||
349 | CharSet const& cs) noexcept | ||
350 | : cs_(cs) | ||
351 | { | ||
352 | } | ||
353 | |||
354 | template<class CharSet_> | ||
355 | friend | ||
356 | constexpr | ||
357 | auto | ||
358 | pct_encoded_fmt_string_rule( | ||
359 | CharSet_ const& cs) noexcept -> | ||
360 | pct_encoded_fmt_string_rule_t<CharSet_>; | ||
361 | |||
362 | system::result<value_type> | ||
363 | 482 | parse( | |
364 | char const*& it, | ||
365 | char const* end) const noexcept | ||
366 | { | ||
367 | 482 | auto const start = it; | |
368 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 240 times.
|
482 | if(it == end) |
369 | { | ||
370 | // this might be empty | ||
371 | 2 | return {}; | |
372 | } | ||
373 | |||
374 | // consume some with literal rule | ||
375 | // this might be an empty literal | ||
376 | 480 | auto literal_rule = pct_encoded_rule(cs_); | |
377 | 480 | auto rv = literal_rule.parse(it, end); | |
378 |
1/2✓ Branch 1 taken 470 times.
✗ Branch 2 not taken.
|
940 | while (rv) |
379 | { | ||
380 | 940 | auto it0 = it; | |
381 | // consume some with replacement id | ||
382 | // rule | ||
383 |
2/2✓ Branch 2 taken 240 times.
✓ Branch 3 taken 230 times.
|
940 | if (!replacement_field_rule.parse(it, end)) |
384 | { | ||
385 | 480 | it = it0; | |
386 | 480 | break; | |
387 | } | ||
388 | 460 | rv = literal_rule.parse(it, end); | |
389 | } | ||
390 | |||
391 | 480 | return core::string_view(start, it - start); | |
392 | } | ||
393 | |||
394 | private: | ||
395 | CharSet cs_; | ||
396 | }; | ||
397 | |||
398 | template<class CharSet> | ||
399 | constexpr | ||
400 | auto | ||
401 | pct_encoded_fmt_string_rule( | ||
402 | CharSet const& cs) noexcept -> | ||
403 | pct_encoded_fmt_string_rule_t<CharSet> | ||
404 | { | ||
405 | // If an error occurs here it means that | ||
406 | // the value of your type does not meet | ||
407 | // the requirements. Please check the | ||
408 | // documentation! | ||
409 | static_assert( | ||
410 | grammar::is_charset<CharSet>::value, | ||
411 | "CharSet requirements not met"); | ||
412 | |||
413 | return pct_encoded_fmt_string_rule_t<CharSet>(cs); | ||
414 | } | ||
415 | |||
416 | // This rule represents a regular string with | ||
417 | // only chars from the specified charset and | ||
418 | // an arbitrary number of replacement ids in it | ||
419 | template<class CharSet> | ||
420 | struct fmt_token_rule_t | ||
421 | { | ||
422 | using value_type = pct_string_view; | ||
423 | |||
424 | constexpr | ||
425 | fmt_token_rule_t( | ||
426 | CharSet const& cs) noexcept | ||
427 | : cs_(cs) | ||
428 | { | ||
429 | } | ||
430 | |||
431 | template<class CharSet_> | ||
432 | friend | ||
433 | constexpr | ||
434 | auto | ||
435 | fmt_token_rule( | ||
436 | CharSet_ const& cs) noexcept -> | ||
437 | fmt_token_rule_t<CharSet_>; | ||
438 | |||
439 | system::result<value_type> | ||
440 | 13 | parse( | |
441 | char const*& it, | ||
442 | char const* end) const noexcept | ||
443 | { | ||
444 | 13 | auto const start = it; | |
445 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 13 times.
|
13 | BOOST_ASSERT(it != end); |
446 | /* | ||
447 | // This should never happen because | ||
448 | // all tokens are optional and will | ||
449 | // already return `none`: | ||
450 | if(it == end) | ||
451 | { | ||
452 | BOOST_URL_RETURN_EC( | ||
453 | grammar::error::need_more); | ||
454 | } | ||
455 | */ | ||
456 | |||
457 | // consume some with literal rule | ||
458 | // this might be an empty literal | ||
459 | auto partial_token_rule = | ||
460 | 13 | grammar::optional_rule( | |
461 | 13 | grammar::token_rule(cs_)); | |
462 | 26 | auto rv = partial_token_rule.parse(it, end); | |
463 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | while (rv) |
464 | { | ||
465 | 24 | auto it0 = it; | |
466 | // consume some with replacement id | ||
467 |
2/2✓ Branch 2 taken 13 times.
✓ Branch 3 taken 11 times.
|
24 | if (!replacement_field_rule.parse(it, end)) |
468 | { | ||
469 | // no replacement and no more cs | ||
470 | // before: nothing else to consume | ||
471 | 13 | it = it0; | |
472 | 13 | break; | |
473 | } | ||
474 | // after {...}, consume any more chars | ||
475 | // in the charset | ||
476 | 11 | rv = partial_token_rule.parse(it, end); | |
477 | } | ||
478 | |||
479 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 12 times.
|
13 | if(it == start) |
480 | { | ||
481 | // it != end but we consumed nothing | ||
482 | 1 | BOOST_URL_RETURN_EC( | |
483 | grammar::error::need_more); | ||
484 | } | ||
485 | |||
486 | 12 | return core::string_view(start, it - start); | |
487 | } | ||
488 | |||
489 | private: | ||
490 | CharSet cs_; | ||
491 | }; | ||
492 | |||
493 | template<class CharSet> | ||
494 | constexpr | ||
495 | auto | ||
496 | fmt_token_rule( | ||
497 | CharSet const& cs) noexcept -> | ||
498 | fmt_token_rule_t<CharSet> | ||
499 | { | ||
500 | // If an error occurs here it means that | ||
501 | // the value of your type does not meet | ||
502 | // the requirements. Please check the | ||
503 | // documentation! | ||
504 | static_assert( | ||
505 | grammar::is_charset<CharSet>::value, | ||
506 | "CharSet requirements not met"); | ||
507 | |||
508 | return fmt_token_rule_t<CharSet>(cs); | ||
509 | } | ||
510 | |||
511 | struct userinfo_template_rule_t | ||
512 | { | ||
513 | struct value_type | ||
514 | { | ||
515 | core::string_view user; | ||
516 | core::string_view password; | ||
517 | bool has_password = false; | ||
518 | }; | ||
519 | |||
520 | auto | ||
521 | 48 | parse( | |
522 | char const*& it, | ||
523 | char const* end | ||
524 | ) const noexcept -> | ||
525 | system::result<value_type> | ||
526 | { | ||
527 | static constexpr auto uchars = | ||
528 | unreserved_chars + | ||
529 | sub_delim_chars; | ||
530 | static constexpr auto pwchars = | ||
531 | uchars + ':'; | ||
532 | |||
533 | 48 | value_type t; | |
534 | |||
535 | // user | ||
536 | static constexpr auto user_fmt_rule = | ||
537 | pct_encoded_fmt_string_rule(uchars); | ||
538 | auto rv = grammar::parse( | ||
539 | 48 | it, end, user_fmt_rule); | |
540 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
|
48 | BOOST_ASSERT(rv); |
541 | 48 | t.user = *rv; | |
542 | |||
543 | // ':' | ||
544 |
2/2✓ Branch 0 taken 31 times.
✓ Branch 1 taken 17 times.
|
48 | if( it == end || |
545 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 16 times.
|
31 | *it != ':') |
546 | { | ||
547 | 32 | t.has_password = false; | |
548 | 32 | t.password = {}; | |
549 | 32 | return t; | |
550 | } | ||
551 | 16 | ++it; | |
552 | |||
553 | // pass | ||
554 | static constexpr auto pass_fmt_rule = | ||
555 | pct_encoded_fmt_string_rule(grammar::ref(pwchars)); | ||
556 | rv = grammar::parse( | ||
557 | 16 | it, end, pass_fmt_rule); | |
558 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
|
16 | BOOST_ASSERT(rv); |
559 | 16 | t.has_password = true; | |
560 | 16 | t.password = *rv; | |
561 | |||
562 | 16 | return t; | |
563 | } | ||
564 | }; | ||
565 | |||
566 | constexpr userinfo_template_rule_t userinfo_template_rule{}; | ||
567 | |||
568 | struct host_template_rule_t | ||
569 | { | ||
570 | using value_type = core::string_view; | ||
571 | |||
572 | auto | ||
573 | 49 | parse( | |
574 | char const*& it, | ||
575 | char const* end | ||
576 | ) const noexcept -> | ||
577 | system::result<value_type> | ||
578 | { | ||
579 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 48 times.
|
49 | if(it == end) |
580 | { | ||
581 | // empty host | ||
582 | 1 | return {}; | |
583 | } | ||
584 | |||
585 | // the host type will be ultimately | ||
586 | // validated when applying the replacement | ||
587 | // strings. Any chars allowed in hosts | ||
588 | // are allowed here. | ||
589 |
2/2✓ Branch 0 taken 46 times.
✓ Branch 1 taken 2 times.
|
48 | if (*it != '[') |
590 | { | ||
591 | // IPv4address and reg-name have the | ||
592 | // same char sets. | ||
593 | 46 | constexpr auto any_host_template_rule = | |
594 | pct_encoded_fmt_string_rule(host_chars); | ||
595 | auto rv = grammar::parse( | ||
596 | 46 | it, end, any_host_template_rule); | |
597 | // any_host_template_rule can always | ||
598 | // be empty, so it's never invalid | ||
599 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 46 times.
|
46 | BOOST_ASSERT(rv); |
600 | 46 | return detail::to_sv(*rv); | |
601 | } | ||
602 | // IP-literals need to be enclosed in | ||
603 | // "[]" if using ':' in the template | ||
604 | // string, because the ':' would be | ||
605 | // ambiguous with the port in fmt string. | ||
606 | // The "[]:" can be used in replacement | ||
607 | // strings without the "[]" though. | ||
608 | 2 | constexpr auto ip_literal_template_rule = | |
609 | pct_encoded_fmt_string_rule(lhost_chars); | ||
610 | 2 | auto it0 = it; | |
611 | auto rv = grammar::parse( | ||
612 | it, end, | ||
613 | 2 | grammar::optional_rule( | |
614 | 2 | grammar::tuple_rule( | |
615 | 2 | grammar::squelch( | |
616 | 2 | grammar::delim_rule('[')), | |
617 | ip_literal_template_rule, | ||
618 | 2 | grammar::squelch( | |
619 | 4 | grammar::delim_rule(']'))))); | |
620 | // ip_literal_template_rule can always | ||
621 | // be empty, so it's never invalid, but | ||
622 | // the rule might fail to match the | ||
623 | // closing "]" | ||
624 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
|
2 | BOOST_ASSERT(rv); |
625 | 2 | return core::string_view{it0, it}; | |
626 | } | ||
627 | }; | ||
628 | |||
629 | constexpr host_template_rule_t host_template_rule{}; | ||
630 | |||
631 | struct authority_template_rule_t | ||
632 | { | ||
633 | using value_type = pattern; | ||
634 | |||
635 | system::result<value_type> | ||
636 | 49 | parse( | |
637 | char const*& it, | ||
638 | char const* end | ||
639 | ) const noexcept | ||
640 | { | ||
641 | 49 | pattern u; | |
642 | |||
643 | // [ userinfo "@" ] | ||
644 | { | ||
645 | auto rv = grammar::parse( | ||
646 | it, end, | ||
647 | 49 | grammar::optional_rule( | |
648 | 49 | grammar::tuple_rule( | |
649 | userinfo_template_rule, | ||
650 | 49 | grammar::squelch( | |
651 | 98 | grammar::delim_rule('@'))))); | |
652 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 49 times.
|
49 | BOOST_ASSERT(rv); |
653 |
2/2✓ Branch 2 taken 9 times.
✓ Branch 3 taken 40 times.
|
49 | if(rv->has_value()) |
654 | { | ||
655 | 9 | auto& r = **rv; | |
656 | 9 | u.has_user = true; | |
657 | 9 | u.user = r.user; | |
658 | 9 | u.has_pass = r.has_password; | |
659 | 9 | u.pass = r.password; | |
660 | } | ||
661 | } | ||
662 | |||
663 | // host | ||
664 | { | ||
665 | auto rv = grammar::parse( | ||
666 | it, end, | ||
667 | 49 | host_template_rule); | |
668 | // host is allowed to be empty | ||
669 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 49 times.
|
49 | BOOST_ASSERT(rv); |
670 | 49 | u.host = *rv; | |
671 | } | ||
672 | |||
673 | // [ ":" port ] | ||
674 | { | ||
675 | constexpr auto port_template_rule = | ||
676 | grammar::optional_rule( | ||
677 | fmt_token_rule(grammar::digit_chars)); | ||
678 | 49 | auto it0 = it; | |
679 | auto rv = grammar::parse( | ||
680 | it, end, | ||
681 | 49 | grammar::tuple_rule( | |
682 | 49 | grammar::squelch( | |
683 | 49 | grammar::delim_rule(':')), | |
684 | 98 | port_template_rule)); | |
685 |
2/2✓ Branch 1 taken 35 times.
✓ Branch 2 taken 14 times.
|
49 | if (!rv) |
686 | { | ||
687 | 35 | it = it0; | |
688 | } | ||
689 | else | ||
690 | { | ||
691 | 14 | u.has_port = true; | |
692 |
2/2✓ Branch 2 taken 12 times.
✓ Branch 3 taken 2 times.
|
14 | if (rv->has_value()) |
693 | { | ||
694 | 12 | u.port = **rv; | |
695 | } | ||
696 | } | ||
697 | } | ||
698 | |||
699 | 49 | return u; | |
700 | } | ||
701 | }; | ||
702 | |||
703 | constexpr authority_template_rule_t authority_template_rule{}; | ||
704 | |||
705 | struct scheme_template_rule_t | ||
706 | { | ||
707 | using value_type = core::string_view; | ||
708 | |||
709 | system::result<value_type> | ||
710 | 147 | parse( | |
711 | char const*& it, | ||
712 | char const* end) const noexcept | ||
713 | { | ||
714 | 147 | auto const start = it; | |
715 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 146 times.
|
147 | if(it == end) |
716 | { | ||
717 | // scheme can't be empty | ||
718 | 1 | BOOST_URL_RETURN_EC( | |
719 | grammar::error::mismatch); | ||
720 | } | ||
721 |
4/4✓ Branch 1 taken 124 times.
✓ Branch 2 taken 22 times.
✓ Branch 3 taken 20 times.
✓ Branch 4 taken 126 times.
|
270 | if(!grammar::alpha_chars(*it) && |
722 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 104 times.
|
124 | *it != '{') |
723 | { | ||
724 | // expected alpha | ||
725 | 20 | BOOST_URL_RETURN_EC( | |
726 | grammar::error::mismatch); | ||
727 | } | ||
728 | |||
729 | // it starts with replacement id or alpha char | ||
730 |
2/2✓ Branch 1 taken 104 times.
✓ Branch 2 taken 22 times.
|
126 | if (!grammar::alpha_chars(*it)) |
731 | { | ||
732 |
2/2✓ Branch 2 taken 2 times.
✓ Branch 3 taken 102 times.
|
104 | if (!replacement_field_rule.parse(it, end)) |
733 | { | ||
734 | // replacement_field_rule is invalid | ||
735 | 2 | BOOST_URL_RETURN_EC( | |
736 | grammar::error::mismatch); | ||
737 | } | ||
738 | } | ||
739 | else | ||
740 | { | ||
741 | // skip first | ||
742 | 22 | ++it; | |
743 | } | ||
744 | |||
745 | static | ||
746 | constexpr | ||
747 | grammar::lut_chars scheme_chars( | ||
748 | "0123456789" "+-." | ||
749 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | ||
750 | "abcdefghijklmnopqrstuvwxyz"); | ||
751 | |||
752 | // non-scheme chars might be a new | ||
753 | // replacement-id or just an invalid char | ||
754 | 124 | it = grammar::find_if_not( | |
755 | it, end, scheme_chars); | ||
756 |
2/2✓ Branch 0 taken 75 times.
✓ Branch 1 taken 51 times.
|
126 | while (it != end) |
757 | { | ||
758 | 75 | auto it0 = it; | |
759 |
2/2✓ Branch 2 taken 73 times.
✓ Branch 3 taken 2 times.
|
75 | if (!replacement_field_rule.parse(it, end)) |
760 | { | ||
761 | 73 | it = it0; | |
762 | 73 | break; | |
763 | } | ||
764 | 2 | it = grammar::find_if_not( | |
765 | it, end, scheme_chars); | ||
766 | } | ||
767 | 124 | return core::string_view(start, it - start); | |
768 | } | ||
769 | }; | ||
770 | |||
771 | constexpr scheme_template_rule_t scheme_template_rule{}; | ||
772 | |||
773 | // This rule should consider all url types at the | ||
774 | // same time according to the format string | ||
775 | // - relative urls with no scheme/authority | ||
776 | // - absolute urls have no fragment | ||
777 | struct pattern_rule_t | ||
778 | { | ||
779 | using value_type = pattern; | ||
780 | |||
781 | system::result<value_type> | ||
782 | 147 | parse( | |
783 | char const*& it, | ||
784 | char const* const end | ||
785 | ) const noexcept | ||
786 | { | ||
787 | 147 | pattern u; | |
788 | |||
789 | // optional scheme | ||
790 | { | ||
791 | 147 | auto it0 = it; | |
792 | auto rv = grammar::parse( | ||
793 | it, end, | ||
794 | 147 | grammar::tuple_rule( | |
795 | scheme_template_rule, | ||
796 | 147 | grammar::squelch( | |
797 | 147 | grammar::delim_rule(':')))); | |
798 |
2/2✓ Branch 1 taken 59 times.
✓ Branch 2 taken 88 times.
|
147 | if(rv) |
799 | 59 | u.scheme = *rv; | |
800 | else | ||
801 | 88 | it = it0; | |
802 | } | ||
803 | |||
804 | // hier_part (authority + path) | ||
805 | // if there are less than 2 chars left, | ||
806 | // we are parsing the path | ||
807 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 143 times.
|
147 | if (it == end) |
808 | { | ||
809 | // this is over, so we can consider | ||
810 | // that a "path-empty" | ||
811 | 4 | return u; | |
812 | } | ||
813 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 138 times.
|
143 | if(end - it == 1) |
814 | { | ||
815 | // only one char left | ||
816 | // it can be a single separator "/", | ||
817 | // representing an empty absolute path, | ||
818 | // or a single-char segment | ||
819 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 3 times.
|
5 | if(*it == '/') |
820 | { | ||
821 | // path-absolute | ||
822 | 2 | u.path = {it, 1}; | |
823 | 2 | ++it; | |
824 | 2 | return u; | |
825 | } | ||
826 | // this can be a: | ||
827 | // - path-noscheme if there's no scheme, or | ||
828 | // - path-rootless with a single char, or | ||
829 | // - path-empty (and consume nothing) | ||
830 |
3/4✓ Branch 1 taken 1 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 3 times.
✗ Branch 4 not taken.
|
4 | if (!u.scheme.empty() || |
831 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | *it != ':') |
832 | { | ||
833 | // path-rootless with a single char | ||
834 | // this needs to be a segment because | ||
835 | // the authority needs two slashes | ||
836 | // "//" | ||
837 | // path-noscheme also matches here | ||
838 | // because we already validated the | ||
839 | // first char | ||
840 | auto rv = grammar::parse( | ||
841 | 3 | it, end, urls::detail::segment_rule); | |
842 |
2/2✓ Branch 1 taken 1 times.
✓ Branch 2 taken 2 times.
|
3 | if(! rv) |
843 | 1 | return rv.error(); | |
844 | 2 | u.path = *rv; | |
845 | } | ||
846 | 2 | return u; | |
847 | } | ||
848 | |||
849 | // authority | ||
850 |
2/2✓ Branch 0 taken 62 times.
✓ Branch 1 taken 76 times.
|
138 | if( it[0] == '/' && |
851 |
2/2✓ Branch 0 taken 49 times.
✓ Branch 1 taken 13 times.
|
62 | it[1] == '/') |
852 | { | ||
853 | // "//" always indicates authority | ||
854 | 49 | it += 2; | |
855 | auto rv = grammar::parse( | ||
856 | it, end, | ||
857 | 49 | authority_template_rule); | |
858 | // authority is allowed to be empty | ||
859 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 49 times.
|
49 | BOOST_ASSERT(rv); |
860 | 49 | u.has_authority = true; | |
861 | 49 | u.has_user = rv->has_user; | |
862 | 49 | u.user = rv->user; | |
863 | 49 | u.has_pass = rv->has_pass; | |
864 | 49 | u.pass = rv->pass; | |
865 | 49 | u.host = rv->host; | |
866 | 49 | u.has_port = rv->has_port; | |
867 | 49 | u.port = rv->port; | |
868 | } | ||
869 | |||
870 | // the authority requires an absolute path | ||
871 | // or an empty path | ||
872 |
2/2✓ Branch 0 taken 111 times.
✓ Branch 1 taken 27 times.
|
138 | if (it == end || |
873 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 89 times.
|
111 | (u.has_authority && |
874 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 14 times.
|
22 | (*it != '/' && |
875 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
|
8 | *it != '?' && |
876 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2 | *it != '#'))) |
877 | { | ||
878 | // path-empty | ||
879 | 29 | return u; | |
880 | } | ||
881 | |||
882 | // path-abempty | ||
883 | // consume the whole path at once because | ||
884 | // we're going to count number of segments | ||
885 | // later after the replacements happen | ||
886 | static constexpr auto segment_fmt_rule = | ||
887 | pct_encoded_fmt_string_rule(path_chars); | ||
888 | auto rp = grammar::parse( | ||
889 | 109 | it, end, segment_fmt_rule); | |
890 | // path-abempty is allowed to be empty | ||
891 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 109 times.
|
109 | BOOST_ASSERT(rp); |
892 | 109 | u.path = *rp; | |
893 | |||
894 | // [ "?" query ] | ||
895 | { | ||
896 | static constexpr auto query_fmt_rule = | ||
897 | pct_encoded_fmt_string_rule(query_chars); | ||
898 | auto rv = grammar::parse( | ||
899 | it, end, | ||
900 | 109 | grammar::tuple_rule( | |
901 | 109 | grammar::squelch( | |
902 | 109 | grammar::delim_rule('?')), | |
903 | 109 | query_fmt_rule)); | |
904 | // query is allowed to be empty but | ||
905 | // delim rule is not | ||
906 |
2/2✓ Branch 1 taken 13 times.
✓ Branch 2 taken 96 times.
|
109 | if (rv) |
907 | { | ||
908 | 13 | u.has_query = true; | |
909 | 13 | u.query = *rv; | |
910 | } | ||
911 | } | ||
912 | |||
913 | // [ "#" fragment ] | ||
914 | { | ||
915 | static constexpr auto frag_fmt_rule = | ||
916 | pct_encoded_fmt_string_rule(fragment_chars); | ||
917 | auto rv = grammar::parse( | ||
918 | it, end, | ||
919 | 109 | grammar::tuple_rule( | |
920 | 109 | grammar::squelch( | |
921 | 109 | grammar::delim_rule('#')), | |
922 | 109 | frag_fmt_rule)); | |
923 | // frag is allowed to be empty but | ||
924 | // delim rule is not | ||
925 |
2/2✓ Branch 1 taken 7 times.
✓ Branch 2 taken 102 times.
|
109 | if (rv) |
926 | { | ||
927 | 7 | u.has_frag = true; | |
928 | 7 | u.frag = *rv; | |
929 | } | ||
930 | } | ||
931 | |||
932 | 109 | return u; | |
933 | } | ||
934 | }; | ||
935 | |||
936 | constexpr pattern_rule_t pattern_rule{}; | ||
937 | |||
938 | system::result<pattern> | ||
939 | 147 | parse_pattern( | |
940 | core::string_view s) | ||
941 | { | ||
942 | return grammar::parse( | ||
943 | 147 | s, pattern_rule); | |
944 | } | ||
945 | |||
946 | } // detail | ||
947 | } // urls | ||
948 | } // boost | ||
949 | |||
950 | #endif | ||
951 |