Line | Branch | Exec | Source |
---|---|---|---|
1 | // | ||
2 | // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com) | ||
3 | // Copyright (c) 2022 Alan de Freitas (alandefreitas@gmail.com) | ||
4 | // | ||
5 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | ||
6 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | ||
7 | // | ||
8 | // Official repository: https://github.com/boostorg/url | ||
9 | // | ||
10 | |||
11 | #ifndef BOOST_URL_DETAIL_IMPL_NORMALIZE_IPP | ||
12 | #define BOOST_URL_DETAIL_IMPL_NORMALIZE_IPP | ||
13 | |||
14 | #include <boost/url/detail/config.hpp> | ||
15 | #include <boost/url/decode_view.hpp> | ||
16 | #include "decode.hpp" | ||
17 | #include <boost/url/segments_encoded_view.hpp> | ||
18 | #include <boost/url/grammar/ci_string.hpp> | ||
19 | #include <boost/assert.hpp> | ||
20 | #include <boost/core/ignore_unused.hpp> | ||
21 | #include <cstring> | ||
22 | #include "normalize.hpp" | ||
23 | |||
24 | namespace boost { | ||
25 | namespace urls { | ||
26 | namespace detail { | ||
27 | |||
28 | void | ||
29 | 5604 | pop_encoded_front( | |
30 | core::string_view& s, | ||
31 | char& c, | ||
32 | std::size_t& n) noexcept | ||
33 | { | ||
34 |
2/2✓ Branch 1 taken 5514 times.
✓ Branch 2 taken 90 times.
|
5604 | if(s.front() != '%') |
35 | { | ||
36 | 5514 | c = s.front(); | |
37 | 5514 | s.remove_prefix(1); | |
38 | } | ||
39 | else | ||
40 | { | ||
41 | 90 | detail::decode_unsafe( | |
42 | &c, | ||
43 | &c + 1, | ||
44 | s.substr(0, 3)); | ||
45 | 90 | s.remove_prefix(3); | |
46 | } | ||
47 | 5604 | ++n; | |
48 | 5604 | } | |
49 | |||
50 | int | ||
51 | 77 | compare_encoded( | |
52 | core::string_view lhs, | ||
53 | core::string_view rhs) noexcept | ||
54 | { | ||
55 | 77 | std::size_t n0 = 0; | |
56 | 77 | std::size_t n1 = 0; | |
57 | 77 | char c0 = 0; | |
58 | 77 | char c1 = 0; | |
59 | 205 | while( | |
60 |
4/4✓ Branch 1 taken 253 times.
✓ Branch 2 taken 29 times.
✓ Branch 3 taken 240 times.
✓ Branch 4 taken 42 times.
|
535 | !lhs.empty() && |
61 |
2/2✓ Branch 1 taken 240 times.
✓ Branch 2 taken 13 times.
|
253 | !rhs.empty()) |
62 | { | ||
63 | 240 | pop_encoded_front(lhs, c0, n0); | |
64 | 240 | pop_encoded_front(rhs, c1, n1); | |
65 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 220 times.
|
240 | if (c0 < c1) |
66 | 20 | return -1; | |
67 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 205 times.
|
220 | if (c1 < c0) |
68 | 15 | return 1; | |
69 | } | ||
70 | 42 | n0 += detail::decode_bytes_unsafe(lhs); | |
71 | 42 | n1 += detail::decode_bytes_unsafe(rhs); | |
72 |
2/2✓ Branch 0 taken 21 times.
✓ Branch 1 taken 21 times.
|
42 | if (n0 == n1) |
73 | 21 | return 0; | |
74 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 13 times.
|
21 | if (n0 < n1) |
75 | 8 | return -1; | |
76 | 13 | return 1; | |
77 | } | ||
78 | |||
79 | void | ||
80 | 1104 | digest_encoded( | |
81 | core::string_view s, | ||
82 | fnv_1a& hasher) noexcept | ||
83 | { | ||
84 | 1104 | char c = 0; | |
85 | 1104 | std::size_t n = 0; | |
86 |
2/2✓ Branch 1 taken 452 times.
✓ Branch 2 taken 1104 times.
|
1556 | while(!s.empty()) |
87 | { | ||
88 | 452 | pop_encoded_front(s, c, n); | |
89 | 452 | hasher.put(c); | |
90 | } | ||
91 | 1104 | } | |
92 | |||
93 | int | ||
94 | 119 | ci_compare_encoded( | |
95 | core::string_view lhs, | ||
96 | core::string_view rhs) noexcept | ||
97 | { | ||
98 | 119 | std::size_t n0 = 0; | |
99 | 119 | std::size_t n1 = 0; | |
100 | 119 | char c0 = 0; | |
101 | 119 | char c1 = 0; | |
102 | 1505 | while ( | |
103 |
4/4✓ Branch 1 taken 1521 times.
✓ Branch 2 taken 103 times.
✓ Branch 3 taken 1515 times.
✓ Branch 4 taken 109 times.
|
3145 | !lhs.empty() && |
104 |
2/2✓ Branch 1 taken 1515 times.
✓ Branch 2 taken 6 times.
|
1521 | !rhs.empty()) |
105 | { | ||
106 | 1515 | pop_encoded_front(lhs, c0, n0); | |
107 | 1515 | pop_encoded_front(rhs, c1, n1); | |
108 | 1515 | c0 = grammar::to_lower(c0); | |
109 | 1515 | c1 = grammar::to_lower(c1); | |
110 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1507 times.
|
1515 | if (c0 < c1) |
111 | 8 | return -1; | |
112 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1505 times.
|
1507 | if (c1 < c0) |
113 | 2 | return 1; | |
114 | } | ||
115 | 109 | n0 += detail::decode_bytes_unsafe(lhs); | |
116 | 109 | n1 += detail::decode_bytes_unsafe(rhs); | |
117 |
2/2✓ Branch 0 taken 102 times.
✓ Branch 1 taken 7 times.
|
109 | if (n0 == n1) |
118 | 102 | return 0; | |
119 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 6 times.
|
7 | if (n0 < n1) |
120 | 1 | return -1; | |
121 | 6 | return 1; | |
122 | } | ||
123 | |||
124 | void | ||
125 | 276 | ci_digest_encoded( | |
126 | core::string_view s, | ||
127 | fnv_1a& hasher) noexcept | ||
128 | { | ||
129 | 276 | char c = 0; | |
130 | 276 | std::size_t n = 0; | |
131 |
2/2✓ Branch 1 taken 1642 times.
✓ Branch 2 taken 276 times.
|
1918 | while(!s.empty()) |
132 | { | ||
133 | 1642 | pop_encoded_front(s, c, n); | |
134 | 1642 | c = grammar::to_lower(c); | |
135 | 1642 | hasher.put(c); | |
136 | } | ||
137 | 276 | } | |
138 | |||
139 | int | ||
140 | 8 | compare( | |
141 | core::string_view lhs, | ||
142 | core::string_view rhs) noexcept | ||
143 | { | ||
144 | 8 | auto rlen = (std::min)(lhs.size(), rhs.size()); | |
145 |
2/2✓ Branch 0 taken 17 times.
✓ Branch 1 taken 1 times.
|
18 | for (std::size_t i = 0; i < rlen; ++i) |
146 | { | ||
147 | 17 | char c0 = lhs[i]; | |
148 | 17 | char c1 = rhs[i]; | |
149 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 11 times.
|
17 | if (c0 < c1) |
150 | 6 | return -1; | |
151 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 10 times.
|
11 | if (c1 < c0) |
152 | 1 | return 1; | |
153 | } | ||
154 |
1/2✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
|
1 | if ( lhs.size() == rhs.size() ) |
155 | 1 | return 0; | |
156 | ✗ | if ( lhs.size() < rhs.size() ) | |
157 | ✗ | return -1; | |
158 | ✗ | return 1; | |
159 | } | ||
160 | |||
161 | int | ||
162 | 156 | ci_compare( | |
163 | core::string_view lhs, | ||
164 | core::string_view rhs) noexcept | ||
165 | { | ||
166 | 156 | auto rlen = (std::min)(lhs.size(), rhs.size()); | |
167 |
2/2✓ Branch 0 taken 640 times.
✓ Branch 1 taken 149 times.
|
789 | for (std::size_t i = 0; i < rlen; ++i) |
168 | { | ||
169 | 640 | char c0 = grammar::to_lower(lhs[i]); | |
170 | 640 | char c1 = grammar::to_lower(rhs[i]); | |
171 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 634 times.
|
640 | if (c0 < c1) |
172 | 6 | return -1; | |
173 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 633 times.
|
634 | if (c1 < c0) |
174 | 1 | return 1; | |
175 | } | ||
176 |
2/2✓ Branch 2 taken 142 times.
✓ Branch 3 taken 7 times.
|
149 | if ( lhs.size() == rhs.size() ) |
177 | 142 | return 0; | |
178 |
2/2✓ Branch 2 taken 6 times.
✓ Branch 3 taken 1 times.
|
7 | if ( lhs.size() < rhs.size() ) |
179 | 6 | return -1; | |
180 | 1 | return 1; | |
181 | } | ||
182 | |||
183 | void | ||
184 | 276 | ci_digest( | |
185 | core::string_view s, | ||
186 | fnv_1a& hasher) noexcept | ||
187 | { | ||
188 |
2/2✓ Branch 2 taken 590 times.
✓ Branch 3 taken 276 times.
|
866 | for (char c: s) |
189 | { | ||
190 | 590 | c = grammar::to_lower(c); | |
191 | 590 | hasher.put(c); | |
192 | } | ||
193 | 276 | } | |
194 | |||
195 | std::size_t | ||
196 | ✗ | path_starts_with( | |
197 | core::string_view lhs, | ||
198 | core::string_view rhs) noexcept | ||
199 | { | ||
200 | ✗ | auto consume_one = []( | |
201 | core::string_view::iterator& it, | ||
202 | char &c) | ||
203 | { | ||
204 | ✗ | if(*it != '%') | |
205 | { | ||
206 | ✗ | c = *it; | |
207 | ✗ | ++it; | |
208 | ✗ | return; | |
209 | } | ||
210 | ✗ | detail::decode_unsafe( | |
211 | &c, | ||
212 | &c + 1, | ||
213 | core::string_view(it, 3)); | ||
214 | ✗ | if (c != '/') | |
215 | { | ||
216 | ✗ | it += 3; | |
217 | ✗ | return; | |
218 | } | ||
219 | ✗ | c = *it; | |
220 | ✗ | ++it; | |
221 | }; | ||
222 | |||
223 | ✗ | auto it0 = lhs.begin(); | |
224 | ✗ | auto it1 = rhs.begin(); | |
225 | ✗ | auto end0 = lhs.end(); | |
226 | ✗ | auto end1 = rhs.end(); | |
227 | ✗ | char c0 = 0; | |
228 | ✗ | char c1 = 0; | |
229 | ✗ | while ( | |
230 | ✗ | it0 < end0 && | |
231 | ✗ | it1 < end1) | |
232 | { | ||
233 | ✗ | consume_one(it0, c0); | |
234 | ✗ | consume_one(it1, c1); | |
235 | ✗ | if (c0 != c1) | |
236 | ✗ | return 0; | |
237 | } | ||
238 | ✗ | if (it1 == end1) | |
239 | ✗ | return it0 - lhs.begin(); | |
240 | ✗ | return 0; | |
241 | } | ||
242 | |||
243 | std::size_t | ||
244 | 2104 | path_ends_with( | |
245 | core::string_view lhs, | ||
246 | core::string_view rhs) noexcept | ||
247 | { | ||
248 | 5800 | auto consume_last = []( | |
249 | core::string_view::iterator& it, | ||
250 | core::string_view::iterator& end, | ||
251 | char& c) | ||
252 | { | ||
253 |
4/4✓ Branch 0 taken 3932 times.
✓ Branch 1 taken 1868 times.
✓ Branch 2 taken 5768 times.
✓ Branch 3 taken 32 times.
|
9732 | if ((end - it) < 3 || |
254 |
2/2✓ Branch 1 taken 3900 times.
✓ Branch 2 taken 32 times.
|
3932 | *(std::prev(end, 3)) != '%') |
255 | { | ||
256 | 5768 | c = *--end; | |
257 | 5768 | return; | |
258 | } | ||
259 |
1/2✓ Branch 2 taken 32 times.
✗ Branch 3 not taken.
|
32 | detail::decode_unsafe( |
260 | &c, | ||
261 | &c + 1, | ||
262 | core::string_view(std::prev( | ||
263 | end, 3), 3)); | ||
264 |
1/2✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
|
32 | if (c != '/') |
265 | { | ||
266 | 32 | end -= 3; | |
267 | 32 | return; | |
268 | } | ||
269 | ✗ | c = *--end; | |
270 | }; | ||
271 | |||
272 | 2104 | auto it0 = lhs.begin(); | |
273 | 2104 | auto it1 = rhs.begin(); | |
274 | 2104 | auto end0 = lhs.end(); | |
275 | 2104 | auto end1 = rhs.end(); | |
276 | 2104 | char c0 = 0; | |
277 | 2104 | char c1 = 0; | |
278 | 1104 | while( | |
279 |
2/2✓ Branch 0 taken 2974 times.
✓ Branch 1 taken 234 times.
|
3208 | it0 < end0 && |
280 |
2/2✓ Branch 0 taken 2900 times.
✓ Branch 1 taken 74 times.
|
2974 | it1 < end1) |
281 | { | ||
282 | 2900 | consume_last(it0, end0, c0); | |
283 | 2900 | consume_last(it1, end1, c1); | |
284 |
2/2✓ Branch 0 taken 1796 times.
✓ Branch 1 taken 1104 times.
|
2900 | if (c0 != c1) |
285 | 1796 | return 0; | |
286 | } | ||
287 |
2/2✓ Branch 0 taken 110 times.
✓ Branch 1 taken 198 times.
|
308 | if (it1 == end1) |
288 | 110 | return lhs.end() - end0; | |
289 | 198 | return 0; | |
290 | } | ||
291 | |||
292 | std::size_t | ||
293 | 831 | remove_dot_segments( | |
294 | char* dest0, | ||
295 | char const* end, | ||
296 | core::string_view input) noexcept | ||
297 | { | ||
298 | // 1. The input buffer `s` is initialized with | ||
299 | // the now-appended path components and the | ||
300 | // output buffer `dest0` is initialized to | ||
301 | // the empty string. | ||
302 | 831 | char* dest = dest0; | |
303 | 831 | bool const is_absolute = input.starts_with('/'); | |
304 | |||
305 | // Step 2 is a loop through 5 production rules: | ||
306 | // https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4 | ||
307 | // | ||
308 | // There are no transitions between all rules, | ||
309 | // which enables some optimizations. | ||
310 | // | ||
311 | // Initial: | ||
312 | // - Rule A: handle initial dots | ||
313 | // If the input buffer begins with a | ||
314 | // prefix of "../" or "./", then remove | ||
315 | // that prefix from the input buffer. | ||
316 | // Rule A can only happen at the beginning. | ||
317 | // Errata 4547: Keep "../" in the beginning | ||
318 | // https://www.rfc-editor.org/errata/eid4547 | ||
319 | // | ||
320 | // Then: | ||
321 | // - Rule D: ignore a final ".." or "." | ||
322 | // if the input buffer consists only of "." | ||
323 | // or "..", then remove that from the input | ||
324 | // buffer. | ||
325 | // Rule D can only happen after Rule A because: | ||
326 | // - B and C write "/" to the input | ||
327 | // - E writes "/" to input or returns | ||
328 | // | ||
329 | // Then: | ||
330 | // - Rule B: ignore ".": write "/" to the input | ||
331 | // - Rule C: apply "..": remove seg and write "/" | ||
332 | // - Rule E: copy complete segment | ||
333 | auto append = | ||
334 | 1491 | [](char*& first, char const* last, core::string_view in) | |
335 | { | ||
336 | // append `in` to `dest` | ||
337 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1491 times.
|
1491 | BOOST_ASSERT(in.size() <= std::size_t(last - first)); |
338 | 1491 | std::memmove(first, in.data(), in.size()); | |
339 | 1491 | first += in.size(); | |
340 | ignore_unused(last); | ||
341 | 1491 | }; | |
342 | |||
343 | 9555 | auto dot_starts_with = []( | |
344 | core::string_view str, core::string_view dots, std::size_t& n) | ||
345 | { | ||
346 | // starts_with for encoded/decoded dots | ||
347 | // or decoded otherwise. return how many | ||
348 | // chars in str match the dots | ||
349 | 9555 | n = 0; | |
350 |
2/2✓ Branch 2 taken 16356 times.
✓ Branch 3 taken 550 times.
|
16906 | for (char c: dots) |
351 | { | ||
352 |
2/2✓ Branch 1 taken 7351 times.
✓ Branch 2 taken 9005 times.
|
16356 | if (str.starts_with(c)) |
353 | { | ||
354 | 7351 | str.remove_prefix(1); | |
355 | 7351 | ++n; | |
356 | } | ||
357 | 9005 | else if (str.size() > 2 && | |
358 |
2/2✓ Branch 1 taken 24 times.
✓ Branch 2 taken 6219 times.
|
6243 | str[0] == '%' && |
359 |
5/6✓ Branch 0 taken 6243 times.
✓ Branch 1 taken 2762 times.
✓ Branch 3 taken 16 times.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 9005 times.
|
15264 | str[1] == '2' && |
360 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | (str[2] == 'e' || |
361 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
|
16 | str[2] == 'E')) |
362 | { | ||
363 | ✗ | str.remove_prefix(3); | |
364 | ✗ | n += 3; | |
365 | } | ||
366 | else | ||
367 | { | ||
368 | 9005 | n = 0; | |
369 | 9005 | return false; | |
370 | } | ||
371 | } | ||
372 | 550 | return true; | |
373 | }; | ||
374 | |||
375 | 4773 | auto dot_equal = [&dot_starts_with]( | |
376 | 4773 | core::string_view str, core::string_view dots) | |
377 | { | ||
378 | 4773 | std::size_t n = 0; | |
379 | 4773 | dot_starts_with(str, dots, n); | |
380 | 4773 | return n == str.size(); | |
381 | 831 | }; | |
382 | |||
383 | // Rule A | ||
384 | std::size_t n; | ||
385 |
2/2✓ Branch 1 taken 766 times.
✓ Branch 2 taken 81 times.
|
847 | while (!input.empty()) |
386 | { | ||
387 |
2/2✓ Branch 2 taken 4 times.
✓ Branch 3 taken 762 times.
|
766 | if (dot_starts_with(input, "../", n)) |
388 | { | ||
389 | // Errata 4547 | ||
390 | 4 | append(dest, end, "../"); | |
391 | 4 | input.remove_prefix(n); | |
392 | 4 | continue; | |
393 | } | ||
394 |
2/2✓ Branch 2 taken 750 times.
✓ Branch 3 taken 12 times.
|
762 | else if (!dot_starts_with(input, "./", n)) |
395 | { | ||
396 | 750 | break; | |
397 | } | ||
398 | 12 | input.remove_prefix(n); | |
399 | } | ||
400 | |||
401 | // Rule D | ||
402 |
2/2✓ Branch 2 taken 82 times.
✓ Branch 3 taken 749 times.
|
831 | if( dot_equal(input, ".")) |
403 | { | ||
404 | 82 | input = {}; | |
405 | } | ||
406 |
2/2✓ Branch 2 taken 3 times.
✓ Branch 3 taken 746 times.
|
749 | else if( dot_equal(input, "..") ) |
407 | { | ||
408 | // Errata 4547 | ||
409 | 3 | append(dest, end, ".."); | |
410 | 3 | input = {}; | |
411 | } | ||
412 | |||
413 | // 2. While the input buffer is not empty, | ||
414 | // loop as follows: | ||
415 |
2/2✓ Branch 1 taken 1647 times.
✓ Branch 2 taken 792 times.
|
2439 | while (!input.empty()) |
416 | { | ||
417 | // Rule B | ||
418 | 1647 | bool const is_dot_seg = dot_starts_with(input, "/./", n); | |
419 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 1615 times.
|
1647 | if (is_dot_seg) |
420 | { | ||
421 | 32 | input.remove_prefix(n - 1); | |
422 | 32 | continue; | |
423 | } | ||
424 | |||
425 | 1615 | bool const is_final_dot_seg = dot_equal(input, "/."); | |
426 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1607 times.
|
1615 | if (is_final_dot_seg) |
427 | { | ||
428 | // We can't remove "." from a core::string_view | ||
429 | // So what we do here is equivalent to | ||
430 | // replacing s with '/' as required | ||
431 | // in Rule B and executing the next | ||
432 | // iteration, which would append this | ||
433 | // '/' to the output, as required by | ||
434 | // Rule E | ||
435 | 8 | append(dest, end, input.substr(0, 1)); | |
436 | 8 | input = {}; | |
437 | 8 | break; | |
438 | } | ||
439 | |||
440 | // Rule C | ||
441 | 1607 | bool const is_dotdot_seg = dot_starts_with(input, "/../", n); | |
442 |
2/2✓ Branch 0 taken 193 times.
✓ Branch 1 taken 1414 times.
|
1607 | if (is_dotdot_seg) |
443 | { | ||
444 | 193 | core::string_view cur_out(dest0, dest - dest0); | |
445 | 193 | std::size_t p = cur_out.find_last_of('/'); | |
446 | 193 | bool const has_multiple_segs = p != core::string_view::npos; | |
447 |
2/2✓ Branch 0 taken 132 times.
✓ Branch 1 taken 61 times.
|
193 | if (has_multiple_segs) |
448 | { | ||
449 | // output has multiple segments | ||
450 | // "erase" [p, end] if not "/.." | ||
451 | 132 | core::string_view last_seg(dest0 + p, dest - (dest0 + p)); | |
452 | 132 | bool const prev_is_dotdot_seg = dot_equal(last_seg, "/.."); | |
453 |
2/2✓ Branch 0 taken 121 times.
✓ Branch 1 taken 11 times.
|
132 | if (!prev_is_dotdot_seg) |
454 | { | ||
455 | 121 | dest = dest0 + p; | |
456 | } | ||
457 | else | ||
458 | { | ||
459 | 11 | append(dest, end, "/.."); | |
460 | } | ||
461 | } | ||
462 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 50 times.
|
61 | else if (dest0 != dest) |
463 | { | ||
464 | // Only one segment in the output: remove it | ||
465 | 11 | core::string_view last_seg(dest0, dest - dest0); | |
466 | 11 | bool const prev_is_dotdot_seg = dot_equal(last_seg, ".."); | |
467 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 2 times.
|
11 | if (!prev_is_dotdot_seg) |
468 | { | ||
469 | 9 | dest = dest0; | |
470 |
1/2✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
|
9 | if (!is_absolute) |
471 | { | ||
472 | 9 | input.remove_prefix(1); | |
473 | } | ||
474 | } | ||
475 | else | ||
476 | { | ||
477 | 2 | append(dest, end, "/.."); | |
478 | } | ||
479 | } | ||
480 | else | ||
481 | { | ||
482 | // Output is empty | ||
483 |
1/2✓ Branch 0 taken 50 times.
✗ Branch 1 not taken.
|
50 | if (is_absolute) |
484 | { | ||
485 | 50 | append(dest, end, "/.."); | |
486 | } | ||
487 | else | ||
488 | { | ||
489 | ✗ | append(dest, end, ".."); | |
490 | } | ||
491 | } | ||
492 | 193 | input.remove_prefix(n - 1); | |
493 | 193 | continue; | |
494 | } | ||
495 | |||
496 | 1414 | bool const is_final_dotdot_seg = dot_equal(input, "/.."); | |
497 |
2/2✓ Branch 0 taken 31 times.
✓ Branch 1 taken 1383 times.
|
1414 | if (is_final_dotdot_seg) |
498 | { | ||
499 | 31 | core::string_view cur_out(dest0, dest - dest0); | |
500 | 31 | std::size_t p = cur_out.find_last_of('/'); | |
501 | 31 | bool const has_multiple_segs = p != core::string_view::npos; | |
502 |
2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 13 times.
|
31 | if (has_multiple_segs) |
503 | { | ||
504 | // output has multiple segments | ||
505 | // "erase" [p, end] if not "/.." | ||
506 | 18 | core::string_view last_seg(dest0 + p, dest - (dest0 + p)); | |
507 | 18 | bool const prev_is_dotdot_seg = dot_equal(last_seg, "/.."); | |
508 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 4 times.
|
18 | if (!prev_is_dotdot_seg) |
509 | { | ||
510 | 14 | dest = dest0 + p; | |
511 | 14 | append(dest, end, "/"); | |
512 | } | ||
513 | else | ||
514 | { | ||
515 | 4 | append(dest, end, "/.."); | |
516 | } | ||
517 | } | ||
518 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 10 times.
|
13 | else if (dest0 != dest) |
519 | { | ||
520 | // Only one segment in the output: remove it | ||
521 | 3 | core::string_view last_seg(dest0, dest - dest0); | |
522 | 3 | bool const prev_is_dotdot_seg = dot_equal(last_seg, ".."); | |
523 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
|
3 | if (!prev_is_dotdot_seg) { |
524 | 1 | dest = dest0; | |
525 | } | ||
526 | else | ||
527 | { | ||
528 | 2 | append(dest, end, "/.."); | |
529 | } | ||
530 | } | ||
531 | else | ||
532 | { | ||
533 | // Output is empty: append dotdot | ||
534 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | if (is_absolute) |
535 | { | ||
536 | 10 | append(dest, end, "/.."); | |
537 | } | ||
538 | else | ||
539 | { | ||
540 | ✗ | append(dest, end, ".."); | |
541 | } | ||
542 | } | ||
543 | 31 | input = {}; | |
544 | 31 | break; | |
545 | } | ||
546 | |||
547 | // Rule E | ||
548 | 1383 | std::size_t p = input.find_first_of('/', 1); | |
549 |
2/2✓ Branch 0 taken 676 times.
✓ Branch 1 taken 707 times.
|
1383 | if (p != core::string_view::npos) |
550 | { | ||
551 | 676 | append(dest, end, input.substr(0, p)); | |
552 | 676 | input.remove_prefix(p); | |
553 | } | ||
554 | else | ||
555 | { | ||
556 | 707 | append(dest, end, input); | |
557 | 707 | input = {}; | |
558 | } | ||
559 | } | ||
560 | |||
561 | // 3. Finally, the output buffer is set | ||
562 | // as the result of remove_dot_segments, | ||
563 | // and we return its size | ||
564 | 831 | return dest - dest0; | |
565 | } | ||
566 | |||
567 | char | ||
568 | 1138 | path_pop_back( core::string_view& s ) | |
569 | { | ||
570 |
4/4✓ Branch 1 taken 518 times.
✓ Branch 2 taken 620 times.
✓ Branch 3 taken 1090 times.
✓ Branch 4 taken 48 times.
|
1656 | if (s.size() < 3 || |
571 |
3/4✓ Branch 2 taken 518 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 470 times.
✓ Branch 5 taken 48 times.
|
518 | *std::prev(s.end(), 3) != '%') |
572 | { | ||
573 | 1090 | char c = s.back(); | |
574 | 1090 | s.remove_suffix(1); | |
575 | 1090 | return c; | |
576 | } | ||
577 | 48 | char c = 0; | |
578 |
1/2✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
|
96 | detail::decode_unsafe( |
579 | 96 | &c, &c + 1, s.substr(s.size() - 3)); | |
580 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 4 times.
|
48 | if (c != '/') |
581 | { | ||
582 | 44 | s.remove_suffix(3); | |
583 | 44 | return c; | |
584 | } | ||
585 | 4 | c = s.back(); | |
586 | 4 | s.remove_suffix(1); | |
587 | 4 | return c; | |
588 | }; | ||
589 | |||
590 | void | ||
591 | 506 | pop_last_segment( | |
592 | core::string_view& s, | ||
593 | core::string_view& c, | ||
594 | std::size_t& level, | ||
595 | bool r) noexcept | ||
596 | { | ||
597 | 506 | c = {}; | |
598 | 506 | std::size_t n = 0; | |
599 |
2/2✓ Branch 1 taken 550 times.
✓ Branch 2 taken 118 times.
|
668 | while (!s.empty()) |
600 | { | ||
601 | // B. if the input buffer begins with a | ||
602 | // prefix of "/./" or "/.", where "." is | ||
603 | // a complete path segment, then replace | ||
604 | // that prefix with "/" in the input | ||
605 | // buffer; otherwise, | ||
606 | 550 | n = detail::path_ends_with(s, "/./"); | |
607 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 540 times.
|
550 | if (n) |
608 | { | ||
609 | 10 | c = s.substr(s.size() - n); | |
610 | 10 | s.remove_suffix(n); | |
611 | 10 | continue; | |
612 | } | ||
613 | 540 | n = detail::path_ends_with(s, "/."); | |
614 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 528 times.
|
540 | if (n) |
615 | { | ||
616 | 12 | c = s.substr(s.size() - n, 1); | |
617 | 12 | s.remove_suffix(n); | |
618 | 12 | continue; | |
619 | } | ||
620 | |||
621 | // C. if the input buffer begins with a | ||
622 | // prefix of "/../" or "/..", where ".." | ||
623 | // is a complete path segment, then | ||
624 | // replace that prefix with "/" in the | ||
625 | // input buffer and remove the last | ||
626 | // segment and its preceding "/" | ||
627 | // (if any) from the output buffer | ||
628 | // otherwise, | ||
629 | 528 | n = detail::path_ends_with(s, "/../"); | |
630 |
2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 486 times.
|
528 | if (n) |
631 | { | ||
632 | 42 | c = s.substr(s.size() - n); | |
633 | 42 | s.remove_suffix(n); | |
634 | 42 | ++level; | |
635 | 42 | continue; | |
636 | } | ||
637 | 486 | n = detail::path_ends_with(s, "/.."); | |
638 |
2/2✓ Branch 0 taken 46 times.
✓ Branch 1 taken 440 times.
|
486 | if (n) |
639 | { | ||
640 | 46 | c = s.substr(s.size() - n); | |
641 | 46 | s.remove_suffix(n); | |
642 | 46 | ++level; | |
643 | 46 | continue; | |
644 | } | ||
645 | |||
646 | // E. move the first path segment in the | ||
647 | // input buffer to the end of the output | ||
648 | // buffer, including the initial "/" | ||
649 | // character (if any) and any subsequent | ||
650 | // characters up to, but not including, | ||
651 | // the next "/" character or the end of | ||
652 | // the input buffer. | ||
653 | 440 | std::size_t p = s.size() > 1 | |
654 |
2/2✓ Branch 0 taken 370 times.
✓ Branch 1 taken 70 times.
|
440 | ? s.find_last_of('/', s.size() - 2) |
655 | 440 | : core::string_view::npos; | |
656 |
2/2✓ Branch 0 taken 272 times.
✓ Branch 1 taken 168 times.
|
440 | if (p != core::string_view::npos) |
657 | { | ||
658 | 272 | c = s.substr(p + 1); | |
659 | 272 | s.remove_suffix(c.size()); | |
660 | } | ||
661 | else | ||
662 | { | ||
663 | 168 | c = s; | |
664 | 168 | s = {}; | |
665 | } | ||
666 | |||
667 |
2/2✓ Branch 0 taken 388 times.
✓ Branch 1 taken 52 times.
|
440 | if (level == 0) |
668 | 388 | return; | |
669 |
2/2✓ Branch 1 taken 42 times.
✓ Branch 2 taken 10 times.
|
52 | if (!s.empty()) |
670 | 42 | --level; | |
671 | } | ||
672 | // we still need to skip n_skip + 1 | ||
673 | // but the string is empty | ||
674 |
4/4✓ Branch 0 taken 42 times.
✓ Branch 1 taken 76 times.
✓ Branch 2 taken 34 times.
✓ Branch 3 taken 8 times.
|
118 | if (r && level) |
675 | { | ||
676 | 34 | c = "/"; | |
677 | 34 | level = 0; | |
678 | 34 | return; | |
679 | } | ||
680 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 80 times.
|
84 | else if (level) |
681 | { | ||
682 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
|
4 | if (c.empty()) |
683 | ✗ | c = "/.."; | |
684 | else | ||
685 | 4 | c = "/../"; | |
686 | 4 | --level; | |
687 | 4 | return; | |
688 | } | ||
689 | 80 | c = {}; | |
690 | } | ||
691 | |||
692 | void | ||
693 | 276 | normalized_path_digest( | |
694 | core::string_view s, | ||
695 | bool remove_unmatched, | ||
696 | fnv_1a& hasher) noexcept | ||
697 | { | ||
698 | 276 | core::string_view child; | |
699 | 276 | std::size_t level = 0; | |
700 | 230 | do | |
701 | { | ||
702 | 506 | pop_last_segment( | |
703 | s, child, level, remove_unmatched); | ||
704 |
2/2✓ Branch 1 taken 1138 times.
✓ Branch 2 taken 506 times.
|
1644 | while (!child.empty()) |
705 | { | ||
706 | 1138 | char c = path_pop_back(child); | |
707 | 1138 | hasher.put(c); | |
708 | } | ||
709 | } | ||
710 |
2/2✓ Branch 1 taken 230 times.
✓ Branch 2 taken 276 times.
|
506 | while (!s.empty()); |
711 | 276 | } | |
712 | |||
713 | // compare segments as if there were a normalized | ||
714 | int | ||
715 | 168 | segments_compare( | |
716 | segments_encoded_view seg0, | ||
717 | segments_encoded_view seg1) noexcept | ||
718 | { | ||
719 | // calculate path size as if it were normalized | ||
720 | auto normalized_size = | ||
721 | 336 | [](segments_encoded_view seg) -> std::size_t | |
722 | { | ||
723 |
2/2✓ Branch 1 taken 102 times.
✓ Branch 2 taken 234 times.
|
336 | if (seg.empty()) |
724 | 102 | return seg.is_absolute(); | |
725 | |||
726 | 234 | std::size_t n = 0; | |
727 | 234 | std::size_t skip = 0; | |
728 | 234 | auto begin = seg.begin(); | |
729 | 234 | auto it = seg.end(); | |
730 |
2/2✓ Branch 1 taken 658 times.
✓ Branch 2 taken 234 times.
|
892 | while (it != begin) |
731 | { | ||
732 | 658 | --it; | |
733 | 658 | decode_view dseg = **it; | |
734 |
2/2✓ Branch 1 taken 167 times.
✓ Branch 2 taken 491 times.
|
658 | if (dseg == "..") |
735 | 167 | ++skip; | |
736 |
2/2✓ Branch 1 taken 453 times.
✓ Branch 2 taken 38 times.
|
491 | else if (dseg != ".") |
737 | { | ||
738 |
2/2✓ Branch 0 taken 85 times.
✓ Branch 1 taken 368 times.
|
453 | if (skip) |
739 | 85 | --skip; | |
740 | else | ||
741 | 368 | n += dseg.size() + 1; | |
742 | } | ||
743 | } | ||
744 | 234 | n += skip * 3; | |
745 | 234 | n -= !seg.is_absolute(); | |
746 | 234 | return n; | |
747 | }; | ||
748 | |||
749 | // find the normalized size for the comparison | ||
750 | 168 | std::size_t n0 = normalized_size(seg0); | |
751 | 168 | std::size_t n1 = normalized_size(seg1); | |
752 | 168 | std::size_t n00 = n0; | |
753 | 168 | std::size_t n10 = n1; | |
754 | |||
755 | // consume the last char from a segment range | ||
756 | auto consume_last = | ||
757 | 1632 | []( | |
758 | std::size_t& n, | ||
759 | decode_view& dseg, | ||
760 | segments_encoded_view::iterator& begin, | ||
761 | segments_encoded_view::iterator& it, | ||
762 | decode_view::iterator& cit, | ||
763 | std::size_t& skip, | ||
764 | bool& at_slash) -> char | ||
765 | { | ||
766 |
2/2✓ Branch 2 taken 1005 times.
✓ Branch 3 taken 627 times.
|
1632 | if (cit != dseg.begin()) |
767 | { | ||
768 | // return last char from current segment | ||
769 | 1005 | at_slash = false; | |
770 | 1005 | --cit; | |
771 | 1005 | --n; | |
772 | 1005 | return *cit; | |
773 | } | ||
774 | |||
775 |
2/2✓ Branch 0 taken 367 times.
✓ Branch 1 taken 260 times.
|
627 | if (!at_slash) |
776 | { | ||
777 | // current segment dseg is over and | ||
778 | // previous char was not a slash | ||
779 | // so we output one | ||
780 | 367 | at_slash = true; | |
781 | 367 | --n; | |
782 | 367 | return '/'; | |
783 | } | ||
784 | |||
785 | // current segment dseg is over and | ||
786 | // last char was already the slash | ||
787 | // between segments, so take the | ||
788 | // next final segment to consume | ||
789 | 260 | at_slash = false; | |
790 |
1/2✓ Branch 2 taken 498 times.
✗ Branch 3 not taken.
|
498 | while (cit == dseg.begin()) |
791 | { | ||
792 | // take next segment | ||
793 |
2/2✓ Branch 1 taken 376 times.
✓ Branch 2 taken 122 times.
|
498 | if (it != begin) |
794 | 376 | --it; | |
795 | else | ||
796 | 122 | break; | |
797 |
2/2✓ Branch 3 taken 140 times.
✓ Branch 4 taken 236 times.
|
376 | if (**it == "..") |
798 | { | ||
799 | // skip next if this is ".." | ||
800 | 140 | ++skip; | |
801 | } | ||
802 |
2/2✓ Branch 3 taken 208 times.
✓ Branch 4 taken 28 times.
|
236 | else if (**it != ".") |
803 | { | ||
804 |
2/2✓ Branch 0 taken 70 times.
✓ Branch 1 taken 138 times.
|
208 | if (skip) |
805 | { | ||
806 | // discount skips | ||
807 | 70 | --skip; | |
808 | } | ||
809 | else | ||
810 | { | ||
811 | // or update current seg | ||
812 | 138 | dseg = **it; | |
813 | 138 | cit = dseg.end(); | |
814 | 138 | break; | |
815 | } | ||
816 | } | ||
817 | } | ||
818 | // consume from the new current | ||
819 | // segment | ||
820 | 260 | --n; | |
821 |
2/2✓ Branch 2 taken 123 times.
✓ Branch 3 taken 137 times.
|
260 | if (cit != dseg.begin()) |
822 | { | ||
823 | // in the general case, we consume | ||
824 | // one more character from the end | ||
825 | 123 | --cit; | |
826 | 123 | return *cit; | |
827 | } | ||
828 | |||
829 | // nothing left to consume in the | ||
830 | // current and new segment | ||
831 |
2/2✓ Branch 1 taken 128 times.
✓ Branch 2 taken 9 times.
|
137 | if (it == begin) |
832 | { | ||
833 | // if this is the first | ||
834 | // segment, the segments are | ||
835 | // over and there can only | ||
836 | // be repetitions of "../" to | ||
837 | // output | ||
838 | 128 | return "/.."[n % 3]; | |
839 | } | ||
840 | // at other segments, we need | ||
841 | // a slash to transition to the | ||
842 | // next segment | ||
843 | 9 | at_slash = true; | |
844 | 9 | return '/'; | |
845 | }; | ||
846 | |||
847 | // consume final segments from seg0 that | ||
848 | // should not influence the comparison | ||
849 | 168 | auto begin0 = seg0.begin(); | |
850 | 168 | auto it0 = seg0.end(); | |
851 | 168 | decode_view dseg0; | |
852 |
2/2✓ Branch 2 taken 117 times.
✓ Branch 3 taken 51 times.
|
168 | if (it0 != seg0.begin()) |
853 | { | ||
854 | 117 | --it0; | |
855 | 117 | dseg0 = **it0; | |
856 | } | ||
857 | 168 | decode_view::iterator cit0 = dseg0.end(); | |
858 | 168 | std::size_t skip0 = 0; | |
859 | 168 | bool at_slash0 = true; | |
860 |
2/2✓ Branch 0 taken 134 times.
✓ Branch 1 taken 168 times.
|
302 | while (n0 > n1) |
861 | { | ||
862 | 134 | consume_last(n0, dseg0, begin0, it0, cit0, skip0, at_slash0); | |
863 | } | ||
864 | |||
865 | // consume final segments from seg1 that | ||
866 | // should not influence the comparison | ||
867 | 168 | auto begin1 = seg1.begin(); | |
868 | 168 | auto it1 = seg1.end(); | |
869 | 168 | decode_view dseg1; | |
870 |
2/2✓ Branch 2 taken 117 times.
✓ Branch 3 taken 51 times.
|
168 | if (it1 != seg1.begin()) |
871 | { | ||
872 | 117 | --it1; | |
873 | 117 | dseg1 = **it1; | |
874 | } | ||
875 | 168 | decode_view::iterator cit1 = dseg1.end(); | |
876 | 168 | std::size_t skip1 = 0; | |
877 | 168 | bool at_slash1 = true; | |
878 |
2/2✓ Branch 0 taken 34 times.
✓ Branch 1 taken 168 times.
|
202 | while (n1 > n0) |
879 | { | ||
880 | 34 | consume_last(n1, dseg1, begin1, it1, cit1, skip1, at_slash1); | |
881 | } | ||
882 | |||
883 | 168 | int cmp = 0; | |
884 |
2/2✓ Branch 0 taken 732 times.
✓ Branch 1 taken 168 times.
|
900 | while (n0) |
885 | { | ||
886 | 732 | char c0 = consume_last( | |
887 | n0, dseg0, begin0, it0, cit0, skip0, at_slash0); | ||
888 | 732 | char c1 = consume_last( | |
889 | n1, dseg1, begin1, it1, cit1, skip1, at_slash1); | ||
890 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 696 times.
|
732 | if (c0 < c1) |
891 | 36 | cmp = -1; | |
892 |
2/2✓ Branch 0 taken 41 times.
✓ Branch 1 taken 655 times.
|
696 | else if (c1 < c0) |
893 | 41 | cmp = +1; | |
894 | } | ||
895 | |||
896 |
2/2✓ Branch 0 taken 41 times.
✓ Branch 1 taken 127 times.
|
168 | if (cmp != 0) |
897 | 41 | return cmp; | |
898 |
2/2✓ Branch 0 taken 125 times.
✓ Branch 1 taken 2 times.
|
127 | if ( n00 == n10 ) |
899 | 125 | return 0; | |
900 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if ( n00 < n10 ) |
901 | 1 | return -1; | |
902 | 1 | return 1; | |
903 | } | ||
904 | |||
905 | } // detail | ||
906 | } // urls | ||
907 | } // boost | ||
908 | |||
909 | #endif | ||
910 |