GCC Code Coverage Report


Directory: libs/url/
File: libs/url/src/detail/normalize.cpp
Date: 2024-03-08 17:32:04
Exec Total Coverage
Lines: 399 402 99.3%
Functions: 20 20 100.0%
Branches: 213 228 93.4%

Line Branch Exec Source
1 //
2 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
3 // Copyright (c) 2022 Alan de Freitas (alandefreitas@gmail.com)
4 //
5 // Distributed under the Boost Software License, Version 1.0. (See accompanying
6 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 //
8 // Official repository: https://github.com/boostorg/url
9 //
10
11
12 #include <boost/url/detail/config.hpp>
13 #include <boost/url/decode_view.hpp>
14 #include "decode.hpp"
15 #include <boost/url/segments_encoded_view.hpp>
16 #include <boost/url/grammar/ci_string.hpp>
17 #include <boost/assert.hpp>
18 #include <boost/core/ignore_unused.hpp>
19 #include <cstring>
20 #include "normalize.hpp"
21
22 namespace boost {
23 namespace urls {
24 namespace detail {
25
26 void
27 7280 pop_encoded_front(
28 core::string_view& s,
29 char& c,
30 std::size_t& n) noexcept
31 {
32
2/2
✓ Branch 1 taken 7190 times.
✓ Branch 2 taken 90 times.
7280 if(s.front() != '%')
33 {
34 7190 c = s.front();
35 7190 s.remove_prefix(1);
36 }
37 else
38 {
39 90 detail::decode_unsafe(
40 &c,
41 &c + 1,
42 s.substr(0, 3));
43 90 s.remove_prefix(3);
44 }
45 7280 ++n;
46 7280 }
47
48 int
49 77 compare_encoded(
50 core::string_view lhs,
51 core::string_view rhs) noexcept
52 {
53 77 std::size_t n0 = 0;
54 77 std::size_t n1 = 0;
55 77 char c0 = 0;
56 77 char c1 = 0;
57 205 while(
58
4/4
✓ Branch 1 taken 253 times.
✓ Branch 2 taken 29 times.
✓ Branch 3 taken 240 times.
✓ Branch 4 taken 42 times.
535 !lhs.empty() &&
59
2/2
✓ Branch 1 taken 240 times.
✓ Branch 2 taken 13 times.
253 !rhs.empty())
60 {
61 240 pop_encoded_front(lhs, c0, n0);
62 240 pop_encoded_front(rhs, c1, n1);
63
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 220 times.
240 if (c0 < c1)
64 20 return -1;
65
2/2
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 205 times.
220 if (c1 < c0)
66 15 return 1;
67 }
68 42 n0 += detail::decode_bytes_unsafe(lhs);
69 42 n1 += detail::decode_bytes_unsafe(rhs);
70
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 21 times.
42 if (n0 == n1)
71 21 return 0;
72
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 13 times.
21 if (n0 < n1)
73 8 return -1;
74 13 return 1;
75 }
76
77 void
78 1216 digest_encoded(
79 core::string_view s,
80 fnv_1a& hasher) noexcept
81 {
82 1216 char c = 0;
83 1216 std::size_t n = 0;
84
2/2
✓ Branch 1 taken 508 times.
✓ Branch 2 taken 1216 times.
1724 while(!s.empty())
85 {
86 508 pop_encoded_front(s, c, n);
87 508 hasher.put(c);
88 }
89 1216 }
90
91 int
92 159 ci_compare_encoded(
93 core::string_view lhs,
94 core::string_view rhs) noexcept
95 {
96 159 std::size_t n0 = 0;
97 159 std::size_t n1 = 0;
98 159 char c0 = 0;
99 159 char c1 = 0;
100 2105 while (
101
4/4
✓ Branch 1 taken 2121 times.
✓ Branch 2 taken 143 times.
✓ Branch 3 taken 2115 times.
✓ Branch 4 taken 149 times.
4385 !lhs.empty() &&
102
2/2
✓ Branch 1 taken 2115 times.
✓ Branch 2 taken 6 times.
2121 !rhs.empty())
103 {
104 2115 pop_encoded_front(lhs, c0, n0);
105 2115 pop_encoded_front(rhs, c1, n1);
106 2115 c0 = grammar::to_lower(c0);
107 2115 c1 = grammar::to_lower(c1);
108
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2107 times.
2115 if (c0 < c1)
109 8 return -1;
110
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2105 times.
2107 if (c1 < c0)
111 2 return 1;
112 }
113 149 n0 += detail::decode_bytes_unsafe(lhs);
114 149 n1 += detail::decode_bytes_unsafe(rhs);
115
2/2
✓ Branch 0 taken 142 times.
✓ Branch 1 taken 7 times.
149 if (n0 == n1)
116 142 return 0;
117
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 6 times.
7 if (n0 < n1)
118 1 return -1;
119 6 return 1;
120 }
121
122 void
123 304 ci_digest_encoded(
124 core::string_view s,
125 fnv_1a& hasher) noexcept
126 {
127 304 char c = 0;
128 304 std::size_t n = 0;
129
2/2
✓ Branch 1 taken 2062 times.
✓ Branch 2 taken 304 times.
2366 while(!s.empty())
130 {
131 2062 pop_encoded_front(s, c, n);
132 2062 c = grammar::to_lower(c);
133 2062 hasher.put(c);
134 }
135 304 }
136
137 int
138 46 compare(
139 core::string_view lhs,
140 core::string_view rhs) noexcept
141 {
142 46 auto rlen = (std::min)(lhs.size(), rhs.size());
143
2/2
✓ Branch 0 taken 79 times.
✓ Branch 1 taken 25 times.
104 for (std::size_t i = 0; i < rlen; ++i)
144 {
145 79 char c0 = lhs[i];
146 79 char c1 = rhs[i];
147
2/2
✓ Branch 0 taken 13 times.
✓ Branch 1 taken 66 times.
79 if (c0 < c1)
148 13 return -1;
149
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 58 times.
66 if (c1 < c0)
150 8 return 1;
151 }
152
2/2
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 21 times.
25 if ( lhs.size() == rhs.size() )
153 4 return 0;
154
2/2
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 13 times.
21 if ( lhs.size() < rhs.size() )
155 8 return -1;
156 13 return 1;
157 }
158
159 int
160 196 ci_compare(
161 core::string_view lhs,
162 core::string_view rhs) noexcept
163 {
164 196 auto rlen = (std::min)(lhs.size(), rhs.size());
165
2/2
✓ Branch 0 taken 800 times.
✓ Branch 1 taken 189 times.
989 for (std::size_t i = 0; i < rlen; ++i)
166 {
167 800 char c0 = grammar::to_lower(lhs[i]);
168 800 char c1 = grammar::to_lower(rhs[i]);
169
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 794 times.
800 if (c0 < c1)
170 6 return -1;
171
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 793 times.
794 if (c1 < c0)
172 1 return 1;
173 }
174
2/2
✓ Branch 2 taken 182 times.
✓ Branch 3 taken 7 times.
189 if ( lhs.size() == rhs.size() )
175 182 return 0;
176
2/2
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 1 times.
7 if ( lhs.size() < rhs.size() )
177 6 return -1;
178 1 return 1;
179 }
180
181 void
182 304 ci_digest(
183 core::string_view s,
184 fnv_1a& hasher) noexcept
185 {
186
2/2
✓ Branch 2 taken 730 times.
✓ Branch 3 taken 304 times.
1034 for (char c: s)
187 {
188 730 c = grammar::to_lower(c);
189 730 hasher.put(c);
190 }
191 304 }
192
193 /* Check if a string ends with the specified suffix (decoded comparison)
194
195 This function determines if a string ends with the specified suffix
196 when the string and suffix are compared after percent-decoding.
197
198 @param str The string to check (percent-encoded)
199 @param suffix The suffix to check for (percent-decoded)
200 @return The number of encoded chars consumed in the string
201 */
202 std::size_t
203 2136 path_ends_with(
204 core::string_view str,
205 core::string_view suffix) noexcept
206 {
207
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2136 times.
2136 BOOST_ASSERT(!str.empty());
208
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2136 times.
2136 BOOST_ASSERT(!suffix.empty());
209
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2136 times.
2136 BOOST_ASSERT(!suffix.contains("%2F"));
210
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2136 times.
2136 BOOST_ASSERT(!suffix.contains("%2f"));
211 5848 auto consume_last = [](
212 core::string_view::iterator& it,
213 core::string_view::iterator& end,
214 char& c)
215 {
216
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5848 times.
5848 BOOST_ASSERT(end > it);
217
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5848 times.
5848 BOOST_ASSERT(it != end);
218
4/4
✓ Branch 0 taken 3960 times.
✓ Branch 1 taken 1888 times.
✓ Branch 2 taken 5800 times.
✓ Branch 3 taken 48 times.
9808 if ((end - it) < 3 ||
219
2/2
✓ Branch 1 taken 3912 times.
✓ Branch 2 taken 48 times.
3960 *(std::prev(end, 3)) != '%')
220 {
221 5800 c = *--end;
222 5800 return false;
223 }
224
1/2
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
48 detail::decode_unsafe(
225 &c,
226 &c + 1,
227 core::string_view(std::prev(
228 end, 3), 3));
229 48 end -= 3;
230 48 return true;
231 };
232
233 2136 auto it0 = str.begin();
234 2136 auto end0 = str.end();
235 2136 auto it1 = suffix.begin();
236 2136 auto end1 = suffix.end();
237 2136 char c0 = 0;
238 2136 char c1 = 0;
239 1112 while(
240
2/2
✓ Branch 0 taken 3006 times.
✓ Branch 1 taken 242 times.
3248 it0 < end0 &&
241
2/2
✓ Branch 0 taken 2932 times.
✓ Branch 1 taken 74 times.
3006 it1 < end1)
242 {
243 2932 bool const is_encoded = consume_last(it0, end0, c0);
244 // The suffix never contains an encoded slash (%2F), and a decoded
245 // slash is not equivalent to an encoded slash
246
4/4
✓ Branch 0 taken 48 times.
✓ Branch 1 taken 2884 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 32 times.
2932 if (is_encoded && c0 == '/')
247 16 return 0;
248 2916 consume_last(it1, end1, c1);
249
2/2
✓ Branch 0 taken 1804 times.
✓ Branch 1 taken 1112 times.
2916 if (c0 != c1)
250 1804 return 0;
251 }
252 316 bool const consumed_suffix = it1 == end1;
253
2/2
✓ Branch 0 taken 110 times.
✓ Branch 1 taken 206 times.
316 if (consumed_suffix)
254 {
255 110 std::size_t const consumed_encoded = str.end() - end0;
256 110 return consumed_encoded;
257 }
258 206 return 0;
259 }
260
261 std::size_t
262 832 remove_dot_segments(
263 char* dest0,
264 char const* end,
265 core::string_view input) noexcept
266 {
267 // 1. The input buffer `s` is initialized with
268 // the now-appended path components and the
269 // output buffer `dest0` is initialized to
270 // the empty string.
271 832 char* dest = dest0;
272 832 bool const is_absolute = input.starts_with('/');
273
274 // Step 2 is a loop through 5 production rules:
275 // https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4
276 //
277 // There are no transitions between all rules,
278 // which enables some optimizations.
279 //
280 // Initial:
281 // - Rule A: handle initial dots
282 // If the input buffer begins with a
283 // prefix of "../" or "./", then remove
284 // that prefix from the input buffer.
285 // Rule A can only happen at the beginning.
286 // Errata 4547: Keep "../" in the beginning
287 // https://www.rfc-editor.org/errata/eid4547
288 //
289 // Then:
290 // - Rule D: ignore a final ".." or "."
291 // if the input buffer consists only of "."
292 // or "..", then remove that from the input
293 // buffer.
294 // Rule D can only happen after Rule A because:
295 // - B and C write "/" to the input
296 // - E writes "/" to input or returns
297 //
298 // Then:
299 // - Rule B: ignore ".": write "/" to the input
300 // - Rule C: apply "..": remove seg and write "/"
301 // - Rule E: copy complete segment
302 auto append =
303 1492 [](char*& first, char const* last, core::string_view in)
304 {
305 // append `in` to `dest`
306
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 1492 times.
1492 BOOST_ASSERT(in.size() <= std::size_t(last - first));
307 1492 std::memmove(first, in.data(), in.size());
308 1492 first += in.size();
309 ignore_unused(last);
310 1492 };
311
312 9563 auto dot_starts_with = [](
313 core::string_view str, core::string_view dots, std::size_t& n)
314 {
315 // starts_with for encoded/decoded dots
316 // or decoded otherwise. return how many
317 // chars in str match the dots
318 9563 n = 0;
319
2/2
✓ Branch 2 taken 16368 times.
✓ Branch 3 taken 550 times.
16918 for (char c: dots)
320 {
321
2/2
✓ Branch 1 taken 7355 times.
✓ Branch 2 taken 9013 times.
16368 if (str.starts_with(c))
322 {
323 7355 str.remove_prefix(1);
324 7355 ++n;
325 7355 continue;
326 }
327
328 // In the general case, we would need to
329 // check if the next char is an encoded
330 // dot.
331 // However, an encoded dot in `str`
332 // would have already been decoded in
333 // url_base::normalize_path().
334 // This needs to be undone if
335 // `remove_dot_segments` is used in a
336 // different context.
337 // if (str.size() > 2 &&
338 // c == '.'
339 // &&
340 // str[0] == '%' &&
341 // str[1] == '2' &&
342 // (str[2] == 'e' ||
343 // str[2] == 'E'))
344 // {
345 // str.remove_prefix(3);
346 // n += 3;
347 // continue;
348 // }
349
350 9013 n = 0;
351 9013 return false;
352 }
353 550 return true;
354 };
355
356 4777 auto dot_equal = [&dot_starts_with](
357 4777 core::string_view str, core::string_view dots)
358 {
359 4777 std::size_t n = 0;
360 4777 dot_starts_with(str, dots, n);
361 4777 return n == str.size();
362 832 };
363
364 // Rule A
365 std::size_t n;
366
2/2
✓ Branch 1 taken 767 times.
✓ Branch 2 taken 81 times.
848 while (!input.empty())
367 {
368
2/2
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 763 times.
767 if (dot_starts_with(input, "../", n))
369 {
370 // Errata 4547
371 4 append(dest, end, "../");
372 4 input.remove_prefix(n);
373 4 continue;
374 }
375
2/2
✓ Branch 2 taken 751 times.
✓ Branch 3 taken 12 times.
763 else if (!dot_starts_with(input, "./", n))
376 {
377 751 break;
378 }
379 12 input.remove_prefix(n);
380 }
381
382 // Rule D
383
2/2
✓ Branch 2 taken 82 times.
✓ Branch 3 taken 750 times.
832 if( dot_equal(input, "."))
384 {
385 82 input = {};
386 }
387
2/2
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 747 times.
750 else if( dot_equal(input, "..") )
388 {
389 // Errata 4547
390 3 append(dest, end, "..");
391 3 input = {};
392 }
393
394 // 2. While the input buffer is not empty,
395 // loop as follows:
396
2/2
✓ Branch 1 taken 1648 times.
✓ Branch 2 taken 793 times.
2441 while (!input.empty())
397 {
398 // Rule B
399 1648 bool const is_dot_seg = dot_starts_with(input, "/./", n);
400
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 1616 times.
1648 if (is_dot_seg)
401 {
402 32 input.remove_prefix(n - 1);
403 32 continue;
404 }
405
406 1616 bool const is_final_dot_seg = dot_equal(input, "/.");
407
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1608 times.
1616 if (is_final_dot_seg)
408 {
409 // We can't remove "." from a core::string_view
410 // So what we do here is equivalent to
411 // replacing s with '/' as required
412 // in Rule B and executing the next
413 // iteration, which would append this
414 // '/' to the output, as required by
415 // Rule E
416 8 append(dest, end, input.substr(0, 1));
417 8 input = {};
418 8 break;
419 }
420
421 // Rule C
422 1608 bool const is_dotdot_seg = dot_starts_with(input, "/../", n);
423
2/2
✓ Branch 0 taken 193 times.
✓ Branch 1 taken 1415 times.
1608 if (is_dotdot_seg)
424 {
425 193 core::string_view cur_out(dest0, dest - dest0);
426 193 std::size_t p = cur_out.find_last_of('/');
427 193 bool const has_multiple_segs = p != core::string_view::npos;
428
2/2
✓ Branch 0 taken 132 times.
✓ Branch 1 taken 61 times.
193 if (has_multiple_segs)
429 {
430 // output has multiple segments
431 // "erase" [p, end] if not "/.."
432 132 core::string_view last_seg(dest0 + p, dest - (dest0 + p));
433 132 bool const prev_is_dotdot_seg = dot_equal(last_seg, "/..");
434
2/2
✓ Branch 0 taken 121 times.
✓ Branch 1 taken 11 times.
132 if (!prev_is_dotdot_seg)
435 {
436 121 dest = dest0 + p;
437 }
438 else
439 {
440 11 append(dest, end, "/..");
441 }
442 }
443
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 50 times.
61 else if (dest0 != dest)
444 {
445 // Only one segment in the output: remove it
446 11 core::string_view last_seg(dest0, dest - dest0);
447 11 bool const prev_is_dotdot_seg = dot_equal(last_seg, "..");
448
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 2 times.
11 if (!prev_is_dotdot_seg)
449 {
450 9 dest = dest0;
451
1/2
✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
9 if (!is_absolute)
452 {
453 9 input.remove_prefix(1);
454 }
455 }
456 else
457 {
458 2 append(dest, end, "/..");
459 }
460 }
461 else
462 {
463 // Output is empty
464
1/2
✓ Branch 0 taken 50 times.
✗ Branch 1 not taken.
50 if (is_absolute)
465 {
466 50 append(dest, end, "/..");
467 }
468 else
469 {
470 // AFREITAS: Although we have no formal proof
471 // for that, the output can't be relative
472 // and empty at this point because relative
473 // paths will fall in the `dest0 != dest`
474 // case above of this rule C and then the
475 // general case of rule E for "..".
476 append(dest, end, "..");
477 }
478 }
479 193 input.remove_prefix(n - 1);
480 193 continue;
481 }
482
483 1415 bool const is_final_dotdot_seg = dot_equal(input, "/..");
484
2/2
✓ Branch 0 taken 31 times.
✓ Branch 1 taken 1384 times.
1415 if (is_final_dotdot_seg)
485 {
486 31 core::string_view cur_out(dest0, dest - dest0);
487 31 std::size_t p = cur_out.find_last_of('/');
488 31 bool const has_multiple_segs = p != core::string_view::npos;
489
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 13 times.
31 if (has_multiple_segs)
490 {
491 // output has multiple segments
492 // "erase" [p, end] if not "/.."
493 18 core::string_view last_seg(dest0 + p, dest - (dest0 + p));
494 18 bool const prev_is_dotdot_seg = dot_equal(last_seg, "/..");
495
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 4 times.
18 if (!prev_is_dotdot_seg)
496 {
497 14 dest = dest0 + p;
498 14 append(dest, end, "/");
499 }
500 else
501 {
502 4 append(dest, end, "/..");
503 }
504 }
505
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 10 times.
13 else if (dest0 != dest)
506 {
507 // Only one segment in the output: remove it
508 3 core::string_view last_seg(dest0, dest - dest0);
509 3 bool const prev_is_dotdot_seg = dot_equal(last_seg, "..");
510
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
3 if (!prev_is_dotdot_seg) {
511 1 dest = dest0;
512 }
513 else
514 {
515 2 append(dest, end, "/..");
516 }
517 }
518 else
519 {
520 // Output is empty: append dotdot
521
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
10 if (is_absolute)
522 {
523 10 append(dest, end, "/..");
524 }
525 else
526 {
527 // AFREITAS: Although we have no formal proof
528 // for that, the output can't be relative
529 // and empty at this point because relative
530 // paths will fall in the `dest0 != dest`
531 // case above of this rule C and then the
532 // general case of rule E for "..".
533 append(dest, end, "..");
534 }
535 }
536 31 input = {};
537 31 break;
538 }
539
540 // Rule E
541 1384 std::size_t p = input.find_first_of('/', 1);
542
2/2
✓ Branch 0 taken 676 times.
✓ Branch 1 taken 708 times.
1384 if (p != core::string_view::npos)
543 {
544 676 append(dest, end, input.substr(0, p));
545 676 input.remove_prefix(p);
546 }
547 else
548 {
549 708 append(dest, end, input);
550 708 input = {};
551 }
552 }
553
554 // 3. Finally, the output buffer is set
555 // as the result of remove_dot_segments,
556 // and we return its size
557 832 return dest - dest0;
558 }
559
560 char
561 1154 path_pop_back( core::string_view& s )
562 {
563
4/4
✓ Branch 1 taken 522 times.
✓ Branch 2 taken 632 times.
✓ Branch 3 taken 1102 times.
✓ Branch 4 taken 52 times.
1676 if (s.size() < 3 ||
564
3/4
✓ Branch 2 taken 522 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 470 times.
✓ Branch 5 taken 52 times.
522 *std::prev(s.end(), 3) != '%')
565 {
566 1102 char c = s.back();
567 1102 s.remove_suffix(1);
568 1102 return c;
569 }
570 52 char c = 0;
571
1/2
✓ Branch 2 taken 52 times.
✗ Branch 3 not taken.
104 detail::decode_unsafe(
572 104 &c, &c + 1, s.substr(s.size() - 3));
573
2/2
✓ Branch 0 taken 44 times.
✓ Branch 1 taken 8 times.
52 if (c != '/')
574 {
575 44 s.remove_suffix(3);
576 44 return c;
577 }
578 8 c = s.back();
579 8 s.remove_suffix(1);
580 8 return c;
581 };
582
583 void
584 538 pop_last_segment(
585 core::string_view& str,
586 core::string_view& seg,
587 std::size_t& level,
588 bool remove_unmatched) noexcept
589 {
590 538 seg = {};
591 538 std::size_t n = 0;
592
2/2
✓ Branch 1 taken 558 times.
✓ Branch 2 taken 142 times.
700 while (!str.empty())
593 {
594 // B. if the input buffer begins with a
595 // prefix of "/./" or "/.", where "." is
596 // a complete path segment, then replace
597 // that prefix with "/" in the input
598 // buffer; otherwise,
599 558 n = detail::path_ends_with(str, "/./");
600
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 548 times.
558 if (n)
601 {
602 10 seg = str.substr(str.size() - n);
603 10 str.remove_suffix(n);
604 10 continue;
605 }
606 548 n = detail::path_ends_with(str, "/.");
607
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 536 times.
548 if (n)
608 {
609 12 seg = str.substr(str.size() - n, 1);
610 12 str.remove_suffix(n);
611 12 continue;
612 }
613
614 // C. if the input buffer begins with a
615 // prefix of "/../" or "/..", where ".."
616 // is a complete path segment, then
617 // replace that prefix with "/" in the
618 // input buffer and remove the last
619 // segment and its preceding "/"
620 // (if any) from the output buffer
621 // otherwise,
622 536 n = detail::path_ends_with(str, "/../");
623
2/2
✓ Branch 0 taken 42 times.
✓ Branch 1 taken 494 times.
536 if (n)
624 {
625 42 seg = str.substr(str.size() - n);
626 42 str.remove_suffix(n);
627 42 ++level;
628 42 continue;
629 }
630 494 n = detail::path_ends_with(str, "/..");
631
2/2
✓ Branch 0 taken 46 times.
✓ Branch 1 taken 448 times.
494 if (n)
632 {
633 46 seg = str.substr(str.size() - n);
634 46 str.remove_suffix(n);
635 46 ++level;
636 46 continue;
637 }
638
639 // E. move the first path segment in the
640 // input buffer to the end of the output
641 // buffer, including the initial "/"
642 // character (if any) and any subsequent
643 // characters up to, but not including,
644 // the next "/" character or the end of
645 // the input buffer.
646 448 std::size_t p = str.size() > 1
647
2/2
✓ Branch 0 taken 374 times.
✓ Branch 1 taken 74 times.
448 ? str.find_last_of('/', str.size() - 2)
648 448 : core::string_view::npos;
649
2/2
✓ Branch 0 taken 276 times.
✓ Branch 1 taken 172 times.
448 if (p != core::string_view::npos)
650 {
651 276 seg = str.substr(p + 1);
652 276 str.remove_suffix(seg.size());
653 }
654 else
655 {
656 172 seg = str;
657 172 str = {};
658 }
659
660
2/2
✓ Branch 0 taken 396 times.
✓ Branch 1 taken 52 times.
448 if (level == 0)
661 396 return;
662
2/2
✓ Branch 1 taken 42 times.
✓ Branch 2 taken 10 times.
52 if (!str.empty())
663 42 --level;
664 }
665 // we still need to skip n_skip + 1
666 // but the string is empty
667
4/4
✓ Branch 0 taken 42 times.
✓ Branch 1 taken 100 times.
✓ Branch 2 taken 34 times.
✓ Branch 3 taken 8 times.
142 if (remove_unmatched && level)
668 {
669 34 seg = "/";
670 34 level = 0;
671 34 return;
672 }
673
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 104 times.
108 else if (level)
674 {
675
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 if (!seg.empty())
676 {
677 4 seg = "/../";
678 }
679 else
680 {
681 // AFREITAS: this condition
682 // is correct, but it might
683 // unreachable.
684 seg = "/..";
685 }
686 4 --level;
687 4 return;
688 }
689 104 seg = {};
690 }
691
692 void
693 304 normalized_path_digest(
694 core::string_view str,
695 bool remove_unmatched,
696 fnv_1a& hasher) noexcept
697 {
698 304 core::string_view seg;
699 304 std::size_t level = 0;
700 234 do
701 {
702 538 pop_last_segment(
703 str, seg, level, remove_unmatched);
704
2/2
✓ Branch 1 taken 1154 times.
✓ Branch 2 taken 538 times.
1692 while (!seg.empty())
705 {
706 1154 char c = path_pop_back(seg);
707 1154 hasher.put(c);
708 }
709 }
710
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 304 times.
538 while (!str.empty());
711 304 }
712
713 // compare segments as if there were a normalized
714 int
715 173 segments_compare(
716 segments_encoded_view seg0,
717 segments_encoded_view seg1) noexcept
718 {
719 // calculate path size as if it were normalized
720 auto normalized_size =
721 346 [](segments_encoded_view seg) -> std::size_t
722 {
723
2/2
✓ Branch 1 taken 108 times.
✓ Branch 2 taken 238 times.
346 if (seg.empty())
724 108 return seg.is_absolute();
725
726 238 std::size_t n = 0;
727 238 std::size_t skip = 0;
728 238 auto begin = seg.begin();
729 238 auto it = seg.end();
730
2/2
✓ Branch 1 taken 662 times.
✓ Branch 2 taken 238 times.
900 while (it != begin)
731 {
732 662 --it;
733 662 decode_view dseg = **it;
734
2/2
✓ Branch 1 taken 167 times.
✓ Branch 2 taken 495 times.
662 if (dseg == "..")
735 167 ++skip;
736
2/2
✓ Branch 1 taken 457 times.
✓ Branch 2 taken 38 times.
495 else if (dseg != ".")
737 {
738
2/2
✓ Branch 0 taken 85 times.
✓ Branch 1 taken 372 times.
457 if (skip)
739 85 --skip;
740 else
741 372 n += dseg.size() + 1;
742 }
743 }
744 238 n += skip * 3;
745 238 n -= !seg.is_absolute();
746 238 return n;
747 };
748
749 // find the normalized size for the comparison
750 173 std::size_t n0 = normalized_size(seg0);
751 173 std::size_t n1 = normalized_size(seg1);
752 173 std::size_t n00 = n0;
753 173 std::size_t n10 = n1;
754
755 // consume the last char from a segment range
756 auto consume_last =
757 1640 [](
758 std::size_t& n,
759 decode_view& dseg,
760 segments_encoded_view::iterator& begin,
761 segments_encoded_view::iterator& it,
762 decode_view::iterator& cit,
763 std::size_t& skip,
764 bool& at_slash) -> char
765 {
766
2/2
✓ Branch 2 taken 1009 times.
✓ Branch 3 taken 631 times.
1640 if (cit != dseg.begin())
767 {
768 // return last char from current segment
769 1009 at_slash = false;
770 1009 --cit;
771 1009 --n;
772 1009 return *cit;
773 }
774
775
2/2
✓ Branch 0 taken 371 times.
✓ Branch 1 taken 260 times.
631 if (!at_slash)
776 {
777 // current segment dseg is over and
778 // previous char was not a slash
779 // so we output one
780 371 at_slash = true;
781 371 --n;
782 371 return '/';
783 }
784
785 // current segment dseg is over and
786 // last char was already the slash
787 // between segments, so take the
788 // next final segment to consume
789 260 at_slash = false;
790
1/2
✓ Branch 2 taken 498 times.
✗ Branch 3 not taken.
498 while (cit == dseg.begin())
791 {
792 // take next segment
793
2/2
✓ Branch 1 taken 376 times.
✓ Branch 2 taken 122 times.
498 if (it != begin)
794 376 --it;
795 else
796 122 break;
797
2/2
✓ Branch 3 taken 140 times.
✓ Branch 4 taken 236 times.
376 if (**it == "..")
798 {
799 // skip next if this is ".."
800 140 ++skip;
801 }
802
2/2
✓ Branch 3 taken 208 times.
✓ Branch 4 taken 28 times.
236 else if (**it != ".")
803 {
804
2/2
✓ Branch 0 taken 70 times.
✓ Branch 1 taken 138 times.
208 if (skip)
805 {
806 // discount skips
807 70 --skip;
808 }
809 else
810 {
811 // or update current seg
812 138 dseg = **it;
813 138 cit = dseg.end();
814 138 break;
815 }
816 }
817 }
818 // consume from the new current
819 // segment
820 260 --n;
821
2/2
✓ Branch 2 taken 123 times.
✓ Branch 3 taken 137 times.
260 if (cit != dseg.begin())
822 {
823 // in the general case, we consume
824 // one more character from the end
825 123 --cit;
826 123 return *cit;
827 }
828
829 // nothing left to consume in the
830 // current and new segment
831
2/2
✓ Branch 1 taken 128 times.
✓ Branch 2 taken 9 times.
137 if (it == begin)
832 {
833 // if this is the first
834 // segment, the segments are
835 // over and there can only
836 // be repetitions of "../" to
837 // output
838 128 return "/.."[n % 3];
839 }
840 // at other segments, we need
841 // a slash to transition to the
842 // next segment
843 9 at_slash = true;
844 9 return '/';
845 };
846
847 // consume final segments from seg0 that
848 // should not influence the comparison
849 173 auto begin0 = seg0.begin();
850 173 auto it0 = seg0.end();
851 173 decode_view dseg0;
852
2/2
✓ Branch 2 taken 119 times.
✓ Branch 3 taken 54 times.
173 if (it0 != seg0.begin())
853 {
854 119 --it0;
855 119 dseg0 = **it0;
856 }
857 173 decode_view::iterator cit0 = dseg0.end();
858 173 std::size_t skip0 = 0;
859 173 bool at_slash0 = true;
860
2/2
✓ Branch 0 taken 134 times.
✓ Branch 1 taken 173 times.
307 while (n0 > n1)
861 {
862 134 consume_last(n0, dseg0, begin0, it0, cit0, skip0, at_slash0);
863 }
864
865 // consume final segments from seg1 that
866 // should not influence the comparison
867 173 auto begin1 = seg1.begin();
868 173 auto it1 = seg1.end();
869 173 decode_view dseg1;
870
2/2
✓ Branch 2 taken 119 times.
✓ Branch 3 taken 54 times.
173 if (it1 != seg1.begin())
871 {
872 119 --it1;
873 119 dseg1 = **it1;
874 }
875 173 decode_view::iterator cit1 = dseg1.end();
876 173 std::size_t skip1 = 0;
877 173 bool at_slash1 = true;
878
2/2
✓ Branch 0 taken 34 times.
✓ Branch 1 taken 173 times.
207 while (n1 > n0)
879 {
880 34 consume_last(n1, dseg1, begin1, it1, cit1, skip1, at_slash1);
881 }
882
883 173 int cmp = 0;
884
2/2
✓ Branch 0 taken 736 times.
✓ Branch 1 taken 173 times.
909 while (n0)
885 {
886 736 char c0 = consume_last(
887 n0, dseg0, begin0, it0, cit0, skip0, at_slash0);
888 736 char c1 = consume_last(
889 n1, dseg1, begin1, it1, cit1, skip1, at_slash1);
890
2/2
✓ Branch 0 taken 36 times.
✓ Branch 1 taken 700 times.
736 if (c0 < c1)
891 36 cmp = -1;
892
2/2
✓ Branch 0 taken 41 times.
✓ Branch 1 taken 659 times.
700 else if (c1 < c0)
893 41 cmp = +1;
894 }
895
896
2/2
✓ Branch 0 taken 41 times.
✓ Branch 1 taken 132 times.
173 if (cmp != 0)
897 41 return cmp;
898
2/2
✓ Branch 0 taken 130 times.
✓ Branch 1 taken 2 times.
132 if ( n00 == n10 )
899 130 return 0;
900
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if ( n00 < n10 )
901 1 return -1;
902 1 return 1;
903 }
904
905 } // detail
906 } // urls
907 } // boost
908
909
910