header_utils
Loading...
Searching...
No Matches
uri_impl.h
1
4
5namespace ghassanpl
6{
7 namespace detail
8 {
9 using namespace string_ops;
10 using namespace parsing;
11
12 template <typename T>
13 void eat(uri_view& uri, T&& c, uri_error_code err)
14 {
15 if (!consume(uri, std::forward<T>(c)))
16 throw err;
17 }
18
19 static constexpr auto isscheme(char c) { return ascii::isalnum(c) || c == '+' || c == '-' || c == '.'; };
20 static constexpr auto isunreserved(char c) { return ascii::isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~'; };
21 static constexpr auto isgendelims(char c) { return isany(c, ":/?#[]@"); };
22 static constexpr auto issubdelims(char c) { return isany(c, "!$&'()*+,;="); };
23 static constexpr auto isreserved(char c) { return isany(c, ":/?#[]@!$&'()*+,;="); };
24
25 template <typename T>
26 std::string condlower(T&& s, enum_flags<uri_decompose_flags> const flags)
27 {
28 return flags.contain(uri_decompose_flags::lowercase_when_appropriate) ? ascii::tolower(std::move(s)) : std::move(s);
29 }
30
31 static std::string parse_scheme(uri_view& uri, enum_flags<uri_decompose_flags> const flags)
32 {
33 const auto start = uri;
34 eat(uri, ascii::isalpha, uri_error_code::scheme_malformed);
35 trim_while(uri, isscheme);
36
37 return condlower(make_string(start.begin(), uri.begin()), flags);
38 }
39
40 static std::string parse_authority(uri_view& uri, enum_flags<uri_decompose_flags> const flags)
41 {
42 return std::string(consume_until(uri, [](char c) { return c == '/' || c == '?' || c == '#'; }));
43 }
44
45 static char parse_pct(std::string_view& str)
46 {
47 uint8_t value = 0;
48 if (auto h1 = consume(str, ascii::isxdigit))
49 value += ascii::xdigit_to_number(h1) * 16;
50 else
51 throw uri_error_code::invalid_percent_encoding;
52 if (auto h2 = consume(str, ascii::isxdigit))
54 else
55 throw uri_error_code::invalid_percent_encoding;
56 return (char)value;
57 }
58
59 template <typename PRED>
60 static std::string consume_with_pct(std::string_view& str, PRED&& pred, std::string_view prefix = {})
61 {
62 std::string result{ prefix };
63 do
64 {
65 result += consume_while(str, pred);
66 if (consume(str, '%'))
67 result += parse_pct(str);
68 else
69 break;
70 } while (true);
71 return result;
72 }
73
74 static std::string try_parse_ipv4(std::string_view& str, enum_flags<uri_decompose_flags> const flags)
75 {
76 auto start = str;
77 auto [_, n1] = consume_c_unsigned(str);
78 if (n1 < 0 || n1 > 255) return {};
79 if (!consume(str, '.')) return {};
80 auto [__, n2] = consume_c_unsigned(str);
81 if (n2 < 0 || n2 > 255) return {};
82 if (!consume(str, '.')) return {};
83 auto [___, n3] = consume_c_unsigned(str);
84 if (n3 < 0 || n3 > 255) return {};
85 if (!consume(str, '.')) return {};
86 auto [____, n4] = consume_c_unsigned(str);
87 if (n4 < 0 || n4 > 255) return {};
88
89 return make_string(start.begin(), str.begin());
90 }
91
92 static std::string parse_host(std::string_view& authority, enum_flags<uri_decompose_flags> const flags)
93 {
94 if (consume(authority, '['))
95 {
100
101 auto result = consume_until(authority, ']');
102 if (result.empty() || !consume(authority, ']'))
103 throw uri_error_code::host_malformed;
104 return std::string{ result };
105 }
106 else
107 {
108 auto start = authority;
109 auto ipv4 = try_parse_ipv4(authority, flags);
110 if (!ipv4.empty())
111 return ipv4;
112
114 authority = start;
115
117 return condlower(consume_with_pct(authority, [](char c) { return isunreserved(c) || c == '-' || c == '.'; }), flags);
118 }
119 }
120
121 static std::tuple<std::string, std::string, std::string> parse_authority_elements(std::string_view authority, enum_flags<uri_decompose_flags> const flags)
122 {
123 std::tuple<std::string, std::string, std::string> result;
124
125 if (authority.contains('@'))
126 {
127 std::get<0>(result) = consume_with_pct(authority, [](char c) { return isunreserved(c) || issubdelims(c) || c == ':'; });
128 eat(authority, '@', uri_error_code::authority_malformed);
129 std::get<1>(result) = parse_host(authority, flags);
130 }
131 else
132 {
133 std::get<1>(result) = parse_host(authority, flags);
134 }
135
136 if (consume(authority, ':'))
137 {
138 std::get<2>(result) = std::string(consume_while(authority, ascii::isdigit));
139 }
140
141 return result;
142 }
143
144 static constexpr auto ispchar(char c) { return isunreserved(c) || issubdelims(c) || c == ':' || c == '@'; }
145 static constexpr auto ispcharnc(char c) { return isunreserved(c) || issubdelims(c) || c == '@'; }
146
147 static std::string parse_segment(uri_view& uri, enum_flags<uri_decompose_flags> const flags)
148 {
149 return consume_with_pct(uri, ispchar);
150 }
151
152 static std::string parse_segment_nonzero(uri_view& uri, enum_flags<uri_decompose_flags> const flags)
153 {
154 auto result = consume_with_pct(uri, ispchar);
155 if (result.empty())
156 throw uri_error_code::path_element_malformed;
157 return result;
158 }
159
160 static std::string parse_segment_nonzero_noncolon(uri_view& uri, enum_flags<uri_decompose_flags> const flags)
161 {
162 auto result = consume_with_pct(uri, ispcharnc);
163 if (result.empty())
164 throw uri_error_code::path_element_malformed;
165 return result;
166 }
167
168 static std::tuple<std::string, std::vector<std::string>> parse_path(bool with_authority, uri_view& uri, enum_flags<uri_decompose_flags> const flags)
169 {
171 auto path = consume_until(uri, [](char c) { return c == '?' || c == '#'; });
172
173 if (with_authority && !path.empty() && path[0] != '/')
174 throw uri_error_code::path_malformed;
175
176 std::vector<std::string> elements;
177
178 if (!path.empty())
179 {
180 uri_view pv = path;
181
182 trim(pv, '/');
183
184 if (!pv.empty())
185 {
186 do
187 {
188 if (flags.contain(uri_decompose_flags::split_path_elements)) elements.push_back(parse_segment(pv, flags));
189 } while (consume(pv, '/'));
190
191 if (!pv.empty())
192 throw uri_error_code::path_malformed;
193 }
194 }
195
196 return { std::string{ path }, std::move(elements) };
197 }
198
199 static constexpr auto isqorf(char c) { return ispchar(c) || c == '/' || c == '?'; }
200
201 template <char QUERY_DELIMITER = '&', char KEY_DELIMITER = '='>
202 void split_query_elements(std::string_view path, std::vector<std::pair<std::string, std::string>>& elements)
203 {
204 split(path, QUERY_DELIMITER, [&elements](std::string_view el, bool last) {
205 auto k = consume_until(el, KEY_DELIMITER); std::ignore = consume(el);
206 elements.push_back({ std::string{k}, std::string{el} });
207 });
208 }
209
210 static std::tuple<std::string, std::vector<std::pair<std::string, std::string>>> parse_query(uri_view& uri, enum_flags<uri_decompose_flags> const flags)
211 {
212 std::tuple<std::string, std::vector<std::pair<std::string, std::string>>> result;
213
214 auto path = consume_with_pct(uri, isqorf, "?");
215 if (!uri.empty() && uri[0] != '#')
216 throw uri_error_code::query_malformed;
217
218 std::get<0>(result) = path;
219 if (flags.contain(uri_decompose_flags::split_query_elements))
220 split_query_elements(std::string_view{ path }.substr(1), std::get<1>(result));
221
222 return result;
223 }
224
225 static std::string parse_fragment(uri_view& uri, enum_flags<uri_decompose_flags> const flags)
226 {
227 auto fragment = consume_with_pct(uri, isqorf, "#");
228 if (!uri.empty())
229 throw uri_error_code::query_malformed;
230 return fragment;
231 }
232
233 static std::string deduce_port_from_scheme(std::string_view scheme)
234 {
235 auto known = query_uri_scheme(scheme);
236 if (known)
237 return std::string{ known->default_port() };
238 return {};
239 }
240 }
241
242
243 uri_expected<decomposed_uri> decompose_uri(uri_view uri, enum_flags<uri_decompose_flags> const flags)
244 {
245 try
246 {
247 decomposed_uri result{};
248
249 result.scheme = detail::parse_scheme(uri, flags);
250
251 if (!string_ops::consume(uri, ':'))
252 return unexpected(uri_error_code::scheme_malformed);
253
254 if (string_ops::consume(uri, "//"))
255 {
256 result.authority = detail::parse_authority(uri, flags);
257
258 auto [user_info, host, port] = detail::parse_authority_elements(result.authority, flags);
259 if (port.empty() && flags.contain(uri_decompose_flags::use_well_known_port_numbers))
260 port = detail::deduce_port_from_scheme(result.scheme);
261 result.user_info = std::move(user_info);
262 result.host = std::move(host);
263 result.port = std::move(port);
264 }
265
266 auto [path, elements] = detail::parse_path(!result.authority.empty(), uri, flags);
267 result.path = std::move(path);
268 result.path_elements = std::move(elements);
269 if (flags.contain(uri_decompose_flags::normalize_path))
270 result.path_elements = result.normalized_path();
271
272 if (string_ops::consume(uri, '?'))
273 {
274 auto [query, elements] = detail::parse_query(uri, flags);
275 result.query = std::move(query);
276 result.query_elements = std::move(elements);
277 }
278
279 if (string_ops::consume(uri, '#'))
280 result.fragment = detail::parse_fragment(uri, flags);
281
282 if (flags.contains_all_of(uri_decompose_flags::lowercase_when_appropriate, uri_decompose_flags::normalize_path))
283 result.canonical_form = true;
284
285 return result;
286 }
287 catch (uri_error_code code)
288 {
289 return unexpected(code);
290 }
291 }
292
293 uri_expected<uri> compose_uri(decomposed_uri const& decomposed, enum_flags<uri_decompose_flags> const flags)
294 {
295 return {};
296 }
297
298 bool decomposed_uri::operator==(decomposed_uri const& other) const noexcept
299 {
300 return
301 string_ops::ascii::strings_equal_ignore_case(scheme, other.scheme) &&
302 user_info == other.user_info &&
303 string_ops::ascii::strings_equal_ignore_case(host, other.host) &&
304 string_ops::trimmed(port, '0') == string_ops::trimmed(other.port, '0') &&
305 normalized_path() == other.normalized_path() &&
306 query == other.query &&
307 fragment == other.fragment;
308 }
309
311 {
312 std::vector<std::string> result;
313 for (auto& el : path_elements)
314 {
315 if (el == ".")
316 continue;
317 else if (el == ".." && !result.empty())
318 result.pop_back();
319 else
320 result.push_back(el);
321 }
322 return result;
323 }
324
325 namespace known_schemes
326 {
327
328 struct url_schemes : known_uri_scheme
329 {
331 virtual uri_error validate_host(std::string_view element) const noexcept override {
332 if (element.empty())
333 return unexpected(uri_error_code::host_required_in_scheme);
334 return {};
335 }
336 };
337
338 struct http_schemes : url_schemes
339 {
340 virtual uri_error validate_user_info(std::string_view element) const noexcept override { return {}; }
341 virtual uri_error validate_path(std::string_view element) const noexcept override {
342 if (!element.empty() || element.starts_with("/"))
343 return unexpected(uri_error_code::path_malformed);
344 return {};
345 }
346
347 virtual std::string normalize_port(std::string_view element) const noexcept override { if (string_ops::trimmed(element, '0') == default_port()) return {}; return std::string{ element }; }
348 virtual std::string normalize_path(std::string_view element) const noexcept { if (element.empty()) return "/"; return std::string{ element }; }
349 virtual std::string normalize_host(std::string_view element) const noexcept { return string_ops::ascii::tolower(element); }
350 };
351
352 struct http_scheme : http_schemes
353 {
354 virtual std::string_view scheme() const noexcept override { return "http"; }
355 virtual std::string_view default_port() const noexcept override { return "80"; }
356 };
357
358 struct https_scheme : http_schemes
359 {
360 virtual std::string_view scheme() const noexcept override { return "https"; }
361 virtual std::string_view default_port() const noexcept override { return "443"; }
362 };
363
364 static http_scheme http;
365 static https_scheme https;
366
368
369 }
370
371 known_uri_scheme const* query_uri_scheme(std::string_view scheme)
372 {
373 static std::map<std::string, known_uri_scheme const*, std::less<>> const schemes = {
374 {"file", &known_schemes::file},
375 {"http", &known_schemes::http},
376 {"https", &known_schemes::https}
377 };
378 if (auto s = schemes.find(scheme); s != schemes.end())
379 return s->second;
380 return nullptr;
381 }
382
383 std::vector<std::pair<std::string, std::string>> known_uri_scheme::split_query_elements(std::string_view query) const noexcept
384 {
385 std::vector<std::pair<std::string, std::string>> result;
386 detail::split_query_elements(query, result);
387 return result;
388 }
389}
constexpr int xdigit_to_number(char32_t cp) noexcept
Convert an ASCII xdigit to its numerical value (only gives meaningful results with valid xdigit argum...
Definition string_ops.h:460
constexpr auto bit_count
Equal to the number of bits in the type.
Definition bits.h:33
std::string_view consume_while(std::string_view &str, FUNC &&pred)
Consumes characters from the beginning of str while they match pred(str[0]).
Definition string_ops.h:755
std::string_view consume_until(std::string_view &str, FUNC &&pred)
Consumes characters from the beginning of str until one matches pred(str[0]), exclusive.
Definition string_ops.h:788
constexpr void split(std::string_view source, char delim, FUNC &&func) noexcept(noexcept(func(std::string_view{}, true)))
Performs a basic "split" operation, calling func for each part of source delimited by delim.
Definition string_ops.h:955
char consume(std::string_view &str)
Consumes and returns the first character in the str, or \0 if no more characters.
Definition string_ops.h:652
constexpr bool isany(char32_t cp, T &&chars) noexcept
Checks if cp is any of the characters in chars
Definition string_ops.h:638
std::string_view prefix(std::string_view str, size_t count) noexcept
Returns a substring containing the count leftmost characters of str. Always valid,...
Definition string_ops.h:571
std::string uri
URIs are stored in a UTF-8 encoding where both non-ASCII code unit bytes as well as URI-reserved char...
Definition uri.h:28
uri_expected< decomposed_uri > decompose_uri(uri_view uri, enum_flags< uri_decompose_flags > flags=enum_flags< uri_decompose_flags >::all())
This function will decompose URI into its composite elements, which includes percent-decoding all the...
Definition uri_impl.h:243
@ use_well_known_port_numbers
if a port is not specified in the uri, the result will guess the port based on the scheme
The below code is based on Sun's libm library code, which is licensed under the following license:
Primary namespace for everything in this library.
Definition align+rec2.h:10
Holds the constituents of a URI.
Definition uri.h:83
std::vector< std::string > normalized_path() const noexcept
Returns the path normalized by applying any "." or ".." elements.
Definition uri_impl.h:310
A (constexpr) value struct that represents a set of bits mapped to an enum (implemented as a bitset)
Definition enum_flags.h:29