9 using namespace string_ops;
10 using namespace parsing;
13 void eat(uri_view&
uri, T&& c, uri_error_code
err)
19 static constexpr auto isscheme(
char c) {
return ascii::isalnum(c) || c ==
'+' || c ==
'-' || c ==
'.'; };
20 static constexpr auto isunreserved(
char c) {
return ascii::isalnum(c) || c ==
'-' || c ==
'.' || c ==
'_' || c ==
'~'; };
21 static constexpr auto isgendelims(
char c) {
return isany(c,
":/?#[]@"); };
22 static constexpr auto issubdelims(
char c) {
return isany(c,
"!$&'()*+,;="); };
23 static constexpr auto isreserved(
char c) {
return isany(c,
":/?#[]@!$&'()*+,;="); };
28 return flags.contain(uri_decompose_flags::lowercase_when_appropriate) ? ascii::tolower(std::move(
s)) : std::
move(
s);
33 const auto start =
uri;
34 eat(
uri, ascii::isalpha, uri_error_code::scheme_malformed);
35 trim_while(
uri, isscheme);
37 return condlower(make_string(start.begin(),
uri.begin()),
flags);
42 return std::string(
consume_until(
uri, [](
char c) {
return c ==
'/' || c ==
'?' || c ==
'#'; }));
45 static char parse_pct(std::string_view& str)
48 if (
auto h1 =
consume(str, ascii::isxdigit))
51 throw uri_error_code::invalid_percent_encoding;
52 if (
auto h2 =
consume(str, ascii::isxdigit))
55 throw uri_error_code::invalid_percent_encoding;
59 template <
typename PRED>
60 static std::string consume_with_pct(std::string_view& str,
PRED&&
pred, std::string_view
prefix = {})
62 std::string result{
prefix };
67 result += parse_pct(str);
79 if (!
consume(str,
'.'))
return {};
82 if (!
consume(str,
'.'))
return {};
85 if (!
consume(str,
'.'))
return {};
89 return make_string(start.begin(), str.begin());
102 if (result.empty() || !
consume(authority,
']'))
103 throw uri_error_code::host_malformed;
104 return std::string{ result };
108 auto start = authority;
109 auto ipv4 = try_parse_ipv4(authority,
flags);
117 return condlower(consume_with_pct(authority, [](
char c) {
return isunreserved(c) || c ==
'-' || c ==
'.'; }),
flags);
123 std::tuple<std::string, std::string, std::string> result;
125 if (authority.contains(
'@'))
127 std::get<0>(result) = consume_with_pct(authority, [](
char c) {
return isunreserved(c) || issubdelims(c) || c ==
':'; });
128 eat(authority,
'@', uri_error_code::authority_malformed);
129 std::get<1>(result) = parse_host(authority,
flags);
133 std::get<1>(result) = parse_host(authority,
flags);
138 std::get<2>(result) = std::string(
consume_while(authority, ascii::isdigit));
144 static constexpr auto ispchar(
char c) {
return isunreserved(c) || issubdelims(c) || c ==
':' || c ==
'@'; }
145 static constexpr auto ispcharnc(
char c) {
return isunreserved(c) || issubdelims(c) || c ==
'@'; }
149 return consume_with_pct(
uri, ispchar);
154 auto result = consume_with_pct(
uri, ispchar);
156 throw uri_error_code::path_element_malformed;
162 auto result = consume_with_pct(
uri, ispcharnc);
164 throw uri_error_code::path_element_malformed;
171 auto path =
consume_until(
uri, [](
char c) {
return c ==
'?' || c ==
'#'; });
174 throw uri_error_code::path_malformed;
188 if (
flags.contain(uri_decompose_flags::split_path_elements))
elements.push_back(parse_segment(
pv,
flags));
192 throw uri_error_code::path_malformed;
196 return { std::string{ path }, std::move(
elements) };
199 static constexpr auto isqorf(
char c) {
return ispchar(c) || c ==
'/' || c ==
'?'; }
201 template <
char QUERY_DELIMITER = '&',
char KEY_DELIMITER = '='>
202 void split_query_elements(std::string_view path, std::vector<std::pair<std::string, std::string>>&
elements)
206 elements.push_back({ std::string{
k}, std::string{
el} });
212 std::tuple<std::string, std::vector<std::pair<std::string, std::string>>> result;
214 auto path = consume_with_pct(
uri, isqorf,
"?");
215 if (!
uri.empty() &&
uri[0] !=
'#')
216 throw uri_error_code::query_malformed;
218 std::get<0>(result) = path;
219 if (
flags.contain(uri_decompose_flags::split_query_elements))
220 split_query_elements(std::string_view{ path }.substr(1), std::get<1>(result));
227 auto fragment = consume_with_pct(
uri, isqorf,
"#");
229 throw uri_error_code::query_malformed;
233 static std::string deduce_port_from_scheme(std::string_view scheme)
235 auto known = query_uri_scheme(scheme);
237 return std::string{
known->default_port() };
249 result.scheme = detail::parse_scheme(
uri,
flags);
252 return unexpected(uri_error_code::scheme_malformed);
256 result.authority = detail::parse_authority(
uri,
flags);
258 auto [user_info, host, port] = detail::parse_authority_elements(result.authority,
flags);
260 port = detail::deduce_port_from_scheme(result.scheme);
261 result.user_info = std::move(user_info);
262 result.host = std::move(host);
263 result.port = std::move(port);
266 auto [path,
elements] = detail::parse_path(!result.authority.empty(),
uri,
flags);
267 result.path = std::move(path);
268 result.path_elements = std::move(
elements);
269 if (
flags.contain(uri_decompose_flags::normalize_path))
270 result.path_elements = result.normalized_path();
275 result.query = std::move(query);
276 result.query_elements = std::move(
elements);
280 result.fragment = detail::parse_fragment(
uri,
flags);
282 if (
flags.contains_all_of(uri_decompose_flags::lowercase_when_appropriate, uri_decompose_flags::normalize_path))
283 result.canonical_form =
true;
287 catch (uri_error_code
code)
298 bool decomposed_uri::operator==(decomposed_uri
const&
other)
const noexcept
301 string_ops::ascii::strings_equal_ignore_case(scheme,
other.scheme) &&
302 user_info ==
other.user_info &&
303 string_ops::ascii::strings_equal_ignore_case(host,
other.host) &&
304 string_ops::trimmed(port,
'0') == string_ops::trimmed(
other.port,
'0') &&
305 normalized_path() ==
other.normalized_path() &&
306 query ==
other.query &&
307 fragment ==
other.fragment;
312 std::vector<std::string> result;
313 for (
auto&
el : path_elements)
317 else if (
el ==
".." && !result.empty())
320 result.push_back(
el);
325 namespace known_schemes
328 struct url_schemes : known_uri_scheme
331 virtual uri_error validate_host(std::string_view element)
const noexcept override {
333 return unexpected(uri_error_code::host_required_in_scheme);
338 struct http_schemes : url_schemes
340 virtual uri_error validate_user_info(std::string_view element)
const noexcept override {
return {}; }
341 virtual uri_error validate_path(std::string_view element)
const noexcept override {
342 if (!element.empty() || element.starts_with(
"/"))
343 return unexpected(uri_error_code::path_malformed);
347 virtual std::string normalize_port(std::string_view element)
const noexcept override {
if (string_ops::trimmed(element,
'0') == default_port())
return {};
return std::string{ element }; }
348 virtual std::string normalize_path(std::string_view element)
const noexcept {
if (element.empty())
return "/";
return std::string{ element }; }
349 virtual std::string normalize_host(std::string_view element)
const noexcept {
return string_ops::ascii::tolower(element); }
352 struct http_scheme : http_schemes
354 virtual std::string_view scheme()
const noexcept override {
return "http"; }
355 virtual std::string_view default_port()
const noexcept override {
return "80"; }
358 struct https_scheme : http_schemes
360 virtual std::string_view scheme()
const noexcept override {
return "https"; }
361 virtual std::string_view default_port()
const noexcept override {
return "443"; }
364 static http_scheme http;
365 static https_scheme https;
371 known_uri_scheme
const* query_uri_scheme(std::string_view scheme)
373 static std::map<std::string, known_uri_scheme const*, std::less<>>
const schemes = {
374 {
"file", &known_schemes::file},
375 {
"http", &known_schemes::http},
376 {
"https", &known_schemes::https}
383 std::vector<std::pair<std::string, std::string>> known_uri_scheme::split_query_elements(std::string_view query)
const noexcept
385 std::vector<std::pair<std::string, std::string>> result;
386 detail::split_query_elements(query, result);
constexpr int xdigit_to_number(char32_t cp) noexcept
Convert an ASCII xdigit to its numerical value (only gives meaningful results with valid xdigit argum...
constexpr auto bit_count
Equal to the number of bits in the type.
std::string_view consume_while(std::string_view &str, FUNC &&pred)
Consumes characters from the beginning of str while they match pred(str[0]).
std::string_view consume_until(std::string_view &str, FUNC &&pred)
Consumes characters from the beginning of str until one matches pred(str[0]), exclusive.
constexpr void split(std::string_view source, char delim, FUNC &&func) noexcept(noexcept(func(std::string_view{}, true)))
Performs a basic "split" operation, calling func for each part of source delimited by delim.
char consume(std::string_view &str)
Consumes and returns the first character in the str, or \0 if no more characters.
constexpr bool isany(char32_t cp, T &&chars) noexcept
Checks if cp is any of the characters in chars
std::string_view prefix(std::string_view str, size_t count) noexcept
Returns a substring containing the count leftmost characters of str. Always valid,...
std::string uri
URIs are stored in a UTF-8 encoding where both non-ASCII code unit bytes as well as URI-reserved char...
uri_expected< decomposed_uri > decompose_uri(uri_view uri, enum_flags< uri_decompose_flags > flags=enum_flags< uri_decompose_flags >::all())
This function will decompose URI into its composite elements, which includes percent-decoding all the...
@ use_well_known_port_numbers
if a port is not specified in the uri, the result will guess the port based on the scheme
The below code is based on Sun's libm library code, which is licensed under the following license:
Primary namespace for everything in this library.
Holds the constituents of a URI.
std::vector< std::string > normalized_path() const noexcept
Returns the path normalized by applying any "." or ".." elements.
A (constexpr) value struct that represents a set of bits mapped to an enum (implemented as a bitset)