header_utils
Loading...
Searching...
No Matches
parsing.h
1
4
5#pragma once
6
7#include "unicode.h"
8#include <variant>
9#include <optional>
10
12{
13 using namespace string_ops;
14
15
16
17 [[nodiscard]] inline size_t find_line_number(std::string_view of_string, std::string_view in_document) noexcept
18 {
19 if (of_string.empty() || in_document.empty())
20 return 0;
21 const auto start_line = of_string.data();
22 const auto start_doc = in_document.data();
24 return 0;
25 return std::count(start_doc, start_line, '\n') + 1;
26 }
27
28 [[nodiscard]] inline std::pair<size_t, size_t> find_line_and_column(std::string_view of_string, std::string_view in_document) noexcept
29 {
30 if (of_string.empty() || in_document.empty())
31 return {};
32 const auto start_line = of_string.data();
33 auto start_doc = in_document.data();
35 return {};
36
37 size_t count = 0;
38 auto last_line = start_doc;
39
40 for (; start_line != start_doc; ++start_doc) {
41 if (*start_doc == '\n') {
42 ++count;
43 last_line = start_doc + 1;
44 }
45 }
46
47 return { count + 1, start_line - last_line + 1 };
48 }
49
50 [[nodiscard]] inline std::string_view consume_c_identifier(std::string_view& str)
51 {
52 if (str.empty() || !ascii::isidentstart(str[0]))
53 return {};
54
55 const auto start = str.begin();
56 str.remove_prefix(1);
57 trim_while(str, ascii::isident);
58 return make_sv(start, str.begin());
59 }
60
61 [[nodiscard]] inline std::string_view consume_c_identifier_with(std::string_view& str, std::string_view additional_chars)
62 {
63 if (str.empty() || !(ascii::isidentstart(str[0]) || contains(additional_chars, str[0])))
64 return {};
65
66 const auto start = str.begin();
67 str.remove_prefix(1);
68 trim_while(str, [additional_chars](char c) { return ascii::isident(c) || contains(additional_chars, c); });
69 return make_sv(start, str.begin());
70 }
71
72
73#if __cpp_lib_to_chars
74 [[nodiscard]] inline std::pair<std::string_view, double> consume_c_float(std::string_view& str)
75 {
76 if (str.empty() || !(ascii::isdigit(str[0]) || str[0] == '-'))
77 return {};
78
79 std::pair<std::string_view, double> result;
80
81 const auto from_chars_result = from_chars(str, result.second);
82 if (from_chars_result.ec != std::errc{})
83 return { {}, std::numeric_limits<double>::quiet_NaN() };
84
85 result.first = make_sv(str.data(), from_chars_result.ptr);
86 str.remove_prefix(result.first.size());
87 return result;
88 }
89
90 [[nodiscard]] inline std::pair<std::string_view, int64_t> consume_c_integer(std::string_view& str, int base = 10)
91 {
92 if (str.empty() || !(ascii::isdigit(str[0]) || str[0] == '-'))
93 return {};
94
95 std::pair<std::string_view, int64_t> result;
96
97 const auto from_chars_result = from_chars(str, result.second, base);
98 if (from_chars_result.ec != std::errc{})
99 return { {}, 0 };
100
101 result.first = make_sv(str.data(), from_chars_result.ptr);
102 str.remove_prefix(result.first.size());
103 return result;
104 }
105
106 [[nodiscard]] inline std::pair<std::string_view, uint64_t> consume_c_unsigned(std::string_view& str, int base = 10)
107 {
108 if (str.empty() || !ascii::isdigit(str[0]))
109 return {};
110
111 std::pair<std::string_view, uint64_t> result;
112
113 const auto from_chars_result = std::from_chars(str.data(), str.data() + str.size(), result.second, base);
114 if (from_chars_result.ec != std::errc{})
115 return { {}, 0 };
116
117 result.first = make_sv(str.data(), from_chars_result.ptr);
118 str.remove_prefix(result.first.size());
119 return result;
120 }
121
122 template <char DELIMITER = '\''>
123 [[nodiscard]] inline std::pair<std::string_view, std::string> consume_c_string(std::string_view& strv)
124 {
125 if (strv.empty() || strv[0] != DELIMITER)
126 throw std::runtime_error("C string must start with delimiter");
127
128 std::pair<std::string_view, std::string> result;
129
130 auto view = strv;
131 auto start = view.begin();
132 view.remove_prefix(1);
133 while (view[0] != DELIMITER)
134 {
135 auto cp = consume(view);
136 if (cp == '\\')
137 {
138 cp = consume(view);
139 if (view.empty())
140 throw std::runtime_error("unterminated C string");
141
142 switch (cp)
143 {
144 case 'n': result.second += '\n'; break;
145 case '"': result.second += '"'; break;
146 case '\'': result.second += '\''; break;
147 case '\\': result.second += '\\'; break;
148 case 'b': result.second += '\b'; break;
149 case 'r': result.second += '\r'; break;
150 case 'f': result.second += '\f'; break;
151 case 't': result.second += '\t'; break;
152 case '0': result.second += '\0'; break;
153 case 'o':
154 {
155 auto num = consume_n(view, 3);
156 if (num.size() < 3 || view.empty()) return {};
157
158 auto parsed = consume_c_integer(num, 8);
159 if (parsed.first.empty() || !num.empty()) return {};
160
161 if (parsed.second > 255) return {};
162 result.second.push_back((char)parsed.second);
163 break;
164 }
165 case 'x':
166 {
167 auto num = consume_n(view, 2);
168 if (num.size() < 2 || view.empty()) return {};
169
170 auto parsed = consume_c_integer(num, 16);
171 if (parsed.first.empty() || !num.empty()) return {};
172
173 //append_utf8(result.second, (char32_t)parsed.second);
174 result.second += (char)parsed.second;
175 break;
176 }
177 case 'u':
178 {
179 auto num = consume_n(view, 4);
180 if (num.size() < 4 || view.empty()) return {};
181
182 auto parsed = consume_c_integer(num, 16);
183 if (parsed.first.empty() || !num.empty()) return {};
184
185 append_utf8(result.second, (char32_t)parsed.second);
186 break;
187 }
188 case 'U':
189 {
190 auto num = consume_n(view, 8);
191 if (num.size() < 8 || view.empty()) return {};
192
193 auto parsed = consume_c_integer(num, 16);
194 if (parsed.first.empty() || !num.empty()) return {};
195
196 append_utf8(result.second, (char32_t)parsed.second);
197 break;
198 }
199 default:
200 throw std::runtime_error("unknown escape character");
201 }
202 }
203 else
204 {
205 result.second += cp;
206 }
207
208 if (view.empty())
209 throw std::runtime_error("unterminated C string");
210 }
211
212 if (!consume(view, DELIMITER))
213 throw std::runtime_error("C string must end with delimiter");
214
215 result.first = make_sv(start, view.begin());
216 strv = view;
217 return result;
218 }
219
220 [[deprecated("WARNING: This function is incomplete")]]
221 [[nodiscard]] inline std::tuple<std::string_view, std::variant<double, uint64_t, int64_t>> consume_c_number(std::string_view& str)
222 {
223 if (str.empty())
224 return {};
225
226 if (auto first_char = str[0]; first_char == '-')
227 {
230 return {};
231 }
232 else if (consume(str, "0x"))
233 return consume_c_unsigned(str, 16);
234 else if (consume(str, "0b"))
235 return consume_c_unsigned(str, 1);
236 else if (consume(str, "0"))
237 return consume_c_unsigned(str, 8);
238 else if (ascii::isdigit(first_char))
239 {
240 if (auto result = consume_c_float(str); !result.first.empty())
241 return result;
242
243 if (auto result = consume_c_unsigned(str); !result.first.empty())
244 return result;
245
246 if (auto result = consume_c_integer(str); !result.first.empty())
247 return result;
248 }
249 return {};
250 }
251
252#endif
253
254 struct parse_error : std::runtime_error
255 {
256 std::string_view Where;
257
258 template <GHPL_FORMAT_TEMPLATE>
259 parse_error(std::string_view where, GHPL_FORMAT_ARGS)
260 : runtime_error(GHPL_FORMAT_CALL)
261 , Where(where)
262 {
263
264 }
265 };
266
267 inline bool try_eat(std::string_view& str, std::string_view what)
268 {
269 string_ops::trim_whitespace_left(str);
270 if (!str.starts_with(what))
271 return false;
272 str.remove_prefix(what.size());
273 return true;
274 }
275
276 inline bool try_eat(std::string_view& str, char what)
277 {
278 string_ops::trim_whitespace_left(str);
279 if (!str.starts_with(what))
280 return false;
281 str.remove_prefix(1);
282 return true;
283 }
284
285 inline void eat(std::string_view& str, std::string_view what)
286 {
287 if (!try_eat(str, what))
288 throw parse_error(str, "expected '{}'", what);
289 }
290
291 inline void eat(std::string_view& str, char what)
292 {
293 if (!try_eat(str, what))
294 throw parse_error(str, "expected '{}'", what);
295 }
296
297 inline std::string_view try_eat_identifier(std::string_view& str)
298 {
299 string_ops::trim_whitespace_left(str);
300 return consume_c_identifier(str);
301 }
302
303 inline std::string_view eat_identifier(std::string_view& str)
304 {
305 const auto result = try_eat_identifier(str);
306 if (result.empty())
307 throw parse_error(str, "expected identifier");
308 return result;
309 }
310
311 inline std::string_view try_eat_identifier_with(std::string_view& str, std::string_view additional_chars)
312 {
313 string_ops::trim_whitespace_left(str);
314 return consume_c_identifier_with(str, additional_chars);
315 }
316
317 inline std::string_view eat_identifier_with(std::string_view& str, std::string_view additional_chars)
318 {
319 const auto result = try_eat_identifier_with(str, additional_chars);
320 if (result.empty())
321 throw parse_error(str, "expected identifier");
322 return result;
323 }
324
325 inline std::string_view eat_whitespace(std::string_view& str)
326 {
327 return string_ops::consume_while(str, string_ops::ascii::isspace);
328 }
329
330 inline bool try_eat_line_comment(std::string_view& str, std::string_view comment_start = "//")
331 {
332 string_ops::trim_whitespace_left(str);
333 if (!try_eat(str, comment_start))
334 return false;
335 std::ignore = string_ops::consume_until(str, '\n');
336 return true;
337 }
338
339 inline bool try_eat_unsigned(std::string_view& str, uint64_t& result, int base = 10)
340 {
341 string_ops::trim_whitespace_left(str);
342 auto [parsed, value] = consume_c_unsigned(str, base);
343 if (parsed.empty()) return false;
344 result = value;
345 return true;
346 }
347
348 inline std::optional<uint64_t> try_eat_unsigned(std::string_view& str, int base = 10)
349 {
350 if (uint64_t result = 0; try_eat_unsigned(str, result, base))
351 return result;
352 return std::nullopt;
353 }
354
355 inline bool try_eat_integer(std::string_view& str, int64_t& result, int base = 10)
356 {
357 string_ops::trim_whitespace_left(str);
358 auto [parsed, value] = consume_c_integer(str, base);
359 if (parsed.empty()) return false;
360 result = value;
361 return true;
362 }
363
364 inline std::optional<int64_t> try_eat_integer(std::string_view& str, int base = 10)
365 {
366 if (int64_t result = 0; try_eat_integer(str, result, base))
367 return result;
368 return std::nullopt;
369 }
370
371 inline uint64_t eat_unsigned(std::string_view& str, int base = 10)
372 {
373 uint64_t result{};
374 if (!try_eat_unsigned(str, result, base))
375 throw parse_error(str, "expected unsigned integer of base {}", base);
376 return result;
377 }
378
379 inline int64_t eat_integer(std::string_view& str, int base = 10)
380 {
381 int64_t result{};
382 if (!try_eat_integer(str, result, base))
383 throw parse_error(str, "expected integer of base {}", base);
384 return result;
385 }
386
387 inline char32_t try_eat_utf8_codepoint(std::string_view& str)
388 {
389 return string_ops::consume_utf8(str);
390 }
391
392 inline char32_t eat_utf8_codepoint(std::string_view& str)
393 {
394 if (const auto cp = string_ops::consume_utf8(str))
395 return cp;
396 throw parse_error(str, "expected UTF-8 codepoint");
397 }
398
399#if 0
400 namespace decade
401 {
402 template <GHPL_FORMAT_TEMPLATE>
403 [[nodiscard]] inline parsing::parse_error parse_error(token_range const& range, GHPL_FORMAT_ARGS) { return parsing::parse_error(to_string_view(range), GHPL_FORMAT_FORWARD); }
404 template <GHPL_FORMAT_TEMPLATE>
405 [[nodiscard]] inline parsing::parse_error parse_error(token_it const& it, GHPL_FORMAT_ARGS) { return parsing::parse_error(it->range, GHPL_FORMAT_FORWARD); }
406
407 struct expression
408 {
409 virtual ~expression() noexcept = default;
410 token_range source_range;
411
412 expression(token_it it) : source_range(it, std::next(it)) {}
413 expression(token_range range) : source_range(range) {}
414 };
415
416 struct function_call_expression : public expression
417 {
418 std::vector<std::unique_ptr<expression>> arguments;
419 std::string name;
420 static std::string make_name(std::span<token_it const> name_parts, bool infix)
421 {
422 std::string name;
423 if (infix)
424 name += ':';
425 for (token_it it : name_parts)
426 {
427 name += it->range;
428 name += ':';
429 }
430 return name;
431 }
432 function_call_expression(token_range range, std::span<token_it const> name_, std::vector<std::unique_ptr<expression>> arguments_, bool infix)
433 : expression(range)
434 , name(make_name(name_, infix))
435 , arguments(std::move(arguments_))
436 {
437 }
438 };
439
440 struct identifier_expression : public expression
441 {
442 std::string identifier;
443
444 identifier_expression(token_it it) : expression(it), identifier(it->range) {}
445 };
446
447 struct literal_expression : public expression
448 {
449 token literal;
450
451 literal_expression(token_it it) : expression(it), literal(*it) {}
452 };
453
454 inline std::unique_ptr<expression> parse_expression(token_range& tokens)
455 {
456 std::unique_ptr<expression> result;
457
458 auto start = tokens.begin();
459
460 std::vector<std::unique_ptr<expression>> constituents;
461 while (tokens && tokens.front().type >= token::word)
462 {
463 if (tokens.front().type == token::word)
464 {
465 constituents.push_back(std::make_unique<identifier_expression>(tokens.begin()));
466 tokens.advance(1);
467 }
468 else if (tokens.front().type == token::number || tokens.front().type == token::string)
469 {
470 constituents.push_back(std::make_unique<literal_expression>(tokens.begin()));
471 tokens.advance(1);
472 }
473 else if (tokens.front().type == token::start_sub_expression)
474 {
475 tokens.advance(1);
476 constituents.push_back(parse_expression(tokens));
477
478 if (tokens.front().type != token::end_sub_expression)
479 throw parse_error(tokens.begin(), "unexpected end of line");
480 tokens.advance(1);
481 }
482 else
483 throw parse_error(tokens.begin(), "expected expression part");
484 }
485 if (constituents.empty())
486 throw parse_error(tokens.begin(), "empty expression encountered");
487
488 const auto constitutent_count = constituents.size();
489 if (constitutent_count == 1)
490 return std::move(constituents[0]);
491
492 const bool infix = (constitutent_count % 2) == 1;
493
494 std::vector<token_it> function_name;
495 std::vector<std::unique_ptr<expression>> arguments;
496 if (infix)
497 arguments.push_back(std::exchange(constituents[0], {}));
499 for (size_t i = infix; i < constitutent_count; i += 2)
500 {
501 auto& function_identifier = constituents[i];
502 if (auto identifier = dynamic_cast<identifier_expression*>(function_identifier.get()))
503 {
504 function_name.push_back(identifier->source_range.begin());
505 arguments.push_back(std::exchange(constituents[i + 1], {}));
506 }
507 else
508 throw parse_error(function_identifier->source_range, "expected function name part");
509 }
510
511 return std::make_unique<function_call_expression>(std::ranges::subrange(start, tokens.begin()), std::move(function_name), std::move(arguments), infix);
512 }
513
514 inline std::unique_ptr<expression> parse_expression(std::string_view& str)
515 {
516 const auto tokens = lex(str);
517 using tokenit = std::ranges::iterator_t<decltype(tokens)>;
518 token_range range = tokens;
519 return parse_expression(range);
520 }
521
522 }
523#endif
524}
constexpr auto bit_count
Equal to the number of bits in the type.
Definition bits.h:33
constexpr __contains_fn contains
contains(range, el)
Definition ranges.h:247
std::string_view consume_while(std::string_view &str, FUNC &&pred)
Consumes characters from the beginning of str while they match pred(str[0]).
Definition string_ops.h:755
auto from_chars(std::string_view str, T &value, const int base=10) noexcept
A version of std::from_chars that takes a std::string_view as the first argument.
std::string_view consume_until(std::string_view &str, FUNC &&pred)
Consumes characters from the beginning of str until one matches pred(str[0]), exclusive.
Definition string_ops.h:788
char consume(std::string_view &str)
Consumes and returns the first character in the str, or \0 if no more characters.
Definition string_ops.h:652
std::string_view consume_n(std::string_view &str, size_t n)
Consumes at most n characters from the beginning of str.
Definition string_ops.h:857
constexpr size_t append_utf8(string8 auto &buffer, char32_t cp)
Appends octets to buffer by encoding cp into UTF-8.
Definition unicode.h:738
constexpr char32_t consume_utf8(string_view8 auto &str)
Consumes (see consume()) a UTF-8 codepoint from str.
Definition unicode.h:674