unsynn/lib.rs
1#![allow(rustdoc::bare_urls)]
2#![doc = include_str!("../README.md")]
3//!
4#![doc = include_str!("../COOKBOOK.md")]
5//!
6//! # Roadmap
7//!
8#![doc = include_str!("../ROADMAP.md")]
9//!
10//! For a history how unsynn evolved, check the [CHANGELOG].
11// PLANNED: currently the Error type is a tad big. This could be either resolved by
12// refactoring the Error type (only keep refine typename, remove 'at' and have the iter start
13// at the error not after) or just Box<Error>. A conclusive optimization for this is postponed
14// until we have benchmarks to decide whats the best approach. It may as well just stay as
15// is...
16#![allow(clippy::result_large_err)]
17
18// When not using proc_macro2, we need the built-in proc_macro.
19// This is only available when unsynn is used from a proc-macro crate.
20#[cfg(not(feature = "proc_macro2"))]
21extern crate proc_macro;
22
23pub mod CHANGELOG {
24 #![allow(non_snake_case)]
25 #![allow(clippy::doc_markdown)]
26 #![doc = include_str!("../CHANGELOG.md")]
27}
28
29// TokenIter
30mod token_iter;
31#[doc(inline)]
32pub use token_iter::*;
33
34/// The `Parser` trait that must be implemented by anything we want to parse. We are parsing
35/// over a [`TokenIter`] ([`TokenStream`] iterator).
36pub trait Parser
37where
38 Self: Sized,
39{
40 /// The actual parsing function that must be implemented. This mutates the `tokens`
41 /// iterator directly. It should not be called from user code except for implementing
42 /// parsers itself and then only when the rules below are followed.
43 ///
44 /// # Implementing Parsers
45 ///
46 /// The parsers for [`TokenStream`], [`TokenTree`], [`Group`], [`Ident`], [`Punct`],
47 /// [`Literal`], [`Except`] and [`Nothing`] (and few more) are the fundamental parsers.
48 /// Any other parser is composed from those.
49 ///
50 /// Calling another `T::parser()` implementation is only valid when this is a conjunctive
51 /// operation and a failure is returned immediately by the `?` operator. This can be used
52 /// as performance optimization. Any other call to a parser must be done within a transaction.
53 /// Otherwise the iterator will be left in a consumed state which breaks further parsing.
54 ///
55 /// Transactions can be done by calling [`Parse::parse()`] or with the
56 /// [`Transaction::transaction()`] method on the iterator.
57 ///
58 /// # Errors
59 ///
60 /// The `parser()` implementation must return an error when it cannot parse the
61 /// input. This error must be a [`Error`]. User code will parse a grammar by calling
62 /// [`Parse::parse_all()`], [`Parse::parse()`] or [`Parse::parse_with()`] which will call
63 /// this method within a transaction and roll back on error.
64 #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
65 fn parser(tokens: &mut TokenIter) -> Result<Self>;
66}
67
68/// This trait provides the user facing API to parse grammatical entities. It is implemented
69/// for anything that implements the [`Parser`] trait. The methods here encapsulating the
70/// iterator that is used for parsing into a transaction. This iterator is always
71/// `Clone`. Instead using a peekable iterator or implementing deeper peeking, parse clones
72/// this iterator to make access transactional, when parsing succeeds then the transaction
73/// becomes committed, otherwise it is rolled back.
74///
75/// This trait cannot be implemented by user code.
76pub trait Parse: Parser {
77 /// This is the user facing API to parse grammatical entities. Calls a `parser()` within a
78 /// transaction. Commits changes on success and returns the parsed value.
79 ///
80 /// # Errors
81 ///
82 /// When the parser returns an error the transaction is rolled back and the error is
83 /// returned.
84 #[inline]
85 #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
86 fn parse(tokens: &mut TokenIter) -> Result<Self> {
87 tokens.transaction(Self::parser)
88 }
89
90 /// Exhaustive parsing within a transaction. This is a convenience method that implies a
91 /// `EndOfStream` at the end. Thus it will error if parsing is not exhaustive.
92 ///
93 /// # Errors
94 ///
95 /// When the parser returns an error or there are tokens left in the stream the
96 /// transaction is rolled back and a error is returned.
97 #[inline]
98 #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
99 fn parse_all(tokens: &mut TokenIter) -> Result<Self> {
100 tokens
101 .transaction(Cons::<Self, EndOfStream>::parser)
102 .map(|result| result.first)
103 }
104
105 /// Parse a value in a transaction, pass it to a
106 /// `FnOnce(Self, &mut TokenIter) -> Result<T>` closure which
107 /// creates a new result or returns an Error.
108 ///
109 /// This method is a very powerful tool as it allows anything from simple validations to
110 /// complete transformations into a new type. You may find this useful to implement
111 /// parsers for complex types that need some runtime logic.
112 ///
113 /// The closures first argument is the parsed value and the second argument is the
114 /// transactional iterator pointing after parsing `Self`. This can be used to create
115 /// errors or parse further. In many cases it can be ignored with `_`.
116 ///
117 /// # Using with the `unsynn!` macro
118 ///
119 /// The [`unsynn!`] macro provides convenient syntax sugar for this method via the `parse_with`
120 /// clause. See the [macro documentation](crate::unsynn#custom-parsing-with-parse_with) for details.
121 ///
122 /// ```rust
123 /// # use unsynn::*;
124 /// unsynn! {
125 /// struct PositiveInt(LiteralInteger);
126 /// parse_with |this, tokens| {
127 /// if this.0.value() > 0 {
128 /// Ok(this)
129 /// } else {
130 /// Error::other(None, tokens, "must be positive".into())
131 /// }
132 /// };
133 /// }
134 /// ```
135 ///
136 /// # Example
137 ///
138 /// ```rust
139 /// # use unsynn::*;
140 /// # use std::collections::BTreeSet;
141 /// // A parser that parses a comma delimited list of anything but commas
142 /// // and stores these lexical sorted.
143 /// struct OrderedStrings {
144 /// strings: Vec<String>
145 /// }
146 ///
147 /// impl Parser for OrderedStrings {
148 /// fn parser(tokens: &mut TokenIter) -> Result<Self> {
149 /// // Our input is CommaDelimitedVec<String>, we'll transform that into
150 /// // OrderedStrings.
151 /// Parse::parse_with(tokens, |this : CommaDelimitedVec<String>, _| -> Result<OrderedStrings> {
152 /// let mut strings: Vec<String> = this.into_iter()
153 /// .map(|s| s.value)
154 /// .collect();
155 /// strings.sort();
156 /// Ok(OrderedStrings { strings })
157 /// })
158 /// }
159 /// }
160 /// let mut input = "a, d, b, e, c,".to_token_iter();
161 /// let ordered_strings: OrderedStrings = input.parse().unwrap();
162 /// assert_eq!(ordered_strings.strings, vec!["a", "b", "c", "d", "e"]);
163 /// ```
164 ///
165 /// # Errors
166 ///
167 /// When the parser or the closure returns an error, the transaction is rolled back and
168 /// the error is returned.
169 #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
170 fn parse_with<T>(
171 tokens: &mut TokenIter,
172 f: impl FnOnce(Self, &mut TokenIter) -> Result<T>,
173 ) -> Result<T> {
174 tokens.transaction(|tokens| {
175 let result = Self::parser(tokens)?;
176 f(result, tokens)
177 })
178 }
179}
180
181/// Parse is implemented for anything that implements [`Parser`].
182impl<T: Parser> Parse for T {}
183
184/// unsynn defines its own [`ToTokens`] trait to be able to implement it for std container types.
185/// This is similar to the `ToTokens` from the quote crate but adds some extra methods and is
186/// implemented for more types. Moreover the `to_token_iter()` method is the main entry point
187/// for crating an iterator that can be used for parsing.
188///
189/// # Using with the `unsynn!` macro
190///
191/// The [`unsynn!`] macro provides convenient syntax sugar for customizing token emission via the
192/// `to_tokens` clause. See the [macro documentation](crate::unsynn#custom-token-emission-with-to_tokens)
193/// for details.
194///
195/// ```rust
196/// # use unsynn::*;
197/// unsynn! {
198/// struct BoolKeyword(bool);
199/// to_tokens |s, tokens| {
200/// let keyword = if s.0 { "true" } else { "false" };
201/// Ident::new(keyword, Span::call_site()).to_tokens(tokens);
202/// };
203/// }
204/// ```
205pub trait ToTokens {
206 /// Write `&self` to the given [`TokenStream`].
207 ///
208 /// This is the core method that needs to be implemented. All other methods in this trait
209 /// have default implementations based on this method.
210 ///
211 /// # Using with the `unsynn!` macro
212 ///
213 /// The [`unsynn!`] macro's `to_tokens` clause provides syntax sugar for implementing this
214 /// method. See [`unsynn!` documentation](crate::unsynn#custom-token-emission-with-to_tokens).
215 #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
216 fn to_tokens(&self, tokens: &mut TokenStream);
217
218 /// Convert `&self` into a [`TokenIter`] object.
219 #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
220 fn to_token_iter(&self) -> TokenIter {
221 TokenIter::new(self.to_token_stream())
222 }
223
224 /// Convert `self` into a [`TokenIter`] object.
225 #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
226 fn into_token_iter(self) -> TokenIter
227 where
228 Self: Sized,
229 {
230 TokenIter::new(self.into_token_stream())
231 }
232
233 /// Convert `&self` into a [`TokenStream`] object.
234 #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
235 fn to_token_stream(&self) -> TokenStream {
236 let mut tokens = TokenStream::new();
237 self.to_tokens(&mut tokens);
238 tokens
239 }
240
241 /// Convert `self` into a [`TokenStream`] object.
242 #[inline]
243 #[mutants::skip]
244 #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
245 fn into_token_stream(self) -> TokenStream
246 where
247 Self: Sized,
248 {
249 self.to_token_stream()
250 }
251
252 /// Convert `&self` into a [`String`] object. This is mostly used in the test suite to
253 /// compare the outputs. When the input is a `&str` then this parses it and returns a
254 /// normalized [`String`].
255 #[inline]
256 #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
257 fn tokens_to_string(&self) -> String {
258 self.to_token_stream().to_string()
259 }
260}
261
262// Full circle
263impl ToTokens for TokenIter {
264 fn to_tokens(&self, tokens: &mut TokenStream) {
265 tokens.extend(self.clone());
266 }
267}
268
269/// `ToTokens` for arrays and slices
270///
271/// # Example
272///
273/// ```rust
274/// use unsynn::*;
275/// let arr: [Ident; 3] = [
276/// Ident::new("a", Span::call_site()),
277/// Ident::new("b", Span::call_site()),
278/// Ident::new("c", Span::call_site())
279/// ];
280/// let mut tokens = TokenStream::new();
281/// arr.to_tokens(&mut tokens);
282/// assert_eq!(tokens.to_string(), "a b c");
283/// # let vec = vec![Ident::new("a", Span::call_site()), Ident::new("b", Span::call_site()), Ident::new("c", Span::call_site())];
284/// # let mut tokens = TokenStream::new();
285/// # vec[1..3].to_tokens(&mut tokens);
286/// # assert_eq!(tokens.to_string(), "b c");
287/// ```
288impl<T: ToTokens> ToTokens for [T] {
289 fn to_tokens(&self, tokens: &mut TokenStream) {
290 for element in self {
291 element.to_tokens(tokens);
292 }
293 }
294}
295
296/// implement `Display` using `ToTokens::tokens_to_string()` for all types that implement `ToTokens`
297impl std::fmt::Display for dyn ToTokens {
298 #[mutants::skip]
299 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
300 write!(f, "{}", self.tokens_to_string())
301 }
302}
303
304/// Extension trait for [`TokenIter`] that calls [`Parse::parse()`].
305#[allow(clippy::missing_errors_doc)]
306pub trait IParse: private::Sealed {
307 /// Parse a value from the iterator. This is a convenience method that calls
308 /// [`Parse::parse()`].
309 #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
310 fn parse<T: Parse>(self) -> Result<T>;
311
312 /// Parse a value from the iterator. This is a convenience method that calls
313 /// [`Parse::parse_all()`].
314 #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
315 fn parse_all<T: Parse>(self) -> Result<T>;
316}
317
318impl private::Sealed for &mut TokenIter {}
319
320/// Implements [`IParse`] for [`&mut TokenIter`]. This API is more convenient in cases where the
321/// compiler can infer types because no turbofish notations are required.
322///
323/// # Example
324///
325/// ```rust
326/// # use unsynn::*;
327///
328/// struct MyStruct {
329/// number: LiteralInteger,
330/// name: Ident,
331/// }
332///
333/// fn example() -> Result<MyStruct> {
334/// let mut input = " 1234 name ".to_token_iter();
335/// Ok(
336/// MyStruct {
337/// // types are inferred here
338/// number: input.parse()?,
339/// name: input.parse()?
340/// }
341/// )
342/// }
343/// ```
344impl IParse for &mut TokenIter {
345 #[inline]
346 fn parse<T: Parse>(self) -> Result<T> {
347 T::parse(self)
348 }
349
350 #[inline]
351 fn parse_all<T: Parse>(self) -> Result<T> {
352 T::parse_all(self)
353 }
354}
355
356/// Helper trait to make [`TokenIter`] transactional
357pub trait Transaction: Clone {
358 /// Transaction on a [`TokenIter`], calls a `FnOnce(&mut TokenIter) -> Result<T>` within a
359 /// transaction. When the closure succeeds, then the transaction is committed and its result
360 /// is returned.
361 ///
362 /// # Errors
363 ///
364 /// When the closure returns an error, the transaction is rolled back and the error
365 /// is returned.
366 fn transaction<R>(&mut self, f: impl FnOnce(&mut Self) -> Result<R>) -> Result<R> {
367 let mut ttokens = self.clone();
368 #[allow(clippy::manual_inspect)] // not pre 1.81
369 f(&mut ttokens).map(|result| {
370 *self = ttokens;
371 result
372 })
373 }
374}
375
376impl Transaction for TokenIter {}
377
378// Result and error type
379mod error;
380pub use error::*;
381
382// various declarative macros
383mod macros;
384
385// Parsers for the `proc_macro2` entities and other fundamental types
386pub mod fundamental;
387#[doc(inline)]
388pub use fundamental::*;
389
390// Groups by explicit bracket types
391pub mod group;
392#[doc(inline)]
393pub use group::*;
394
395// Punctuation, delimiters
396pub mod punct;
397#[doc(inline)]
398pub use punct::*;
399
400// operators
401pub mod operator;
402#[doc(inline)]
403pub use operator::{names::*, *};
404
405// Literals
406pub mod literal;
407#[doc(inline)]
408pub use literal::*;
409
410// Parse into certain rust types
411pub mod rust_types;
412#[doc(inline)]
413/* is this a bug in the linter when the module only implements traits? */
414//#[expect(unused_imports)] // don't want to bump msrv to 1.81 just for this
415#[allow(unused_imports)]
416pub use rust_types::*;
417
418// Delimited sequences
419pub mod delimited;
420#[doc(inline)]
421pub use delimited::*;
422
423// containers and smart pointers
424pub mod container;
425#[doc(inline)]
426pub use container::*;
427
428// combinators
429pub mod combinator;
430#[doc(inline)]
431pub use combinator::*;
432
433// parse time transformers
434pub mod transform;
435#[doc(inline)]
436pub use transform::*;
437
438// dynamic transformers
439pub mod dynamic;
440#[doc(inline)]
441pub use dynamic::*;
442
443// expression parser building blocks
444pub mod expressions;
445#[doc(inline)]
446pub use expressions::*;
447
448// Parse predicates for compile-time parser control
449pub mod predicates;
450#[doc(inline)]
451pub use predicates::*;
452
453// helpers for the keyword macro
454#[doc(hidden)]
455pub mod keyword_group;
456pub use keyword_group::*;
457
458// debug utilities
459pub mod debug;
460#[doc(inline)]
461pub use debug::*;
462
463/// `unsynn` reexports the entities from `proc_macro2` it implements `Parse` and `ToTokens` for.
464#[cfg(feature = "proc_macro2")]
465pub use proc_macro2::{
466 Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree,
467};
468
469/// `unsynn` reexports the entities from `proc_macro` it implements `Parse` and `ToTokens` for.
470#[cfg(not(feature = "proc_macro2"))]
471pub use proc_macro::{
472 Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree,
473};
474
475mod private {
476 pub trait Sealed {}
477}
478
479/// Helper macro that asserts that two entities implementing `ToTokens` result in the same
480/// `TokenStream`. Used in tests to ensure that the output of parsing is as expected. This
481/// macro allows two forms:
482///
483/// * The first form takes two expressions, both expressions are converted into canonical
484/// strings with `.tokens_to_string()` to be compared.
485/// * The second form takes a string literal prefixed with `str` as second parameter. This
486/// string literal is then taken literally for the comparison.
487///
488/// The later form is used for testing `Joint` punctuation and whitespace placement.
489#[macro_export]
490macro_rules! assert_tokens_eq {
491 ($a:expr, $b:expr$(, $($arg:tt)*)?) => {
492 assert_eq!($a.tokens_to_string(), $b.tokens_to_string() $(, $($arg)*)?);
493 };
494 ($a:expr, str $b:literal$(, $($arg:tt)*)?) => {
495 assert_eq!($a.tokens_to_string(), $b $(, $($arg)*)?);
496 };
497}