unsynn/
lib.rs

1#![allow(rustdoc::bare_urls)]
2#![doc = include_str!("../README.md")]
3//!
4#![doc = include_str!("../COOKBOOK.md")]
5//!
6//! # Roadmap
7//!
8#![doc = include_str!("../ROADMAP.md")]
9//!
10//! For a history how unsynn evolved, check the [CHANGELOG].
11// PLANNED: currently the Error type is a tad big. This could be either resolved by
12// refactoring the Error type (only keep refine typename, remove 'at' and have the iter start
13// at the error not after) or just Box<Error>. A conclusive optimization for this is postponed
14// until we have benchmarks to decide whats the best approach. It may as well just stay as
15// is...
16#![allow(clippy::result_large_err)]
17
18// When not using proc_macro2, we need the built-in proc_macro.
19// This is only available when unsynn is used from a proc-macro crate.
20#[cfg(not(feature = "proc_macro2"))]
21extern crate proc_macro;
22
23pub mod CHANGELOG {
24    #![allow(non_snake_case)]
25    #![allow(clippy::doc_markdown)]
26    #![doc = include_str!("../CHANGELOG.md")]
27}
28
29// TokenIter
30mod token_iter;
31#[doc(inline)]
32pub use token_iter::*;
33
34/// The `Parser` trait that must be implemented by anything we want to parse. We are parsing
35/// over a [`TokenIter`] ([`TokenStream`] iterator).
36pub trait Parser
37where
38    Self: Sized,
39{
40    /// The actual parsing function that must be implemented. This mutates the `tokens`
41    /// iterator directly. It should not be called from user code except for implementing
42    /// parsers itself and then only when the rules below are followed.
43    ///
44    /// # Implementing Parsers
45    ///
46    /// The parsers for [`TokenStream`], [`TokenTree`], [`Group`], [`Ident`], [`Punct`],
47    /// [`Literal`], [`Except`] and [`Nothing`] (and few more) are the fundamental parsers.
48    /// Any other parser is composed from those.
49    ///
50    /// Calling another `T::parser()` implementation is only valid when this is a conjunctive
51    /// operation and a failure is returned immediately by the `?` operator. This can be used
52    /// as performance optimization. Any other call to a parser must be done within a transaction.
53    /// Otherwise the iterator will be left in a consumed state which breaks further parsing.
54    ///
55    /// Transactions can be done by calling [`Parse::parse()`] or with the
56    /// [`Transaction::transaction()`] method on the iterator.
57    ///
58    /// # Errors
59    ///
60    /// The `parser()` implementation must return an error when it cannot parse the
61    /// input. This error must be a [`Error`]. User code will parse a grammar by calling
62    /// [`Parse::parse_all()`], [`Parse::parse()`] or [`Parse::parse_with()`] which will call
63    /// this method within a transaction and roll back on error.
64    #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
65    fn parser(tokens: &mut TokenIter) -> Result<Self>;
66}
67
68/// This trait provides the user facing API to parse grammatical entities. It is implemented
69/// for anything that implements the [`Parser`] trait. The methods here encapsulating the
70/// iterator that is used for parsing into a transaction. This iterator is always
71/// `Clone`. Instead using a peekable iterator or implementing deeper peeking, parse clones
72/// this iterator to make access transactional, when parsing succeeds then the transaction
73/// becomes committed, otherwise it is rolled back.
74///
75/// This trait cannot be implemented by user code.
76pub trait Parse: Parser {
77    /// This is the user facing API to parse grammatical entities. Calls a `parser()` within a
78    /// transaction. Commits changes on success and returns the parsed value.
79    ///
80    /// # Errors
81    ///
82    /// When the parser returns an error the transaction is rolled back and the error is
83    /// returned.
84    #[inline]
85    #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
86    fn parse(tokens: &mut TokenIter) -> Result<Self> {
87        tokens.transaction(Self::parser)
88    }
89
90    /// Exhaustive parsing within a transaction. This is a convenience method that implies a
91    /// `EndOfStream` at the end. Thus it will error if parsing is not exhaustive.
92    ///
93    /// # Errors
94    ///
95    /// When the parser returns an error or there are tokens left in the stream the
96    /// transaction is rolled back and a error is returned.
97    #[inline]
98    #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
99    fn parse_all(tokens: &mut TokenIter) -> Result<Self> {
100        tokens
101            .transaction(Cons::<Self, EndOfStream>::parser)
102            .map(|result| result.first)
103    }
104
105    /// Parse a value in a transaction, pass it to a
106    /// `FnOnce(Self, &mut TokenIter) -> Result<T>` closure which
107    /// creates a new result or returns an Error.
108    ///
109    /// This method is a very powerful tool as it allows anything from simple validations to
110    /// complete transformations into a new type. You may find this useful to implement
111    /// parsers for complex types that need some runtime logic.
112    ///
113    /// The closures first argument is the parsed value and the second argument is the
114    /// transactional iterator pointing after parsing `Self`. This can be used to create
115    /// errors or parse further. In many cases it can be ignored with `_`.
116    ///
117    /// # Using with the `unsynn!` macro
118    ///
119    /// The [`unsynn!`] macro provides convenient syntax sugar for this method via the `parse_with`
120    /// clause. See the [macro documentation](crate::unsynn#custom-parsing-with-parse_with) for details.
121    ///
122    /// ```rust
123    /// # use unsynn::*;
124    /// unsynn! {
125    ///     struct PositiveInt(LiteralInteger);
126    ///     parse_with |this, tokens| {
127    ///         if this.0.value() > 0 {
128    ///             Ok(this)
129    ///         } else {
130    ///             Error::other(None, tokens, "must be positive".into())
131    ///         }
132    ///     };
133    /// }
134    /// ```
135    ///
136    /// # Example
137    ///
138    /// ```rust
139    /// # use unsynn::*;
140    /// # use std::collections::BTreeSet;
141    /// // A parser that parses a comma delimited list of anything but commas
142    /// // and stores these lexical sorted.
143    /// struct OrderedStrings {
144    ///     strings: Vec<String>
145    /// }
146    ///
147    /// impl Parser for OrderedStrings {
148    ///     fn parser(tokens: &mut TokenIter) -> Result<Self> {
149    ///         // Our input is CommaDelimitedVec<String>, we'll transform that into
150    ///         // OrderedStrings.
151    ///         Parse::parse_with(tokens, |this : CommaDelimitedVec<String>, _| -> Result<OrderedStrings> {
152    ///             let mut strings: Vec<String> = this.into_iter()
153    ///                 .map(|s| s.value)
154    ///                 .collect();
155    ///             strings.sort();
156    ///             Ok(OrderedStrings { strings })
157    ///         })
158    ///     }
159    /// }
160    /// let mut input = "a, d, b, e, c,".to_token_iter();
161    /// let ordered_strings: OrderedStrings = input.parse().unwrap();
162    /// assert_eq!(ordered_strings.strings, vec!["a", "b", "c", "d", "e"]);
163    /// ```
164    ///
165    /// # Errors
166    ///
167    /// When the parser or the closure returns an error, the transaction is rolled back and
168    /// the error is returned.
169    #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
170    fn parse_with<T>(
171        tokens: &mut TokenIter,
172        f: impl FnOnce(Self, &mut TokenIter) -> Result<T>,
173    ) -> Result<T> {
174        tokens.transaction(|tokens| {
175            let result = Self::parser(tokens)?;
176            f(result, tokens)
177        })
178    }
179}
180
181/// Parse is implemented for anything that implements [`Parser`].
182impl<T: Parser> Parse for T {}
183
184/// unsynn defines its own [`ToTokens`] trait to be able to implement it for std container types.
185/// This is similar to the `ToTokens` from the quote crate but adds some extra methods and is
186/// implemented for more types. Moreover the `to_token_iter()` method is the main entry point
187/// for crating an iterator that can be used for parsing.
188///
189/// # Using with the `unsynn!` macro
190///
191/// The [`unsynn!`] macro provides convenient syntax sugar for customizing token emission via the
192/// `to_tokens` clause. See the [macro documentation](crate::unsynn#custom-token-emission-with-to_tokens)
193/// for details.
194///
195/// ```rust
196/// # use unsynn::*;
197/// unsynn! {
198///     struct BoolKeyword(bool);
199///     to_tokens |s, tokens| {
200///         let keyword = if s.0 { "true" } else { "false" };
201///         Ident::new(keyword, Span::call_site()).to_tokens(tokens);
202///     };
203/// }
204/// ```
205pub trait ToTokens {
206    /// Write `&self` to the given [`TokenStream`].
207    ///
208    /// This is the core method that needs to be implemented. All other methods in this trait
209    /// have default implementations based on this method.
210    ///
211    /// # Using with the `unsynn!` macro
212    ///
213    /// The [`unsynn!`] macro's `to_tokens` clause provides syntax sugar for implementing this
214    /// method. See [`unsynn!` documentation](crate::unsynn#custom-token-emission-with-to_tokens).
215    #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
216    fn to_tokens(&self, tokens: &mut TokenStream);
217
218    /// Convert `&self` into a [`TokenIter`] object.
219    #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
220    fn to_token_iter(&self) -> TokenIter {
221        TokenIter::new(self.to_token_stream())
222    }
223
224    /// Convert `self` into a [`TokenIter`] object.
225    #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
226    fn into_token_iter(self) -> TokenIter
227    where
228        Self: Sized,
229    {
230        TokenIter::new(self.into_token_stream())
231    }
232
233    /// Convert `&self` into a [`TokenStream`] object.
234    #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
235    fn to_token_stream(&self) -> TokenStream {
236        let mut tokens = TokenStream::new();
237        self.to_tokens(&mut tokens);
238        tokens
239    }
240
241    /// Convert `self` into a [`TokenStream`] object.
242    #[inline]
243    #[mutants::skip]
244    #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
245    fn into_token_stream(self) -> TokenStream
246    where
247        Self: Sized,
248    {
249        self.to_token_stream()
250    }
251
252    /// Convert `&self` into a [`String`] object.  This is mostly used in the test suite to
253    /// compare the outputs.  When the input is a `&str` then this parses it and returns a
254    /// normalized [`String`].
255    #[inline]
256    #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
257    fn tokens_to_string(&self) -> String {
258        self.to_token_stream().to_string()
259    }
260}
261
262// Full circle
263impl ToTokens for TokenIter {
264    fn to_tokens(&self, tokens: &mut TokenStream) {
265        tokens.extend(self.clone());
266    }
267}
268
269/// `ToTokens` for arrays and slices
270///
271/// # Example
272///
273/// ```rust
274/// use unsynn::*;
275/// let arr: [Ident; 3] = [
276///     Ident::new("a", Span::call_site()),
277///     Ident::new("b", Span::call_site()),
278///     Ident::new("c", Span::call_site())
279/// ];
280/// let mut tokens = TokenStream::new();
281/// arr.to_tokens(&mut tokens);
282/// assert_eq!(tokens.to_string(), "a b c");
283/// # let vec = vec![Ident::new("a", Span::call_site()), Ident::new("b", Span::call_site()), Ident::new("c", Span::call_site())];
284/// # let mut tokens = TokenStream::new();
285/// # vec[1..3].to_tokens(&mut tokens);
286/// # assert_eq!(tokens.to_string(), "b c");
287/// ```
288impl<T: ToTokens> ToTokens for [T] {
289    fn to_tokens(&self, tokens: &mut TokenStream) {
290        for element in self {
291            element.to_tokens(tokens);
292        }
293    }
294}
295
296/// implement `Display` using `ToTokens::tokens_to_string()` for all types that implement `ToTokens`
297impl std::fmt::Display for dyn ToTokens {
298    #[mutants::skip]
299    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
300        write!(f, "{}", self.tokens_to_string())
301    }
302}
303
304/// Extension trait for [`TokenIter`] that calls [`Parse::parse()`].
305#[allow(clippy::missing_errors_doc)]
306pub trait IParse: private::Sealed {
307    /// Parse a value from the iterator. This is a convenience method that calls
308    /// [`Parse::parse()`].
309    #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
310    fn parse<T: Parse>(self) -> Result<T>;
311
312    /// Parse a value from the iterator. This is a convenience method that calls
313    /// [`Parse::parse_all()`].
314    #[cfg_attr(feature = "trait_methods_track_caller", track_caller)]
315    fn parse_all<T: Parse>(self) -> Result<T>;
316}
317
318impl private::Sealed for &mut TokenIter {}
319
320/// Implements [`IParse`] for [`&mut TokenIter`]. This API is more convenient in cases where the
321/// compiler can infer types because no turbofish notations are required.
322///
323/// # Example
324///
325/// ```rust
326/// # use unsynn::*;
327///
328/// struct MyStruct {
329///     number: LiteralInteger,
330///     name:   Ident,
331/// }
332///
333/// fn example() -> Result<MyStruct> {
334///     let mut input = " 1234 name ".to_token_iter();
335///     Ok(
336///         MyStruct {
337///             // types are inferred here
338///             number: input.parse()?,
339///             name: input.parse()?
340///         }
341///     )
342/// }
343/// ```
344impl IParse for &mut TokenIter {
345    #[inline]
346    fn parse<T: Parse>(self) -> Result<T> {
347        T::parse(self)
348    }
349
350    #[inline]
351    fn parse_all<T: Parse>(self) -> Result<T> {
352        T::parse_all(self)
353    }
354}
355
356/// Helper trait to make [`TokenIter`] transactional
357pub trait Transaction: Clone {
358    /// Transaction on a [`TokenIter`], calls a `FnOnce(&mut TokenIter) -> Result<T>` within a
359    /// transaction. When the closure succeeds, then the transaction is committed and its result
360    /// is returned.
361    ///
362    /// # Errors
363    ///
364    /// When the closure returns an error, the transaction is rolled back and the error
365    /// is returned.
366    fn transaction<R>(&mut self, f: impl FnOnce(&mut Self) -> Result<R>) -> Result<R> {
367        let mut ttokens = self.clone();
368        #[allow(clippy::manual_inspect)] // not pre 1.81
369        f(&mut ttokens).map(|result| {
370            *self = ttokens;
371            result
372        })
373    }
374}
375
376impl Transaction for TokenIter {}
377
378// Result and error type
379mod error;
380pub use error::*;
381
382// various declarative macros
383mod macros;
384
385// Parsers for the `proc_macro2` entities and other fundamental types
386pub mod fundamental;
387#[doc(inline)]
388pub use fundamental::*;
389
390// Groups by explicit bracket types
391pub mod group;
392#[doc(inline)]
393pub use group::*;
394
395// Punctuation, delimiters
396pub mod punct;
397#[doc(inline)]
398pub use punct::*;
399
400// operators
401pub mod operator;
402#[doc(inline)]
403pub use operator::{names::*, *};
404
405// Literals
406pub mod literal;
407#[doc(inline)]
408pub use literal::*;
409
410// Parse into certain rust types
411pub mod rust_types;
412#[doc(inline)]
413/* is this a bug in the linter when the module only implements traits? */
414//#[expect(unused_imports)] // don't want to bump msrv to 1.81 just for this
415#[allow(unused_imports)]
416pub use rust_types::*;
417
418// Delimited sequences
419pub mod delimited;
420#[doc(inline)]
421pub use delimited::*;
422
423// containers and smart pointers
424pub mod container;
425#[doc(inline)]
426pub use container::*;
427
428// combinators
429pub mod combinator;
430#[doc(inline)]
431pub use combinator::*;
432
433// parse time transformers
434pub mod transform;
435#[doc(inline)]
436pub use transform::*;
437
438// dynamic transformers
439pub mod dynamic;
440#[doc(inline)]
441pub use dynamic::*;
442
443// expression parser building blocks
444pub mod expressions;
445#[doc(inline)]
446pub use expressions::*;
447
448// Parse predicates for compile-time parser control
449pub mod predicates;
450#[doc(inline)]
451pub use predicates::*;
452
453// helpers for the keyword macro
454#[doc(hidden)]
455pub mod keyword_group;
456pub use keyword_group::*;
457
458// debug utilities
459pub mod debug;
460#[doc(inline)]
461pub use debug::*;
462
463/// `unsynn` reexports the entities from `proc_macro2` it implements `Parse` and `ToTokens` for.
464#[cfg(feature = "proc_macro2")]
465pub use proc_macro2::{
466    Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree,
467};
468
469/// `unsynn` reexports the entities from `proc_macro` it implements `Parse` and `ToTokens` for.
470#[cfg(not(feature = "proc_macro2"))]
471pub use proc_macro::{
472    Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree,
473};
474
475mod private {
476    pub trait Sealed {}
477}
478
479/// Helper macro that asserts that two entities implementing `ToTokens` result in the same
480/// `TokenStream`. Used in tests to ensure that the output of parsing is as expected.  This
481/// macro allows two forms:
482///
483///  * The first form takes two expressions, both expressions are converted into canonical
484///    strings with `.tokens_to_string()` to be compared.
485///  * The second form takes a string literal prefixed with `str` as second parameter. This
486///    string literal is then taken literally for the comparison.
487///
488/// The later form is used for testing `Joint` punctuation and whitespace placement.
489#[macro_export]
490macro_rules! assert_tokens_eq {
491    ($a:expr, $b:expr$(, $($arg:tt)*)?) => {
492        assert_eq!($a.tokens_to_string(), $b.tokens_to_string() $(, $($arg)*)?);
493    };
494    ($a:expr, str $b:literal$(, $($arg:tt)*)?) => {
495        assert_eq!($a.tokens_to_string(), $b $(, $($arg)*)?);
496    };
497}