From 537a8b772552a3c239330483c71b40936d8d4c4b Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Thu, 29 Apr 2021 18:09:26 +0200 Subject: [PATCH] Add version from other project This was mostly what I implemented as utility library for another project. But I figured I can also extract it as it's useful on its own. --- Cargo.toml | 7 +- examples/simple.rs | 22 +++ macro/Cargo.toml | 14 ++ macro/src/ast.rs | 42 +++++ macro/src/gen.rs | 394 +++++++++++++++++++++++++++++++++++++++++++++ macro/src/lib.rs | 17 ++ macro/src/parse.rs | 143 ++++++++++++++++ src/lib.rs | 4 + 8 files changed, 641 insertions(+), 2 deletions(-) create mode 100644 examples/simple.rs create mode 100644 macro/Cargo.toml create mode 100644 macro/src/ast.rs create mode 100644 macro/src/gen.rs create mode 100644 macro/src/lib.rs create mode 100644 macro/src/parse.rs diff --git a/Cargo.toml b/Cargo.toml index ba3ef96..b80b805 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,9 @@ version = "0.1.0" authors = ["Lukas Kalbertodt "] edition = "2018" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] +confique-macro = { path = "macro" } +serde = { version = "1", features = ["derive"] } + +[dev-dependencies] +log = { version = "0.4", features = ["serde", "std"] } diff --git a/examples/simple.rs b/examples/simple.rs new file mode 100644 index 0000000..8b11f20 --- /dev/null +++ b/examples/simple.rs @@ -0,0 +1,22 @@ + +mod config { + use std::path::PathBuf; + + confique::config! { + log: { + /// Determines how many messages are logged. Log messages below + /// this level are not emitted. Possible values: "trace", "debug", + /// "info", "warn", "error" and "off". + level: log::LevelFilter = "debug", + + /// If this is set, log messages are also written to this file. + #[example = "/var/log/tobira.log"] + file: Option, + } + } +} + + +fn main() { + +} diff --git a/macro/Cargo.toml b/macro/Cargo.toml new file mode 100644 index 0000000..0c8cdc0 --- /dev/null +++ b/macro/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "confique-macro" +version = "0.0.1" +authors = ["Lukas Kalbertodt "] +edition = "2018" + +[lib] +proc-macro = true + +[dependencies] +syn = "1.0" +quote = "1.0" +proc-macro2 = "1.0" +heck = "0.3.2" diff --git a/macro/src/ast.rs b/macro/src/ast.rs new file mode 100644 index 0000000..081ea80 --- /dev/null +++ b/macro/src/ast.rs @@ -0,0 +1,42 @@ +//! Definition of the intermediate representation or AST. + +/// The parsed input to the `gen_config` macro. +pub(crate) struct Input { + pub(crate) root: Node, +} + +/// One node in the tree of the configuration format. Can either be a leaf node +/// (a string, int, float or bool value) or an internal node that contains +/// children. +pub(crate) enum Node { + Internal { + doc: Vec, + name: syn::Ident, + children: Vec, + }, + Leaf { + doc: Vec, + name: syn::Ident, + ty: syn::Type, + default: Option, + example: Option, + }, +} + +/// The kinds of expressions (just literals) we allow for default or example +/// values. +pub(crate) enum Expr { + Str(syn::LitStr), + Int(syn::LitInt), + Float(syn::LitFloat), + Bool(syn::LitBool), +} + +impl Node { + pub(crate) fn name(&self) -> &syn::Ident { + match self { + Self::Internal { name, .. } => name, + Self::Leaf { name, .. } => name, + } + } +} diff --git a/macro/src/gen.rs b/macro/src/gen.rs new file mode 100644 index 0000000..2e7b9c4 --- /dev/null +++ b/macro/src/gen.rs @@ -0,0 +1,394 @@ +use proc_macro2::TokenStream; +use quote::{quote, ToTokens}; +use syn::Ident; +use std::fmt::{self, Write}; + +use crate::ast::{Expr, Input, Node}; + + +pub(crate) fn gen(input: Input) -> TokenStream { + let visibility = quote! { pub(crate) }; + let toml = gen_toml(&input); + let root_mod = gen_root_mod(&input, &visibility); + let raw_mod = gen_raw_mod(&input, &visibility); + let util_mod = gen_util_mod(&visibility); + + quote! { + const TOML_TEMPLATE: &str = #toml; + + #root_mod + #raw_mod + #util_mod + } +} + +fn gen_util_mod(visibility: &TokenStream) -> TokenStream { + quote! { + mod util { + use std::fmt::{self, Write}; + + #[derive(Debug)] + #visibility struct TryFromError { + #visibility path: &'static str, + } + + impl fmt::Display for TryFromError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + std::write!(f, "required configuration value is missing: '{}'", self.path) + } + } + + impl std::error::Error for TryFromError {} + } + } +} + +fn gen_raw_mod(input: &Input, visibility: &TokenStream) -> TokenStream { + let mut contents = TokenStream::new(); + visit(input, |node, path| { + if let Node::Internal { name, children, .. } = node { + let type_name = to_camel_case(name); + + let raw_fields = collect_tokens(children, |node| { + match node { + Node::Leaf { name, ty, .. } => { + let inner = as_option(&ty).unwrap_or(&ty); + quote! { #visibility #name: Option<#inner>, } + }, + Node::Internal { name, .. } => { + let child_type_name = to_camel_case(name); + quote! { + #[serde(default)] + #visibility #name: #child_type_name, + } + }, + } + }); + + let default_fields = collect_tokens(children, |node| { + match node { + Node::Leaf { name, default: None, .. } => quote! { #name: None, }, + Node::Leaf { name, default: Some(expr), ty, .. } => { + let inner_type = as_option(ty).unwrap_or(ty); + let path = format!("{}.{}", path.join("."), name); + let msg = format!( + "default configuration value for '{}' cannot be deserialized as '{}'", + path, + inner_type.to_token_stream(), + ); + + quote! { + #name: Some({ + let result: Result<_, ::confique::serde::de::value::Error> + = Deserialize::deserialize(#expr.into_deserializer()); + result.expect(#msg) + }), + } + }, + Node::Internal { name, .. } => { + let child_type_name = to_camel_case(name); + quote! { + #name: #child_type_name::default_values(), + } + } + } + }); + + let overwrite_with_fields = collect_tokens(children, |node| { + match node { + Node::Leaf { name, .. } => quote! { + #name: other.#name.or(self.#name), + }, + Node::Internal { name, .. } => quote! { + #name: self.#name.overwrite_with(other.#name), + } + } + }); + + contents.extend(quote! { + #[derive(Debug, Default, ::confique::serde::Deserialize)] + #[serde(deny_unknown_fields)] + #visibility struct #type_name { + #raw_fields + } + + impl #type_name { + #visibility fn default_values() -> Self { + Self { #default_fields } + } + + #visibility fn overwrite_with(self, other: Self) -> Self { + Self { #overwrite_with_fields } + } + } + }); + } + }); + + quote! { + /// Types where all configuration values are optional. + /// + /// The types in this module also represent the full configuration tree, + /// but all values are optional. That's useful for intermediate steps or + /// "layers" of configuration sources. Imagine that the three layers: + /// environment variables, a TOML file and the fixed default values. The + /// only thing that matters is that required values are present after + /// merging all sources, but each individual source can be missing + /// required values. + /// + /// These types implement `serde::Deserialize`. + mod raw { + use super::*; + use ::confique::serde::{Deserialize, de::IntoDeserializer}; + + #contents + } + } +} + +fn gen_root_mod(input: &Input, visibility: &TokenStream) -> TokenStream { + let mut out = TokenStream::new(); + visit(input, |node, path| { + if let Node::Internal { name, doc, children } = node { + let type_name = to_camel_case(name); + + let user_fields = collect_tokens(children, |node| { + match node { + Node::Leaf { name, doc, ty, .. } => quote! { + #( #[doc = #doc] )* + #visibility #name: #ty, + }, + Node::Internal { name, .. } => { + let child_type_name = to_camel_case(name); + quote! { + #visibility #name: #child_type_name, + } + }, + } + }); + + let try_from_fields = collect_tokens(children, |node| { + match node { + Node::Leaf { name, ty, .. } => { + match as_option(ty) { + // If this value is optional, we just move it as it can never fail. + Some(_) => quote! { #name: src.#name, }, + + // Otherwise, we return an error if the value hasn't been specified. + None => { + let path = match path.is_empty() { + true => name.to_string(), + false => format!("{}.{}", path.join("."), name), + }; + + quote! { + #name: src.#name + .ok_or(self::util::TryFromError { path: #path })?, + } + } + } + }, + Node::Internal { name, .. } => quote! { + #name: std::convert::TryFrom::try_from(src.#name)?, + }, + } + }); + + out.extend(quote! { + #( #[doc = #doc] )* + #[derive(Debug)] + #visibility struct #type_name { + #user_fields + } + + impl std::convert::TryFrom for #type_name { + type Error = util::TryFromError; + fn try_from(src: raw::#type_name) -> Result { + Ok(Self { + #try_from_fields + }) + } + } + }); + } + }); + + out +} + +/// Generates the TOML template file. +fn gen_toml(input: &Input) -> String { + /// Writes all doc comments to the file. + fn write_doc(out: &mut String, doc: &[String]) { + for line in doc { + writeln!(out, "#{}", line).unwrap(); + } + } + + /// Adds zero, one or two line breaks to make sure that there are at least + /// two line breaks at the end of the string. + fn add_empty_line(out: &mut String) { + match () { + () if out.ends_with("\n\n") => {}, + () if out.ends_with('\n') => out.push('\n'), + _ => out.push_str("\n\n"), + } + } + + + let mut out = String::new(); + visit(input, |node, path| { + match node { + Node::Internal { doc, .. } => { + write_doc(&mut out, doc); + + // If a new subsection starts, we always print the header, even if not + // strictly necessary. + if path.is_empty() { + add_empty_line(&mut out); + } else { + writeln!(out, "[{}]", path.join(".")).unwrap(); + } + } + + Node::Leaf { doc, name, ty, default, example } => { + write_doc(&mut out, doc); + + // Add note about default value or the value being required. + match default { + Some(default) => { + if !doc.is_empty() { + writeln!(out, "#").unwrap(); + } + writeln!(out, "# Default: {}", default).unwrap(); + } + None if as_option(ty).is_some() => {} + None => { + if !doc.is_empty() { + writeln!(out, "#").unwrap(); + } + writeln!(out, "# Required! This value must be specified.").unwrap(); + } + } + + // We check that already when parsing. + let example = example.as_ref() + .or(default.as_ref()) + .expect("neither example nor default"); + + // Commented out example. + writeln!(out, "#{} = {}", name, example).unwrap(); + add_empty_line(&mut out); + } + } + }); + + // Make sure there is only a single trailing newline. + while out.ends_with("\n\n") { + out.pop(); + } + + out +} + +/// Visits all nodes in depth-first session (visiting the parent before its +/// children). +fn visit(input: &Input, mut visitor: F) +where + F: FnMut(&Node, &[String]), +{ + let mut stack = vec![(&input.root, vec![])]; + while let Some((node, path)) = stack.pop() { + visitor(&node, &path); + + if let Node::Internal { children, .. } = node { + for child in children.iter().rev() { + let mut child_path = path.clone(); + child_path.push(child.name().to_string()); + stack.push((child, child_path)); + } + } + } +} + +/// Iterates over `it`, calling `f` for each element, collecting all returned +/// token streams into one. +fn collect_tokens( + it: impl IntoIterator, + f: impl FnMut(T) -> TokenStream, +) -> TokenStream { + it.into_iter().map(f).collect() +} + +fn to_camel_case(ident: &Ident) -> Ident { + use heck::CamelCase; + + Ident::new(&ident.to_string().to_camel_case(), ident.span()) +} + +/// Checks if the given type is an `Option` and if so, return the inner type. +/// +/// Note: this function clearly shows one of the major shortcomings of proc +/// macros right now: we do not have access to the compiler's type tables and +/// can only check if it "looks" like an `Option`. Of course, stuff can go +/// wrong. But that's the best we can do and it's highly unlikely that someone +/// shadows `Option`. +fn as_option(ty: &syn::Type) -> Option<&syn::Type> { + let ty = match ty { + syn::Type::Path(path) => path, + _ => return None, + }; + + if ty.qself.is_some() || ty.path.leading_colon.is_some() { + return None; + } + + let valid_paths = [ + &["Option"] as &[_], + &["std", "option", "Option"], + &["core", "option", "Option"], + ]; + if !valid_paths.iter().any(|vp| ty.path.segments.iter().map(|s| &s.ident).eq(*vp)) { + return None; + } + + let args = match &ty.path.segments.last().unwrap().arguments { + syn::PathArguments::AngleBracketed(args) => args, + _ => return None, + }; + + if args.args.len() != 1 { + return None; + } + + match &args.args[0] { + syn::GenericArgument::Type(t) => Some(t), + _ => None, + } +} + + +impl ToTokens for Expr { + fn to_tokens(&self, tokens: &mut TokenStream) { + match self { + Self::Str(lit) => lit.to_tokens(tokens), + Self::Int(lit) => lit.to_tokens(tokens), + Self::Float(lit) => lit.to_tokens(tokens), + Self::Bool(lit) => lit.to_tokens(tokens), + } + } +} + +// This `Display` impl is for writing into a TOML file. +impl fmt::Display for Expr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + // TODO: not sure if `escape_debug` is really what we want here, but + // it's working for now. + Self::Str(lit) => write!(f, "\"{}\"", lit.value().escape_debug()), + Self::Int(lit) => lit.fmt(f), + Self::Float(lit) => lit.fmt(f), + Self::Bool(lit) => lit.value.fmt(f), + } + } +} diff --git a/macro/src/lib.rs b/macro/src/lib.rs new file mode 100644 index 0000000..a213226 --- /dev/null +++ b/macro/src/lib.rs @@ -0,0 +1,17 @@ +use proc_macro::TokenStream as TokenStream1; + + +mod ast; +mod gen; +mod parse; + + +/// Defines a configuration in a special syntax. TODO: explain what this +/// generates. +#[proc_macro] +pub fn config(input: TokenStream1) -> TokenStream1 { + syn::parse2::(input.into()) + .map(gen::gen) + .unwrap_or_else(|e| e.to_compile_error()) + .into() +} diff --git a/macro/src/parse.rs b/macro/src/parse.rs new file mode 100644 index 0000000..a89cd38 --- /dev/null +++ b/macro/src/parse.rs @@ -0,0 +1,143 @@ +use proc_macro2::{Span, TokenStream}; +use syn::{ + Error, Ident, + parse::{Parse, ParseStream}, + punctuated::Punctuated, + spanned::Spanned, +}; + +use crate::ast::{Expr, Input, Node}; + + + +impl Parse for Input { + fn parse(input: ParseStream) -> Result { + let mut outer_attrs = input.call(syn::Attribute::parse_inner)?; + let doc = extract_doc(&mut outer_attrs)?; + let children = input.call(>::parse_terminated)?; + assert_no_extra_attrs(&outer_attrs)?; + + let root = Node::Internal { + doc, + name: Ident::new("config", Span::call_site()), + children: children.into_iter().collect(), + }; + + Ok(Self { root }) + } +} + + +impl Parse for Node { + fn parse(input: ParseStream) -> Result { + let mut attrs = input.call(syn::Attribute::parse_outer)?; + let doc = extract_doc(&mut attrs)?; + + // All nodes start with an identifier and a colon. + let name = input.parse()?; + let _: syn::Token![:] = input.parse()?; + + let out = if input.lookahead1().peek(syn::token::Brace) { + // --- A nested Internal --- + + let inner; + syn::braced!(inner in input); + let fields = inner.call(>::parse_terminated)?; + + Self::Internal { + doc, + name, + children: fields.into_iter().collect(), + } + } else { + // --- A single value --- + + // Type is mandatory. + let ty = input.parse()?; + + // Optional default value. + let default = if input.lookahead1().peek(syn::Token![=]) { + let _: syn::Token![=] = input.parse()?; + Some(input.parse()?) + } else { + None + }; + + // Optional example value. + let example = attrs.iter() + .position(|attr| attr.path.is_ident("example")) + .map(|i| { + let attr = attrs.remove(i); + parse_attr_value::(attr.tokens) + }) + .transpose()?; + + if example.is_none() && default.is_none() { + let msg = "either a default value or an example value has to be specified"; + return Err(Error::new(name.span(), msg)); + } + + Self::Leaf { doc, name, ty, default, example } + }; + + assert_no_extra_attrs(&attrs)?; + + Ok(out) + } +} + +impl Parse for Expr { + fn parse(input: ParseStream) -> Result { + let lit = input.parse::()?; + let out = match lit { + syn::Lit::Str(l) => Self::Str(l), + syn::Lit::Int(l) => Self::Int(l), + syn::Lit::Float(l) => Self::Float(l), + syn::Lit::Bool(l) => Self::Bool(l), + + _ => { + let msg = "only string, integer, float and bool literals are allowed here"; + return Err(Error::new(lit.span(), msg)); + } + }; + + Ok(out) + } +} + +/// Makes sure that the given list is empty or returns an error otherwise. +fn assert_no_extra_attrs(attrs: &[syn::Attribute]) -> Result<(), Error> { + if let Some(attr) = attrs.get(0) { + let msg = "unknown/unexpected/duplicate attribute in this position"; + return Err(Error::new(attr.span(), msg)); + } + + Ok(()) +} + +/// Parses the tokenstream as a `T` preceeded by a `=`. This is useful for +/// attributes of the form `#[foo = ]`. +fn parse_attr_value(tokens: TokenStream) -> Result { + use syn::parse::Parser; + + fn parser(input: ParseStream) -> Result { + let _: syn::Token![=] = input.parse()?; + input.parse() + } + + parser.parse2(tokens) +} + +/// Extract all doc attributes from the list and return them as simple strings. +fn extract_doc(attrs: &mut Vec) -> Result, Error> { + let out = attrs.iter() + .filter(|attr| attr.path.is_ident("doc")) + .map(|attr| parse_attr_value::(attr.tokens.clone()).map(|lit| lit.value())) + .collect::>()?; + + // I know this is algorithmically not optimal, but `drain_filter` is still + // unstable and I can't be bothered to write the proper algorithm right now. + attrs.retain(|attr| !attr.path.is_ident("doc")); + + Ok(out) +} diff --git a/src/lib.rs b/src/lib.rs index e69de29..77737a1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -0,0 +1,4 @@ + +pub use confique_macro::config as config; + +pub use serde;