How to parse mustache with Boost.Xpressive correctly?
Here is the correct full code from @sehe that now works under GCC >4.8 and CLANG under Linux and Windows. Again many thanks mate for this awesome help, even though this means that I can bury XPressive :D
The following lines have changed or been added:
// --
#define BOOST_RESULT_OF_USE_DECLTYPE
// --
struct to_string_f {
template <typename T>
std::string operator()(T const& v) const { return v.to_string(); }};
// --
section %= "{{" >> sense >> reference [ section_id = to_string(_1) ] >> "}}"
>> sequence // contents
> ("{{" >> ('/' >> lexeme [ lit(section_id) ]) >> "}}");
// --
phx::function<to_string_f> to_string;
//#define BOOST_SPIRIT_DEBUG
#define BOOST_RESULT_OF_USE_DECLTYPE
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/utility/string_ref.hpp>
#include <functional>
#include <map>
namespace mustache {
// any atom refers directly to source iterators for efficiency
using boost::string_ref;
template <typename Kind> struct atom {
string_ref value;
atom() { }
atom(string_ref const& value) : value(value) { }
friend std::ostream& operator<<(std::ostream& os, atom const& v) { return os << typeid(v).name() << "[" << v.value << "]"; }
};
// the atoms
using verbatim = atom<struct verbatim_tag>;
using variable = atom<struct variable_tag>;
using partial = atom<struct partial_tag>;
// the template elements (any atom or a section)
struct section;
using melement = boost::variant<
verbatim,
variable,
partial, // TODO comments and set-separators
boost::recursive_wrapper<section>
>;
// the template: sequences of elements
using sequence = std::vector<melement>;
// section: recursively define to contain a template sequence
struct section {
bool sense; // positive or negative
string_ref control;
sequence content;
};
}
BOOST_FUSION_ADAPT_STRUCT(mustache::section, (bool, sense)(boost::string_ref, control)(mustache::sequence, content))
namespace qi = boost::spirit::qi;
namespace phx= boost::phoenix;
struct to_string_f {
template <typename T>
std::string operator()(T const& v) const { return v.to_string(); }
};
template <typename Iterator>
struct mustache_grammar : qi::grammar<Iterator, mustache::sequence()>
{
mustache_grammar() : mustache_grammar::base_type(sequence)
{
using namespace qi;
static const _a_type section_id = {}; // local
using boost::phoenix::construct;
using boost::phoenix::begin;
using boost::phoenix::size;
sequence = *element;
element =
!(lit("{{") >> '/') >> // section-end ends the current sequence
(partial | section | variable | verbatim);
reference = raw [ lexeme [ +(graph - "}}") ] ]
[ _val = construct<boost::string_ref>(&*begin(_1), size(_1)) ];
partial = qi::lit("{{") >> "> " >> reference >> "}}";
sense = ('#' > attr(true))
| ('^' > attr(false));
section %= "{{" >> sense >> reference [ section_id = to_string(_1) ] >> "}}"
>> sequence // contents
> ("{{" >> ('/' >> lexeme [ lit(section_id) ]) >> "}}");
variable = "{{" >> reference >> "}}";
verbatim = raw [ lexeme [ +(char_ - "{{") ] ]
[ _val = construct<boost::string_ref>(&*begin(_1), size(_1)) ];
BOOST_SPIRIT_DEBUG_NODES(
(sequence)(element)(partial)(variable)(section)(verbatim)
(reference)(sense)
)
}
private:
phx::function<to_string_f> to_string;
qi::rule<Iterator, mustache::sequence()> sequence;
qi::rule<Iterator, mustache::melement()> element;
qi::rule<Iterator, mustache::partial()> partial;
qi::rule<Iterator, mustache::section(), qi::locals<std::string> > section;
qi::rule<Iterator, bool()> sense; // postive or negative
qi::rule<Iterator, mustache::variable()> variable;
qi::rule<Iterator, mustache::verbatim()> verbatim;
qi::rule<Iterator, boost::string_ref()> reference;
};
namespace Dumping {
struct dumper : boost::static_visitor<std::ostream&>
{
std::ostream& operator()(std::ostream& os, mustache::sequence const& v) const {
for(auto& element : v)
boost::apply_visitor(std::bind(dumper(), std::ref(os), std::placeholders::_1), element);
return os;
}
std::ostream& operator()(std::ostream& os, mustache::verbatim const& v) const {
return os << v.value;
}
std::ostream& operator()(std::ostream& os, mustache::variable const& v) const {
return os << "{{" << v.value << "}}";
}
std::ostream& operator()(std::ostream& os, mustache::partial const& v) const {
return os << "{{> " << v.value << "}}";
}
std::ostream& operator()(std::ostream& os, mustache::section const& v) const {
os << "{{" << (v.sense?'#':'^') << v.control << "}}";
(*this)(os, v.content);
return os << "{{/" << v.control << "}}";
}
};
}
namespace ContextExpander {
struct Nil { };
using Value = boost::make_recursive_variant<
Nil,
double,
std::string,
std::map<std::string, boost::recursive_variant_>,
std::vector<boost::recursive_variant_>
>::type;
using Dict = std::map<std::string, Value>;
using Array = std::vector<Value>;
static inline std::ostream& operator<<(std::ostream& os, Nil const&) { return os << "#NIL#"; }
static inline std::ostream& operator<<(std::ostream& os, Dict const& v) { return os << "#DICT(" << v.size() << ")#"; }
static inline std::ostream& operator<<(std::ostream& os, Array const& v) { return os << "#ARRAY(" << v.size() << ")#"; }
struct expander : boost::static_visitor<std::ostream&>
{
std::ostream& operator()(std::ostream& os, Value const& ctx, mustache::sequence const& v) const {
for(auto& element : v)
boost::apply_visitor(std::bind(expander(), std::ref(os), std::placeholders::_1, std::placeholders::_2), ctx, element);
return os;
}
template <typename Ctx>
std::ostream& operator()(std::ostream& os, Ctx const&/*ignored*/, mustache::verbatim const& v) const {
return os << v.value;
}
std::ostream& operator()(std::ostream& os, Dict const& ctx, mustache::variable const& v) const {
auto it = ctx.find(v.value.to_string());
if (it != ctx.end())
os << it->second;
return os;
}
template <typename Ctx>
std::ostream& operator()(std::ostream& os, Ctx const&, mustache::variable const&) const {
return os;
}
std::ostream& operator()(std::ostream& os, Dict const& ctx, mustache::partial const& v) const {
auto it = ctx.find(v.value.to_string());
if (it != ctx.end())
{
static const mustache_grammar<std::string::const_iterator> p;
auto const& subtemplate = boost::get<std::string>(it->second);
std::string::const_iterator first = subtemplate.begin(), last = subtemplate.end();
mustache::sequence dynamic_template;
if (qi::parse(first, last, p, dynamic_template))
return (*this)(os, Value{ctx}, dynamic_template);
}
return os << "#ERROR#";
}
std::ostream& operator()(std::ostream& os, Dict const& ctx, mustache::section const& v) const {
auto it = ctx.find(v.control.to_string());
if (it != ctx.end())
boost::apply_visitor(std::bind(do_section(), std::ref(os), std::placeholders::_1, std::cref(v)), it->second);
else if (!v.sense)
(*this)(os, Value{/*Nil*/}, v.content);
return os;
}
template <typename Ctx, typename T>
std::ostream& operator()(std::ostream& os, Ctx const&/* ctx*/, T const&/* element*/) const {
return os << "[TBI:" << __PRETTY_FUNCTION__ << "]";
}
private:
struct do_section : boost::static_visitor<> {
void operator()(std::ostream& os, Array const& ctx, mustache::section const& v) const {
for(auto& item : ctx)
expander()(os, item, v.content);
}
template <typename Ctx>
void operator()(std::ostream& os, Ctx const& ctx, mustache::section const& v) const {
if (v.sense == truthiness(ctx))
expander()(os, Value(ctx), v.content);
}
private:
static bool truthiness(Nil) { return false; }
static bool truthiness(double d) { return 0. == d; }
template <typename T> static bool truthiness(T const& v) { return !v.empty(); }
};
};
}
int myMain()
{
std::cout << std::unitbuf;
std::string input = "<ul>{{#time}}\n\t<li>{{> partial}}</li>{{/time}}</ul>\n "
"<i>for all good men</i> to come to the {007} aid of "
"their</bold> {{country}}. Result: {{^Res2}}(absent){{/Res2}}{{#Res2}}{{Res2}}{{/Res2}}"
;
// Parser setup --------------------------------------------------------
typedef std::string::const_iterator It;
static const mustache_grammar<It> p;
It first = input.begin(), last = input.end();
try {
mustache::sequence parsed_template;
if (qi::parse(first, last, p, parsed_template))
{
std::cout << "Parse success\n";
} else
{
std::cout << "Parse failed\n";
}
if (first != last)
{
std::cout << "Remaing unparsed input: '" << std::string(first, last) << "'\n";
}
std::cout << "Input: " << input << "\n";
std::cout << "Dump: ";
Dumping::dumper()(std::cout, parsed_template) << "\n";
std::cout << "Evaluation: ";
{
using namespace ContextExpander;
expander engine;
Value const ctx = Dict {
{ "time", Array {
Dict { { "partial", "gugus {{zeit}} (a.k.a. <u>{{title}}</u>)"}, { "title", "noon" }, { "zeit", "12:00" } },
Dict { { "partial", "gugus {{zeit}} (a.k.a. <u>{{title}}</u>)"}, { "title", "evening" }, { "zeit", "19:30" } },
Dict { { "partial", "gugus <u>{{title}}</u> (expected at around {{zeit}})"}, { "title", "dawn" }, { "zeit", "06:00" } },
} },
{ "country", "ESP" },
{ "Res3", "unused" }
};
engine(std::cout, ctx, parsed_template);
}
} catch(qi::expectation_failure<It> const& e)
{
std::cout << "Unexpected: '" << std::string(e.first, e.last) << "'\n";
}
}
How to parse using boost if it is not json, but similar to it?
It looks like Yet Another YAML/Mustache/JSON/... derivative.
Without a formal spec it's hard to actually assess what effort would be required, but here's a list of implementations of similar grammars in Boost Spirit, with varying amounts of feature completeness:
- How to parse mustache with Boost.Xpressive correctly? <-- this is likely your best matching demonstration
- Parse a substring as JSON using QJsonDocument (minimal subset, use something like this to transform the input to proper JSON, e.g.?)
- Reading JSON file with C++ and BOOST A full featured JSON parser (with AST and escapes but no comments)
Applications of a toy JSON parser implementation:
- replace only some value from json to json
- How to manipulate leaves of a JSON tree
- more
Parsing white-spaces in between lexemes using boost-spirit
Generation is a fundamentally different job than parsing.
Parsing removes redundancy and normalizes data. Generation adds redundancy and chooses (one of typically many) representations according to some goals (stylistic guides, efficiency goals etc).
By allowing yourself to get side-tracked with the BNF similarity, you've lost sight of your goals. As, in BNF many instances of whitespace are simply not significant.
This is manifest in the direct observation that the AST does not contain the whitespace.
Hacking It
The simplest way would be to represent the skipped whitespace instead as "string literals" inside your AST:
_term = _literal | _rule_name | _whitespace;
With
_whitespace = +blank;
And then making the _list
rule a lexeme as well (so as to not skip blanks):
// lexemes
qi::rule<Iterator, Ast::List()> _list;
qi::rule<Iterator, std::string()> _literal, _whitespace;
See it Live On Compiler Explorer
Clean Solution
The above leaves a few "warts": there are spots where whitespace is still not significant (namely around |
and specifically before the list-attribute numbers):
<code> ::= <letter><digit> 34 | <letter><digit><code> 23
<letter> ::= "a" 1 | "b" 2 | "c" 3 | "d" 4 | "e" 5 | "f" 6 | "g" 7 | "h" 8 | "i" 9
<digit> ::= "9" 10 | "1" 11 | "2" 12 | "3" 13 | "4" 14
I don't see how it would usefully be significant there, unless of course your input doesn't look like the input you've been using. E.g. if it looks like this instead:
<code>::=<letter><digit>34|<letter><digit><code>23
<letter>::="a"1|"b"2|"c"3|"d"4|"e"5|"f"6|"g"7|"h"8|"i"9
<digit>::="9"10|"1"11|"2"12|"3"13|"4"14
You could make all the rules lexeme. However, this doesn't add up with the presence of quoted strings, at all. The whole notion of quoted strings is to mark regions where normal whitespace (and comment) skipping is suspended.
I have a nagging feeling that you are much farther away from your actual problem (see https://meta.stackexchange.com/questions/66377/what-is-the-xy-problem) than we can even currently see, and you might even have stripped the whole quoted-string-literals concept from the "BNF" already.
A clean solution would be to forget about misleading similarities with BNF and just devise your own grammar from the ground up.
If the goal is simply to have a (recursive) macro/template expansion engine, it should really turn out a lot simpler than what you currently have. Maybe you can describe your real task (input, desired output and required behaviors) so we can help you achieve that?
Problems with boost::phoenix::bind and boost::phoenix::actors in a semantic action for boost::spirit::qi
A few observations:
You don't seem to be using a skipper, so using
lexeme
is redundant (see Boost spirit skipper issues)You want to know how to detect the type of the attribute exposed by a parser expression: see Detecting the parameter types in a Spirit semantic action
The types are documented with the parser directives, though, so e.g.
as_string[(qi::char_("1-9") >> +qi::char_("0-9"))]
results inboost::fusion::vector2<char, std::vector<char> >
, which is directly reflected in the error message on GCC:boost/phoenix/bind/detail/preprocessed/function_ptr_10.hpp|50 col 39| error: could not convert ‘a0’ from ‘boost::fusion::vector2<char, std::vector<char> >’ to ‘std::vector<char>’
Prefer not to mix and match library placeholders/wrappers, e.g.
boost::ref
andboost::phoenix::ref
You seem to be reinventing integer parsing; consider using
qi::int_parser
insteadIt seems that the case to parse
0
is missing :)
Assuming you want my_str
to simply reflect the input string including number base prefix, I could suggest using:
number =
as_string[(qi::char_("1-9") >> +qi::char_("0-9"))] [phx::bind(&fPushIntCV, qi::_1, phx::ref(code), phx::ref(variables))]
| as_string[("0x" >> +qi::char_("0-9a-fA-F")) ] [phx::bind(&fPushIntCV, qi::_1, phx::ref(code), phx::ref(variables))]
| as_string[("0b" >> +qi::char_("0-1")) ] [phx::bind(&fPushIntCV, qi::_1, phx::ref(code), phx::ref(variables))]
| as_string[("0" >> +qi::char_("0-7")) ] [phx::bind(&fPushIntCV, qi::_1, phx::ref(code), phx::ref(variables))]
//some other junk
;
However, this could be simplified to:
number = as_string[
(qi::char_("1-9") >> +qi::char_("0-9"))
| ("0x" >> +qi::char_("0-9a-fA-F"))
| ("0b" >> +qi::char_("01"))
| ("0" >> +qi::char_("0-7"))
] [phx::bind(&fPushIntCV, qi::_1, phx::ref(code), phx::ref(variables))]
;
Now, you would probably like to just parse an integer value instead:
number =
(
("0x" >> qi::int_parser<int, 16, 1>())
| ("0b" >> qi::int_parser<int, 2, 1>())
| ("0" >> qi::int_parser<int, 8, 1>())
| qi::int_ /* accepts "0" */) [phx::bind(&fPushIntCV, qi::_1, phx::ref(code), phx::ref(variables))]
;
Which handsomely does the conversions[1], and you can just take an int
:
void fPushIntCV (int my_number, Code& c, Variables& v) {
std::cout << "fPushIntCV: " << my_number << "\n";
}
[1] (there's also uint_parser
and you can parse long
, long long
etc.; even big integers like boost::multiprecision::cpp_int
should be no issue)
Here's a demo program using this, showing that the values are converted correctly (and: "0" is accepted :)): Live On Coliru
int main()
{
Code code;
Variables variables;
Calculator g(code, variables);
for (std::string const input : { "0", "0xef1A", "010", "0b10101" })
{
It f(input.begin()), l(input.end());
if(qi::parse(f, l, g))
std::cout << "Parse success ('" << input << "')\n";
else std::cout << "Parse failed ('" << input << "')\n";
if (f != l)
std::cout << "Input remaining: '" << std::string(f, l) << "'\n";
}
}
Prints
fPushIntCV: 0
Parse success ('0')
fPushIntCV: 61210
Parse success ('0xef1A')
fPushIntCV: 8
Parse success ('010')
fPushIntCV: 21
Parse success ('0b10101')
Strange static_cast compilation error while compile boost spirit parser
Firstly, I can only assume
data = (text | imgtag | vartag | inctag | blktag | reftag) >> *data;
was /meant/ as 1-or-more repeats of the (...) expression. Writing it as
data = +(text | imgtag | vartag | inctag | blktag | reftag);
expresses the same, but allows attribute propagation to match the exposes attribute type.
There are a number of
lexeme[]
directives that have no purpose when not using a skipperThere is a suspicious manual skipping of whitespace that might be better served by using a skipper
Insofar as you do wish to require a mandatory space after the "tag name", consider using
operator&
operator. That way you can still use a skipper.Anyhow, it's possible you were looking for something like the Qi Repository
distinct()[]
parser directiveEven with a skipper
*(+lit(' ') >> lexeme[+(char_ - '{' - '}')])
doesn't make sense as
lexeme[...]
would eat any space up to closing '}' and hence the second repeat of the*()
would never apply.See also Boost spirit skipper issues
There is a lot of manual repetition between rules. Consider using
qi::symbols
to map the input to tag types.If you do, it becomes easier to avoid semantic actions (Good Thing: Boost Spirit: "Semantic actions are evil"?). Even if you didn't, you could use
qi::attr
to expose a specific value as thetype
value.Consider adding debug information (see
BOOST_SPIRIT_DEBUG
in the demo below)
The grammar simplified
I'd reduce the whole grammar to just this:
data = +( ('{' >> tag >> '}') | text );
tag = lexeme[type >> &char_(" {}")] >> lexeme[*~char_("{}")];
text = attr("text") >> lexeme[+~char_("{}")];
Done! No more semantic actions, no more dozens of rules doing basically the same. No more complicated nested repeats with unclear multiplicities. type
is a qi::symbols
parser now, that contains the mapping of tag names:
type.add
("img", "img")
("var", "var")
("inc", "inc")
("blank", "blk")
("ref", "ref");
And here's a complete demo:
DEMO
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
struct CMyTag
{
std::string tagName;
std::string tagData;
};
BOOST_FUSION_ADAPT_STRUCT(::CMyTag, (std::string, tagName) (std::string, tagData))
template <typename Iterator, typename Skipper = qi::space_type>
struct testTag_grammar : qi::grammar<Iterator, std::vector<CMyTag>(), Skipper>
{
testTag_grammar() :
testTag_grammar::base_type(data)
{
using namespace qi;
data = +( ('{' >> tag >> '}') | text );
type.add
("img", "img")
("var", "var")
("inc", "inc")
("blank", "blk")
("ref", "ref");
tag = lexeme[type >> &char_(" {}")] >> lexeme[*~char_("{}")];
text = attr("text") >> lexeme[+~char_("{}")];
BOOST_SPIRIT_DEBUG_NODES( (data) (tag) (text))
}
private:
qi::symbols<char, std::string> type;
qi::rule<Iterator, CMyTag(), Skipper> tag, text;
qi::rule<Iterator, std::vector<CMyTag>(), Skipper> data;
};
int main() {
testTag_grammar<std::string::const_iterator> l_gramar;
std::string const l_test = "asd {img} {ref I}sdkflsdlk {img} wmrwerml";
std::vector<CMyTag> l_result;
auto f = l_test.begin(), l = l_test.end();
bool result = qi::phrase_parse(f, l, l_gramar, qi::space, l_result);
if (result) {
std::cout << "Parse success: " << l_result.size() << "\n";
for (auto& v : l_result)
std::cout << "Name '" << v.tagName << "', Data '" << v.tagData << "'\n";
}
else {
std::cout << "Parse failed\n";
}
if (f!=l) {
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
}
Prints
Parse success: 6
Name 'text', Data 'asd '
Name 'img', Data ''
Name 'ref', Data 'I'
Name 'text', Data 'sdkflsdlk '
Name 'img', Data ''
Name 'text', Data 'wmrwerml'
How do I systematically replace text within mustache tags in a large body of text?
This is a tough (but not impossible) task to do with a single regular expression. Fortunately, there's no reason we have to do it with a single one. A much easier (and more robust) approach is to use two regular expressions: one to match replacement tags (things contained in {{curly brackets}}
) and another to replace instances of array indexers with dot indexers. Here's my solution:
s.replace( /\{\{(.*?)\}\}/g, function(x){ // this grabs replacement tags
return x.replace( /\[(\d+)\]/g,'.$1' )}); // this replaces array indexers
Note: I have not analyzed this solution with the entire mustache syntax, so I cannot guarantee it will work if you're using more than the standard tags.
Related Topics
How Is If Statement Evaluated in C++
How Does Excel Successfully Round Floating Point Numbers Even Though They Are Imprecise
Why Add Void to Method Parameter List
Calling Constructor with Braces
Use Const Wherever Possible in C++
Error C1083: Cannot Open Include File: 'Stdafx.H'
What Does the & (Ampersand) at the End of Member Function Signature Mean
Convert Shared Library to Static Library
C++ Array Size Dependent on Function Parameter Causes Compile Errors
Why Is Std::Cout So Time Consuming
Seeking Code Stub Generator (From Header Files)
C++ Get Description of an Exception Caught in Catch(...) Block
Equivalent of Console.Readline() in C++