Доброго времени суток, коллеги
Практический вопрос по spirit.
Возможно ли описать грамматику для сопоставления по образцу?
К примеру для проверки сопоставимости тексту `squeeze' паттерна `*ee*' используется такой код (увы, не работающий так, как ожидается):
using namespace boost;
using namespace std;
struct pm_state
{
};
struct string_consumer
{
void operator()( char ch ) const
{
cout << "ch = " << ch << '\n';
}
void operator()( char const * begin, char const * end ) const
{
cout << "str = " << string(begin, end) << '\n';
}
};
struct pm_grammar
: spirit::grammar<pm_grammar>
{
pm_state & m_state;
pm_grammar( pm_state & state )
: m_state( state )
{}
template<typename ScannerT>
struct definition
{
definition( const pm_grammar& self )
{
// imitate "*ee*"
m_sequence =
spirit::lexeme_d[
(*spirit::alpha_p)[string_consumer()] >>
spirit::chseq_p("ee") >>
(*spirit::alpha_p)[string_consumer()]
]
;
}
spirit::rule<ScannerT> m_sequence;
const spirit::rule<ScannerT>& start() const
{
return m_sequence;
}
}; // struct definition
}; // struct pm_grammar
static void test()
{
pm_state state;
pm_grammar grammar( state );
const char * str = "squeeze";
spirit::parse_info<> info = parse( str, grammar, spirit::nothing_p/*or space_p???*/ );
if( info.full )
{
cout << "Parsing succeeded\n";
}
else
{
cout << "Parsing failed at " << info.stop << "\n";
}
}
int main( int argc, char * argv[] )
{
try
{
test();
}
catch( const std::exception& e )
{
cout << "ERROR: " << e.what() << "\n";
}
getchar();
return 0;
}
можно поподробнее как с помощью spirit решать подобные задачи?
PS: поздравляю всех с
профессиональным праздником
Здравствуйте, Аноним, Вы писали:
А>...
Прошу прощения, не залогинился.
Ну вот, теперь
точноАвтор: StevenIvanov
Дата: 13.09.07
никто не ответит
Здравствуйте, StevenIvanov, Вы писали:
А>>...
Для тех, кто тоже ждал, верил и надеялся — вот черновой вариант решения (с использованием boost::spirit):
#include <iostream>
#include <string>
#include <boost/spirit.hpp>
#include <boost/spirit/dynamic/stored_rule.hpp>
using namespace boost;
using namespace std;
// help to build a pattern-matching rule
template<typename PatternMatchingRule>
struct pm_helper
: spirit::grammar< pm_helper<PatternMatchingRule> >
{
typedef typename pm_helper<PatternMatchingRule> self_type;
PatternMatchingRule & m_rule;
bool m_star_prev;
pm_helper( PatternMatchingRule & pm_rule )
: m_rule( pm_rule )
, m_star_prev( false )
{
// first rule is `fake`
m_rule = !spirit::nothing_p;
}
void operator()( const char * begin, const char * end ) const
{
//cout << "s = " << string( begin, end ) << "\n";
const_cast<self_type*>( this )->accept( begin, end );
}
template<typename ScannerT>
struct definition
{
definition( const self_type& self )
{
m_pattern = +((+(spirit::graph_p - '*')) | '*')[self];
}
const spirit::rule<ScannerT>& start() const
{
return m_pattern;
}
private:
spirit::rule<ScannerT> m_pattern;
};
private:
void accept( const char * begin, const char * end )
{
// checking *
char ch = *begin;
if( ch == '*' )
{
// checking whether the substring marker is the last one
if( *end == 0 )
m_rule = m_rule.copy() >> (*spirit::alpha_p);
m_star_prev = true;
return;
}
// text
if( m_star_prev )
{
// *
m_rule = m_rule.copy() >> (*(spirit::alpha_p - spirit::chseq_p(begin, end)));
// text
m_rule = m_rule.copy() >> (spirit::chseq_p(begin, end));
}
else
{
// text
m_rule = m_rule.copy() >> (spirit::chseq_p(begin, end));
}
m_star_prev = false;
}
};
struct pm_grammar
: spirit::grammar<pm_grammar>
{
string m_pattern;
pm_grammar( const string& pattern )
: m_pattern( pattern )
{}
template<typename ScannerT>
struct definition
{
definition( const pm_grammar& self )
{
pm_helper< typename spirit::stored_rule<typename ScannerT> > helper_grammar( m_sequence );
spirit::parse_info<> info = boost::spirit::parse(
self.m_pattern.c_str(),
helper_grammar,
spirit::nothing_p );
if( !info.full )
throw std::logic_error( "an error found in the pattern given" );
return;
}
const spirit::stored_rule<ScannerT>& start() const
{
return m_sequence;
}
private:
spirit::stored_rule<ScannerT> m_sequence;
}; // struct definition
}; // struct pm_grammar
static bool match( const char * source, const char * pattern )
{
pm_grammar grammar( pattern );
spirit::parse_info<> info = parse( source, grammar, spirit::space_p );
return info.full;
}
static void test_pattern_matching_using_spirit()
{
if( match("this is a string", "t*is*ing") )
{
cout << "Pattern matches the source\n";
}
else
{
cout << "Pattern does not match to the source\n";
}
}
И еще.
РанееАвтор: c-smile
Дата: 01.02.07
Замечательный Андрей (aka c-smile) давал ответ на подобный вопрос. Немного изменив его код можно так же получить обобщенное решение (применимое не только к char/wchar_t, но и к любой другой последовательности в любом STL-совместимом контейнере) (2 файла — pattern_matching.h match_receivers.h):
pattern_matching.h :
#ifndef PATTERN_MATCHING_H_INCLUDED
#define PATTERN_MATCHING_H_INCLUDED
#include "match_receivers.h"
namespace PatternMatching
{
/// \struct CharTraits
/// represents traits of the char/wchar_t elements
///
template<typename CharType>
struct CharTraits
{
static bool is_any_subsequence( CharType ct )
{
return ct == '*';
}
static bool is_any_element( CharType ct )
{
return ct == '?';
}
static bool is_equal( CharType lhs, CharType rhs )
{
return lhs == rhs;
}
};
///
/// matches given pattern sequence with the source sequence using
/// `ElementTraits' to identify whether certain pattern symbol is equal
/// to the certain element(s) of the source sequence
///
template<
typename ElementTraits, // source&pattern element's traits. For char/wchar_t types
// CharTraits class could be used
typename SourceIterator, // source iterator
typename PatternIterator, // pattern iterator
typename MatchReceiver // matches receiver like FakeMatchReceiver>
>
static bool match(
SourceIterator src_begin, SourceIterator src_end,
PatternIterator pat_begin, PatternIterator pat_end,
MatchReceiver& match_receiver
)
{
PatternIterator match_pos = pat_end;
// this variables used for match_receiver in order to
// provide matched subsequence in the source sequence given
SourceIterator src_subsequence_begin = src_end;
SourceIterator src_subsequence_end = src_end;
for( ;; )
{
// 1st case: check when come to an end of the source sequence given
if( src_begin == src_end )
{
// inform match_receiver about matched subsequence if any
if( src_subsequence_end != src_end && src_subsequence_begin != src_end )
match_receiver.match_found( src_subsequence_begin, src_subsequence_end );
// only one case is acceptable: `any subsequence' symbol at the end of the pattern given
if( pat_begin != pat_end )
{
bool any_sub_at_last = false; // indicates if pattern have symbol `any subsequence' at its tail
for( ; pat_begin != pat_end; ++ pat_begin )
{
if( !ElementTraits::is_any_subsequence(*pat_begin) )
return false;
any_sub_at_last = true;
}
// inform match_receiver about matched subsequence
// it will be empty here, because src_begin == src_end
if( any_sub_at_last )
match_receiver.match_found( src_begin, src_end );
}
return true;
}
// 2nd case: check when come to an end of the pattern sequence given
if( pat_begin == pat_end )
{
// check whether match_pos is not empty
// restore at previous matching pos and try again
if( match_pos != pat_end )
{
pat_begin = match_pos;
++ src_begin;
continue;
}
// check the end of the string
if( src_begin == src_end )
{
// inform match_receiver about previous matched subsequence if any
if( src_subsequence_end != src_end && src_subsequence_begin != src_end )
match_receiver.match_found( src_subsequence_begin, src_subsequence_end );
return true;
}
return false;
}
// 3rd case: check whether current pattern and source symbols is equal to each other
if( ElementTraits::is_equal(*src_begin, *pat_begin) )
{
src_subsequence_end = src_begin;
++ src_begin;
++ pat_begin;
continue;
}
// 4th case: check whether current pattern's symbol stands for any element in the source sequence
if( ElementTraits::is_any_element(*pat_begin) )
{
match_receiver.match_found( *src_begin );
// increase counters
++ src_begin;
++ pat_begin;
continue;
}
// 5th case: check whether current pattern's symbol stands for any subsequence in the source sequence
if( ElementTraits::is_any_subsequence(*pat_begin) )
{
match_pos = ++ pat_begin;
// inform match_receiver about previously matched subsequence if any
if( src_subsequence_end != src_end && src_subsequence_begin != src_end )
{
match_receiver.match_found( src_subsequence_begin, src_subsequence_end );
src_subsequence_end = src_end;
}
// if any subsequence symbol met at the end of the pattern given
if( match_pos == pat_end )
{
match_receiver.match_found( src_begin, src_end );
return true;
}
// mark source subsequence start
src_subsequence_begin = src_begin;
continue;
} // if any substring met
// 6th case: check whether it is possible to re-match source subsequence
if( match_pos != pat_end )
{
// reset marker of the subsequence's end
src_subsequence_end = src_end;
pat_begin = match_pos;
++ src_begin;
continue;
}
// 7th case: the pattern sequence does not match the source sequence
break;
} // for( ;; )
return false;
} // match
static bool do_match( const std::string& src, const std::string& pattern )
{
FakeMatchReceiver fake_receiver;
return match< CharTraits<char> >(
src.begin(), src.end(),
pattern.begin(), pattern.end(),
fake_receiver
);
}
template<typename ConcreteMatchReceiver>
static bool do_match( const char * source, const char * pattern, ConcreteMatchReceiver& match_receiver )
{
return match< CharTraits<char> >(
source, source + strlen(source),
pattern, pattern + strlen(pattern),
match_receiver
);
}
} // namespace PatternMatching
#endif // PATTERN_MATCHING_H_INCLUDED
match_receivers.h
#ifndef MATCH_RECEIVERS_H_INCLUDED
#define MATCH_RECEIVERS_H_INCLUDED
#include <sstream>
namespace PatternMatching
{
/// \struct FakeMatchReceiver
/// represents a fake receiver of the coincidences found during pattern matching
///
struct FakeMatchReceiver
{
FakeMatchReceiver()
{}
template<typename SourceElement>
static void match_found( SourceElement e ) {}
template<typename SourceIterator>
static void match_found( SourceIterator begin, SourceIterator end ) {}
};
/// \class LogMatchReceiver
/// logs matches and serializes it to the output string stream
///
class LogMatchReceiver
{
public:
LogMatchReceiver()
: m_match_index( 0 )
, m_at_least_one_found( false )
{
m_ostr << "Matches found in order of an appearance in the pattern given:\n";
}
template<typename ValueType>
void match_found( ValueType value )
{
m_at_least_one_found = true;
m_ostr << ++ m_match_index << ") ? = " << value << "\n";
}
template<typename SourceIterator>
void match_found( SourceIterator begin, SourceIterator end )
{
m_at_least_one_found = true;
m_ostr << ++ m_match_index << ") * = ";
typedef typename std::iterator_traits<SourceIterator>::value_type ValueType;
std::copy( begin, end, std::ostream_iterator<ValueType>(m_ostr, "") );
m_ostr << "\n";
}
std::string report() const
{
if( m_at_least_one_found )
return m_ostr.str();
return std::string();
}
private:
bool m_at_least_one_found;
size_t m_match_index;
std::ostringstream m_ostr;
};
} // namespace PatternMatching
#endif // MATCH_RECEIVERS_H_INCLUDED