boost::spirit & pattern matching
От: Аноним  
Дата: 13.09.07 07:15
Оценка:
Доброго времени суток, коллеги

Практический вопрос по spirit.

Возможно ли описать грамматику для сопоставления по образцу?
К примеру для проверки сопоставимости тексту `squeeze' паттерна `*ee*' используется такой код (увы, не работающий так, как ожидается):


using namespace boost;
using namespace std;

struct pm_state
{
};

struct string_consumer
{
    void operator()( char ch ) const
    {
        cout << "ch = " << ch << '\n';
    }

    void operator()( char const * begin, char const * end ) const
    {
        cout << "str = " << string(begin, end) << '\n';
    }
};


struct pm_grammar
    : spirit::grammar<pm_grammar>
{
    pm_state &      m_state;

    pm_grammar( pm_state & state )
        : m_state( state )
    {}

    template<typename ScannerT>
    struct definition
    {
        definition( const pm_grammar& self )
        {
            // imitate "*ee*"
            m_sequence =
                spirit::lexeme_d[
                    (*spirit::alpha_p)[string_consumer()] >>
                    spirit::chseq_p("ee") >>
                    (*spirit::alpha_p)[string_consumer()]
                ]
                ;
        }

        spirit::rule<ScannerT>  m_sequence;

        const spirit::rule<ScannerT>& start() const
        {
            return m_sequence;
        }
    }; // struct definition
}; // struct pm_grammar

static void test()
{
    pm_state    state;
    pm_grammar  grammar( state );

    const char * str = "squeeze";

    spirit::parse_info<> info = parse( str, grammar, spirit::nothing_p/*or space_p???*/ );

    if( info.full )
    {
        cout << "Parsing succeeded\n";
    }
    else
    {
        cout << "Parsing failed at " << info.stop << "\n";
    }
}

int main( int argc, char * argv[] )
{
    try
    {
        test();
    }
    catch( const std::exception& e )
    {
        cout << "ERROR: " << e.what() << "\n";
    }
    getchar();
    return 0;
}


можно поподробнее как с помощью spirit решать подобные задачи?

PS: поздравляю всех с профессиональным праздником
Re: boost::spirit & pattern matching
От: StevenIvanov США  
Дата: 13.09.07 08:49
Оценка:
Здравствуйте, Аноним, Вы писали:

А>...


Прошу прощения, не залогинился.
Ну вот, теперь точно
Автор: StevenIvanov
Дата: 13.09.07
никто не ответит
Re[2]: boost::spirit & pattern matching + небольшой бонус :)
От: StevenIvanov США  
Дата: 19.09.07 14:22
Оценка:
Здравствуйте, StevenIvanov, Вы писали:

А>>...


Для тех, кто тоже ждал, верил и надеялся — вот черновой вариант решения (с использованием boost::spirit):

#include <iostream>
#include <string>

#include <boost/spirit.hpp>
#include <boost/spirit/dynamic/stored_rule.hpp>

using namespace boost;
using namespace std;

// help to build a pattern-matching rule
template<typename PatternMatchingRule>
struct pm_helper
    : spirit::grammar< pm_helper<PatternMatchingRule> >
{
    typedef typename pm_helper<PatternMatchingRule>     self_type;

    PatternMatchingRule &   m_rule;
    bool                    m_star_prev;

    pm_helper( PatternMatchingRule & pm_rule )
        : m_rule( pm_rule )
        , m_star_prev( false )
    {
        // first rule is `fake`
        m_rule = !spirit::nothing_p;
    }

    void operator()( const char * begin, const char * end ) const
    {
        //cout << "s = " << string( begin, end ) << "\n";
        const_cast<self_type*>( this )->accept( begin, end );
    }

    template<typename ScannerT>
    struct definition
    {
        definition( const self_type& self )
        {
            m_pattern = +((+(spirit::graph_p - '*')) | '*')[self];
        }

        const spirit::rule<ScannerT>& start() const
        {
            return m_pattern;
        }

    private:
        spirit::rule<ScannerT>      m_pattern;
    };

private:
    void accept( const char * begin, const char * end )
    {
        // checking *
        char ch = *begin;
        if( ch == '*' )
        {
            // checking whether the substring marker is the last one
            if( *end == 0 )
                m_rule = m_rule.copy() >> (*spirit::alpha_p);

            m_star_prev = true;
            return;
        }

        // text
        if( m_star_prev )
        {
            // *
            m_rule = m_rule.copy() >> (*(spirit::alpha_p - spirit::chseq_p(begin, end)));
            // text
            m_rule = m_rule.copy() >> (spirit::chseq_p(begin, end));
        }
        else
        {
            // text
            m_rule = m_rule.copy() >> (spirit::chseq_p(begin, end));
        }

        m_star_prev = false;
    }
};

struct pm_grammar
    : spirit::grammar<pm_grammar>
{
    string      m_pattern;

    pm_grammar( const string& pattern )
        : m_pattern( pattern )
    {}

    template<typename ScannerT>
    struct definition
    {
        definition( const pm_grammar& self )
        {
            pm_helper< typename spirit::stored_rule<typename ScannerT> > helper_grammar( m_sequence );
            spirit::parse_info<> info = boost::spirit::parse(
                self.m_pattern.c_str(),
                helper_grammar,
                spirit::nothing_p );
            if( !info.full )
                throw std::logic_error( "an error found in the pattern given" );

            return;
        }

        const spirit::stored_rule<ScannerT>& start() const
        {
            return m_sequence;
        }

    private:
        spirit::stored_rule<ScannerT>   m_sequence;
    }; // struct definition
}; // struct pm_grammar

static bool match( const char * source, const char * pattern )
{
    pm_grammar  grammar( pattern );
    spirit::parse_info<> info = parse( source, grammar, spirit::space_p );
    return info.full;
}

static void test_pattern_matching_using_spirit()
{
    if( match("this is a string", "t*is*ing") )
    {
        cout << "Pattern matches the source\n";
    }
    else
    {
        cout << "Pattern does not match to the source\n";
    }
}


И еще.

Ранее
Автор: c-smile
Дата: 01.02.07
Замечательный Андрей (aka c-smile) давал ответ на подобный вопрос. Немного изменив его код можно так же получить обобщенное решение (применимое не только к char/wchar_t, но и к любой другой последовательности в любом STL-совместимом контейнере) (2 файла — pattern_matching.h match_receivers.h):

pattern_matching.h :
#ifndef PATTERN_MATCHING_H_INCLUDED
#define PATTERN_MATCHING_H_INCLUDED

#include "match_receivers.h"

namespace PatternMatching
{

/// \struct CharTraits
/// represents traits of the char/wchar_t elements
///
template<typename CharType>
struct CharTraits
{
    static bool is_any_subsequence( CharType ct )
    {
        return ct == '*';
    }

    static bool is_any_element( CharType ct )
    {
        return ct == '?';
    }

    static bool is_equal( CharType lhs, CharType rhs )
    {
        return lhs == rhs;
    }
};

///
/// matches given pattern sequence with the source sequence using
/// `ElementTraits' to identify whether certain pattern symbol is equal
/// to the certain element(s) of the source sequence
///
template<
        typename ElementTraits,     // source&pattern element's traits. For char/wchar_t types
                                    // CharTraits class could be used
        typename SourceIterator,    // source iterator
        typename PatternIterator,   // pattern iterator
        typename MatchReceiver      // matches receiver like FakeMatchReceiver>
        >
static bool match(
             SourceIterator src_begin, SourceIterator src_end,
             PatternIterator pat_begin, PatternIterator pat_end,
             MatchReceiver& match_receiver
             )
{
    PatternIterator     match_pos = pat_end;

    // this variables used for match_receiver in order to
    // provide matched subsequence in the source sequence given
    SourceIterator      src_subsequence_begin   = src_end;
    SourceIterator      src_subsequence_end     = src_end;

    for( ;; )
    {
        // 1st case: check when come to an end of the source sequence given
        if( src_begin == src_end )
        {
            // inform match_receiver about matched subsequence if any
            if( src_subsequence_end != src_end && src_subsequence_begin != src_end )
                match_receiver.match_found( src_subsequence_begin, src_subsequence_end );

            // only one case is acceptable: `any subsequence' symbol at the end of the pattern given
            if( pat_begin != pat_end )
            {
                bool    any_sub_at_last = false; // indicates if pattern have symbol `any subsequence' at its tail
                for( ; pat_begin != pat_end; ++ pat_begin )
                {
                    if( !ElementTraits::is_any_subsequence(*pat_begin) )
                        return false;

                    any_sub_at_last = true;
                }

                // inform match_receiver about matched subsequence
                // it will be empty here, because src_begin == src_end
                if( any_sub_at_last )
                    match_receiver.match_found( src_begin, src_end );
            }

            return true;
        }

        // 2nd case: check when come to an end of the pattern sequence given
        if( pat_begin == pat_end )
        {
            // check whether match_pos is not empty
            // restore at previous matching pos and try again
            if( match_pos != pat_end )
            {
                pat_begin = match_pos;
                ++ src_begin;
                continue;
            }

            // check the end of the string
            if( src_begin == src_end )
            {
                // inform match_receiver about previous matched subsequence if any
                if( src_subsequence_end != src_end && src_subsequence_begin != src_end )
                    match_receiver.match_found( src_subsequence_begin, src_subsequence_end );

                return true;
            }

            return false;
        }

        // 3rd case: check whether current pattern and source symbols is equal to each other
        if( ElementTraits::is_equal(*src_begin, *pat_begin) )
        {
            src_subsequence_end = src_begin;

            ++ src_begin;
            ++ pat_begin;
            continue;
        }

        // 4th case: check whether current pattern's symbol stands for any element in the source sequence
        if( ElementTraits::is_any_element(*pat_begin) )
        {
            match_receiver.match_found( *src_begin );

            // increase counters
            ++ src_begin;
            ++ pat_begin;
            continue;
        }

        // 5th case: check whether current pattern's symbol stands for any subsequence in the source sequence
        if( ElementTraits::is_any_subsequence(*pat_begin) )
        {
            match_pos = ++ pat_begin;

            // inform match_receiver about previously matched subsequence if any
            if( src_subsequence_end != src_end && src_subsequence_begin != src_end )
            {
                match_receiver.match_found( src_subsequence_begin, src_subsequence_end );

                src_subsequence_end = src_end;
            }

            // if any subsequence symbol met at the end of the pattern given
            if( match_pos == pat_end )
            {
                match_receiver.match_found( src_begin, src_end );
                return true;
            }

            // mark source subsequence start
            src_subsequence_begin = src_begin;

            continue;
        } // if any substring met

        // 6th case: check whether it is possible to re-match source subsequence
        if( match_pos != pat_end )
        {
            // reset marker of the subsequence's end
            src_subsequence_end = src_end;

            pat_begin = match_pos;
            ++ src_begin;
            continue;
        }

        // 7th case: the pattern sequence does not match the source sequence
        break;
    } // for( ;; )
    return false;
} // match


static bool do_match( const std::string& src, const std::string& pattern )
{
    FakeMatchReceiver   fake_receiver;
    return match< CharTraits<char> >(
                    src.begin(), src.end(),
                    pattern.begin(), pattern.end(),
                    fake_receiver
                    );
}


template<typename ConcreteMatchReceiver>
static bool do_match( const char * source, const char * pattern, ConcreteMatchReceiver& match_receiver )
{
    return match< CharTraits<char> >(
                    source, source + strlen(source),
                    pattern, pattern + strlen(pattern),
                    match_receiver
                    );
}



} // namespace PatternMatching

#endif // PATTERN_MATCHING_H_INCLUDED



match_receivers.h



#ifndef MATCH_RECEIVERS_H_INCLUDED
#define MATCH_RECEIVERS_H_INCLUDED

#include <sstream>

namespace PatternMatching
{


/// \struct FakeMatchReceiver
/// represents a fake receiver of the coincidences found during pattern matching
///
struct FakeMatchReceiver
{
    FakeMatchReceiver()
    {}
    
    template<typename SourceElement>
    static void match_found( SourceElement e ) {}

    template<typename SourceIterator>
    static void match_found( SourceIterator begin, SourceIterator end ) {}
};

/// \class LogMatchReceiver
/// logs matches and serializes it to the output string stream
///
class LogMatchReceiver
{
public:
    LogMatchReceiver()
        : m_match_index( 0 )
        , m_at_least_one_found( false )
    {
        m_ostr << "Matches found in order of an appearance in the pattern given:\n";
    }

    template<typename ValueType>
    void match_found( ValueType value )
    {
        m_at_least_one_found = true;
        m_ostr << ++ m_match_index << ") ? = " << value << "\n";
    }

    template<typename SourceIterator>
    void match_found( SourceIterator begin, SourceIterator end )
    {
        m_at_least_one_found = true;
        m_ostr << ++ m_match_index << ") * = ";
        typedef typename std::iterator_traits<SourceIterator>::value_type ValueType; 
        std::copy( begin, end, std::ostream_iterator<ValueType>(m_ostr, "") );
        m_ostr << "\n";
    }

    std::string report() const
    {
        if( m_at_least_one_found )
            return m_ostr.str();

        return std::string();
    }

private:
    bool                    m_at_least_one_found;
    size_t                  m_match_index;
    std::ostringstream      m_ostr;
};


} // namespace PatternMatching

#endif // MATCH_RECEIVERS_H_INCLUDED
 
Подождите ...
Wait...
Пока на собственное сообщение не было ответов, его можно удалить.