SRC: boost::(spirit|regex) для бедных - Исходники

...Работы у меня сегодня не было, а настроение поизвращаться с C++ — было.
В результате всего этого, появился "шедевер" — шаблонная библиотека статических регулярных выражений, в чем-то смахивающая на SPIRIT. Вот только она гаараздо меньше — всего 7 килобайт. Ну, и гораздо примитивнее, конечно.

Основная ее фишка в том, что компиляция регулярного выражения проводится на этапе компиляции программы — прямо в исполнимый код (довольно страшноватенький — т.к. оптимизация еще впереди). А на этапе выполнения остается только пользоваться функцией поиска совпадений...

пример:

//задаем регулярное выражение (обьяснение примитивов будет ниже)
namespace myrx
{
    using namespace rx_tmpl;
    
    typedef //определяем регулярное выражение для выделения числа 
            //с плавающей точкой: [+-]?[0-9]+(\.[0-9]+)?([Ee][+-]?[0-9]+)? 
        seq<
            opt<set<'+','-'> >,
            plus<digit>,
            opt<seq<c<'.'>, plus<digit> > >,
            opt<seq<set<'E','e'>, opt<set<'+','-'> >, plus<digit> > >
        >
    float_num;
}

//применяем его...
void function()
{
    string number("123.45E-56 some text");
    string::iterator it = number.begin();
    if( myrx::float_num::match(it, number.end()) )
    {
        //строка совпала.
        //итератор it указывает на позицию сразу за числом
    } else {
        //строка не совпала
        //позиция итератора не изменилась.
    }
}

примитивы:

c<'A'> - литеральный символ.
s<'H','E','L','L','O'> - литеральная строка
set<'A','B','C','D','E','F'> - набор символов - как [ABCDEF].
range<'A','Z'> - диапазон символов - как [A-Z].
range<'A','Z',false> - диапазон символов - как [^A-Z].
ws - пробел или таб [ \t]
digit - цифра - как [0-9]
alpha - английская буква без учета регистра - как [A-Za-z]
star<RX> - ноль или более совпадений - как (abcd)*
plus<RX> - одно или более или более совпадений - как (abcd)+
opt<RX> - необязательное совпадение - как (abcd)?
alt<RX1,RX2> - альтернатива - как (abc|def)
seq<RX1,RX2,RX3,....,RX10> - конкатенация регулярных выражений

ЗЫ. Код будет в следующем сообщении в этой же нитке.

Проверено и работает:
— VC 6.0SP5 и старше
— Mingw GCC 3.3.1

#include <stdio.h>
#include <algorithm>

namespace rx_tmpl
{
    namespace impl
    {
        template<wchar_t E, class TAIL>
        struct strn: TAIL
        {
            template<class InIt>
            static bool match(InIt& it, const InIt& end)
            {
                return (it != end) && (E == *it) && TAIL::match(++it,end);
            }
        };

        struct strnlast
        {
            template<class InIt>
            static bool match(InIt& it, const InIt& end)
            {
                return true;
            }
        };

        template<wchar_t E, class TAIL>
        struct setn: TAIL
        {
            static bool do_match(wchar_t ch)
            {
                return (E == ch) || TAIL::do_match(ch);
            }
        };

        struct setnlast
        {
            static bool do_match(wchar_t ch)
            {
                return false;
            }
        };


        template<class RX, class TAIL>
        struct seqn: TAIL
        {
            template<class InIt>
            static bool do_match(InIt& it, const InIt& end)
            {
                return RX::match(it,end) && TAIL::do_match(it,end);
            }
        };

        struct seqnlast
        {
            template<class InIt>
            static bool do_match(InIt& it, const InIt& end)
            {
                return true;
            }
        };
    }//namespace impl

template<wchar_t E>
struct c /* literal charachter */
{
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        bool res = (it != end) && (E == *it);
        if( res ) ++it;
        return res;
    }
};

template<
int c0,     int c1,     int c2 =-1, int c3 =-1, int c4 =-1, int c5 =-1, int c6 =-1, int c7 =-1, int c8 =-1, int c9 =-1,
int c10=-1, int c11=-1, int c12=-1, int c13=-1, int c14=-1, int c15=-1, int c16=-1, int c17=-1, int c18=-1, int c19=-1,
int c20=-1, int c21=-1, int c22=-1, int c23=-1, int c24=-1, int c25=-1, int c26=-1, int c27=-1, int c28=-1, int c29=-1
>
struct s /* literal string - up to 30 symbols */
    :impl::strn<
        c0,
        s<
            c1,c2,c3,c4,c5,c6,c7,c8,c9,
            c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,
            c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,
            -1
        >
    >
{};

template<> 
struct s<
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1>
:impl::strnlast
{};

template<
int c0,     int c1,     int c2 =-1, int c3 =-1, int c4 =-1, int c5 =-1, int c6 =-1, int c7 =-1, int c8 =-1, int c9 =-1,
int c10=-1, int c11=-1, int c12=-1, int c13=-1, int c14=-1, int c15=-1, int c16=-1, int c17=-1, int c18=-1, int c19=-1,
int c20=-1, int c21=-1, int c22=-1, int c23=-1, int c24=-1, int c25=-1, int c26=-1, int c27=-1, int c28=-1, int c29=-1
>
struct set /* charachter set - up to 30 chars - like "[ABCDEFabcdef]"*/
{
    typedef impl::setn<
        c0,
        typename set<
            c1,c2,c3,c4,c5,c6,c7,c8,c9,
            c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,
            c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,
            -1
        >::node
    > node;

    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        bool res = (it != end) && node::do_match(*it);
        if( res ) ++it;
        return res;
    }
};

template<> 
struct set<
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1>
{
    typedef impl::setnlast node;
};

template<wchar_t FRONT, wchar_t BACK, bool INCLUSIVE=true>
struct range /*charachter range - like "[a-d]" or "[^a-d]" */
{
    template<bool> struct sel {};
    //PXT_CASSERT((FRONT < BACK));

    template<class InIt>
    static bool do_match(InIt& it, const InIt& end, sel<true>)
    {
        bool res = (it != end) && (FRONT >= *it) && (*it <= BACK);
        if( res ) ++it;
        return res;
    }

    template<class InIt>
    static bool do_match(InIt& it, const InIt& end, sel<false>)
    {
        return !do_match(it, end, sel<true>());
    }

    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        return do_match(it, end, sel<INCLUSIVE>());
    }
};

struct ws /* whitespace chars */
{
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        bool res = (it != end) && ((*it == ' ') || (*it == '\t'));
        if( res ) ++it;
        return res;
    }
};

struct digit /* numeric chars */
{
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        bool res = (it != end) && (*it >= '0') && (*it <= '9');
        if( res ) ++it;
        return res;
    }
};

struct alpha /* numeric chars */
{
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        bool res = (it != end) && (
            ((*it >= 'A') && (*it <= 'Z')) || 
            ((*it >= 'a') && (*it <= 'z'))
        );
        if( res ) ++it;
        return res;
    }
};

template<class RX>
struct star /*zero or more occurences of RX - like "(abc)*" */
{
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        while( true )
        {
            InIt t(it);
            if( false == RX::match(it, end) || it == t )
                break;
        }
        return true;
    }
};

template<class RX>
struct plus /*one or more occurences of RX - like "(abc)+" */
{
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        return RX::match(it, end) && star<RX>::match(it, end);
    }
};

template<class RX>
struct opt /*optional occurence of RX - like "(abc)?" */
{
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        RX::match(it, end);
        return true;
    }
};

template<class RX1, class RX2>
struct alt /*alternative - like "(abc|def)" */
{
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        InIt i1(it); bool b1 = RX1::match(i1,end);
        InIt i2(it); bool b2 = RX2::match(i2,end);
        if( b1 && b2 )
            it = (std::max)(i1,i2);
        else if( b1 )
            it = i1;
        else if( b2 )
            it = i2;
        else
            return false;
        return true;
    }
};

struct seqe {};
template<
    class RX0,      class RX1,      class RX2=seqe, class RX3=seqe, class RX4=seqe, 
    class RX5=seqe, class RX6=seqe, class RX7=seqe, class RX8=seqe, class RX9=seqe
>
struct seq 
{
    typedef impl::seqn<
        RX0,
        typename seq<RX1,RX2,RX3,RX4,RX5,RX6,RX7,RX8,RX9,seqe>::node
    > node;

    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        InIt t(it);
        bool res = node::do_match(it,end);
        if( !res ) it = t;
        return res;
    }
};

template<>
struct seq<seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe>
{
    typedef impl::seqnlast node;
};

}//namespace rx_tmpl

namespace myrx
{
    using namespace rx_tmpl;

    typedef 
        seq<
            s<'H','E','L','L','O'>,
            alt< c<','>, ws>,
            star<ws>,
            alt<
                seq<s<'R','E','G','E','X'>, opt< c<'P'> > >,
                seq<s<'R','E','G','U','L','A','R'>, plus<ws>, s<'E','X','P','R','E','S','S','I','O','N'> >
            >
        >
    rx;

    typedef 
        //{D}+(\.{D}+)?([Ee][+-]?{D}+)? 
        seq<
            opt<set<'+','-'> >,
            plus<digit>,
            opt<seq<c<'.'>, plus<digit> > >,
            opt<seq<set<'E','e'>, opt<set<'+','-'> >, plus<digit> > >
        >
    float_num;
}

template<class RX>
void test(char* pp)
{
    char* p = pp;
    bool res = RX::match(p, p+strlen(p));
    printf("\"%s\" %s matched\n", pp, res?"":"not");
}

int main()
{
    test<myrx::float_num>("123");
    test<myrx::float_num>("-123");
    test<myrx::float_num>("123.45");
    test<myrx::float_num>("123E-10");
    test<myrx::float_num>("123.45E-10");
    test<myrx::float_num>("+123.45E-10");
}

Здравствуйте, TepMuHyc, Вы писали:

T> Проверено и работает:
T> — VC 6.0SP5 и старше
T> — Mingw GCC 3.3.1
T> ...

Офигеть! Как же всё это работает???

---------------------------------------------------------
СНП, Артёмов Денис. e-mail: artyomov <at> inbox.ru

Posted via RSDN NNTP Server 1.8 beta

Здравствуйте, TepMuHyc, Вы писали:

[]

А сравнения будут?

... << RSDN@Home 1.1.0 stable >>

давно собирался сделать такую же штуку со спиритом (т.к. у тебя все же регекс)
удивительно, как до этого еще бустовцы не додумались.
а вот такой вопрос — насколько это эффективнее (и эфективнее ли?) классического бустового варианта?
я имею в виду все параметры: размер получившегося ехе-шника, скорость выполнения, простоту написания (ну тут явно налицо некоторый проигрыш, хотя, в принципе, можно привыкнуть)

Здравствуйте, ArtDenis, Вы писали:

AD>Офигеть! Как же всё это работает???
Алямянтарно, Ватсон.

Регулярное выражение состоит из комбинации примитивов.
Каждый примитив определяет "функцию совпадения" следующего формата:

    template<class InIt>
    static bool match(InIt& it, const InIt& end);

Если совпадение найдено, match() возвращает true и продвигает итератор it на нужное количество символов. Если же совпадение не найдено, возвращается true и итератор остается в прежней позиции.

Собственно, для того чтобы класс стал "примитивом", все что ему необходимо — это реализовать функцию match(). То есть, "расширение и углубление" возможностей данной библиотеки — дело довольно простое.

Примитивы можно разделить на "терминальные" — это те, которые выполняют "реальную работу" — т.е. сравнение итератора в текущей позиции и символа — это шаблоны c<>, s<>, set<>, range<>, ws, alpha, digit,
И "нетерминальные" — которые выполняют логические операции над другими примитивами (как терминальными, так и нет) — это plus<>, star<>, opt<>, alt<>, seq<>.

В результате комбинирования терминальных примитивов при помощи нетерминальных, генерируется "функция совпадения" код которой представляет собой "дерево разбора" регулярного выражения. А выполнение этой функции — это как бы "обход" этого дерева разбора.

Что же касается реализации рекурсивных шаблонов set<> seq<> и s<>, то принцип их построения достаточно хорошо описан у Александреску — в главе про TypeList<>. Единственное, что можно в какой-то мере считать моим достижением в их реализации — удалось обойтись без частичной специализации (это важно для VC6.0) и сделать это достаточно красиво...

Кстати, о рекурсии: класс boost::tuple<> построен примерно на той же технологии, но имхо несколько более "некрасиво".

Здравствуйте, Зверёк Харьковский, Вы писали:

ЗХ>а вот такой вопрос — насколько это эффективнее (и эфективнее ли?) классического бустового варианта?
...Это второй человек, который захотел бенчмарок... Этакие вы ненасытные

Я ж этот код сотворил вчера за 4 часа на коленке

ЗХ>размер получившегося ехе-шника,
Тут надо учитывать количество уникальных регулярных выражений задействованных в программе.

На малом количестве выражений — моя библиотека будет выигрывать, причем на порядки — т.к. она генерит только функции поиска совпадений.
На большам количестве выражений — моя сначала сравняется, а потом начнет отставать.
Я ожидаю, что с boost::regex по размеру она сравнится при количестве рег.выражений около 100 или 200, но это надо проверять...

ЗХ>скорость выполнения,
boost::regex построена на недерминированном конечном автомате — что означает что она только один раз извлекает каждый элемент строки из итератора. Из этого, кстати, следует что она может использовать input_iterator — а это дорогого стОит.

Моя билиотека — это голый и примитивный обход дерева (увы, сгенерировать конечный автомат на шаблонах мне еще слабО

). Несколько спасает то, что этот обход выгоняется в исполнимый код.
Но "откаты назад" и повторные проверки участков последовательности неизбежны — в особенности этим страдает примитив alt<>. Так что, она может использовать только random_access_iterator. Я пробовал адаптировать ее к forward_iterator — получилось, но мне хочется именно input_iterator, поэтому я счел поддержку forward_iterator слишком обременительной и выкинул ее. (В принципе, у меня в башке бродит идея "кэширующего итератора", который мог бы превратить input_iterator в random_access_iterator, но это пока только идея и вопрос, надо ли ?)

Что же касается скорости, то моя билиотека будет слабже boost::regex как минимум в несколько раз. И только некоторые специально оптимизированные регулярные выражения могут позволить им сравниться в скорости.

ЗХ>простоту написания (ну тут явно налицо некоторый проигрыш, хотя, в принципе, можно привыкнуть)
Если привыкнуть, то просто — мне, например, очень просто

В целом, я бы рекомендовал применять мою библотеку когда:
1) Количество рег.выражений малО — не более нескольких 10-20.
2) Все они известны на compile-time
3) Скорость выполнения поиска совпадений не критична и/или рег.выражения достаточно простЫ.

Здравствуйте, TepMuHyc, Вы писали:

TMH>Моя билиотека — это голый и примитивный обход дерева (увы, сгенерировать конечный автомат на шаблонах мне еще слабО

).
а чо, давай сгенерируем

что мешает-то?

TMH>Что же касается скорости, то моя билиотека будет слабже boost::regex как минимум в несколько раз. И только некоторые специально оптимизированные регулярные выражения могут позволить им сравниться в скорости.

а чем это обусловлено?

TMH>В целом, я бы рекомендовал применять мою библотеку когда:
TMH>1) Количество рег.выражений малО — не более нескольких 10-20.
TMH>2) Все они известны на compile-time
TMH>3) Скорость выполнения поиска совпадений не критична и/или рег.выражения достаточно простЫ.

в этих условиях она будет лучше, чем regex?

Здравствуйте, Зверёк Харьковский, Вы писали:

ЗХ>а чо, давай сгенерируем

что мешает-то?

Если ты настолько кррррутттт, go ahead — генери.
А я данную тему знаю не понаслышке — даже сгенерить голый NFA по Тейлору — задачка далеко нетривиальная.
Причем, получим совершенно неоптимальное представление — в виде графа на указателях и (что самое подлое) без оптимизации e-closure(char).

И, если все же каким-то чудом удастся — исходников будет — дайбожЕ уместиться килобайт в 100

А компилироваться они будут... успеешь поесть, трахнуть жену, выспаться — и далее по циклу раз 5

TMH>>Что же касается скорости, то моя билиотека будет слабже boost::regex как минимум в несколько раз. И только некоторые специально оптимизированные регулярные выражения могут позволить им сравниться в скорости.

ЗХ>а чем это обусловлено?
Я же говорил чем — одни и те же участки строки могут просматриваться несколько раз — особенно этим грешит примитив alt<>.
boost::regex (да и любая себя уважающая библиотека рег.выражений) разбор строки делает за один проход.

TMH>>В целом, я бы рекомендовал применять мою библотеку когда:
TMH>>1) Количество рег.выражений малО — не более 10-20.
TMH>>2) Все они известны на compile-time
TMH>>3) Скорость выполнения поиска совпадений не критична и/или рег.выражения достаточно простЫ.

ЗХ>в этих условиях она будет лучше, чем regex?
Да — хотя бы по времени компиляции и размеру кода.
Что же касается скорости, то как известно, 20% от кода программы исполняются 80% времени жизни программы.
Надо только следить чтобы в эти 20% не попали функции rx_templ::some_regex::match()

Изменения:
— подчищен код — убраны ненужные наследования, реализация всех рекурсивных шаблонов приведена к единообразию
— убраны все реверансы в строну forward_iterator — библиотека теперь поддерживает ТОЛЬКО random_access_iterator
— примитив alt<> теперь поддерживает до 10 подвыражений (раньше только 2)
— оптимизирован примитив s<> — убраны ненужные проверки конца последовательности.

Планы на следующие версии:
— примитив select<RX> — выделение подвыражений — будут изменения формата функции match()
— терминальные примитивы нечувствительные к регистру букв — думаю как сделать максимально гибко (эх, где ты, template typedef)

namespace rx_tmpl
{
    namespace impl
    {
        template<wchar_t E, class TAIL>
        struct strn
        {
            enum{ length = 1 + TAIL::length };

            template<class InIt>
            static bool do_match(InIt& it)
            {
                return (E == *it) && TAIL::do_match(++it);
            }
        };

        struct strnlast
        {
            enum{ length = 0 };

            template<class InIt>
            static bool do_match(InIt& it)
            {
                return true;
            }
        };

        template<wchar_t E, class TAIL>
        struct setn
        {
            static bool do_match(wchar_t ch)
            {
                return (E == ch) || TAIL::do_match(ch);
            }
        };

        struct setnlast
        {
            static bool do_match(wchar_t ch)
            {
                return false;
            }
        };

        template<class RX, class TAIL>
        struct seqn
        {
            template<class InIt>
            static bool do_match(InIt& it, const InIt& end)
            {
                return RX::match(it,end) && TAIL::do_match(it,end);
            }
        };

        struct seqnlast
        {
            enum{ last = true };

            template<class InIt>
            static bool do_match(InIt& it, const InIt&)
            {
                return true;
            }
        };

        template<class RX, class TAIL>
        struct altn /*alternative - like "(abc|def)" */
        {
            template<bool> struct sel{};
            enum{ last = false };

            template<class InIt>
            static bool do_match(InIt& it, const InIt& end, sel<false>)
            {
                InIt i1(it); bool b1 = RX::match(i1,end);
                InIt i2(it); bool b2 = TAIL::do_match(i2,end);
                if( b1 && b2 )
                    it = (i1 < i2) ? i2 : i1;
                else if( b1 )
                    it = i1;
                else if( b2 )
                    it = i2;
                else
                    return false;
                return true;
            }

            template<class InIt>
            static bool do_match(InIt& it, const InIt& end, sel<true>)
            {
                return RX::match(it,end);
            }

            template<class InIt>
            static bool do_match(InIt& it, const InIt& end)
            {
                return do_match(it,end, sel<TAIL::last>());
            }
        };
    }//namespace impl

template<wchar_t E>
struct c /* literal charachter */
{
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        bool res = (it != end) && (E == *it);
        if( res ) ++it;
        return res;
    }
};

template<
int c0,     int c1,     int c2 =-1, int c3 =-1, int c4 =-1, int c5 =-1, int c6 =-1, int c7 =-1, int c8 =-1, int c9 =-1,
int c10=-1, int c11=-1, int c12=-1, int c13=-1, int c14=-1, int c15=-1, int c16=-1, int c17=-1, int c18=-1, int c19=-1,
int c20=-1, int c21=-1, int c22=-1, int c23=-1, int c24=-1, int c25=-1, int c26=-1, int c27=-1, int c28=-1, int c29=-1
>
struct s /* literal string - up to 30 symbols */
{
    typedef impl::strn<
        c0,
        typename s<
            c1,c2,c3,c4,c5,c6,c7,c8,c9,
            c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,
            c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,
            -1
        >::node
    > node;

    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        InIt tend( it + node::length );
        if( !(tend < end || tend == end) )
            return false;

        InIt t(it);
        if( !node::do_match(it) )
            { it = t; return false;    }
        return true;
    }
};

template<> 
struct s<
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1>
{
    typedef impl::strnlast node;
};

template<
int c0,     int c1,     int c2 =-1, int c3 =-1, int c4 =-1, int c5 =-1, int c6 =-1, int c7 =-1, int c8 =-1, int c9 =-1,
int c10=-1, int c11=-1, int c12=-1, int c13=-1, int c14=-1, int c15=-1, int c16=-1, int c17=-1, int c18=-1, int c19=-1,
int c20=-1, int c21=-1, int c22=-1, int c23=-1, int c24=-1, int c25=-1, int c26=-1, int c27=-1, int c28=-1, int c29=-1
>
struct set /* charachter set - up to 30 chars - like "[ABCDEFabcdef]"*/
{
    typedef impl::setn<
        c0,
        typename set<
            c1,c2,c3,c4,c5,c6,c7,c8,c9,
            c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,
            c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,
            -1
        >::node
    > node;

    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        bool res = (it != end) && node::do_match(*it);
        if( res ) ++it;
        return res;
    }
};

template<> 
struct set<
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1>
{
    typedef impl::setnlast node;
};

template<wchar_t FRONT, wchar_t BACK, bool INCLUSIVE=true>
struct range /*charachter range - like "[a-d]" or "[^a-d]" */
{
    template<bool> struct sel {};
    //PXT_CASSERT((FRONT < BACK));

    template<class InIt>
    static bool do_match(InIt& it, const InIt& end, sel<true>)
    {
        bool res = (it != end) && (FRONT >= *it) && (*it <= BACK);
        if( res ) ++it;
        return res;
    }

    template<class InIt>
    static bool do_match(InIt& it, const InIt& end, sel<false>)
    {
        return !do_match(it, end, sel<true>());
    }

    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        return do_match(it, end, sel<INCLUSIVE>());
    }
};

struct ws /* whitespace chars */
{
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        bool res = (it != end) && ((*it == ' ') || (*it == '\t'));
        if( res ) ++it;
        return res;
    }
};

struct digit /* numeric chars */
{
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        bool res = (it != end) && (*it >= '0') && (*it <= '9');
        if( res ) ++it;
        return res;
    }
};

struct alpha /* numeric chars */
{
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        bool res = (it != end) && (
            ((*it >= 'A') && (*it <= 'Z')) || 
            ((*it >= 'a') && (*it <= 'z'))
        );
        if( res ) ++it;
        return res;
    }
};

template<class RX>
struct star /*zero or more occurences of RX - like "(abc)*" */
{
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        while( true )
        {
            InIt t(it);
            if( false == RX::match(it, end) || it == t )
                break;
        }
        return true;
    }
};

template<class RX>
struct plus /*one or more occurences of RX - like "(abc)+" */
{
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        return RX::match(it, end) && star<RX>::match(it, end);
    }
};

template<class RX>
struct opt /*optional occurence of RX - like "(abc)?" */
{
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        RX::match(it, end);
        return true;
    }
};

struct seqe {};

template<
    class RX0,      class RX1,      class RX2=seqe, class RX3=seqe, class RX4=seqe, 
    class RX5=seqe, class RX6=seqe, class RX7=seqe, class RX8=seqe, class RX9=seqe
>
struct seq 
{
    typedef impl::seqn<
        RX0,
        typename seq<RX1,RX2,RX3,RX4,RX5,RX6,RX7,RX8,RX9,seqe>::node
    > node;

    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        InIt t(it);
        bool res = node::do_match(it,end);
        if( !res ) it = t;
        return res;
    }
};

template<>
struct seq<seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe>
{
    typedef impl::seqnlast node;
};

template<
    class RX0,      class RX1,      class RX2=seqe, class RX3=seqe, class RX4=seqe, 
    class RX5=seqe, class RX6=seqe, class RX7=seqe, class RX8=seqe, class RX9=seqe
>
struct alt /*alternative - like "(abc|def)" */
{
    typedef impl::altn<
        RX0,
        typename alt<RX1,RX2,RX3,RX4,RX5,RX6,RX7,RX8,RX9,seqe>::node
    > node;

    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        return node::do_match(it,end);
    }
};

template<>
struct alt<seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe>
{
    typedef impl::seqnlast node;
};
}//namespace rx_tmpl

Здравствуйте, TepMuHyc, Вы писали:

А почему в качестве дефолтного символа в шаблонах использован -1 (русское "я"), а не 0 ?

Здравствуйте, Кодт, Вы писали:

К>А почему в качестве дефолтного символа в шаблонах использован -1 (русское "я"), а не 0 ?
...Вообще-то да... Спасибо.
В самом скором будущем заменю -1 на 0xBAADC0DE — надеюсь китайцы и прочие японцы против не будут

Здравствуйте, TepMuHyc, Вы писали:

TMH>- терминальные примитивы нечувствительные к регистру букв — думаю как сделать максимально гибко (эх, где ты, template typedef)
А так пойдет?

template<class T1, class T2>
struct some_else_template
{
    typedef some_template<T1, int, T2, float> type;
};
some_else_template<char, short>::type

... << RSDN@Home 1.1 beta 2 >>

Здравствуйте, TepMuHyc, Вы писали:

TMH>...Работы у меня сегодня не было, а настроение поизвращаться с C++ — было.
TMH>В результате всего этого, появился "шедевер" — шаблонная библиотека статических регулярных выражений, в чем-то смахивающая на SPIRIT. Вот только она гаараздо меньше — всего 7 килобайт. Ну, и гораздо примитивнее, конечно.

TMH>Основная ее фишка в том, что компиляция регулярного выражения проводится на этапе компиляции программы — прямо в исполнимый код (довольно страшноватенький — т.к. оптимизация еще впереди). А на этапе выполнения остается только пользоваться функцией поиска совпадений...

Почти один в один boost::spirit. Только в boost::spirit почти не нужно извращаться с typedef'ами, выражение собирается с помощью вызовов функций. Собственно говоря, твоему подходу это не противоречит, т. к. в boost::spirit на низком уровне имеются подобные шаблоны. А интерфейс в духе boost::spirit можно добавить уже поверх всего написаного.

Здравствуйте, TepMuHyc, Вы писали:

К>>А почему в качестве дефолтного символа в шаблонах использован -1 (русское "я"), а не 0 ?
TMH>...Вообще-то да... Спасибо.
TMH>В самом скором будущем заменю -1 на 0xBAADC0DE — надеюсь китайцы и прочие японцы против не будут

Где-нибудь наверху определи константу -- enum или просто #define. Чтобы потом можно было по-быстрому править...

Здравствуйте, Кодт, Вы писали:

К>Где-нибудь наверху определи константу -- enum или просто #define. Чтобы потом можно было по-быстрому править...
...это и ежу понятно — писать 0xBAADC0DE 28 раз — можно и мозоли на пальчиках заработать — проще
enum{ stopch = 0xBAADC0DE };

"TepMuHyc" <1583@news.rsdn.ru> wrote in message news:443651@news.rsdn.ru...
From: TepMuHyc http://vbourdo.cjb.net/

Изменения:
— подчищен код — убраны ненужные наследования, реализация всех рекурсивных шаблонов приведена к единообразию
— убраны все реверансы в строну forward_iterator — библиотека теперь поддерживает ТОЛЬКО random_access_iterator
— примитив alt<> теперь поддерживает до 10 подвыражений (раньше только 2)
— оптимизирован примитив s<> — убраны ненужные проверки конца последовательности.

Планы на следующие версии:
— примитив select<RX> — выделение подвыражений — будут изменения формата функции match()
— терминальные примитивы нечувствительные к регистру букв — думаю как сделать максимально гибко (эх, где ты, template typedef)
вот:

template <typename _Ty>
struct _typedef
{
    typedef _Ty value;
};

void main()
{
    _typedef<char>::value MyType;
}

Или... что ты имел в виду?
С уважением,
AlexanderFEDIN@iname.com

Posted via RSDN NNTP Server 1.8 beta

Изменения и прочая отдельным письмом...

#ifndef __STATIC_REGEX_H_INCLUDED__
#define    __STATIC_REGEX_H_INCLUDED__
namespace static_regex {

typedef wchar_t rx_char;
struct seqe {};
enum { stpc = 0xBAADC0DE };

template<rx_char E>
struct c {/* literal charachter */
    static bool char_match(rx_char ch)
        { return (E == ch); }

    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        bool res = (it != end) && (E == static_cast<rx_char>(*it));
        if( res ) ++it;
        return res;
    }

    template<class InIt>
    static bool match(InIt& it, const InIt& end, InIt*, int&)
    {
        return match(it,end);
    }
};

template<
    int c0,       int c1,       int c2 =stpc, int c3 =stpc, int c4 =stpc, int c5 =stpc, int c6 =stpc, int c7 =stpc, int c8 =stpc, int c9 =stpc,
    int c10=stpc, int c11=stpc, int c12=stpc, int c13=stpc, int c14=stpc, int c15=stpc, int c16=stpc, int c17=stpc, int c18=stpc, int c19=stpc,
    int c20=stpc, int c21=stpc, int c22=stpc, int c23=stpc, int c24=stpc, int c25=stpc, int c26=stpc, int c27=stpc, int c28=stpc, int c29=stpc>
struct s {/* literal string - up to 30 symbols */
    typedef s<
        c1, c2, c3, c4, c5, c6, c7, c8, c9, c10,
        c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,
        c21,c22,c23,c24,c25,c26,c27,c28,c29,stpc
    > tail;

    enum { length = 1 + tail::length };
    template<class InIt>
    static bool do_match(InIt& it)
    {
        return (c0 == static_cast<rx_char>(*it)) && tail::do_match(++it);
    }

    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        InIt tend( it + length );
        if( !(tend < end || tend == end) )
            return false;

        InIt t(it);
        bool res = do_match(it);
        if( !res ) it = t;
        return res;
    }

    template<class InIt>
    static bool match(InIt& it, const InIt& end, InIt*, int&)
    {
        return match(it,end);
    }
};
template<> struct s<
    stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc,
    stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc,
    stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc>
{
    enum { length = 0 };

    template<class InIt>
    static bool do_match(InIt&)
        { return true; }
};

template<
    bool INCLUSIVE,
    int c0,       int c1,       int c2 =stpc, int c3 =stpc, int c4 =stpc, int c5 =stpc, int c6 =stpc, int c7 =stpc, int c8 =stpc, int c9 =stpc,
    int c10=stpc, int c11=stpc, int c12=stpc, int c13=stpc, int c14=stpc, int c15=stpc, int c16=stpc, int c17=stpc, int c18=stpc, int c19=stpc,
    int c20=stpc, int c21=stpc, int c22=stpc, int c23=stpc, int c24=stpc, int c25=stpc, int c26=stpc, int c27=stpc, int c28=stpc, int c29=stpc>
struct cc_ /* charachter class - like "[abcdEF]" or "[^abcdEF]" */
{
    typedef cc_<INCLUSIVE,
        c1, c2, c3, c4, c5, c6, c7, c8, c9, c10,
        c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,
        c21,c22,c23,c24,c25,c26,c27,c28,c29,stpc
    > tail;

    static bool char_match(rx_char ch)
        { return (c0 == ch) || tail::char_match(ch); }

    template<bool> struct sel {};
    static bool char_match(rx_char ch, sel<true>)
        { return char_match(ch); }
    static bool char_match(rx_char ch, sel<false>)
        { return !char_match(ch); }

    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        bool res = (it != end) && 
            char_match(static_cast<rx_char>(*it), sel<INCLUSIVE>());
        if( res ) ++it;
        return res;
    }

    template<class InIt>
    static bool match(InIt& it, const InIt& end, InIt*, int&)
    {
        return match(it,end);
    }
};

template<> struct cc_<true,
    stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc,
    stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc,
    stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc>
{ 
    static bool char_match(rx_char ch)
        { return false; }
};

template<> struct cc_<false,
    stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc,
    stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc,
    stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc, stpc>
{
    static bool char_match(rx_char ch)
        { return false; }
};

template<
int c0,       int c1,       int c2 =stpc, int c3 =stpc, int c4 =stpc, int c5 =stpc, int c6 =stpc, int c7 =stpc, int c8 =stpc, int c9 =stpc,
int c10=stpc, int c11=stpc, int c12=stpc, int c13=stpc, int c14=stpc, int c15=stpc, int c16=stpc, int c17=stpc, int c18=stpc, int c19=stpc,
int c20=stpc, int c21=stpc, int c22=stpc, int c23=stpc, int c24=stpc, int c25=stpc, int c26=stpc, int c27=stpc, int c28=stpc, int c29=stpc>
struct cc:cc_<true,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29>
    {};

template<
int c0,       int c1,       int c2 =stpc, int c3 =stpc, int c4 =stpc, int c5 =stpc, int c6 =stpc, int c7 =stpc, int c8 =stpc, int c9 =stpc,
int c10=stpc, int c11=stpc, int c12=stpc, int c13=stpc, int c14=stpc, int c15=stpc, int c16=stpc, int c17=stpc, int c18=stpc, int c19=stpc,
int c20=stpc, int c21=stpc, int c22=stpc, int c23=stpc, int c24=stpc, int c25=stpc, int c26=stpc, int c27=stpc, int c28=stpc, int c29=stpc>
struct not_cc:cc_<false,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29>
    {};

template<rx_char F, rx_char B, bool INCLUSIVE=true>
struct range /* charachter range - like "[a-d]" or "[^a-d]" */
{
    static bool char_match(rx_char ch)
    {
        return (((F < B) ? F : B) <= ch) && (ch <= ((F < B) ? B : F));
    }

    template<bool> struct sel {};
    static bool char_match(rx_char ch, sel<true>)
        { return char_match(ch); }
    static bool char_match(rx_char ch, sel<false>)
        { return !char_match(ch); }

    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        bool res = (it != end) && 
            char_match(static_cast<rx_char>(*it), sel<INCLUSIVE>());
        if( res ) ++it;
        return res;
    }

    template<class InIt>
    static bool match(InIt& it, const InIt& end, InIt*, int&)
    {
        return match(it,end);
    }
};

template<
    class CC0,      class CC1,      class CC2=seqe, class CC3=seqe, class CC4=seqe, 
    class CC5=seqe, class CC6=seqe, class CC7=seqe, class CC8=seqe, class CC9=seqe>
struct set {/* charachter set */
    typedef set<CC1,CC2,CC3,CC4,CC5,CC6,CC7,CC8,CC9,seqe> tail;

    static bool char_match(rx_char ch)
    {
        return CC0::char_match(ch) || tail::char_match(ch);
    }

    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        bool res = (it != end) &&
            char_match( static_cast<rx_char>(*it) );
        if( res ) ++it;
        return res;
    }

    template<class InIt>
    static bool match(InIt& it, const InIt& end, InIt*, int&)
    {
        return match(it,end);
    }
};

template<> struct set<seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe>
{ 
    static bool char_match(rx_char)
        { return false; }
};

template<class RX>
struct star {/* zero or more occurences of RX - like "(abc)*" */
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        while( RX::match(it,end) );
        return true;
    }

    template<class InIt>
    static bool match(InIt& it, const InIt& end, InIt* sel, int& nsel)
    {
        while( RX::match(it,end,sel,nsel) );
        return true;
    }
};

template<class RX>
struct plus {/* one or more occurences of RX - like "(abc)+" */
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        return RX::match(it,end) && star<RX>::match(it,end);
    }

    template<class InIt>
    static bool match(InIt& it, const InIt& end, InIt* sel, int& nsel)
    {
        return RX::match(it,end,sel,nsel) && star<RX>::match(it,end,sel,nsel);
    }
};

template<class RX>
struct opt {/* optional occurence of RX - like "(abc)?" */
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        RX::match(it, end);
        return true;
    }

    template<class InIt>
    static bool match(InIt& it, const InIt& end, InIt* sel, int& nsel)
    {
        RX::match(it,end,sel,nsel);
        return true;
    }
};

template<
    class RX0,      class RX1,      class RX2=seqe, class RX3=seqe, class RX4=seqe, 
    class RX5=seqe, class RX6=seqe, class RX7=seqe, class RX8=seqe, class RX9=seqe>
struct seq /* sequence of RX - like "(abc)(def)" */
{
    typedef seq<RX1,RX2,RX3,RX4,RX5,RX6,RX7,RX8,RX9,seqe> tail;

    template<class InIt>
    static bool do_match(InIt& it, const InIt& end)
    {
        return RX0::match(it,end) && tail::do_match(it,end);
    }

    template<class InIt>
    static bool do_match(InIt& it, const InIt& end, InIt* sel, int& nsel)
    {
        return RX0::match(it,end,sel,nsel) && tail::do_match(it,end,sel,nsel);
    }

    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        InIt itbk(it);
        if( !do_match(it,end) ) 
            { it = itbk; return false; }
        return true;
    }

    template<class InIt>
    static bool match(InIt& it, const InIt& end, InIt* sel, int& nsel)
    {
        InIt itbk(it);
        int nselbk = nsel;
        bool res = do_match(it,end,sel,nsel);
        if( !res ) {
            it = itbk;
            nsel = nselbk;
        }
        return res;
    }
};

template<> struct seq<seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe>
{
    template<class InIt>
    static bool do_match(InIt& it, const InIt& end)
        { return true; }
    template<class InIt>
    static bool do_match(InIt& it, const InIt& end, InIt* sel, int& nsel)
        { return true; }
};

template<
    class RX0,      class RX1,      class RX2=seqe, class RX3=seqe, class RX4=seqe, 
    class RX5=seqe, class RX6=seqe, class RX7=seqe, class RX8=seqe, class RX9=seqe>
struct alt /* alternative - like "(abc|def)" */
{
    typedef alt<RX1,RX2,RX3,RX4,RX5,RX6,RX7,RX8,RX9,seqe> tail;

    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        bool res;
        InIt rxit(it); 
        res  = RX0::match(rxit,end);
        res |= tail::match(it,end);
        if( it < rxit ) 
            it = rxit;
        return res;
    }

    template<class InIt>
    static bool match(InIt& it, const InIt& end, InIt* sel, int& nsel)
    {
        bool res;
        InIt rxit(it); 
        int rxnsel = nsel, tailnsel;
        res  = RX0::match(rxit,end,sel,rxnsel);
        res |= tail::match(it,end,sel,tailnsel = rxnsel);
        if( it <= rxit ) {
            it = rxit;
            nsel = rxnsel;
        } else
        if( res )
            for( ; rxnsel < tailnsel; rxnsel++, nsel++ )
            {
                sel[2*nsel  ] = sel[2*rxnsel  ];
                sel[2*nsel+1] = sel[2*rxnsel+1];
            }
        return res;
    }
};

template<> struct alt<seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe,seqe>
{
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
        { return false; }
    template<class InIt>
    static bool match(InIt& it, const InIt& end, InIt*, int&)
        { return false; }
};

template<class RX>
struct select
{
    template<class InIt>
    static bool match(InIt& it, const InIt& end)
    {
        return RX::match(it,end);
    }

    template<class InIt>
    static bool match(InIt& it, const InIt& end, InIt* sel, int& nsel)
    {
        InIt first(it);
        int nselbk = nsel++;
        bool res = RX::match(it,end,sel,nsel);
        if( res ) {
            sel[2*nselbk  ] = first;
            sel[2*nselbk+1] = it;
        } else {
            nsel = nselbk;
        }
        return res;
    }
};

//-////////////////////////////////////////////////////////////////////////
template<bool INCLUSIVE> struct ws_
    :cc<INCLUSIVE, ' ', '\t'>
    {};
typedef ws_<true> ws;
typedef ws_<false> not_ws;

template<bool INCLUSIVE> struct digit_
    :range<'0','9',INCLUSIVE>
    {};
typedef digit_<true> digit;
typedef digit_<false> not_digit;

template<bool INCLUSIVE> struct xdigit_
    :set< range<'0','9',INCLUSIVE>, range<'A','F',INCLUSIVE>, range<'a','f',INCLUSIVE> >
    {};
typedef digit_<true> xdigit;
typedef digit_<false> not_xdigit;

template<bool INCLUSIVE> struct alpha_
    :set< range<'A','Z',INCLUSIVE>, range<'a','z',INCLUSIVE> >
    {};
typedef alpha_<true> alpha;
typedef alpha_<false> not_alpha;

template<bool INCLUSIVE> struct alphanum_
    :set< alpha_<INCLUSIVE>, digit_<INCLUSIVE> >
    {};
typedef alphanum_<true> alphanum;
typedef alphanum_<false> not_alphanum;

struct endl:seq< opt< c<'\r'> >, c<'\n'> > {};
}//namespace static_regex
#endif//__STATIC_REGEX_H_INCLUDED__

Изменения:
— как и было обещано, добавил примитив select<RX> для выделения подвыражений. Причем, без серьезной потери производительности, что особенно приятно. Теперь, немного о его использовании:
--выражение для выделения ответа FTP сервера (аналогичное приведенному в boost::regex) будет выглядеть так:

seq<
    select< plus<digit> >,               //код ответа
    select< alt<cc<'-',' '>, endl> >,    //символ переноса - если '-' - то ответ многострочный
    select< star<not_cc<'\r','\n'> > >,  //текст сообщения - все до конца строки
    opt<endl>                            //необязательный перевод строки
>

--вызов теста на совпадение будет выглядеть так:

int   nsubmatches;
char* submatches[2*3];//сколько всего планируется подвыражений - границы НЕ проверяются
char* text = "200 Hello world\n", ptr = text;
if( ftp_regex::match(ptr, ptr+strlen(ptr), submatches, nsubmatches=0) )
{//совпадает...
    submatches[0] //начало совпадения №1
    submatches[1] //конец совпадения №1
    
    submatches[2] //начало совпадения №2
    submatches[3] //конец совпадения №2
    
    submatches[4] //начало совпадения №3
    submatches[5] //конец совпадения №3
}

— основательно переработаны терминальные примитивы для работы с классами символов.

    c<'А'> - просто символ
    cс<'А','B','C'> - набор символов [ABC]
    not_cс<'А','B','C'> - набор символов [^ABC]
    range<'А','F',bool> - диапазон символов [A-F]
    set<CC0,CC1,...> - комбинанирование вышеуказанных примитивов -
        например класс символов [0-9A-Za-z_] будет выглядеть как 
        set<range<'0','9'>,range<'A','Z'>,range<'a','z'>,c<'_'> >

— добавлено еще несколько предопределенных примитивов

    ws - пробел или таб [ \t]
    not_ws - все кроме пробела или таба
    digit - десятичные цифры [0-9]
    not_digit - все кроме десятичных цифр
    xdigit - шестнадцатиричные цифры [0-9A-Fa-f]
    not_xdigit - все кроме шестнадцатиричных цифр
    alpha - английские буквы [A-Za-z]
    not_alpha - все кроме английских букв
    alphanum - английские буквы и цифры [0-9A-Za-z]
    not_alphanum - все кроме английских букв и цифр
    endl - конец строки (\r?\n)

— увы, не добавил case-insensitive примитивы — т.к. хорошо подумав, понял, что нечувствительность к регистру — штука очень уж зависящая от локали — а вариантов работы с локалями — масса и формализовать их не так уж легко. Тем более, совсем не хочется делать эту библиотечку зависящей от огромного и страшного пакета <locale>. Так что пусть данный вопрос остается желающим в качестве домашнего задания

...запустил нижеприведенную програмку и просто офигел...
Я, конечно ожидал, что инлайновый код быстрый, но чтобы настолько

# matches: 60000 # fails: 40000
my time: 40

# matches: 60000 # fails: 40000
boost time: 3055

Вот исходники тестовой программы.
Проверялось на VC 7.1 с ключом /O2.

#define BOOST_REGEX_STATIC_LINK
#include <windows.h>
#include <string>
#include <iostream>
#include "../static_regex.h"
#include <boost/regex.hpp>

using std::string;
namespace
{
    using namespace static_regex;
    typedef 
    seq<
        select< plus<digit> >,               //код ответа
        select< alt<cc<'-',' '>, endl> >,    //символ переноса - если '-' - то ответ многострочный
        select< star<not_cc<'\r','\n'> > >,  //текст сообщения - все до конца строки
        opt<endl>                            //необязательный перевод строки
    > 
    rx_ftp;

    boost::regex brx_ftp("([0-9]+)([\\- ]|(?:\r?\n))([^\r\n]*)(?:\r?\n)?");
}

string data[] = {
    "202Slightly malformed response\r\n",
    "200 Hello beutiful world\n",
    "201-Hello nice and beutiful world\n",
    "202-Hello sometimes nice and beutiful but mostly ugly world\r\n",
    "Not a response at all\r\n",
};

bool is_ftpanswer(const string& data)
{
    int nsubm;
    string::const_iterator subm[2*10];

    if( rx_ftp::match(data.begin(),data.end(),subm,nsubm=0) )
    {
        return true;
    }
    return false;
}

bool bis_ftpanswer(const string& data)
{
   boost::smatch what;
   if( boost::regex_match(data, what, brx_ftp) )
   {
      return true;
   }
   return false;
}

int main()
{
    DWORD start=0;
    int nmatches, nfails;
    const int NITER = 20000;

    nmatches = nfails = 0;
    start = GetTickCount();
    for(int i=0; i < NITER; i++ )
    {
        for( int r=0; r<5; r++ )
             is_ftpanswer( data[r] )
                ? ++nmatches : ++nfails;
    }
    std::cout << "# matches: " << nmatches << " # fails: " << nfails << std::endl;
    std::cout << "my time: " << (GetTickCount() - start) << std::endl;

    nmatches = nfails = 0;
    start = GetTickCount();
    for(int i=0; i < NITER; i++ )
    {
        for( int r=0; r<5; r++ )
             bis_ftpanswer( data[r] )
                ? ++nmatches : ++nfails;
    }
    std::cout << "# matches: " << nmatches << " # fails: " << nfails << std::endl;
    std::cout << "boost time: " << (GetTickCount() - start) << std::endl;
}

	От:	TepMuHyc
	Дата:	13.11.03 17:25
	Оценка:	254 (24)

	От:	TepMuHyc
	Дата:	13.11.03 17:30
	Оценка:	15 (1)

	От:	ArtDenis
	Дата:	14.11.03 04:26
	Оценка:

От:	Alexey Shirshov	http://wise-orm.com
Дата:	14.11.03 08:56
Оценка:

	От:	Зверёк Харьковский
	Дата:	14.11.03 10:22
	Оценка:

От:	alexanderfedin	http://alexander-fedin.pixels.com/
Дата:	18.11.03 16:48
Оценка:

	От:	WolfHound
	Дата:	15.11.03 06:33
	Оценка:

	От:	GvozdodeR
	Дата:	15.11.03 16:41
	Оценка:

	От:	Кодт
	Дата:	16.11.03 11:37
	Оценка: