Re: Boost.Locale под Windows
От: nen777w  
Дата: 22.08.12 07:07
Оценка:
L"Русский текст" <- Разве результатом этого не будет utf16?

В свое время когда надо было utf8 <-> utf16, пользовался вот таким откуда то спертым кодом

  Скрытый текст
//.................................................................................................
    template<class InIt, class OutIt>
    inline void utf8_encode(InIt in, const InIt end, OutIt out)
    {
        while( !(in == end) ) {
            unsigned long wc = static_cast<wchar_t>(*in); ++in;
        over:
            if( wc < 0x80 ) {
                *out = static_cast<char>(wc); ++out;
                continue;
            }

            if( sizeof(wchar_t) == 2 && 
                wc >= 0xD800 && wc < 0xE000 ) 
            {//handle surrogates for UTF-16
                if( wc >= 0xDC00 ) { wc = '?'; goto over; }
                if( in == end ) return;
                unsigned long lo = static_cast<wchar_t>(*in); ++in;
                if( lo >= 0xDC00 && wc < 0xE000 ) {
                    wc  = 0x10000 + ((wc & 0x3FF) << 10 | lo & 0x3FF);
                } else { 
                    *out = '?'; ++out; wc = lo;
                    goto over;
                }
            }

            char c; int shift; 
            if( wc < 0x800 )     { shift = 6;  c = ('\xFF' << 6); } else
                if( wc < 0x10000 )   { shift = 12; c = ('\xFF' << 5); } else
                    if( wc < 0x200000 )  { shift = 18; c = ('\xFF' << 4); } else
                        if( wc < 0x4000000 ) { shift = 24; c = ('\xFF' << 3); } else 
                        { shift = 30; c = ('\xFF' << 2); }
                        do {
                            c |= (wc >> shift) & 0x3f;
                            *out = c; ++out;
                            c = char(0x80); shift -= 6;
                        } while( shift >= 0 );
        }
    }
    //.................................................................................................
    template<class InIt, class OutIt>
    inline void utf8_decode(InIt in, const InIt end, OutIt out)
    {
        int cnt;
        for(; !(in == end); ++in) {
            unsigned long wc = static_cast<unsigned char>(*in);
        over:
            if( wc & 0x80 ) {
                if( 0xC0 == (0xE0 & wc) ) { cnt = 1; wc &= ~0xE0; } else
                    if( 0xE0 == (0xF0 & wc) ) { cnt = 2; wc &= ~0xF0; } else
                        if( 0xF0 == (0xF8 & wc) ) { cnt = 3; wc &= ~0xF8; } else
                            if( 0xF8 == (0xFC & wc) ) { cnt = 4; wc &= ~0xFC; } else
                                if( 0xFC == (0xFE & wc) ) { cnt = 5; wc &= ~0xFE; } else
                                { *out = wchar_t('?'); ++out; continue; };//invalid start code
                if( 0 == wc ) wc = ~0UL;//codepoint encoded with overlong sequence
                do {
                    if( ++in == end ) return;
                    unsigned char c = static_cast<unsigned char>(*in);
                    if( 0x80 != (0xC0 & c) )
                    { *out = static_cast<wchar_t>(wc); ++out; wc = c; goto over; }
                    wc <<= 6; wc |= c & ~0xC0;
                } while( --cnt );
                if( 0x80000000 & wc ) wc = '?';//codepoint exceeds unicode range
                if( sizeof(wchar_t) == 2 && wc > 0xFFFF )
                {//handle surrogates for UTF-16
                    wc -= 0x10000;
                    *out = static_cast<wchar_t>(0xD800 | ((wc >> 10) & 0x3FF)); ++out;
                    *out = static_cast<wchar_t>(0xDC00 | (wc & 0x3FF)); ++out;
                    continue;
                }
            }
            *out = static_cast<wchar_t>(wc); ++out;
        }
    }
    //.................................................................................................
    inline std::string    utf16_to_utf8( const std::wstring& utf16 )
    {
        std::string result;
        utf8_encode( utf16.begin(), utf16.end(), std::back_inserter(result) );
        return result;
    }
    //.................................................................................................
    inline std::wstring utf8_to_utf16( const std::string& utf8 )
    {
        std::wstring result;
        utf8_decode( utf8.begin(),utf8.end(), std::back_inserter(result) );
        return result;
    }
 
Подождите ...
Wait...
Пока на собственное сообщение не было ответов, его можно удалить.