Re: использование libiconv в *nix
От: zaufi Земля  
Дата: 04.09.06 11:27
Оценка:
Вот набросал примерчег:
#  include <iconv.h>
#  include <cassert>
#  include <cerrno>
#  include <string>
#  include <stdexcept>
#  include <iostream>

class charset_converter
{
    iconv_t m_cd;

public:
    charset_converter(const std::string& from, const std::string& to)
      : m_cd(iconv_open(to.c_str(), from.c_str()))
    {
        if (m_cd == reinterpret_cast<iconv_t>(-1))
            throw std::runtime_error("iconv init failure");
    }
    ~charset_converter()
    {
        int r = iconv_close(m_cd);
        assert(r != -1);
    }
    std::string convert(const std::string&);
};

class convert
{
    std::string m_str;
    std::string m_from;
    std::string m_to;

public:
    convert(const std::string& str) : m_str(str) {}
    convert& from(const std::string& from_enc)
    {
        m_from = from_enc;
        return *this;
    }
    convert& to(const std::string& to_enc)
    {
        m_to = to_enc;
        return *this;
    }
    operator std::string() const
    {
        if (m_from.empty() || m_to.empty())
            throw std::runtime_error("Bad use of conversion helper class");
        return charset_converter(m_from, m_to).convert(m_str);
    }
};

std::string charset_converter::convert(const std::string& str)
{
    using namespace std;
    const size_t MAX_BUFFER_SIZE = 4096;
    /// \warning iconv wants to have `char**` to input buffer -- WHY? Is it can be modified?
    char* in_buffer = const_cast<char*>(str.data());
    size_t in_bytes_left = str.size();

    string result;
    size_t out_bytes_left;
    char out_buffer[MAX_BUFFER_SIZE];
    char* out_buffer_ptr;
    for (bool exit = false; !exit && in_bytes_left; )
    {
        out_buffer_ptr = out_buffer;
        out_bytes_left = MAX_BUFFER_SIZE;
        size_t sz = iconv(m_cd, &in_buffer, &in_bytes_left, &out_buffer_ptr, &out_bytes_left);
        if (static_cast<int>(sz) == -1)
        {
            switch (errno)
            {
                case E2BIG:
                    result += string(out_buffer, MAX_BUFFER_SIZE - out_bytes_left);
                    break;
                case EILSEQ:
                    throw runtime_error("invalid byte sequence");
                case EINVAL:
                    throw runtime_error("incomplete byte sequence");
                case EBADF:
                default:
                    throw runtime_error("Bad use");
            }
        }
    }
    size_t sz = iconv(m_cd, 0, 0, &out_buffer_ptr, &out_bytes_left);
    assert(sz >= 0 && "No errors possible here!");
    result += string(out_buffer, MAX_BUFFER_SIZE - out_bytes_left);
    //reset convert descriptor
    iconv(m_cd, 0, 0, 0, 0);
    return result;
}

int main(int argc, char* argv[])
{
    if (setlocale(LC_ALL, "") == 0)
    {
        std::cerr << "Unable to setup default locale.\n";
        return -1;
    }
    if (argc >= 2)
    {
        for (int i = 1; i < argc; ++i)
        {
            std::string result_1251 = convert(argv[i]).from("UTF-8").to("CP1251");
            std::cout << "arg[" << i << "] = \"" << result_1251 << "\"\n";
        }
    }
    return 0;
}


у меня вроде нормально работает:
zaufi tests # ./iconv_test "Превед" | hexdump
0000000 7261 5b67 5d31 3d20 2220 f0cf e2e5 e4e5
0000010 0a22
0000012
zaufi tests # locale
LANG=en_US.UTF-8
LC_CTYPE=ru_RU.UTF-8
LC_NUMERIC=POSIX
LC_TIME=en_US.UTF-8
LC_COLLATE=ru_RU.UTF-8
LC_MONETARY=ru_RU.UTF-8
LC_MESSAGES=en_US.UTF-8
LC_PAPER=ru_RU.UTF-8
LC_NAME=ru_RU.UTF-8
LC_ADDRESS=ru_RU.UTF-8
LC_TELEPHONE=ru_RU.UTF-8
LC_MEASUREMENT=ru_RU.UTF-8
LC_IDENTIFICATION=ru_RU.UTF-8
LC_ALL=
 
Подождите ...
Wait...
Пока на собственное сообщение не было ответов, его можно удалить.