Line data Source code
1 : // tinygettext - A gettext replacement that works directly on .po files
2 : // Copyright (c) 2009 Ingo Ruhnke <grumbel@gmail.com>
3 : //
4 : // This software is provided 'as-is', without any express or implied
5 : // warranty. In no event will the authors be held liable for any damages
6 : // arising from the use of this software.
7 : //
8 : // Permission is granted to anyone to use this software for any purpose,
9 : // including commercial applications, and to alter it and redistribute it
10 : // freely, subject to the following restrictions:
11 : //
12 : // 1. The origin of this software must not be misrepresented; you must not
13 : // claim that you wrote the original software. If you use this software
14 : // in a product, an acknowledgement in the product documentation would be
15 : // appreciated but is not required.
16 : // 2. Altered source versions must be plainly marked as such, and must not be
17 : // misrepresented as being the original software.
18 : // 3. This notice may not be removed or altered from any source distribution.
19 :
20 : #include "precompiled.h"
21 :
22 : #include <ctype.h>
23 : #include <assert.h>
24 : #include <sstream>
25 : #include <errno.h>
26 : #include <stdexcept>
27 : #include <string.h>
28 : #include <stdlib.h>
29 :
30 : #include "tinygettext/iconv.hpp"
31 : #include "tinygettext/log_stream.hpp"
32 :
33 : namespace tinygettext {
34 :
35 0 : IConv::IConv()
36 : : to_charset(),
37 : from_charset(),
38 0 : cd(nullptr)
39 0 : {}
40 :
41 0 : IConv::IConv(const std::string& from_charset_, const std::string& to_charset_)
42 : : to_charset(),
43 : from_charset(),
44 0 : cd(nullptr)
45 : {
46 0 : set_charsets(from_charset_, to_charset_);
47 0 : }
48 :
49 0 : IConv::~IConv()
50 : {
51 0 : if (cd)
52 0 : iconv_close(cd);
53 0 : }
54 :
55 : void
56 0 : IConv::set_charsets(const std::string& from_charset_, const std::string& to_charset_)
57 : {
58 0 : if (cd)
59 0 : iconv_close(cd);
60 :
61 0 : from_charset = from_charset_;
62 0 : to_charset = to_charset_;
63 :
64 0 : for(std::string::iterator i = to_charset.begin(); i != to_charset.end(); ++i)
65 0 : *i = static_cast<char>(toupper(*i));
66 :
67 0 : for(std::string::iterator i = from_charset.begin(); i != from_charset.end(); ++i)
68 0 : *i = static_cast<char>(toupper(*i));
69 :
70 0 : if (to_charset == from_charset)
71 : {
72 0 : cd = nullptr;
73 : }
74 : else
75 : {
76 0 : cd = iconv_open(to_charset.c_str(), from_charset.c_str());
77 0 : if (cd == reinterpret_cast<iconv_t>(-1))
78 : {
79 0 : if(errno == EINVAL)
80 : {
81 0 : std::ostringstream str;
82 : str << "IConv construction failed: conversion from '" << from_charset
83 0 : << "' to '" << to_charset << "' not available";
84 0 : throw std::runtime_error(str.str());
85 : }
86 : else
87 : {
88 0 : std::ostringstream str;
89 0 : str << "IConv: construction failed: " << strerror(errno);
90 0 : throw std::runtime_error(str.str());
91 : }
92 : }
93 : }
94 0 : }
95 :
96 : /// Convert a string from encoding to another.
97 : std::string
98 0 : IConv::convert(const std::string& text)
99 : {
100 0 : if (!cd)
101 : {
102 0 : return text;
103 : }
104 : else
105 : {
106 0 : size_t inbytesleft = text.size();
107 0 : size_t outbytesleft = 4*inbytesleft; // Worst case scenario: ASCII -> UTF-32?
108 :
109 : // We try to avoid to much copying around, so we write directly into
110 : // a std::string
111 0 : const char* inbuf = &text[0];
112 0 : std::string result(outbytesleft, 'X');
113 0 : char* outbuf = &result[0];
114 :
115 : // Try to convert the text.
116 0 : size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
117 0 : if (ret == static_cast<size_t>(-1))
118 : {
119 0 : if (errno == EILSEQ || errno == EINVAL)
120 : { // invalid multibyte sequence
121 0 : iconv(cd, nullptr, nullptr, nullptr, nullptr); // reset state
122 :
123 : // FIXME: Could try to skip the invalid byte and continue
124 0 : log_error << "error: tinygettext:iconv: invalid multibyte sequence in: \"" << text << "\"" << std::endl;
125 : }
126 0 : else if (errno == E2BIG)
127 : { // output buffer to small
128 0 : assert(false && "tinygettext/iconv.cpp: E2BIG: This should never be reached");
129 : }
130 0 : else if (errno == EBADF)
131 : {
132 0 : assert(false && "tinygettext/iconv.cpp: EBADF: This should never be reached");
133 : }
134 : else
135 : {
136 0 : assert(false && "tinygettext/iconv.cpp: <unknown>: This should never be reached");
137 : }
138 : }
139 :
140 0 : result.resize(4*text.size() - outbytesleft);
141 :
142 0 : return result;
143 : }
144 : }
145 :
146 3 : } // namespace tinygettext
147 :
148 : /* EOF */
|