locale_conv.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549
  1. // wstring_convert implementation -*- C++ -*-
  2. // Copyright (C) 2015-2018 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the
  6. // terms of the GNU General Public License as published by the
  7. // Free Software Foundation; either version 3, or (at your option)
  8. // any later version.
  9. // This library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. // Under Section 7 of GPL version 3, you are granted additional
  14. // permissions described in the GCC Runtime Library Exception, version
  15. // 3.1, as published by the Free Software Foundation.
  16. // You should have received a copy of the GNU General Public License and
  17. // a copy of the GCC Runtime Library Exception along with this program;
  18. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. // <http://www.gnu.org/licenses/>.
  20. /** @file bits/locale_conv.h
  21. * This is an internal header file, included by other library headers.
  22. * Do not attempt to use it directly. @headername{locale}
  23. */
  24. #ifndef _LOCALE_CONV_H
  25. #define _LOCALE_CONV_H 1
  26. #if __cplusplus < 201103L
  27. # include <bits/c++0x_warning.h>
  28. #else
  29. #include <streambuf>
  30. #include <bits/stringfwd.h>
  31. #include <bits/allocator.h>
  32. #include <bits/codecvt.h>
  33. #include <bits/unique_ptr.h>
  34. namespace std _GLIBCXX_VISIBILITY(default)
  35. {
  36. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  37. /**
  38. * @addtogroup locales
  39. * @{
  40. */
  41. template<typename _OutStr, typename _InChar, typename _Codecvt,
  42. typename _State, typename _Fn>
  43. bool
  44. __do_str_codecvt(const _InChar* __first, const _InChar* __last,
  45. _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
  46. size_t& __count, _Fn __fn)
  47. {
  48. if (__first == __last)
  49. {
  50. __outstr.clear();
  51. __count = 0;
  52. return true;
  53. }
  54. size_t __outchars = 0;
  55. auto __next = __first;
  56. const auto __maxlen = __cvt.max_length() + 1;
  57. codecvt_base::result __result;
  58. do
  59. {
  60. __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
  61. auto __outnext = &__outstr.front() + __outchars;
  62. auto const __outlast = &__outstr.back() + 1;
  63. __result = (__cvt.*__fn)(__state, __next, __last, __next,
  64. __outnext, __outlast, __outnext);
  65. __outchars = __outnext - &__outstr.front();
  66. }
  67. while (__result == codecvt_base::partial && __next != __last
  68. && (__outstr.size() - __outchars) < __maxlen);
  69. if (__result == codecvt_base::error)
  70. {
  71. __count = __next - __first;
  72. return false;
  73. }
  74. if (__result == codecvt_base::noconv)
  75. {
  76. __outstr.assign(__first, __last);
  77. __count = __last - __first;
  78. }
  79. else
  80. {
  81. __outstr.resize(__outchars);
  82. __count = __next - __first;
  83. }
  84. return true;
  85. }
  86. // Convert narrow character string to wide.
  87. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  88. inline bool
  89. __str_codecvt_in(const char* __first, const char* __last,
  90. basic_string<_CharT, _Traits, _Alloc>& __outstr,
  91. const codecvt<_CharT, char, _State>& __cvt,
  92. _State& __state, size_t& __count)
  93. {
  94. using _Codecvt = codecvt<_CharT, char, _State>;
  95. using _ConvFn
  96. = codecvt_base::result
  97. (_Codecvt::*)(_State&, const char*, const char*, const char*&,
  98. _CharT*, _CharT*, _CharT*&) const;
  99. _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
  100. return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
  101. __count, __fn);
  102. }
  103. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  104. inline bool
  105. __str_codecvt_in(const char* __first, const char* __last,
  106. basic_string<_CharT, _Traits, _Alloc>& __outstr,
  107. const codecvt<_CharT, char, _State>& __cvt)
  108. {
  109. _State __state = {};
  110. size_t __n;
  111. return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
  112. }
  113. // Convert wide character string to narrow.
  114. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  115. inline bool
  116. __str_codecvt_out(const _CharT* __first, const _CharT* __last,
  117. basic_string<char, _Traits, _Alloc>& __outstr,
  118. const codecvt<_CharT, char, _State>& __cvt,
  119. _State& __state, size_t& __count)
  120. {
  121. using _Codecvt = codecvt<_CharT, char, _State>;
  122. using _ConvFn
  123. = codecvt_base::result
  124. (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
  125. char*, char*, char*&) const;
  126. _ConvFn __fn = &codecvt<_CharT, char, _State>::out;
  127. return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
  128. __count, __fn);
  129. }
  130. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  131. inline bool
  132. __str_codecvt_out(const _CharT* __first, const _CharT* __last,
  133. basic_string<char, _Traits, _Alloc>& __outstr,
  134. const codecvt<_CharT, char, _State>& __cvt)
  135. {
  136. _State __state = {};
  137. size_t __n;
  138. return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
  139. }
  140. #ifdef _GLIBCXX_USE_WCHAR_T
  141. _GLIBCXX_BEGIN_NAMESPACE_CXX11
  142. /// String conversions
  143. template<typename _Codecvt, typename _Elem = wchar_t,
  144. typename _Wide_alloc = allocator<_Elem>,
  145. typename _Byte_alloc = allocator<char>>
  146. class wstring_convert
  147. {
  148. public:
  149. typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string;
  150. typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
  151. typedef typename _Codecvt::state_type state_type;
  152. typedef typename wide_string::traits_type::int_type int_type;
  153. /** Default constructor.
  154. *
  155. * @param __pcvt The facet to use for conversions.
  156. *
  157. * Takes ownership of @p __pcvt and will delete it in the destructor.
  158. */
  159. explicit
  160. wstring_convert(_Codecvt* __pcvt = new _Codecvt()) : _M_cvt(__pcvt)
  161. {
  162. if (!_M_cvt)
  163. __throw_logic_error("wstring_convert");
  164. }
  165. /** Construct with an initial converstion state.
  166. *
  167. * @param __pcvt The facet to use for conversions.
  168. * @param __state Initial conversion state.
  169. *
  170. * Takes ownership of @p __pcvt and will delete it in the destructor.
  171. * The object's conversion state will persist between conversions.
  172. */
  173. wstring_convert(_Codecvt* __pcvt, state_type __state)
  174. : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true)
  175. {
  176. if (!_M_cvt)
  177. __throw_logic_error("wstring_convert");
  178. }
  179. /** Construct with error strings.
  180. *
  181. * @param __byte_err A string to return on failed conversions.
  182. * @param __wide_err A wide string to return on failed conversions.
  183. */
  184. explicit
  185. wstring_convert(const byte_string& __byte_err,
  186. const wide_string& __wide_err = wide_string())
  187. : _M_cvt(new _Codecvt),
  188. _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
  189. _M_with_strings(true)
  190. {
  191. if (!_M_cvt)
  192. __throw_logic_error("wstring_convert");
  193. }
  194. ~wstring_convert() = default;
  195. // _GLIBCXX_RESOLVE_LIB_DEFECTS
  196. // 2176. Special members for wstring_convert and wbuffer_convert
  197. wstring_convert(const wstring_convert&) = delete;
  198. wstring_convert& operator=(const wstring_convert&) = delete;
  199. /// @{ Convert from bytes.
  200. wide_string
  201. from_bytes(char __byte)
  202. {
  203. char __bytes[2] = { __byte };
  204. return from_bytes(__bytes, __bytes+1);
  205. }
  206. wide_string
  207. from_bytes(const char* __ptr)
  208. { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
  209. wide_string
  210. from_bytes(const byte_string& __str)
  211. {
  212. auto __ptr = __str.data();
  213. return from_bytes(__ptr, __ptr + __str.size());
  214. }
  215. wide_string
  216. from_bytes(const char* __first, const char* __last)
  217. {
  218. if (!_M_with_cvtstate)
  219. _M_state = state_type();
  220. wide_string __out{ _M_wide_err_string.get_allocator() };
  221. if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
  222. _M_count))
  223. return __out;
  224. if (_M_with_strings)
  225. return _M_wide_err_string;
  226. __throw_range_error("wstring_convert::from_bytes");
  227. }
  228. /// @}
  229. /// @{ Convert to bytes.
  230. byte_string
  231. to_bytes(_Elem __wchar)
  232. {
  233. _Elem __wchars[2] = { __wchar };
  234. return to_bytes(__wchars, __wchars+1);
  235. }
  236. byte_string
  237. to_bytes(const _Elem* __ptr)
  238. {
  239. return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
  240. }
  241. byte_string
  242. to_bytes(const wide_string& __wstr)
  243. {
  244. auto __ptr = __wstr.data();
  245. return to_bytes(__ptr, __ptr + __wstr.size());
  246. }
  247. byte_string
  248. to_bytes(const _Elem* __first, const _Elem* __last)
  249. {
  250. if (!_M_with_cvtstate)
  251. _M_state = state_type();
  252. byte_string __out{ _M_byte_err_string.get_allocator() };
  253. if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
  254. _M_count))
  255. return __out;
  256. if (_M_with_strings)
  257. return _M_byte_err_string;
  258. __throw_range_error("wstring_convert::to_bytes");
  259. }
  260. /// @}
  261. // _GLIBCXX_RESOLVE_LIB_DEFECTS
  262. // 2174. wstring_convert::converted() should be noexcept
  263. /// The number of elements successfully converted in the last conversion.
  264. size_t converted() const noexcept { return _M_count; }
  265. /// The final conversion state of the last conversion.
  266. state_type state() const { return _M_state; }
  267. private:
  268. unique_ptr<_Codecvt> _M_cvt;
  269. byte_string _M_byte_err_string;
  270. wide_string _M_wide_err_string;
  271. state_type _M_state = state_type();
  272. size_t _M_count = 0;
  273. bool _M_with_cvtstate = false;
  274. bool _M_with_strings = false;
  275. };
  276. _GLIBCXX_END_NAMESPACE_CXX11
  277. /// Buffer conversions
  278. template<typename _Codecvt, typename _Elem = wchar_t,
  279. typename _Tr = char_traits<_Elem>>
  280. class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
  281. {
  282. typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;
  283. public:
  284. typedef typename _Codecvt::state_type state_type;
  285. /** Default constructor.
  286. *
  287. * @param __bytebuf The underlying byte stream buffer.
  288. * @param __pcvt The facet to use for conversions.
  289. * @param __state Initial conversion state.
  290. *
  291. * Takes ownership of @p __pcvt and will delete it in the destructor.
  292. */
  293. explicit
  294. wbuffer_convert(streambuf* __bytebuf = 0, _Codecvt* __pcvt = new _Codecvt,
  295. state_type __state = state_type())
  296. : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
  297. {
  298. if (!_M_cvt)
  299. __throw_logic_error("wbuffer_convert");
  300. _M_always_noconv = _M_cvt->always_noconv();
  301. if (_M_buf)
  302. {
  303. this->setp(_M_put_area, _M_put_area + _S_buffer_length);
  304. this->setg(_M_get_area + _S_putback_length,
  305. _M_get_area + _S_putback_length,
  306. _M_get_area + _S_putback_length);
  307. }
  308. }
  309. ~wbuffer_convert() = default;
  310. // _GLIBCXX_RESOLVE_LIB_DEFECTS
  311. // 2176. Special members for wstring_convert and wbuffer_convert
  312. wbuffer_convert(const wbuffer_convert&) = delete;
  313. wbuffer_convert& operator=(const wbuffer_convert&) = delete;
  314. streambuf* rdbuf() const noexcept { return _M_buf; }
  315. streambuf*
  316. rdbuf(streambuf *__bytebuf) noexcept
  317. {
  318. auto __prev = _M_buf;
  319. _M_buf = __bytebuf;
  320. return __prev;
  321. }
  322. /// The conversion state following the last conversion.
  323. state_type state() const noexcept { return _M_state; }
  324. protected:
  325. int
  326. sync()
  327. { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; }
  328. typename _Wide_streambuf::int_type
  329. overflow(typename _Wide_streambuf::int_type __out)
  330. {
  331. if (!_M_buf || !_M_conv_put())
  332. return _Tr::eof();
  333. else if (!_Tr::eq_int_type(__out, _Tr::eof()))
  334. return this->sputc(__out);
  335. return _Tr::not_eof(__out);
  336. }
  337. typename _Wide_streambuf::int_type
  338. underflow()
  339. {
  340. if (!_M_buf)
  341. return _Tr::eof();
  342. if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
  343. return _Tr::to_int_type(*this->gptr());
  344. else
  345. return _Tr::eof();
  346. }
  347. streamsize
  348. xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
  349. {
  350. if (!_M_buf || __n == 0)
  351. return 0;
  352. streamsize __done = 0;
  353. do
  354. {
  355. auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
  356. __n - __done);
  357. _Tr::copy(this->pptr(), __s + __done, __nn);
  358. this->pbump(__nn);
  359. __done += __nn;
  360. } while (__done < __n && _M_conv_put());
  361. return __done;
  362. }
  363. private:
  364. // fill the get area from converted contents of the byte stream buffer
  365. bool
  366. _M_conv_get()
  367. {
  368. const streamsize __pb1 = this->gptr() - this->eback();
  369. const streamsize __pb2 = _S_putback_length;
  370. const streamsize __npb = std::min(__pb1, __pb2);
  371. _Tr::move(_M_get_area + _S_putback_length - __npb,
  372. this->gptr() - __npb, __npb);
  373. streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
  374. __nbytes = std::min(__nbytes, _M_buf->in_avail());
  375. if (__nbytes < 1)
  376. __nbytes = 1;
  377. __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
  378. if (__nbytes < 1)
  379. return false;
  380. __nbytes += _M_unconv;
  381. // convert _M_get_buf into _M_get_area
  382. _Elem* __outbuf = _M_get_area + _S_putback_length;
  383. _Elem* __outnext = __outbuf;
  384. const char* __bnext = _M_get_buf;
  385. codecvt_base::result __result;
  386. if (_M_always_noconv)
  387. __result = codecvt_base::noconv;
  388. else
  389. {
  390. _Elem* __outend = _M_get_area + _S_buffer_length;
  391. __result = _M_cvt->in(_M_state,
  392. __bnext, __bnext + __nbytes, __bnext,
  393. __outbuf, __outend, __outnext);
  394. }
  395. if (__result == codecvt_base::noconv)
  396. {
  397. // cast is safe because noconv means _Elem is same type as char
  398. auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
  399. _Tr::copy(__outbuf, __get_buf, __nbytes);
  400. _M_unconv = 0;
  401. return true;
  402. }
  403. if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
  404. char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
  405. this->setg(__outbuf, __outbuf, __outnext);
  406. return __result != codecvt_base::error;
  407. }
  408. // unused
  409. bool
  410. _M_put(...)
  411. { return false; }
  412. bool
  413. _M_put(const char* __p, streamsize __n)
  414. {
  415. if (_M_buf->sputn(__p, __n) < __n)
  416. return false;
  417. return true;
  418. }
  419. // convert the put area and write to the byte stream buffer
  420. bool
  421. _M_conv_put()
  422. {
  423. _Elem* const __first = this->pbase();
  424. const _Elem* const __last = this->pptr();
  425. const streamsize __pending = __last - __first;
  426. if (_M_always_noconv)
  427. return _M_put(__first, __pending);
  428. char __outbuf[2 * _S_buffer_length];
  429. const _Elem* __next = __first;
  430. const _Elem* __start;
  431. do
  432. {
  433. __start = __next;
  434. char* __outnext = __outbuf;
  435. char* const __outlast = __outbuf + sizeof(__outbuf);
  436. auto __result = _M_cvt->out(_M_state, __next, __last, __next,
  437. __outnext, __outlast, __outnext);
  438. if (__result == codecvt_base::error)
  439. return false;
  440. else if (__result == codecvt_base::noconv)
  441. return _M_put(__next, __pending);
  442. if (!_M_put(__outbuf, __outnext - __outbuf))
  443. return false;
  444. }
  445. while (__next != __last && __next != __start);
  446. if (__next != __last)
  447. _Tr::move(__first, __next, __last - __next);
  448. this->pbump(__first - __next);
  449. return __next != __first;
  450. }
  451. streambuf* _M_buf;
  452. unique_ptr<_Codecvt> _M_cvt;
  453. state_type _M_state;
  454. static const streamsize _S_buffer_length = 32;
  455. static const streamsize _S_putback_length = 3;
  456. _Elem _M_put_area[_S_buffer_length];
  457. _Elem _M_get_area[_S_buffer_length];
  458. streamsize _M_unconv = 0;
  459. char _M_get_buf[_S_buffer_length-_S_putback_length];
  460. bool _M_always_noconv;
  461. };
  462. #endif // _GLIBCXX_USE_WCHAR_T
  463. /// @} group locales
  464. _GLIBCXX_END_NAMESPACE_VERSION
  465. } // namespace
  466. #endif // __cplusplus
  467. #endif /* _LOCALE_CONV_H */