]> The Tcpdump Group git mirrors - libpcap/blob - charconv.c
Add support for UTF-8 strings on Windows.
[libpcap] / charconv.c
1 /* -*- Mode: c; tab-width: 8; indent-tabs-mode: 1; c-basic-offset: 8; -*- */
2 /*
3 * Copyright (c) 1993, 1994, 1995, 1996, 1997
4 * The Regents of the University of California. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by the Computer Systems
17 * Engineering Group at Lawrence Berkeley Laboratory.
18 * 4. Neither the name of the University nor of the Laboratory may be used
19 * to endorse or promote products derived from this software without
20 * specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #ifdef _WIN32
36 #include <windows.h>
37
38 #include "charconv.h"
39
40 wchar_t *
41 cp_to_utf_16le(UINT codepage, const char *cp_string, DWORD flags)
42 {
43 int utf16le_len;
44 wchar_t *utf16le_string;
45
46 /*
47 * Map from the specified code page to UTF-16LE.
48 * First, find out how big a buffer we'll need.
49 */
50 utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1,
51 NULL, 0);
52 if (utf16le_len == 0) {
53 /*
54 * Error. Fail with EINVAL.
55 */
56 errno = EINVAL;
57 return (NULL);
58 }
59
60 /*
61 * Now attempt to allocate a buffer for that.
62 */
63 utf16le_string = malloc(utf16le_len * sizeof (wchar_t));
64 if (utf16le_string == NULL) {
65 /*
66 * Not enough memory; assume errno has been
67 * set, and fail.
68 */
69 return (NULL);
70 }
71
72 /*
73 * Now convert.
74 */
75 utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1,
76 utf16le_string, utf16le_len);
77 if (utf16le_len == 0) {
78 /*
79 * Error. Fail with EINVAL.
80 * XXX - should this ever happen, given that
81 * we already ran the string through
82 * MultiByteToWideChar() to find out how big
83 * a buffer we needed?
84 */
85 free(utf16le_string);
86 errno = EINVAL;
87 return (NULL);
88 }
89 return (utf16le_string);
90 }
91
92 char *
93 utf_16le_to_cp(UINT codepage, const wchar_t *utf16le_string)
94 {
95 int cp_len;
96 char *cp_string;
97
98 /*
99 * Map from UTF-16LE to the specified code page.
100 * First, find out how big a buffer we'll need.
101 * We convert composite characters to precomposed characters,
102 * as that's what Windows expects.
103 */
104 cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK,
105 utf16le_string, -1, NULL, 0, NULL, NULL);
106 if (cp_len == 0) {
107 /*
108 * Error. Fail with EINVAL.
109 */
110 errno = EINVAL;
111 return (NULL);
112 }
113
114 /*
115 * Now attempt to allocate a buffer for that.
116 */
117 cp_string = malloc(cp_len * sizeof (char));
118 if (cp_string == NULL) {
119 /*
120 * Not enough memory; assume errno has been
121 * set, and fail.
122 */
123 return (NULL);
124 }
125
126 /*
127 * Now convert.
128 */
129 cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK,
130 utf16le_string, -1, cp_string, cp_len, NULL, NULL);
131 if (cp_len == 0) {
132 /*
133 * Error. Fail with EINVAL.
134 * XXX - should this ever happen, given that
135 * we already ran the string through
136 * WideCharToMultiByte() to find out how big
137 * a buffer we needed?
138 */
139 free(cp_string);
140 errno = EINVAL;
141 return (NULL);
142 }
143 return (cp_string);
144 }
145
146 /*
147 * Convert an error message string from UTF-8 to the local code page, as
148 * best we can.
149 *
150 * The buffer is assumed to be PCAP_ERRBUF_SIZE bytes long; we truncate
151 * if it doesn't fit.
152 */
153 void
154 utf_8_to_acp_truncated(char *errbuf)
155 {
156 wchar_t *utf_16_errbuf;
157 int retval;
158 DWORD err;
159
160 /*
161 * Do this by converting to UTF-16LE and then to the local
162 * code page. That means we get to use Microsoft's
163 * conversion routines, rather than having to understand
164 * all the code pages ourselves, *and* that this routine
165 * can convert in place.
166 */
167
168 /*
169 * Map from UTF-8 to UTF-16LE.
170 * First, find out how big a buffer we'll need.
171 * Convert any invalid characters to REPLACEMENT CHARACTER.
172 */
173 utf_16_errbuf = cp_to_utf_16le(CP_UTF8, errbuf, 0);
174 if (utf_16_errbuf == NULL) {
175 /*
176 * Error. Give up.
177 */
178 snprintf(errbuf, PCAP_ERRBUF_SIZE,
179 "Can't convert error string to the local code page");
180 return;
181 }
182
183 /*
184 * Now, convert that to the local code page.
185 * Use the current thread's code page. For unconvertable
186 * characters, let it pick the "best fit" character.
187 *
188 * XXX - we'd like some way to do what utf_16le_to_utf_8_truncated()
189 * does if the buffer isn't big enough, but we don't want to have
190 * to handle all local code pages ourselves; doing so requires
191 * knowledge of all those code pages, including knowledge of how
192 * characters are formed in thoe code pages so that we can avoid
193 * cutting a multi-byte character into pieces.
194 *
195 * Converting to an un-truncated string using Windows APIs, and
196 * then copying to the buffer, still requires knowledge of how
197 * characters are formed in the target code page.
198 */
199 retval = WideCharToMultiByte(CP_THREAD_ACP, 0, utf_16_errbuf, -1,
200 errbuf, PCAP_ERRBUF_SIZE, NULL, NULL);
201 if (retval == 0) {
202 err = GetLastError();
203 free(utf_16_errbuf);
204 if (err == ERROR_INSUFFICIENT_BUFFER)
205 snprintf(errbuf, PCAP_ERRBUF_SIZE,
206 "The error string, in the local code page, didn't fit in the buffer");
207 else
208 snprintf(errbuf, PCAP_ERRBUF_SIZE,
209 "Can't convert error string to the local code page");
210 return;
211 }
212 free(utf_16_errbuf);
213 }
214 #endif