2 * Copyright (c) 1993, 1994, 1995, 1996, 1997, 1998
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the Computer Systems
16 * Engineering Group at Lawrence Berkeley Laboratory.
17 * 4. Neither the name of the University nor of the Laboratory may be used
18 * to endorse or promote products derived from this software without
19 * specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * Utilities for message formatting used both by libpcap and rpcapd.
52 #include "portability.h"
65 * True if we shouold use UTF-8.
70 pcap_fmt_set_encoding(unsigned int opts
)
72 if (opts
== PCAP_CHAR_ENC_UTF_8
)
77 pcap_fmt_set_encoding(unsigned int opts _U_
)
87 * Convert a null-terminated UTF-16LE string to UTF-8, putting it into
88 * a buffer starting at the specified location and stopping if we go
89 * past the specified size. This will only put out complete UTF-8
92 * We do this ourselves because Microsoft doesn't offer a "convert and
93 * stop at a UTF-8 character boundary if we run out of space" routine.
95 #define IS_LEADING_SURROGATE(c) \
96 ((c) >= 0xd800 && (c) < 0xdc00)
97 #define IS_TRAILING_SURROGATE(c) \
98 ((c) >= 0xdc00 && (c) < 0xe000)
99 #define SURROGATE_VALUE(leading, trailing) \
100 (((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000)
101 #define REPLACEMENT_CHARACTER 0x0FFFD
104 utf_16le_to_utf_8_truncated(const wchar_t *utf_16
, char *utf_8
,
110 if (utf_8_len
== 0) {
112 * Not even enough room for a trailing '\0'.
113 * Don't put anything into the buffer.
118 while ((c
= *utf_16
++) != '\0') {
119 if (IS_LEADING_SURROGATE(c
)) {
121 * Leading surrogate. Must be followed by
122 * a trailing surrogate.
127 * Oops, string ends with a lead
128 * surrogate. Try to drop in
129 * a REPLACEMENT CHARACTER, and
130 * don't move the string pointer,
131 * so on the next trip through
132 * the loop we grab the terminating
135 uc
= REPLACEMENT_CHARACTER
;
138 * OK, we can consume this 2-octet
142 if (IS_TRAILING_SURROGATE(c2
)) {
144 * Trailing surrogate.
145 * This calculation will,
146 * for c being a leading
147 * surrogate and c2 being
148 * a trailing surrogate,
149 * produce a value between
150 * 0x100000 and 0x10ffff,
151 * so it's always going to be
152 * a valid Unicode code point.
154 uc
= SURROGATE_VALUE(c
, c2
);
157 * Not a trailing surroage;
159 * REPLACEMENT CHARACTER.
161 uc
= REPLACEMENT_CHARACTER
;
166 * Not a leading surrogate.
168 if (IS_TRAILING_SURROGATE(c
)) {
170 * Trailing surrogate without
171 * a preceding leading surrogate.
172 * Try to drop in a REPLACEMENT
175 uc
= REPLACEMENT_CHARACTER
;
178 * This is a valid BMP character;
186 * OK, uc is a valid Unicode character; how
187 * many bytes worth of UTF-8 does it require?
193 * Not enough room for that byte
194 * plus a trailing '\0'.
200 } else if (uc
< 0x0800) {
204 * Not enough room for those bytes
205 * plus a trailing '\0'.
209 *utf_8
++ = ((uc
>> 6) & 0x3F) | 0xC0;
210 *utf_8
++ = ((uc
>> 0) & 0x3F) | 0x80;
212 } else if (uc
< 0x010000) {
216 * Not enough room for those bytes
217 * plus a trailing '\0'.
221 *utf_8
++ = ((uc
>> 12) & 0x0F) | 0xE0;
222 *utf_8
++ = ((uc
>> 6) & 0x3F) | 0x80;
223 *utf_8
++ = ((uc
>> 0) & 0x3F) | 0x80;
229 * Not enough room for those bytes
230 * plus a trailing '\0'.
234 *utf_8
++ = ((uc
>> 18) & 0x03) | 0xF0;
235 *utf_8
++ = ((uc
>> 12) & 0x3F) | 0x80;
236 *utf_8
++ = ((uc
>> 6) & 0x3F) | 0x80;
237 *utf_8
++ = ((uc
>> 0) & 0x3F) | 0x80;
243 * OK, we have enough room for (at least) a trailing '\0'.
244 * (We started out with enough room, thanks to the test
245 * for a zero-length buffer at the beginning, and if
246 * there wasn't enough room for any character we wanted
247 * to put into the buffer *plus* a trailing '\0',
248 * we'd have quit before putting it into the buffer,
249 * and thus would have left enough room for the trailing
257 * Return a pointer to the terminating '\0', in case we
258 * want to drop something in after that.
265 * Generate an error message based on a format, arguments, and an
266 * errno, with a message for the errno after the formatted output.
269 pcap_fmt_errmsg_for_errno(char *errbuf
, size_t errbuflen
, int errnum
,
270 const char *fmt
, ...)
275 pcap_vfmt_errmsg_for_errno(errbuf
, errbuflen
, errnum
, fmt
, ap
);
280 pcap_vfmt_errmsg_for_errno(char *errbuf
, size_t errbuflen
, int errnum
,
281 const char *fmt
, va_list ap
)
285 size_t errbuflen_remaining
;
287 (void)vsnprintf(errbuf
, errbuflen
, fmt
, ap
);
288 msglen
= strlen(errbuf
);
291 * Do we have enough space to append ": "?
292 * Including the terminating '\0', that's 3 bytes.
294 if (msglen
+ 3 > errbuflen
) {
295 /* No - just give them what we've produced. */
299 errbuflen_remaining
= errbuflen
- msglen
;
303 errbuflen_remaining
-= 2;
306 * Now append the string for the error code.
308 #if defined(HAVE__WCSERROR_S)
310 * We have a Windows-style _wcserror_s().
311 * Generate a UTF-16LE error message.
313 wchar_t utf_16_errbuf
[PCAP_ERRBUF_SIZE
];
314 errno_t err
= _wcserror_s(utf_16_errbuf
, PCAP_ERRBUF_SIZE
, errnum
);
317 * It doesn't appear to be documented anywhere obvious
318 * what the error returns from _wcserror_s().
320 snprintf(p
, errbuflen_remaining
, "Error %d", errnum
);
325 * Now convert it from UTF-16LE to UTF-8, dropping it in the
326 * remaining space in the buffer, and truncating it - cleanly,
327 * on a UTF-8 character boundary - if it doesn't fit.
329 utf_16le_to_utf_8_truncated(utf_16_errbuf
, p
, errbuflen_remaining
);
332 * Now, if we're not in UTF-8 mode, convert errbuf to the
336 utf_8_to_acp_truncated(errbuf
);
337 #elif defined(HAVE_GNU_STRERROR_R)
339 * We have a GNU-style strerror_r(), which is *not* guaranteed to
340 * do anything to the buffer handed to it, and which returns a
341 * pointer to the error string, which may or may not be in
344 * It is, however, guaranteed to succeed.
346 char strerror_buf
[PCAP_ERRBUF_SIZE
];
347 char *errstring
= strerror_r(errnum
, strerror_buf
, PCAP_ERRBUF_SIZE
);
348 snprintf(p
, errbuflen_remaining
, "%s", errstring
);
349 #elif defined(HAVE_POSIX_STRERROR_R)
351 * We have a POSIX-style strerror_r(), which is guaranteed to fill
352 * in the buffer, but is not guaranteed to succeed.
354 int err
= strerror_r(errnum
, p
, errbuflen_remaining
);
357 * UNIX 03 says this isn't guaranteed to produce a
358 * fallback error message.
360 snprintf(p
, errbuflen_remaining
, "Unknown error: %d",
362 } else if (err
== ERANGE
) {
364 * UNIX 03 says this isn't guaranteed to produce a
365 * fallback error message.
367 snprintf(p
, errbuflen_remaining
,
368 "Message for error %d is too long", errnum
);
372 * We have neither _wcserror_s() nor strerror_r(), so we're
373 * stuck with using pcap_strerror().
375 snprintf(p
, errbuflen_remaining
, "%s", pcap_strerror(errnum
));
381 * Generate an error message based on a format, arguments, and a
382 * Win32 error, with a message for the Win32 error after the formatted output.
385 pcap_fmt_errmsg_for_win32_err(char *errbuf
, size_t errbuflen
, DWORD errnum
,
386 const char *fmt
, ...)
391 pcap_vfmt_errmsg_for_win32_err(errbuf
, errbuflen
, errnum
, fmt
, ap
);
396 pcap_vfmt_errmsg_for_win32_err(char *errbuf
, size_t errbuflen
, DWORD errnum
,
397 const char *fmt
, va_list ap
)
401 size_t errbuflen_remaining
;
403 wchar_t utf_16_errbuf
[PCAP_ERRBUF_SIZE
];
406 vsnprintf(errbuf
, errbuflen
, fmt
, ap
);
407 msglen
= strlen(errbuf
);
410 * Do we have enough space to append ": "?
411 * Including the terminating '\0', that's 3 bytes.
413 if (msglen
+ 3 > errbuflen
) {
414 /* No - just give them what we've produced. */
418 errbuflen_remaining
= errbuflen
- msglen
;
423 errbuflen_remaining
-= 2;
426 * Now append the string for the error code.
428 * XXX - what language ID to use?
430 * For UN*Xes, pcap_strerror() may or may not return localized
433 * We currently don't have localized messages for libpcap, but
434 * we might want to do so. On the other hand, if most of these
435 * messages are going to be read by libpcap developers and
436 * perhaps by developers of libpcap-based applications, English
437 * might be a better choice, so the developer doesn't have to
438 * get the message translated if it's in a language they don't
439 * happen to understand.
441 retval
= FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM
|FORMAT_MESSAGE_IGNORE_INSERTS
|FORMAT_MESSAGE_MAX_WIDTH_MASK
,
442 NULL
, errnum
, MAKELANGID(LANG_NEUTRAL
, SUBLANG_DEFAULT
),
443 utf_16_errbuf
, PCAP_ERRBUF_SIZE
, NULL
);
448 snprintf(p
, errbuflen_remaining
,
449 "Couldn't get error message for error (%lu)", errnum
);
454 * Now convert it from UTF-16LE to UTF-8.
456 p
= utf_16le_to_utf_8_truncated(utf_16_errbuf
, p
, errbuflen_remaining
);
459 * Now append the error number, if it fits.
461 utf_8_len
= p
- errbuf
;
462 errbuflen_remaining
-= utf_8_len
;
463 if (utf_8_len
== 0) {
464 /* The message was empty. */
465 snprintf(p
, errbuflen_remaining
, "(%lu)", errnum
);
467 snprintf(p
, errbuflen_remaining
, " (%lu)", errnum
);
470 * Now, if we're not in UTF-8 mode, convert errbuf to the
474 utf_8_to_acp_truncated(errbuf
);