The Tcpdump Group git mirrors - libpcap/blob - fmtutils.c

   1 /*
   2  * Copyright (c) 1993, 1994, 1995, 1996, 1997, 1998
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. All advertising materials mentioning features or use of this software
  14  *    must display the following acknowledgement:
  15  *      This product includes software developed by the Computer Systems
  16  *      Engineering Group at Lawrence Berkeley Laboratory.
  17  * 4. Neither the name of the University nor of the Laboratory may be used
  18  *    to endorse or promote products derived from this software without
  19  *    specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31  * SUCH DAMAGE.
  32  */
  33
  34 /*
  35  * Utilities for message formatting used both by libpcap and rpcapd.
  36  */
  37
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41
  42 #include "ftmacros.h"
  43
  44 #include <stddef.h>
  45 #include <stdarg.h>
  46 #include <stdio.h>
  47 #include <string.h>
  48 #include <errno.h>
  49
  50 #include "pcap-int.h"
  51
  52 #include "portability.h"
  53
  54 #include "fmtutils.h"
  55
  56 #ifdef _WIN32
  57 #include "charconv.h"
  58 #endif
  59
  60 /*
  61  * Set the encoding.
  62  */
  63 #ifdef _WIN32
  64 /*
  65  * True if we shouold use UTF-8.
  66  */
  67 static int use_utf_8;
  68
  69 void
  70 pcap_fmt_set_encoding(unsigned int opts)
  71 {
  72         if (opts == PCAP_CHAR_ENC_UTF_8)
  73                 use_utf_8 = 1;
  74 }
  75 #else
  76 void
  77 pcap_fmt_set_encoding(unsigned int opts _U_)
  78 {
  79         /*
  80          * Nothing to do here.
  81          */
  82 }
  83 #endif
  84
  85 #ifdef _WIN32
  86 /*
  87  * Convert a null-terminated UTF-16LE string to UTF-8, putting it into
  88  * a buffer starting at the specified location and stopping if we go
  89  * past the specified size.  This will only put out complete UTF-8
  90  * sequences.
  91  *
  92  * We do this ourselves because Microsoft doesn't offer a "convert and
  93  * stop at a UTF-8 character boundary if we run out of space" routine.
  94  */
  95 #define IS_LEADING_SURROGATE(c) \
  96         ((c) >= 0xd800 && (c) < 0xdc00)
  97 #define IS_TRAILING_SURROGATE(c) \
  98         ((c) >= 0xdc00 && (c) < 0xe000)
  99 #define SURROGATE_VALUE(leading, trailing) \
 100         (((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000)
 101 #define REPLACEMENT_CHARACTER   0x0FFFD
 102
 103 static char *
 104 utf_16le_to_utf_8_truncated(const wchar_t *utf_16, char *utf_8,
 105     size_t utf_8_len)
 106 {
 107         wchar_t c, c2;
 108         uint32_t uc;
 109
 110         if (utf_8_len == 0) {
 111                 /*
 112                  * Not even enough room for a trailing '\0'.
 113                  * Don't put anything into the buffer.
 114                  */
 115                 return (utf_8);
 116         }
 117
 118         while ((c = *utf_16++) != '\0') {
 119                 if (IS_LEADING_SURROGATE(c)) {
 120                         /*
 121                          * Leading surrogate.  Must be followed by
 122                          * a trailing surrogate.
 123                          */
 124                         c2 = *utf_16;
 125                         if (c2 == '\0') {
 126                                 /*
 127                                  * Oops, string ends with a lead
 128                                  * surrogate.  Try to drop in
 129                                  * a REPLACEMENT CHARACTER, and
 130                                  * don't move the string pointer,
 131                                  * so on the next trip through
 132                                  * the loop we grab the terminating
 133                                  * '\0' and quit.
 134                                  */
 135                                 uc = REPLACEMENT_CHARACTER;
 136                         } else {
 137                                 /*
 138                                  * OK, we can consume this 2-octet
 139                                  * value.
 140                                  */
 141                                 utf_16++;
 142                                 if (IS_TRAILING_SURROGATE(c2)) {
 143                                         /*
 144                                          * Trailing surrogate.
 145                                          * This calculation will,
 146                                          * for c being a leading
 147                                          * surrogate and c2 being
 148                                          * a trailing surrogate,
 149                                          * produce a value between
 150                                          * 0x100000 and 0x10ffff,
 151                                          * so it's always going to be
 152                                          * a valid Unicode code point.
 153                                          */
 154                                         uc = SURROGATE_VALUE(c, c2);
 155                                 } else {
 156                                         /*
 157                                          * Not a trailing surroage;
 158                                          * try to drop in a
 159                                          * REPLACEMENT CHARACTER.
 160                                          */
 161                                         uc = REPLACEMENT_CHARACTER;
 162                                 }
 163                         }
 164                 } else {
 165                         /*
 166                          * Not a leading surrogate.
 167                          */
 168                         if (IS_TRAILING_SURROGATE(c)) {
 169                                 /*
 170                                  * Trailing surrogate without
 171                                  * a preceding leading surrogate.
 172                                  * Try to drop in a REPLACEMENT
 173                                  * CHARACTER.
 174                                  */
 175                                 uc = REPLACEMENT_CHARACTER;
 176                         } else {
 177                                 /*
 178                                  * This is a valid BMP character;
 179                                  * drop it in.
 180                                  */
 181                                 uc = c;
 182                         }
 183                 }
 184
 185                 /*
 186                  * OK, uc is a valid Unicode character; how
 187                  * many bytes worth of UTF-8 does it require?
 188                  */
 189                 if (uc < 0x0080) {
 190                         /* 1 byte. */
 191                         if (utf_8_len < 2) {
 192                                 /*
 193                                  * Not enough room for that byte
 194                                  * plus a trailing '\0'.
 195                                  */
 196                                 break;
 197                         }
 198                         *utf_8++ = (char)uc;
 199                         utf_8_len--;
 200                 } else if (uc < 0x0800) {
 201                         /* 2 bytes. */
 202                         if (utf_8_len < 3) {
 203                                 /*
 204                                  * Not enough room for those bytes
 205                                  * plus a trailing '\0'.
 206                                  */
 207                                 break;
 208                         }
 209                         *utf_8++ = ((uc >> 6) & 0x3F) | 0xC0;
 210                         *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
 211                         utf_8_len -= 2;
 212                 } else if (uc < 0x010000) {
 213                         /* 3 bytes. */
 214                         if (utf_8_len < 4) {
 215                                 /*
 216                                  * Not enough room for those bytes
 217                                  * plus a trailing '\0'.
 218                                  */
 219                                 break;
 220                         }
 221                         *utf_8++ = ((uc >> 12) & 0x0F) | 0xE0;
 222                         *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
 223                         *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
 224                         utf_8_len -= 3;
 225                 } else {
 226                         /* 4 bytes. */
 227                         if (utf_8_len < 5) {
 228                                 /*
 229                                  * Not enough room for those bytes
 230                                  * plus a trailing '\0'.
 231                                  */
 232                                 break;
 233                         }
 234                         *utf_8++ = ((uc >> 18) & 0x03) | 0xF0;
 235                         *utf_8++ = ((uc >> 12) & 0x3F) | 0x80;
 236                         *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
 237                         *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
 238                         utf_8_len -= 3;
 239                 }
 240         }
 241
 242         /*
 243          * OK, we have enough room for (at least) a trailing '\0'.
 244          * (We started out with enough room, thanks to the test
 245          * for a zero-length buffer at the beginning, and if
 246          * there wasn't enough room for any character we wanted
 247          * to put into the buffer *plus* a trailing '\0',
 248          * we'd have quit before putting it into the buffer,
 249          * and thus would have left enough room for the trailing
 250          * '\0'.)
 251          *
 252          * Drop it in.
 253          */
 254         *utf_8 = '\0';
 255
 256         /*
 257          * Return a pointer to the terminating '\0', in case we
 258          * want to drop something in after that.
 259          */
 260         return (utf_8);
 261 }
 262 #endif /* _WIN32 */
 263
 264 /*
 265  * Generate an error message based on a format, arguments, and an
 266  * errno, with a message for the errno after the formatted output.
 267  */
 268 void
 269 pcap_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
 270     const char *fmt, ...)
 271 {
 272         va_list ap;
 273
 274         va_start(ap, fmt);
 275         pcap_vfmt_errmsg_for_errno(errbuf, errbuflen, errnum, fmt, ap);
 276         va_end(ap);
 277 }
 278
 279 void
 280 pcap_vfmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
 281     const char *fmt, va_list ap)
 282 {
 283         size_t msglen;
 284         char *p;
 285         size_t errbuflen_remaining;
 286
 287         (void)vsnprintf(errbuf, errbuflen, fmt, ap);
 288         msglen = strlen(errbuf);
 289
 290         /*
 291          * Do we have enough space to append ": "?
 292          * Including the terminating '\0', that's 3 bytes.
 293          */
 294         if (msglen + 3 > errbuflen) {
 295                 /* No - just give them what we've produced. */
 296                 return;
 297         }
 298         p = errbuf + msglen;
 299         errbuflen_remaining = errbuflen - msglen;
 300         *p++ = ':';
 301         *p++ = ' ';
 302         *p = '\0';
 303         errbuflen_remaining -= 2;
 304
 305         /*
 306          * Now append the string for the error code.
 307          */
 308 #if defined(HAVE__WCSERROR_S)
 309         /*
 310          * We have a Windows-style _wcserror_s().
 311          * Generate a UTF-16LE error message.
 312          */
 313         wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
 314         errno_t err = _wcserror_s(utf_16_errbuf, PCAP_ERRBUF_SIZE, errnum);
 315         if (err != 0) {
 316                 /*
 317                  * It doesn't appear to be documented anywhere obvious
 318                  * what the error returns from _wcserror_s().
 319                  */
 320                 snprintf(p, errbuflen_remaining, "Error %d", errnum);
 321                 return;
 322         }
 323
 324         /*
 325          * Now convert it from UTF-16LE to UTF-8, dropping it in the
 326          * remaining space in the buffer, and truncating it - cleanly,
 327          * on a UTF-8 character boundary - if it doesn't fit.
 328          */
 329         utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
 330
 331         /*
 332          * Now, if we're not in UTF-8 mode, convert errbuf to the
 333          * local code page.
 334          */
 335         if (!use_utf_8)
 336                 utf_8_to_acp_truncated(errbuf);
 337 #elif defined(HAVE_GNU_STRERROR_R)
 338         /*
 339          * We have a GNU-style strerror_r(), which is *not* guaranteed to
 340          * do anything to the buffer handed to it, and which returns a
 341          * pointer to the error string, which may or may not be in
 342          * the buffer.
 343          *
 344          * It is, however, guaranteed to succeed.
 345          */
 346         char strerror_buf[PCAP_ERRBUF_SIZE];
 347         char *errstring = strerror_r(errnum, strerror_buf, PCAP_ERRBUF_SIZE);
 348         snprintf(p, errbuflen_remaining, "%s", errstring);
 349 #elif defined(HAVE_POSIX_STRERROR_R)
 350         /*
 351          * We have a POSIX-style strerror_r(), which is guaranteed to fill
 352          * in the buffer, but is not guaranteed to succeed.
 353          */
 354         int err = strerror_r(errnum, p, errbuflen_remaining);
 355         if (err == EINVAL) {
 356                 /*
 357                  * UNIX 03 says this isn't guaranteed to produce a
 358                  * fallback error message.
 359                  */
 360                 snprintf(p, errbuflen_remaining, "Unknown error: %d",
 361                     errnum);
 362         } else if (err == ERANGE) {
 363                 /*
 364                  * UNIX 03 says this isn't guaranteed to produce a
 365                  * fallback error message.
 366                  */
 367                 snprintf(p, errbuflen_remaining,
 368                     "Message for error %d is too long", errnum);
 369         }
 370 #else
 371         /*
 372          * We have neither _wcserror_s() nor strerror_r(), so we're
 373          * stuck with using pcap_strerror().
 374          */
 375         snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum));
 376 #endif
 377 }
 378
 379 #ifdef _WIN32
 380 /*
 381  * Generate an error message based on a format, arguments, and a
 382  * Win32 error, with a message for the Win32 error after the formatted output.
 383  */
 384 void
 385 pcap_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
 386     const char *fmt, ...)
 387 {
 388         va_list ap;
 389
 390         va_start(ap, fmt);
 391         pcap_vfmt_errmsg_for_win32_err(errbuf, errbuflen, errnum, fmt, ap);
 392         va_end(ap);
 393 }
 394
 395 void
 396 pcap_vfmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
 397     const char *fmt, va_list ap)
 398 {
 399         size_t msglen;
 400         char *p;
 401         size_t errbuflen_remaining;
 402         DWORD retval;
 403         wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
 404         size_t utf_8_len;
 405
 406         vsnprintf(errbuf, errbuflen, fmt, ap);
 407         msglen = strlen(errbuf);
 408
 409         /*
 410          * Do we have enough space to append ": "?
 411          * Including the terminating '\0', that's 3 bytes.
 412          */
 413         if (msglen + 3 > errbuflen) {
 414                 /* No - just give them what we've produced. */
 415                 return;
 416         }
 417         p = errbuf + msglen;
 418         errbuflen_remaining = errbuflen - msglen;
 419         *p++ = ':';
 420         *p++ = ' ';
 421         *p = '\0';
 422         msglen += 2;
 423         errbuflen_remaining -= 2;
 424
 425         /*
 426          * Now append the string for the error code.
 427          *
 428          * XXX - what language ID to use?
 429          *
 430          * For UN*Xes, pcap_strerror() may or may not return localized
 431          * strings.
 432          *
 433          * We currently don't have localized messages for libpcap, but
 434          * we might want to do so.  On the other hand, if most of these
 435          * messages are going to be read by libpcap developers and
 436          * perhaps by developers of libpcap-based applications, English
 437          * might be a better choice, so the developer doesn't have to
 438          * get the message translated if it's in a language they don't
 439          * happen to understand.
 440          */
 441         retval = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,
 442             NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
 443             utf_16_errbuf, PCAP_ERRBUF_SIZE, NULL);
 444         if (retval == 0) {
 445                 /*
 446                  * Failed.
 447                  */
 448                 snprintf(p, errbuflen_remaining,
 449                     "Couldn't get error message for error (%lu)", errnum);
 450                 return;
 451         }
 452
 453         /*
 454          * Now convert it from UTF-16LE to UTF-8.
 455          */
 456         p = utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
 457
 458         /*
 459          * Now append the error number, if it fits.
 460          */
 461         utf_8_len = p - errbuf;
 462         errbuflen_remaining -= utf_8_len;
 463         if (utf_8_len == 0) {
 464                 /* The message was empty. */
 465                 snprintf(p, errbuflen_remaining, "(%lu)", errnum);
 466         } else
 467                 snprintf(p, errbuflen_remaining, " (%lu)", errnum);
 468
 469         /*
 470          * Now, if we're not in UTF-8 mode, convert errbuf to the
 471          * local code page.
 472          */
 473         if (!use_utf_8)
 474                 utf_8_to_acp_truncated(errbuf);
 475 }
 476 #endif