The Tcpdump Group git mirrors - libpcap/blob - fmtutils.c

   1 /*
   2  * Copyright (c) 1993, 1994, 1995, 1996, 1997, 1998
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. All advertising materials mentioning features or use of this software
  14  *    must display the following acknowledgement:
  15  *      This product includes software developed by the Computer Systems
  16  *      Engineering Group at Lawrence Berkeley Laboratory.
  17  * 4. Neither the name of the University nor of the Laboratory may be used
  18  *    to endorse or promote products derived from this software without
  19  *    specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31  * SUCH DAMAGE.
  32  */
  33
  34 /*
  35  * Utilities for message formatting used both by libpcap and rpcapd.
  36  */
  37
  38 #include <config.h>
  39
  40 #include "ftmacros.h"
  41
  42 #include <stddef.h>
  43 #include <stdarg.h>
  44 #include <stdio.h>
  45 #include <string.h>
  46 #include <errno.h>
  47
  48 #include "pcap-int.h"
  49
  50 #include "portability.h"
  51
  52 #include "fmtutils.h"
  53
  54 #ifdef _WIN32
  55 #include "charconv.h"
  56 #endif
  57
  58 /*
  59  * Set the encoding.
  60  */
  61 #ifdef _WIN32
  62 /*
  63  * True if we should use UTF-8.
  64  */
  65 static int use_utf_8;
  66
  67 void
  68 pcapint_fmt_set_encoding(unsigned int opts)
  69 {
  70         if (opts == PCAP_CHAR_ENC_UTF_8)
  71                 use_utf_8 = 1;
  72 }
  73 #else
  74 void
  75 pcapint_fmt_set_encoding(unsigned int opts _U_)
  76 {
  77         /*
  78          * Nothing to do here.
  79          */
  80 }
  81 #endif
  82
  83 #ifdef _WIN32
  84 /*
  85  * Convert a null-terminated UTF-16LE string to UTF-8, putting it into
  86  * a buffer starting at the specified location and stopping if we go
  87  * past the specified size.  This will only put out complete UTF-8
  88  * sequences.
  89  *
  90  * We do this ourselves because Microsoft doesn't offer a "convert and
  91  * stop at a UTF-8 character boundary if we run out of space" routine.
  92  */
  93 #define IS_LEADING_SURROGATE(c) \
  94         ((c) >= 0xd800 && (c) < 0xdc00)
  95 #define IS_TRAILING_SURROGATE(c) \
  96         ((c) >= 0xdc00 && (c) < 0xe000)
  97 #define SURROGATE_VALUE(leading, trailing) \
  98         (((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000)
  99 #define REPLACEMENT_CHARACTER   0x0FFFD
 100
 101 static char *
 102 utf_16le_to_utf_8_truncated(const wchar_t *utf_16, char *utf_8,
 103     size_t utf_8_len)
 104 {
 105         wchar_t c, c2;
 106         uint32_t uc;
 107
 108         if (utf_8_len == 0) {
 109                 /*
 110                  * Not even enough room for a trailing '\0'.
 111                  * Don't put anything into the buffer.
 112                  */
 113                 return (utf_8);
 114         }
 115
 116         while ((c = *utf_16++) != '\0') {
 117                 if (IS_LEADING_SURROGATE(c)) {
 118                         /*
 119                          * Leading surrogate.  Must be followed by
 120                          * a trailing surrogate.
 121                          */
 122                         c2 = *utf_16;
 123                         if (c2 == '\0') {
 124                                 /*
 125                                  * Oops, string ends with a lead
 126                                  * surrogate.  Try to drop in
 127                                  * a REPLACEMENT CHARACTER, and
 128                                  * don't move the string pointer,
 129                                  * so on the next trip through
 130                                  * the loop we grab the terminating
 131                                  * '\0' and quit.
 132                                  */
 133                                 uc = REPLACEMENT_CHARACTER;
 134                         } else {
 135                                 /*
 136                                  * OK, we can consume this 2-octet
 137                                  * value.
 138                                  */
 139                                 utf_16++;
 140                                 if (IS_TRAILING_SURROGATE(c2)) {
 141                                         /*
 142                                          * Trailing surrogate.
 143                                          * This calculation will,
 144                                          * for c being a leading
 145                                          * surrogate and c2 being
 146                                          * a trailing surrogate,
 147                                          * produce a value between
 148                                          * 0x100000 and 0x10ffff,
 149                                          * so it's always going to be
 150                                          * a valid Unicode code point.
 151                                          */
 152                                         uc = SURROGATE_VALUE(c, c2);
 153                                 } else {
 154                                         /*
 155                                          * Not a trailing surrogate;
 156                                          * try to drop in a
 157                                          * REPLACEMENT CHARACTER.
 158                                          */
 159                                         uc = REPLACEMENT_CHARACTER;
 160                                 }
 161                         }
 162                 } else {
 163                         /*
 164                          * Not a leading surrogate.
 165                          */
 166                         if (IS_TRAILING_SURROGATE(c)) {
 167                                 /*
 168                                  * Trailing surrogate without
 169                                  * a preceding leading surrogate.
 170                                  * Try to drop in a REPLACEMENT
 171                                  * CHARACTER.
 172                                  */
 173                                 uc = REPLACEMENT_CHARACTER;
 174                         } else {
 175                                 /*
 176                                  * This is a valid BMP character;
 177                                  * drop it in.
 178                                  */
 179                                 uc = c;
 180                         }
 181                 }
 182
 183                 /*
 184                  * OK, uc is a valid Unicode character; how
 185                  * many bytes worth of UTF-8 does it require?
 186                  */
 187                 if (uc < 0x0080) {
 188                         /* 1 byte. */
 189                         if (utf_8_len < 2) {
 190                                 /*
 191                                  * Not enough room for that byte
 192                                  * plus a trailing '\0'.
 193                                  */
 194                                 break;
 195                         }
 196                         *utf_8++ = (char)uc;
 197                         utf_8_len--;
 198                 } else if (uc < 0x0800) {
 199                         /* 2 bytes. */
 200                         if (utf_8_len < 3) {
 201                                 /*
 202                                  * Not enough room for those bytes
 203                                  * plus a trailing '\0'.
 204                                  */
 205                                 break;
 206                         }
 207                         *utf_8++ = ((uc >> 6) & 0x3F) | 0xC0;
 208                         *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
 209                         utf_8_len -= 2;
 210                 } else if (uc < 0x010000) {
 211                         /* 3 bytes. */
 212                         if (utf_8_len < 4) {
 213                                 /*
 214                                  * Not enough room for those bytes
 215                                  * plus a trailing '\0'.
 216                                  */
 217                                 break;
 218                         }
 219                         *utf_8++ = ((uc >> 12) & 0x0F) | 0xE0;
 220                         *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
 221                         *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
 222                         utf_8_len -= 3;
 223                 } else {
 224                         /* 4 bytes. */
 225                         if (utf_8_len < 5) {
 226                                 /*
 227                                  * Not enough room for those bytes
 228                                  * plus a trailing '\0'.
 229                                  */
 230                                 break;
 231                         }
 232                         *utf_8++ = ((uc >> 18) & 0x03) | 0xF0;
 233                         *utf_8++ = ((uc >> 12) & 0x3F) | 0x80;
 234                         *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
 235                         *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
 236                         utf_8_len -= 3;
 237                 }
 238         }
 239
 240         /*
 241          * OK, we have enough room for (at least) a trailing '\0'.
 242          * (We started out with enough room, thanks to the test
 243          * for a zero-length buffer at the beginning, and if
 244          * there wasn't enough room for any character we wanted
 245          * to put into the buffer *plus* a trailing '\0',
 246          * we'd have quit before putting it into the buffer,
 247          * and thus would have left enough room for the trailing
 248          * '\0'.)
 249          *
 250          * Drop it in.
 251          */
 252         *utf_8 = '\0';
 253
 254         /*
 255          * Return a pointer to the terminating '\0', in case we
 256          * want to drop something in after that.
 257          */
 258         return (utf_8);
 259 }
 260 #endif /* _WIN32 */
 261
 262 /*
 263  * Generate an error message based on a format, arguments, and an
 264  * errno, with a message for the errno after the formatted output.
 265  */
 266 void
 267 pcapint_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
 268     const char *fmt, ...)
 269 {
 270         va_list ap;
 271
 272         va_start(ap, fmt);
 273         pcapint_vfmt_errmsg_for_errno(errbuf, errbuflen, errnum, fmt, ap);
 274         va_end(ap);
 275 }
 276
 277 void
 278 pcapint_vfmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
 279     const char *fmt, va_list ap)
 280 {
 281         size_t msglen;
 282         char *p;
 283         size_t errbuflen_remaining;
 284
 285         (void)vsnprintf(errbuf, errbuflen, fmt, ap);
 286         msglen = strlen(errbuf);
 287
 288         /*
 289          * Do we have enough space to append ": "?
 290          * Including the terminating '\0', that's 3 bytes.
 291          */
 292         if (msglen + 3 > errbuflen) {
 293                 /* No - just give them what we've produced. */
 294                 return;
 295         }
 296         p = errbuf + msglen;
 297         errbuflen_remaining = errbuflen - msglen;
 298         *p++ = ':';
 299         *p++ = ' ';
 300         *p = '\0';
 301         errbuflen_remaining -= 2;
 302
 303         /*
 304          * Now append the string for the error code.
 305          */
 306 #if defined(HAVE__WCSERROR_S)
 307         /*
 308          * We have a Windows-style _wcserror_s().
 309          * Generate a UTF-16LE error message.
 310          */
 311         wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
 312         errno_t err = _wcserror_s(utf_16_errbuf, PCAP_ERRBUF_SIZE, errnum);
 313         if (err != 0) {
 314                 /*
 315                  * It doesn't appear to be documented anywhere obvious
 316                  * what the error returns from _wcserror_s().
 317                  */
 318                 snprintf(p, errbuflen_remaining, "Error %d", errnum);
 319                 return;
 320         }
 321
 322         /*
 323          * Now convert it from UTF-16LE to UTF-8, dropping it in the
 324          * remaining space in the buffer, and truncating it - cleanly,
 325          * on a UTF-8 character boundary - if it doesn't fit.
 326          */
 327         utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
 328
 329         /*
 330          * Now, if we're not in UTF-8 mode, convert errbuf to the
 331          * local code page.
 332          */
 333         if (!use_utf_8)
 334                 utf_8_to_acp_truncated(errbuf);
 335 #else
 336         /*
 337          * Either Windows without _wcserror_s() or not Windows.  Let pcap_strerror()
 338          * solve the non-UTF-16 part of this problem space.
 339          */
 340         snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum));
 341 #endif
 342 }
 343
 344 #ifdef _WIN32
 345 /*
 346  * Generate an error message based on a format, arguments, and a
 347  * Win32 error, with a message for the Win32 error after the formatted output.
 348  */
 349 void
 350 pcapint_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
 351     const char *fmt, ...)
 352 {
 353         va_list ap;
 354
 355         va_start(ap, fmt);
 356         pcapint_vfmt_errmsg_for_win32_err(errbuf, errbuflen, errnum, fmt, ap);
 357         va_end(ap);
 358 }
 359
 360 void
 361 pcapint_vfmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
 362     const char *fmt, va_list ap)
 363 {
 364         size_t msglen;
 365         char *p;
 366         size_t errbuflen_remaining;
 367         DWORD retval;
 368         wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
 369         size_t utf_8_len;
 370
 371         vsnprintf(errbuf, errbuflen, fmt, ap);
 372         msglen = strlen(errbuf);
 373
 374         /*
 375          * Do we have enough space to append ": "?
 376          * Including the terminating '\0', that's 3 bytes.
 377          */
 378         if (msglen + 3 > errbuflen) {
 379                 /* No - just give them what we've produced. */
 380                 return;
 381         }
 382         p = errbuf + msglen;
 383         errbuflen_remaining = errbuflen - msglen;
 384         *p++ = ':';
 385         *p++ = ' ';
 386         *p = '\0';
 387         msglen += 2;
 388         errbuflen_remaining -= 2;
 389
 390         /*
 391          * Now append the string for the error code.
 392          *
 393          * XXX - what language ID to use?
 394          *
 395          * For UN*Xes, pcap_strerror() may or may not return localized
 396          * strings.
 397          *
 398          * We currently don't have localized messages for libpcap, but
 399          * we might want to do so.  On the other hand, if most of these
 400          * messages are going to be read by libpcap developers and
 401          * perhaps by developers of libpcap-based applications, English
 402          * might be a better choice, so the developer doesn't have to
 403          * get the message translated if it's in a language they don't
 404          * happen to understand.
 405          */
 406         retval = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,
 407             NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
 408             utf_16_errbuf, PCAP_ERRBUF_SIZE, NULL);
 409         if (retval == 0) {
 410                 /*
 411                  * Failed.
 412                  */
 413                 snprintf(p, errbuflen_remaining,
 414                     "Couldn't get error message for error (%lu)", errnum);
 415                 return;
 416         }
 417
 418         /*
 419          * Now convert it from UTF-16LE to UTF-8.
 420          */
 421         p = utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
 422
 423         /*
 424          * Now append the error number, if it fits.
 425          */
 426         utf_8_len = p - errbuf;
 427         errbuflen_remaining -= utf_8_len;
 428         if (utf_8_len == 0) {
 429                 /* The message was empty. */
 430                 snprintf(p, errbuflen_remaining, "(%lu)", errnum);
 431         } else
 432                 snprintf(p, errbuflen_remaining, " (%lu)", errnum);
 433
 434         /*
 435          * Now, if we're not in UTF-8 mode, convert errbuf to the
 436          * local code page.
 437          */
 438         if (!use_utf_8)
 439                 utf_8_to_acp_truncated(errbuf);
 440 }
 441 #endif