]> The Tcpdump Group git mirrors - libpcap/blobdiff - fmtutils.c
CI: Call print_so_deps() on rpcapd in remote enabled build
[libpcap] / fmtutils.c
index f1a8907327deb5e34083110d9ec0a838e2d0150a..a5a4fe62f34e2c241f9042bb91a540b6e23f5508 100644 (file)
@@ -35,9 +35,7 @@
  * Utilities for message formatting used both by libpcap and rpcapd.
  */
 
-#ifdef HAVE_CONFIG_H
 #include <config.h>
-#endif
 
 #include "ftmacros.h"
 
 #include <string.h>
 #include <errno.h>
 
-#include <pcap/pcap.h>
+#include "pcap-int.h"
 
 #include "portability.h"
 
 #include "fmtutils.h"
 
+#ifdef _WIN32
+#include "charconv.h"
+#endif
+
+/*
+ * Set the encoding.
+ */
+#ifdef _WIN32
+/*
+ * True if we should use UTF-8.
+ */
+static int use_utf_8;
+
+void
+pcapint_fmt_set_encoding(unsigned int opts)
+{
+       if (opts == PCAP_CHAR_ENC_UTF_8)
+               use_utf_8 = 1;
+}
+#else
+void
+pcapint_fmt_set_encoding(unsigned int opts _U_)
+{
+       /*
+        * Nothing to do here.
+        */
+}
+#endif
+
+#ifdef _WIN32
+/*
+ * Convert a null-terminated UTF-16LE string to UTF-8, putting it into
+ * a buffer starting at the specified location and stopping if we go
+ * past the specified size.  This will only put out complete UTF-8
+ * sequences.
+ *
+ * We do this ourselves because Microsoft doesn't offer a "convert and
+ * stop at a UTF-8 character boundary if we run out of space" routine.
+ */
+#define IS_LEADING_SURROGATE(c) \
+       ((c) >= 0xd800 && (c) < 0xdc00)
+#define IS_TRAILING_SURROGATE(c) \
+       ((c) >= 0xdc00 && (c) < 0xe000)
+#define SURROGATE_VALUE(leading, trailing) \
+       (((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000)
+#define REPLACEMENT_CHARACTER  0x0FFFD
+
+static char *
+utf_16le_to_utf_8_truncated(const wchar_t *utf_16, char *utf_8,
+    size_t utf_8_len)
+{
+       wchar_t c, c2;
+       uint32_t uc;
+
+       if (utf_8_len == 0) {
+               /*
+                * Not even enough room for a trailing '\0'.
+                * Don't put anything into the buffer.
+                */
+               return (utf_8);
+       }
+
+       while ((c = *utf_16++) != '\0') {
+               if (IS_LEADING_SURROGATE(c)) {
+                       /*
+                        * Leading surrogate.  Must be followed by
+                        * a trailing surrogate.
+                        */
+                       c2 = *utf_16;
+                       if (c2 == '\0') {
+                               /*
+                                * Oops, string ends with a lead
+                                * surrogate.  Try to drop in
+                                * a REPLACEMENT CHARACTER, and
+                                * don't move the string pointer,
+                                * so on the next trip through
+                                * the loop we grab the terminating
+                                * '\0' and quit.
+                                */
+                               uc = REPLACEMENT_CHARACTER;
+                       } else {
+                               /*
+                                * OK, we can consume this 2-octet
+                                * value.
+                                */
+                               utf_16++;
+                               if (IS_TRAILING_SURROGATE(c2)) {
+                                       /*
+                                        * Trailing surrogate.
+                                        * This calculation will,
+                                        * for c being a leading
+                                        * surrogate and c2 being
+                                        * a trailing surrogate,
+                                        * produce a value between
+                                        * 0x100000 and 0x10ffff,
+                                        * so it's always going to be
+                                        * a valid Unicode code point.
+                                        */
+                                       uc = SURROGATE_VALUE(c, c2);
+                               } else {
+                                       /*
+                                        * Not a trailing surrogate;
+                                        * try to drop in a
+                                        * REPLACEMENT CHARACTER.
+                                        */
+                                       uc = REPLACEMENT_CHARACTER;
+                               }
+                       }
+               } else {
+                       /*
+                        * Not a leading surrogate.
+                        */
+                       if (IS_TRAILING_SURROGATE(c)) {
+                               /*
+                                * Trailing surrogate without
+                                * a preceding leading surrogate.
+                                * Try to drop in a REPLACEMENT
+                                * CHARACTER.
+                                */
+                               uc = REPLACEMENT_CHARACTER;
+                       } else {
+                               /*
+                                * This is a valid BMP character;
+                                * drop it in.
+                                */
+                               uc = c;
+                       }
+               }
+
+               /*
+                * OK, uc is a valid Unicode character; how
+                * many bytes worth of UTF-8 does it require?
+                */
+               if (uc < 0x0080) {
+                       /* 1 byte. */
+                       if (utf_8_len < 2) {
+                               /*
+                                * Not enough room for that byte
+                                * plus a trailing '\0'.
+                                */
+                               break;
+                       }
+                       *utf_8++ = (char)uc;
+                       utf_8_len--;
+               } else if (uc < 0x0800) {
+                       /* 2 bytes. */
+                       if (utf_8_len < 3) {
+                               /*
+                                * Not enough room for those bytes
+                                * plus a trailing '\0'.
+                                */
+                               break;
+                       }
+                       *utf_8++ = ((uc >> 6) & 0x3F) | 0xC0;
+                       *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
+                       utf_8_len -= 2;
+               } else if (uc < 0x010000) {
+                       /* 3 bytes. */
+                       if (utf_8_len < 4) {
+                               /*
+                                * Not enough room for those bytes
+                                * plus a trailing '\0'.
+                                */
+                               break;
+                       }
+                       *utf_8++ = ((uc >> 12) & 0x0F) | 0xE0;
+                       *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
+                       *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
+                       utf_8_len -= 3;
+               } else {
+                       /* 4 bytes. */
+                       if (utf_8_len < 5) {
+                               /*
+                                * Not enough room for those bytes
+                                * plus a trailing '\0'.
+                                */
+                               break;
+                       }
+                       *utf_8++ = ((uc >> 18) & 0x03) | 0xF0;
+                       *utf_8++ = ((uc >> 12) & 0x3F) | 0x80;
+                       *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
+                       *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
+                       utf_8_len -= 3;
+               }
+       }
+
+       /*
+        * OK, we have enough room for (at least) a trailing '\0'.
+        * (We started out with enough room, thanks to the test
+        * for a zero-length buffer at the beginning, and if
+        * there wasn't enough room for any character we wanted
+        * to put into the buffer *plus* a trailing '\0',
+        * we'd have quit before putting it into the buffer,
+        * and thus would have left enough room for the trailing
+        * '\0'.)
+        *
+        * Drop it in.
+        */
+       *utf_8 = '\0';
+
+       /*
+        * Return a pointer to the terminating '\0', in case we
+        * want to drop something in after that.
+        */
+       return (utf_8);
+}
+#endif /* _WIN32 */
+
 /*
  * Generate an error message based on a format, arguments, and an
  * errno, with a message for the errno after the formatted output.
  */
 void
-pcap_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
+pcapint_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
     const char *fmt, ...)
 {
        va_list ap;
+
+       va_start(ap, fmt);
+       pcapint_vfmt_errmsg_for_errno(errbuf, errbuflen, errnum, fmt, ap);
+       va_end(ap);
+}
+
+void
+pcapint_vfmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
+    const char *fmt, va_list ap)
+{
        size_t msglen;
        char *p;
        size_t errbuflen_remaining;
-#if defined(HAVE_STRERROR_S)
-       errno_t err;
-#elif defined(HAVE_STRERROR_R)
-       int err;
-#endif
 
-       va_start(ap, fmt);
-       pcap_vsnprintf(errbuf, errbuflen, fmt, ap);
-       va_end(ap);
+       (void)vsnprintf(errbuf, errbuflen, fmt, ap);
        msglen = strlen(errbuf);
 
        /*
@@ -89,43 +298,144 @@ pcap_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
        *p++ = ':';
        *p++ = ' ';
        *p = '\0';
-       msglen += 2;
        errbuflen_remaining -= 2;
 
        /*
         * Now append the string for the error code.
         */
-#if defined(HAVE_STRERROR_S)
-       err = strerror_s(p, errbuflen_remaining, errnum);
+#if defined(HAVE__WCSERROR_S)
+       /*
+        * We have a Windows-style _wcserror_s().
+        * Generate a UTF-16LE error message.
+        */
+       wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
+       errno_t err = _wcserror_s(utf_16_errbuf, PCAP_ERRBUF_SIZE, errnum);
        if (err != 0) {
                /*
                 * It doesn't appear to be documented anywhere obvious
-                * what the error returns from strerror_s().
+                * what the error returns from _wcserror_s().
                 */
-               pcap_snprintf(p, errbuflen_remaining, "Error %d", errnum);
+               snprintf(p, errbuflen_remaining, "Error %d", errnum);
+               return;
        }
-#elif defined(HAVE_STRERROR_R)
-       err = strerror_r(errnum, p, errbuflen_remaining);
-       if (err == EINVAL) {
-               /*
-                * UNIX 03 says this isn't guaranteed to produce a
-                * fallback error message.
-                */
-               pcap_snprintf(p, errbuflen_remaining, "Unknown error: %d",
-                   errnum);
-       } else if (err == ERANGE) {
+
+       /*
+        * Now convert it from UTF-16LE to UTF-8, dropping it in the
+        * remaining space in the buffer, and truncating it - cleanly,
+        * on a UTF-8 character boundary - if it doesn't fit.
+        */
+       utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
+
+       /*
+        * Now, if we're not in UTF-8 mode, convert errbuf to the
+        * local code page.
+        */
+       if (!use_utf_8)
+               utf_8_to_acp_truncated(errbuf);
+#else
+       /*
+        * Either Windows without _wcserror_s() or not Windows.  Let pcap_strerror()
+        * solve the non-UTF-16 part of this problem space.
+        */
+       snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum));
+#endif
+}
+
+#ifdef _WIN32
+/*
+ * Generate an error message based on a format, arguments, and a
+ * Win32 error, with a message for the Win32 error after the formatted output.
+ */
+void
+pcapint_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
+    const char *fmt, ...)
+{
+       va_list ap;
+
+       va_start(ap, fmt);
+       pcapint_vfmt_errmsg_for_win32_err(errbuf, errbuflen, errnum, fmt, ap);
+       va_end(ap);
+}
+
+void
+pcapint_vfmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
+    const char *fmt, va_list ap)
+{
+       size_t msglen;
+       char *p;
+       size_t errbuflen_remaining;
+       DWORD retval;
+       wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
+       size_t utf_8_len;
+
+       vsnprintf(errbuf, errbuflen, fmt, ap);
+       msglen = strlen(errbuf);
+
+       /*
+        * Do we have enough space to append ": "?
+        * Including the terminating '\0', that's 3 bytes.
+        */
+       if (msglen + 3 > errbuflen) {
+               /* No - just give them what we've produced. */
+               return;
+       }
+       p = errbuf + msglen;
+       errbuflen_remaining = errbuflen - msglen;
+       *p++ = ':';
+       *p++ = ' ';
+       *p = '\0';
+       msglen += 2;
+       errbuflen_remaining -= 2;
+
+       /*
+        * Now append the string for the error code.
+        *
+        * XXX - what language ID to use?
+        *
+        * For UN*Xes, pcap_strerror() may or may not return localized
+        * strings.
+        *
+        * We currently don't have localized messages for libpcap, but
+        * we might want to do so.  On the other hand, if most of these
+        * messages are going to be read by libpcap developers and
+        * perhaps by developers of libpcap-based applications, English
+        * might be a better choice, so the developer doesn't have to
+        * get the message translated if it's in a language they don't
+        * happen to understand.
+        */
+       retval = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,
+           NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+           utf_16_errbuf, PCAP_ERRBUF_SIZE, NULL);
+       if (retval == 0) {
                /*
-                * UNIX 03 says this isn't guaranteed to produce a
-                * fallback error message.
+                * Failed.
                 */
-               pcap_snprintf(p, errbuflen_remaining,
-                   "Message for error %d is too long", errnum);
+               snprintf(p, errbuflen_remaining,
+                   "Couldn't get error message for error (%lu)", errnum);
+               return;
        }
-#else
+
        /*
-        * We have neither strerror_s() nor strerror_r(), so we're
-        * stuck with using pcap_strerror().
+        * Now convert it from UTF-16LE to UTF-8.
         */
-       pcap_snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum));
-#endif
+       p = utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
+
+       /*
+        * Now append the error number, if it fits.
+        */
+       utf_8_len = p - errbuf;
+       errbuflen_remaining -= utf_8_len;
+       if (utf_8_len == 0) {
+               /* The message was empty. */
+               snprintf(p, errbuflen_remaining, "(%lu)", errnum);
+       } else
+               snprintf(p, errbuflen_remaining, " (%lu)", errnum);
+
+       /*
+        * Now, if we're not in UTF-8 mode, convert errbuf to the
+        * local code page.
+        */
+       if (!use_utf_8)
+               utf_8_to_acp_truncated(errbuf);
 }
+#endif