Add support for UTF-8 strings on Windows.

author Guy Harris <[email protected]>

Sat, 11 Apr 2020 21:40:01 +0000 (14:40 -0700)

committer Guy Harris <[email protected]>

Sat, 11 Apr 2020 21:40:01 +0000 (14:40 -0700)
author Guy Harris <[email protected]>
Sat, 11 Apr 2020 21:40:01 +0000 (14:40 -0700)
committer Guy Harris <[email protected]>
Sat, 11 Apr 2020 21:40:01 +0000 (14:40 -0700)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index eb531994a6cece561699c3696f217021a713b7de..c6f5f1cb78f255bf15d3ee7fdbf9fe1e977f422d 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -427,9 +427,9 @@ main(void)
      endif(NOT HAVE_GNU_STRERROR_R)
  else(HAVE_STRERROR_R)
      #
-    # We don't have strerror_r; do we have strerror_s?
+    # We don't have strerror_r; do we have _wcserror_s?
      #
-    check_function_exists(strerror_s HAVE_STRERROR_S)
+    check_function_exists(_wcserror_s HAVE__WCSERROR_S)
  endif(HAVE_STRERROR_R)
  
  #
@@ -1026,12 +1026,15 @@ set(PROJECT_SOURCE_LIST_C
  )
  
  if(WIN32)
+    #
+    # We add the character set conversion routines; they're Windows-only
+    # for now.
      #
      # We assume we don't have asprintf(), and provide an implementation
      # that uses _vscprintf() to determine how big the string needs to be.
      #
      set(PROJECT_SOURCE_LIST_C ${PROJECT_SOURCE_LIST_C}
-        missing/win_asprintf.c)
+        charconv.c missing/win_asprintf.c)
  else()
      if(NOT HAVE_ASPRINTF)
          set(PROJECT_SOURCE_LIST_C ${PROJECT_SOURCE_LIST_C} missing/asprintf.c)
@@ -1204,7 +1207,7 @@ message(STATUS "Packet capture mechanism type: ${PCAP_TYPE}")
  if(WIN32)
      if(PCAP_TYPE STREQUAL "npf")
          #
-        # Link with packet.dll before WinSock2.
+        # Link with packet.dll before Winsock2.
          #
          set(PCAP_LINK_LIBRARIES ${PACKET_LIBRARIES} ${PCAP_LINK_LIBRARIES})
      elseif(PCAP_TYPE STREQUAL "null")
@@ -2486,6 +2489,7 @@ set(MAN3PCAP_NOEXPAND
      pcap_get_required_select_timeout.3pcap
      pcap_get_selectable_fd.3pcap
      pcap_geterr.3pcap
+    pcap_init.3pcap
      pcap_inject.3pcap
      pcap_is_swapped.3pcap
      pcap_lib_version.3pcap
diff --git a/Makefile.in b/Makefile.in

index 9826db205fcf6dc09f985421d8872b08657ceb98..a67d6943321f09e091b6442f63fad28b3092efad 100644 (file)
--- a/Makefile.in
+++ b/Makefile.in
@@ -203,6 +203,7 @@ MAN3PCAP_NOEXPAND = \
         pcap_get_required_select_timeout.3pcap \
         pcap_get_selectable_fd.3pcap \
         pcap_geterr.3pcap \
+       pcap_init.3pcap \
         pcap_inject.3pcap \
         pcap_is_swapped.3pcap \
         pcap_lib_version.3pcap \
diff --git a/charconv.c b/charconv.c

new file mode 100644 (file)

index 0000000..ac6ddf1
--- /dev/null
+++ b/charconv.c
@@ -0,0 +1,214 @@
+/* -*- Mode: c; tab-width: 8; indent-tabs-mode: 1; c-basic-offset: 8; -*- */
+/*
+ * Copyright (c) 1993, 1994, 1995, 1996, 1997
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the Computer Systems
+ *     Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef _WIN32
+#include <windows.h>
+
+#include "charconv.h"
+
+wchar_t *
+cp_to_utf_16le(UINT codepage, const char *cp_string, DWORD flags)
+{
+       int utf16le_len;
+       wchar_t *utf16le_string;
+
+       /*
+        * Map from the specified code page to UTF-16LE.
+        * First, find out how big a buffer we'll need.
+        */
+       utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1,
+           NULL, 0);
+       if (utf16le_len == 0) {
+               /*
+                * Error.  Fail with EINVAL.
+                */
+               errno = EINVAL;
+               return (NULL);
+       }
+
+       /*
+        * Now attempt to allocate a buffer for that.
+        */
+       utf16le_string = malloc(utf16le_len * sizeof (wchar_t));
+       if (utf16le_string == NULL) {
+               /*
+                * Not enough memory; assume errno has been
+                * set, and fail.
+                */
+               return (NULL);
+       }
+
+       /*
+        * Now convert.
+        */
+       utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1,
+           utf16le_string, utf16le_len);
+       if (utf16le_len == 0) {
+               /*
+                * Error.  Fail with EINVAL.
+                * XXX - should this ever happen, given that
+                * we already ran the string through
+                * MultiByteToWideChar() to find out how big
+                * a buffer we needed?
+                */
+               free(utf16le_string);
+               errno = EINVAL;
+               return (NULL);
+       }
+       return (utf16le_string);
+}
+
+char *
+utf_16le_to_cp(UINT codepage, const wchar_t *utf16le_string)
+{
+       int cp_len;
+       char *cp_string;
+
+       /*
+        * Map from UTF-16LE to the specified code page.
+        * First, find out how big a buffer we'll need.
+        * We convert composite characters to precomposed characters,
+        * as that's what Windows expects.
+        */
+       cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK,
+           utf16le_string, -1, NULL, 0, NULL, NULL);
+       if (cp_len == 0) {
+               /*
+                * Error.  Fail with EINVAL.
+                */
+               errno = EINVAL;
+               return (NULL);
+       }
+
+       /*
+        * Now attempt to allocate a buffer for that.
+        */
+       cp_string = malloc(cp_len * sizeof (char));
+       if (cp_string == NULL) {
+               /*
+                * Not enough memory; assume errno has been
+                * set, and fail.
+                */
+               return (NULL);
+       }
+
+       /*
+        * Now convert.
+        */
+       cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK,
+           utf16le_string, -1, cp_string, cp_len, NULL, NULL);
+       if (cp_len == 0) {
+               /*
+                * Error.  Fail with EINVAL.
+                * XXX - should this ever happen, given that
+                * we already ran the string through
+                * WideCharToMultiByte() to find out how big
+                * a buffer we needed?
+                */
+               free(cp_string);
+               errno = EINVAL;
+               return (NULL);
+       }
+       return (cp_string);
+}
+
+/*
+ * Convert an error message string from UTF-8 to the local code page, as
+ * best we can.
+ *
+ * The buffer is assumed to be PCAP_ERRBUF_SIZE bytes long; we truncate
+ * if it doesn't fit.
+ */
+void
+utf_8_to_acp_truncated(char *errbuf)
+{
+       wchar_t *utf_16_errbuf;
+       int retval;
+       DWORD err;
+
+       /*
+        * Do this by converting to UTF-16LE and then to the local
+        * code page.  That means we get to use Microsoft's
+        * conversion routines, rather than having to understand
+        * all the code pages ourselves, *and* that this routine
+        * can convert in place.
+        */
+
+       /*
+        * Map from UTF-8 to UTF-16LE.
+        * First, find out how big a buffer we'll need.
+        * Convert any invalid characters to REPLACEMENT CHARACTER.
+        */
+       utf_16_errbuf = cp_to_utf_16le(CP_UTF8, errbuf, 0);
+       if (utf_16_errbuf == NULL) {
+               /*
+                * Error.  Give up.
+                */
+               snprintf(errbuf, PCAP_ERRBUF_SIZE,
+                   "Can't convert error string to the local code page");
+               return;
+       }
+
+       /*
+        * Now, convert that to the local code page.
+        * Use the current thread's code page.  For unconvertable
+        * characters, let it pick the "best fit" character.
+        *
+        * XXX - we'd like some way to do what utf_16le_to_utf_8_truncated()
+        * does if the buffer isn't big enough, but we don't want to have
+        * to handle all local code pages ourselves; doing so requires
+        * knowledge of all those code pages, including knowledge of how
+        * characters are formed in thoe code pages so that we can avoid
+        * cutting a multi-byte character into pieces.
+        *
+        * Converting to an un-truncated string using Windows APIs, and
+        * then copying to the buffer, still requires knowledge of how
+        * characters are formed in the target code page.
+        */
+       retval = WideCharToMultiByte(CP_THREAD_ACP, 0, utf_16_errbuf, -1,
+           errbuf, PCAP_ERRBUF_SIZE, NULL, NULL);
+       if (retval == 0) {
+               err = GetLastError();
+               free(utf_16_errbuf);
+               if (err == ERROR_INSUFFICIENT_BUFFER)
+                       snprintf(errbuf, PCAP_ERRBUF_SIZE,
+                           "The error string, in the local code page, didn't fit in the buffer");
+               else
+                       snprintf(errbuf, PCAP_ERRBUF_SIZE,
+                           "Can't convert error string to the local code page");
+               return;
+       }
+       free(utf_16_errbuf);
+}
+#endif
diff --git a/charconv.h b/charconv.h

new file mode 100644 (file)

index 0000000..a37d424
--- /dev/null
+++ b/charconv.h
@@ -0,0 +1,44 @@
+/* -*- Mode: c; tab-width: 8; indent-tabs-mode: 1; c-basic-offset: 8; -*- */
+/*
+ * Copyright (c) 1993, 1994, 1995, 1996, 1997
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the Computer Systems
+ *     Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef charonv_h
+#define charonv_h
+
+#ifdef _WIN32
+extern wchar_t *cp_to_utf_16le(UINT codepage, const char *cp_string, DWORD flags);
+extern char *utf_16le_to_cp(UINT codepage, const wchar_t *utf16le_string);
+extern void utf_8_to_acp_truncated(char *);
+#endif
+
+#endif
diff --git a/cmakeconfig.h.in b/cmakeconfig.h.in

index 73be7aa4eb478d78413ea44304d945196c35b72b..84fb42aee94d8dd3eb7488c463ccf35e31b26dda 100644 (file)
--- a/cmakeconfig.h.in
+++ b/cmakeconfig.h.in
@@ -180,9 +180,6 @@
  /* Define to 1 if you have the `strerror' function. */
  #cmakedefine HAVE_STRERROR 1
  
-/* Define to 1 if you have the `strerror_s' function. */
-#cmakedefine HAVE_STRERROR_S 1
-
  /* Define to 1 if you have the <strings.h> header file. */
  #cmakedefine HAVE_STRINGS_H 1
  
@@ -265,6 +262,9 @@
  /* Define to 1 if you have the `vsyslog' function. */
  #cmakedefine HAVE_VSYSLOG 1
  
+/* Define to 1 if you have the `_wcserror_s' function. */
+#cmakedefine HAVE__WCSERROR_S 1
+
  /* Define to 1 if you have the `PacketIsLoopbackAdapter' function. */
  #cmakedefine HAVE_PACKET_IS_LOOPBACK_ADAPTER 1
  
diff --git a/config.h.in b/config.h.in

index cbec492801f5f26873e2bf351b49192eebcc7ba7..93be6269ac28117f9dae5b6ce255433ac62fbd7e 100644 (file)
--- a/config.h.in
+++ b/config.h.in
@@ -192,9 +192,6 @@
  /* Define to 1 if you have the `strerror' function. */
  #undef HAVE_STRERROR
  
-/* Define to 1 if you have the `strerror_s' function. */
-#undef HAVE_STRERROR_S
-
  /* Define to 1 if you have the <strings.h> header file. */
  #undef HAVE_STRINGS_H
  
@@ -274,6 +271,9 @@
  /* Define to 1 if you have the `vsyslog' function. */
  #undef HAVE_VSYSLOG
  
+/* Define to 1 if you have the `_wcserror_s' function. */
+#undef HAVE__WCSERROR_S
+
  /* IPv6 */
  #undef INET6
  
diff --git a/configure b/configure

index a5b1084c8a0164e7d7ac7c93f9daf268c7380c5b..1cb1df1f1510cd8b290c969802ecb8a319b1bd53 100755 (executable)
--- a/configure
+++ b/configure
@@ -5358,14 +5358,14 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
  else
  
         #
-       # We don't have strerror_r; do we have strerror_s?
+       # We don't have strerror_r; do we have _wcserror_s?
         #
-       for ac_func in strerror_s
+       for ac_func in _wcserror_s
  do :
-  ac_fn_c_check_func "$LINENO" "strerror_s" "ac_cv_func_strerror_s"
-if test "x$ac_cv_func_strerror_s" = xyes; then :
+  ac_fn_c_check_func "$LINENO" "_wcserror_s" "ac_cv_func__wcserror_s"
+if test "x$ac_cv_func__wcserror_s" = xyes; then :
    cat >>confdefs.h <<_ACEOF
-#define HAVE_STRERROR_S 1
+#define HAVE__WCSERROR_S 1
  _ACEOF
  
  fi
diff --git a/configure.ac b/configure.ac

index d63ccc7a2b51e62894fcb9464ea7a1fe007cb21b..a396263c923eed88508dd48d9985a663fe0f9e87 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -133,9 +133,9 @@ main(void)
      ],
      [
         #
-       # We don't have strerror_r; do we have strerror_s?
+       # We don't have strerror_r; do we have _wcserror_s?
         #
-       AC_CHECK_FUNCS(strerror_s)
+       AC_CHECK_FUNCS(_wcserror_s)
      ])
  
  #
diff --git a/fmtutils.c b/fmtutils.c

index a4f59c2b2529840fa7d3b9793803691c661f6509..8f6921fbc0467ce7dfdb5c7c116e3f0c6a2037c9 100644 (file)
--- a/fmtutils.c
+++ b/fmtutils.c
@@ -47,12 +47,219 @@
  #include <string.h>
  #include <errno.h>
  
-#include <pcap/pcap.h>
+#include "pcap-int.h"
  
  #include "portability.h"
  
  #include "fmtutils.h"
  
+#ifdef _WIN32
+#include "charconv.h"
+#endif
+
+/*
+ * Set the encoding.
+ */
+#ifdef _WIN32
+/*
+ * True if we shouold use UTF-8.
+ */
+static int use_utf_8;
+
+void
+pcap_fmt_set_encoding(unsigned int opts)
+{
+       if (opts == PCAP_CHAR_ENC_UTF_8)
+               use_utf_8 = 1;
+}
+#else
+void
+pcap_fmt_set_encoding(unsigned int opts _U_)
+{
+       /*
+        * Nothing to do here.
+        */
+}
+#endif
+
+#ifdef _WIN32
+/*
+ * Convert a null-terminated UTF-16LE string to UTF-8, putting it into
+ * a buffer starting at the specified location and stopping if we go
+ * past the specified size.  This will only put out complete UTF-8
+ * sequences.
+ *
+ * We do this ourselves because Microsoft doesn't offer a "convert and
+ * stop at a UTF-8 character boundary if we run out of space" routine.
+ */
+#define IS_LEADING_SURROGATE(c) \
+       ((c) >= 0xd800 && (c) < 0xdc00)
+#define IS_TRAILING_SURROGATE(c) \
+       ((c) >= 0xdc00 && (c) < 0xe000)
+#define SURROGATE_VALUE(leading, trailing) \
+       (((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000)
+#define REPLACEMENT_CHARACTER  0x0FFFD
+
+static char *
+utf_16le_to_utf_8_truncated(wchar_t *utf_16, char *utf_8, size_t utf_8_len)
+{
+       wchar_t c, c2;
+       uint32_t uc;
+
+       if (utf_8_len == 0) {
+               /*
+                * Not even enough room for a trailing '\0'.
+                * Don't put anything into the buffer.
+                */
+               return (utf_8);
+       }
+
+       while ((c = *utf_16++) != '\0') {
+               if (IS_LEADING_SURROGATE(c)) {
+                       /*
+                        * Leading surrogate.  Must be followed by
+                        * a trailing surrogate.
+                        */
+                       c2 = *utf_16;
+                       if (c2 == '\0') {
+                               /*
+                                * Oops, string ends with a lead
+                                * surrogate.  Try to drop in
+                                * a REPLACEMENT CHARACTER, and
+                                * don't move the string pointer,
+                                * so on the next trip through
+                                * the loop we grab the terminating
+                                * '\0' and quit.
+                                */
+                               uc = REPLACEMENT_CHARACTER;
+                       } else {
+                               /*
+                                * OK, we can consume this 2-octet
+                                * value.
+                                */
+                               utf_16++;
+                               if (IS_TRAILING_SURROGATE(c2)) {
+                                       /*
+                                        * Trailing surrogate.
+                                        * This calculation will,
+                                        * for c being a leading
+                                        * surrogate and c2 being
+                                        * a trailing surrogate,
+                                        * produce a value between
+                                        * 0x100000 and 0x10ffff,
+                                        * so it's always going to be
+                                        * a valid Unicode code point.
+                                        */
+                                       uc = SURROGATE_VALUE(c, c2);
+                               } else {
+                                       /*
+                                        * Not a trailing surroage;
+                                        * try to drop in a
+                                        * REPLACEMENT CHARACTER.
+                                        */
+                                       uc = REPLACEMENT_CHARACTER;
+                               }
+                       }
+               } else {
+                       /*
+                        * Not a leading surrogate.
+                        */
+                       if (IS_TRAILING_SURROGATE(c)) {
+                               /*
+                                * Trailing surrogate without
+                                * a preceding leading surrogate.
+                                * Try to drop in a REPLACEMENT
+                                * CHARACTER.
+                                */
+                               uc = REPLACEMENT_CHARACTER;
+                       } else {
+                               /*
+                                * This is a valid BMP character;
+                                * drop it in.
+                                */
+                               uc = c;
+                       }
+               }
+
+               /*
+                * OK, uc is a valid Unicode character; how
+                * many bytes worth of UTF-8 does it require?
+                */
+               if (uc < 0x0080) {
+                       /* 1 byte. */
+                       if (utf_8_len < 2) {
+                               /*
+                                * Not enough room for that byte
+                                * plus a trailing '\0'.
+                                */
+                               break;
+                       }
+                       *utf_8++ = (char)uc;
+                       utf_8_len--;
+               } else if (uc < 0x0800) {
+                       /* 2 bytes. */
+                       if (utf_8_len < 3) {
+                               /*
+                                * Not enough room for those bytes
+                                * plus a trailing '\0'.
+                                */
+                               break;
+                       }
+                       *utf_8++ = ((uc >> 6) & 0x3F) | 0xC0;
+                       *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
+                       utf_8_len -= 2;
+               } else if (uc < 0x010000) {
+                       /* 3 bytes. */
+                       if (utf_8_len < 4) {
+                               /*
+                                * Not enough room for those bytes
+                                * plus a trailing '\0'.
+                                */
+                               break;
+                       }
+                       *utf_8++ = ((uc >> 12) & 0x0F) | 0xE0;
+                       *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
+                       *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
+                       utf_8_len -= 3;
+               } else {
+                       /* 4 bytes. */
+                       if (utf_8_len < 5) {
+                               /*
+                                * Not enough room for those bytes
+                                * plus a trailing '\0'.
+                                */
+                               break;
+                       }
+                       *utf_8++ = ((uc >> 18) & 0x03) | 0xF0;
+                       *utf_8++ = ((uc >> 12) & 0x3F) | 0x80;
+                       *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
+                       *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
+                       utf_8_len -= 3;
+               }
+       }
+
+       /*
+        * OK, we have enough room for (at least) a trailing '\0'.
+        * (We started out with enough room, thanks to the test
+        * for a zero-length buffer at the beginning, and if
+        * there wasn't enough room for any character we wanted
+        * to put into the the buffer *plus* a trailing '\0',
+        * we'd have quit before putting it into the buffer,
+        * and thus would have left enough room for the trailing
+        * '\0'.)
+        *
+        * Drop it in.
+        */
+       *utf_8 = '\0';
+
+       /*
+        * Return a pointer to the terminating '\0', in case we
+        * want to drop something in after that.
+        */
+       return (utf_8);
+}
+#endif /* _WIN32 */
+
  /*
   * Generate an error message based on a format, arguments, and an
   * errno, with a message for the errno after the formatted output.
@@ -89,18 +296,35 @@ pcap_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
         /*
          * Now append the string for the error code.
          */
-#if defined(HAVE_STRERROR_S)
+#if defined(HAVE__WCSERROR_S)
         /*
-        * We have a Windows-style strerror_s().
+        * We have a Windows-style _wcserror_s().
+        * Generate a UTF-16LE error message.
          */
-       errno_t err = strerror_s(p, errbuflen_remaining, errnum);
+       wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
+       errno_t err = _wcserror_s(utf_16_errbuf, sizeof (utf_16_errbuf), errnum);
         if (err != 0) {
                 /*
                  * It doesn't appear to be documented anywhere obvious
-                * what the error returns from strerror_s().
+                * what the error returns from _wcserror_s().
                  */
                 snprintf(p, errbuflen_remaining, "Error %d", errnum);
+               return;
         }
+
+       /*
+        * Now convert it from UTF-16LE to UTF-8, dropping it in the
+        * remaining space in the buffer, and truncating it - cleanly,
+        * on a UTF-8 character boundary - if it doesn't fit.
+        */
+       utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
+
+       /*
+        * Now, if we're not in UTF-8 mode, convert errbuf to the
+        * local code page.
+        */
+       if (!use_utf_8)
+               utf_8_to_acp_truncated(errbuf);
  #elif defined(HAVE_GNU_STRERROR_R)
         /*
          * We have a GNU-style strerror_r(), which is *not* guaranteed to
@@ -136,7 +360,7 @@ pcap_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
         }
  #else
         /*
-        * We have neither strerror_s() nor strerror_r(), so we're
+        * We have neither _wcserror_s() nor strerror_r(), so we're
          * stuck with using pcap_strerror().
          */
         snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum));
@@ -157,7 +381,8 @@ pcap_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
         char *p;
         size_t errbuflen_remaining;
         DWORD retval;
-       char win32_errbuf[PCAP_ERRBUF_SIZE+1];
+       wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
+       size_t utf_8_len;
  
         va_start(ap, fmt);
         vsnprintf(errbuf, errbuflen, fmt, ap);
@@ -196,9 +421,9 @@ pcap_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
          * get the message translated if it's in a language they don't
          * happen to understand.
          */
-       retval = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,
+       retval = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,
             NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
-           win32_errbuf, PCAP_ERRBUF_SIZE, NULL);
+           utf_16_errbuf, PCAP_ERRBUF_SIZE, NULL);
         if (retval == 0) {
                 /*
                  * Failed.
@@ -208,6 +433,27 @@ pcap_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
                 return;
         }
  
-       snprintf(p, errbuflen_remaining, "%s (%lu)", win32_errbuf, errnum);
+       /*
+        * Now convert it from UTF-16LE to UTF-8.
+        */
+       p = utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
+
+       /*
+        * Now append the error number, if it fits.
+        */
+       utf_8_len = p - errbuf;
+       errbuflen_remaining -= utf_8_len;
+       if (utf_8_len == 0) {
+               /* The message was empty. */
+               snprintf(p, errbuflen_remaining, "(%lu)", errnum);
+       } else
+               snprintf(p, errbuflen_remaining, " (%lu)", errnum);
+
+       /*
+        * Now, if we're not in UTF-8 mode, convert errbuf to the
+        * local code page.
+        */
+       if (!use_utf_8)
+               utf_8_to_acp_truncated(errbuf);
  }
  #endif
diff --git a/fmtutils.h b/fmtutils.h

index 838948bcd3d6f6c9a7b39359df068c214f3683cc..ba0f66ca06846bbed90514b30022d63c76619762 100644 (file)
--- a/fmtutils.h
+++ b/fmtutils.h
@@ -40,6 +40,8 @@
  extern "C" {
  #endif
  
+void   pcap_fmt_set_encoding(unsigned int);
+
  void   pcap_fmt_errmsg_for_errno(char *, size_t, int,
      PCAP_FORMAT_STRING(const char *), ...) PCAP_PRINTFLIKE(4, 5);
  
diff --git a/pcap-int.h b/pcap-int.h

index 43c948fd6632898b17ef7ed6c41e1e03dcaac741..99ede45b70adbde9b29d60e032ba646ef6b1a02a 100644 (file)
--- a/pcap-int.h
+++ b/pcap-int.h
@@ -51,6 +51,33 @@
  extern "C" {
  #endif
  
+/*
+ * If pcap_new_api is set, we disable pcap_lookupdev(), because:
+ *
+ *    it's not thread-safe, and is marked as deprecated, on all
+ *    platforms;
+ *
+ *    on Windows, it may return UTF-16LE strings, which the program
+ *    might then pass to pcap_create() (or to pcap_open_live(), which
+ *    then passes them to pcap_create()), requiring pcap_create() to
+ *    check for UTF-16LE strings using a hack, and that hack 1)
+ *    *cannot* be 100% reliable and 2) runs the risk of going past the
+ *    end of the string.
+ *
+ * We keep it around in legacy mode for compatibility.
+ *
+ * We also disable the aforementioned hack in pcap_create().
+ */
+extern int pcap_new_api;
+
+/*
+ * If pcap_utf_8_mode is set, on Windows we treat strings as UTF-8.
+ *
+ * On UN*Xes, we assume all strings are and should be in UTF-8, regardless
+ * of the setting of this flag.
+ */
+extern int pcap_utf_8_mode;
+
  #ifdef MSDOS
    #include <fcntl.h>
    #include <io.h>
@@ -260,6 +287,9 @@ struct pcap {
         struct bpf_program fcode;
  
         char errbuf[PCAP_ERRBUF_SIZE + 1];
+#ifdef _WIN32
+       char acp_errbuf[PCAP_ERRBUF_SIZE + 1];  /* buffer for local code page error strings */
+#endif
         int dlt_count;
         u_int *dlt_list;
         int tstamp_type_count;
@@ -486,7 +516,8 @@ int add_addr_to_if(pcap_if_list_t *, const char *, bpf_u_int32,
  #endif
  
  /*
- * Internal interfaces for "pcap_open_offline()".
+ * Internal interfaces for "pcap_open_offline()" and other savefile
+ * I/O routines.
   *
   * "pcap_open_offline_common()" allocates and fills in a pcap_t, for use
   * by pcap_open_offline routines.
@@ -497,10 +528,22 @@ int       add_addr_to_if(pcap_if_list_t *, const char *, bpf_u_int32,
   * "sf_cleanup()" closes the file handle associated with a pcap_t, if
   * appropriate, and frees all data common to all modules for handling
   * savefile types.
+ *
+ * "charset_fopen()", in UTF-8 mode on Windows, does an fopen() that
+ * treats the pathname as being in UTF-8, rather than the local
+ * code page, on Windows.
   */
  pcap_t *pcap_open_offline_common(char *ebuf, size_t size);
  bpf_u_int32 pcap_adjust_snapshot(bpf_u_int32 linktype, bpf_u_int32 snaplen);
  void   sf_cleanup(pcap_t *p);
+#ifdef _WIN32
+FILE   *charset_fopen(const char *path, const char *mode);
+#else
+/*
+ * On UN*X, just use Boring Old fopen().
+ */
+#define charset_fopen(path, mode)      fopen((path), (mode))
+#endif
  
  /*
   * Internal interfaces for doing user-mode filtering of packets and
diff --git a/pcap-npf.c b/pcap-npf.c

index 1c615c04d13502438117bf76ab3247aa08533761..0cc4ac8ae24cbb4c72edb45f998c5a79bc0bbc97 100644 (file)
--- a/pcap-npf.c
+++ b/pcap-npf.c
@@ -937,7 +937,7 @@ pcap_activate_npf(pcap_t *p)
                 }
         }
  
-       /* Init WinSock */
+       /* Init Winsock if it hasn't already been initialized */
         pcap_wsockinit();
  
         pw->adapter = PacketOpenAdapter(p->opt.device);
@@ -1898,6 +1898,22 @@ pcap_lookupdev(char *errbuf)
         DWORD dwVersion;
         DWORD dwWindowsMajorVersion;
  
+       /*
+        * We disable this in "new API" mode, because 1) in WinPcap/Npcap,
+        * it may return UTF-16 strings, for backwards-compatibility
+        * reasons, and we're also disabling the hack to make that work,
+        * for not-going-past-the-end-of-a-string reasons, and 2) we
+        * want its behavior to be consistent.
+        *
+        * In addition, it's not thread-safe, so we've marked it as
+        * deprecated.
+        */
+       if (pcap_new_api) {
+               snprintf(errbuf, PCAP_ERRBUF_SIZE,
+                   "pcap_lookupdev() is deprecated and is not supported in programs calling pcap_init()");
+               return (NULL);
+       }
+
  /* disable MSVC's GetVersion() deprecated warning here */
  DIAG_OFF_DEPRECATION
         dwVersion = GetVersion();       /* get the OS version */
diff --git a/pcap-rpcap.c b/pcap-rpcap.c

index f7999fe0219982dbd1ac8423f1b6d604eddffde6..836681a10c19c3878be6b5120bd4704e2ef70049 100644 (file)
--- a/pcap-rpcap.c
+++ b/pcap-rpcap.c
@@ -46,6 +46,10 @@
  #include "rpcap-protocol.h"
  #include "pcap-rpcap.h"
  
+#ifdef _WIN32
+#include "charconv.h"          /* for utf_8_to_acp_truncated() */
+#endif
+
  #ifdef HAVE_OPENSSL
  #include "sslutils.h"
  #endif
@@ -2161,7 +2165,7 @@ rpcap_setup_session(const char *source, struct pcap_rmtauth *auth,
         }
  
         /* Warning: this call can be the first one called by the user. */
-       /* For this reason, we have to initialize the WinSock support. */
+       /* For this reason, we have to initialize the Winsock support. */
         if (sock_init(errbuf, PCAP_ERRBUF_SIZE) == -1)
                 return -1;
  
@@ -2797,7 +2801,7 @@ SOCKET pcap_remoteact_accept_ex(const char *address, const char *port, const cha
         hints.ai_socktype = SOCK_STREAM;
  
         /* Warning: this call can be the first one called by the user. */
-       /* For this reason, we have to initialize the WinSock support. */
+       /* For this reason, we have to initialize the Winsock support. */
         if (sock_init(errbuf, PCAP_ERRBUF_SIZE) == -1)
                 return (SOCKET)-1;
  
@@ -3353,6 +3357,15 @@ static void rpcap_msg_err(SOCKET sockctrl, SSL *ssl, uint32 plen, char *remote_e
                  */
                 remote_errbuf[PCAP_ERRBUF_SIZE - 1] = '\0';
  
+#ifdef _WIN32
+               /*
+                * If we're not in UTF-8 mode, convert it to the local
+                * code page.
+                */
+               if (!pcap_utf_8_mode)
+                       utf_8_to_acp_truncated(remote_errbuf);
+#endif
+
                 /*
                  * Throw away the rest.
                  */
diff --git a/pcap.3pcap.in b/pcap.3pcap.in

index cfe2288a1ccbbbc2e3d2eb02bb354661b1e0d27b..4dff262f62a6526147a74ab53d5b49438261bd82 100644 (file)
--- a/pcap.3pcap.in
+++ b/pcap.3pcap.in
@@ -35,6 +35,52 @@ on the network, even those destined for other hosts, are accessible
  through this mechanism.
  It also supports saving captured packets to a ``savefile'', and reading
  packets from a ``savefile''.
+.SS Initializing
+.BR pcap_init ()
+initializes the library.  It takes an argument giving options;
+currently, the options are:
+.TP
+.B PCAP_CHAR_ENC_LOCAL
+Treat all strings supplied as arguments, and return all strings to the
+caller, as being in the local character encoding.
+.TP
+.B PCAP_CHAR_ENC_UTF_8
+Treat all strings supplied as arguments, and return all strings to the
+caller, as being in UTF-8.
+.PP
+On UNIX-like systems, the local character encoding is assumed to be
+UTF-8, so no character encoding transformations are done.
+.PP
+On Windows, the local character encoding is the local ANSI code page.
+.PP
+If
+.BR pcap_init ()
+is called, the deprecated
+.BR pcap_lookupdev ()
+routine always fails, so it should not be used, and, on Windows,
+.BR pcap_create ()
+does not attempt to handle UTF-16LE strings.
+.PP
+If
+.BR pcap_init ()
+is not called, strings are treated as being in the local ANSI code page
+on Windows,
+.BR pcap_lookupdev ()
+will succeed if there is a device on which to capture, and
+.BR pcap_create ()
+makes an attempt to check whether the string passed as an argument is a
+UTF-16LE string - note that this attempt is unsafe, as it may run past
+the end of the string - to handle
+.BR pcap_lookupdev ()
+returning a UTF-16LE string. Programs that don't call
+.BR pcap_init ()
+should, on Windows, call
+.BR pcap_wsockinit ()
+to initialize Winsock; this is not necessary if
+.BR pcap_init ()
+is called, as
+.BR pcap_init ()
+will initialize Winsock itself on Windows.
  .SS Opening a capture handle for reading
  To open a handle for a live capture, given the name of the network or
  other interface on which the capture should be done, call
diff --git a/pcap.c b/pcap.c

index 1d684cc3cf4068b6aa120aa26d2d70d4bf1a56df..19670ab01bbd5c0ef620842c60fe433cccae54e5 100644 (file)
--- a/pcap.c
+++ b/pcap.c
@@ -126,6 +126,23 @@ struct rtentry;            /* declarations in <net/if.h> */
  #ifdef _WIN32
  /*
   * DllMain(), required when built as a Windows DLL.
+ *
+ * To quote the WSAStartup() documentation:
+ *
+ *   The WSAStartup function typically leads to protocol-specific helper
+ *   DLLs being loaded. As a result, the WSAStartup function should not
+ *   be called from the DllMain function in a application DLL. This can
+ *   potentially cause deadlocks.
+ *
+ * and the WSACleanup() documentation:
+ *
+ *   The WSACleanup function typically leads to protocol-specific helper
+ *   DLLs being unloaded. As a result, the WSACleanup function should not
+ *   be called from the DllMain function in a application DLL. This can
+ *   potentially cause deadlocks.
+ *
+ * So we don't actually do anything here.  pcap_init() should be called
+ * to initialize pcap on both UN*X and Windows.
   */
  BOOL WINAPI DllMain(
    HANDLE hinstDLL _U_,
@@ -137,41 +154,159 @@ BOOL WINAPI DllMain(
  }
  
  /*
- * Start WinSock.
- * Exported in case some applications using WinPcap/Npcap called it,
- * even though it wasn't exported.
+ * Start Winsock.
+ * Internal routine.
   */
-int
-wsockinit(void)
+static int
+internal_wsockinit(char *errbuf)
  {
         WORD wVersionRequested;
         WSADATA wsaData;
         static int err = -1;
         static int done = 0;
+       int status;
  
         if (done)
                 return (err);
  
-       wVersionRequested = MAKEWORD( 1, 1);
-       err = WSAStartup( wVersionRequested, &wsaData );
-       atexit ((void(*)(void))WSACleanup);
+       /*
+        * Versions of Windows that don't support Winsock 2.2 are
+        * too old for us.
+        */
+       wVersionRequested = MAKEWORD(2, 2);
+       status = WSAStartup(wVersionRequested, &wsaData);
         done = 1;
-
-       if ( err != 0 )
-               err = -1;
+       if (status != 0) {
+               if (errbuf != NULL) {
+                       pcap_fmt_errmsg_for_win32_err(errbuf, PCAP_ERRBUF_SIZE,
+                           status, "WSAStartup() failed");
+               }
+               return (err);
+       }
+       atexit ((void(*)(void))WSACleanup);
+       err = 0;
         return (err);
  }
  
+/*
+ * Exported in case some applications using WinPcap/Npcap called it,
+ * even though it wasn't exported.
+ */
+int
+wsockinit(void)
+{
+       return (internal_wsockinit(NULL));
+}
+
  /*
   * This is the exported function; new programs should call this.
+ * *Newer* programs should call pcap_init().
   */
  int
  pcap_wsockinit(void)
  {
-       return (wsockinit());
+       return (internal_wsockinit(NULL));
  }
  #endif /* _WIN32 */
  
+/*
+ * Do whatever initialization is needed for libpcap.
+ *
+ * The argument specifies whether we use the local code page or UTF-8
+ * for strings; on UN*X, we just assume UTF-8 in places where the encoding
+ * would matter, whereas, on Windows, we use the local code page for
+ * PCAP_CHAR_ENC_LOCAL and UTF-8 for PCAP_CHAR_ENC_UTF_8.
+ *
+ * On Windows, we also disable the hack in pcap_create() to deal with
+ * being handed UTF-16 strings, because if the user calls this they're
+ * explicitly declaring that they will either be passing local code
+ * page strings or UTF-8 strings, so we don't need to allow UTF-16LE
+ * strings to be passed.  For good measure, on Windows *and* UN*X,
+ * we disable pcap_lookupdev(), to prevent anybody from even
+ * *trying* to pass the result of pcap_lookupdev() - which might be
+ * UTF-16LE on Windows, for ugly compatibility reasons - to pcap_create()
+ * or pcap_open_live() or pcap_open().
+ *
+ * Returns 0 on success, -1 on error.
+ */
+int pcap_new_api;              /* pcap_lookupdev() always fails */
+int pcap_utf_8_mode;           /* Strings should be in UTF-8. */
+
+int
+pcap_init(unsigned int opts, char *errbuf)
+{
+       static int initialized;
+
+       /*
+        * Don't allow multiple calls that set different modes; that
+        * may mean a library is initializing pcap in one mode and
+        * a program using that library, or another library used by
+        * that program, is initializing it in another mode.
+        */
+       switch (opts) {
+
+       case PCAP_CHAR_ENC_LOCAL:
+               /* Leave "UTF-8 mode" off. */
+               if (initialized) {
+                       if (pcap_utf_8_mode) {
+                               snprintf(errbuf, PCAP_ERRBUF_SIZE,
+                                   "Multiple pcap_init calls with different character encodings");
+                               return (-1);
+                       }
+               }
+               break;
+
+       case PCAP_CHAR_ENC_UTF_8:
+               /* Turn on "UTF-8 mode". */
+               if (initialized) {
+                       if (!pcap_utf_8_mode) {
+                               snprintf(errbuf, PCAP_ERRBUF_SIZE,
+                                   "Multiple pcap_init calls with different character encodings");
+                               return (-1);
+                       }
+               }
+               pcap_utf_8_mode = 1;
+               break;
+
+       default:
+               snprintf(errbuf, PCAP_ERRBUF_SIZE, "Unknown options specified");
+               return (-1);
+       }
+
+       /*
+        * Turn the appropriate mode on for error messages; those routines
+        * are also used in rpcapd, which has no access to pcap's internal
+        * UTF-8 mode flag, so we have to call a routine to set its
+        * UTF-8 mode flag.
+        */
+       pcap_fmt_set_encoding(opts);
+
+       if (initialized) {
+               /*
+                * Nothing more to do; for example, on Windows, we've
+                * already initialized Winsock.
+                */
+               return (0);
+       }
+
+#ifdef _WIN32
+       /*
+        * Now set up Winsock.
+        */
+       if (internal_wsockinit(errbuf) == -1) {
+               /* Failed. */
+               return (-1);
+       }
+#endif
+
+       /*
+        * We're done.
+        */
+       initialized = 1;
+       pcap_new_api = 1;
+       return (0);
+}
+
  /*
   * String containing the library version.
   * Not explicitly exported via a header file - the right API to use
@@ -1365,6 +1500,22 @@ pcap_lookupdev(char *errbuf)
         static char device[IF_NAMESIZE + 1];
         char *ret;
  
+       /*
+        * We disable this in "new API" mode, because 1) in WinPcap/Npcap,
+        * it may return UTF-16 strings, for backwards-compatibility
+        * reasons, and we're also disabling the hack to make that work,
+        * for not-going-past-the-end-of-a-string reasons, and 2) we
+        * want its behavior to be consistent.
+        *
+        * In addition, it's not thread-safe, so we've marked it as
+        * deprecated.
+        */
+       if (pcap_new_api) {
+               snprintf(errbuf, PCAP_ERRBUF_SIZE,
+                   "pcap_lookupdev() is deprecated and is not supported in programs calling pcap_init()");
+               return (NULL);
+       }
+
         if (pcap_findalldevs(&alldevs, errbuf) == -1)
                 return (NULL);
  
@@ -2121,15 +2272,27 @@ pcap_create(const char *device, char *errbuf)
                  * so, convert it back to the local code page's
                  * extended ASCII.
                  *
-                * XXX - you *cannot* reliably detect whether a
-                * string is UTF-16LE or not; "a" could either
-                * be a one-character ASCII string or the first
-                * character of a UTF-16LE string.  This particular
-                * version of this heuristic dates back to WinPcap
-                * 4.1.1; PacketOpenAdapter() does uses the same
-                * heuristic, with the exact same vulnerability.
+                * We disable that check in "new API" mode, because:
+                *
+                *   1) You *cannot* reliably detect whether a
+                *   string is UTF-16LE or not; "a" could either
+                *   be a one-character ASCII string or the first
+                *   character of a UTF-16LE string.
+                *
+                *   2) Doing that test can run past the end of
+                *   the string, if it's a 1-character ASCII
+                *   string
+                *
+                * This particular version of this heuristic dates
+                * back to WinPcap 4.1.1; PacketOpenAdapter() does
+                * uses the same heuristic, with the exact same
+                * vulnerability.
+                *
+                * That's why we disable this in "new API" mode.
+                * We keep it around in legacy mode for backwards
+                * compatibility.
                  */
-               if (device[0] != '\0' && device[1] == '\0') {
+               if (!pcap_new_api && device[0] != '\0' && device[1] == '\0') {
                         size_t length;
  
                         length = wcslen((wchar_t *)device);
diff --git a/pcap/pcap.h b/pcap/pcap.h

index 60f3dbef0fb97050f0b5e484298993faf6df5b8a..c38a303c5c0ee0416cae1433bdf8b0d1da708d62 100644 (file)
--- a/pcap/pcap.h
+++ b/pcap/pcap.h
@@ -364,6 +364,26 @@ typedef void (*pcap_handler)(u_char *, const struct pcap_pkthdr *,
   */
  #define PCAP_NETMASK_UNKNOWN   0xffffffff
  
+/*
+ * Initialize pcap.  If this isn't called, pcap is initialized to
+ * a mode source-compatible and binary-compatible with older versions
+ * that lack this routine.
+ */
+
+/*
+ * Initialization options.
+ * All bits not listed here are reserved for expansion.
+ *
+ * On UNIX-like systems, the local character encoding is assumed to be
+ * UTF-8, so no character encoding transformations are done.
+ *
+ * On Windows, the local character encoding is the local ANSI code page.
+ */
+#define PCAP_CHAR_ENC_LOCAL    0x00000000U     /* strings are in the local character encoding */
+#define PCAP_CHAR_ENC_UTF_8    0x00000001U     /* strings are in UTF-8 */
+
+PCAP_API int   pcap_init(unsigned int, char *);
+
  /*
   * We're deprecating pcap_lookupdev() for various reasons (not
   * thread-safe, can behave weirdly with WinPcap).  Callers
diff --git a/pcap_init.3pcap b/pcap_init.3pcap

new file mode 100644 (file)

index 0000000..05fbbd7
--- /dev/null
+++ b/pcap_init.3pcap
@@ -0,0 +1,89 @@
+.\" Copyright (c) 1994, 1996, 1997
+.\"    The Regents of the University of California.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that: (1) source code distributions
+.\" retain the above copyright notice and this paragraph in its entirety, (2)
+.\" distributions including binary code include the above copyright notice and
+.\" this paragraph in its entirety in the documentation or other materials
+.\" provided with the distribution, and (3) all advertising materials mentioning
+.\" features or use of this software display the following acknowledgement:
+.\" ``This product includes software developed by the University of California,
+.\" Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
+.\" the University nor the names of its contributors may be used to endorse
+.\" or promote products derived from this software without specific prior
+.\" written permission.
+.\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
+.\" WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+.\"
+.TH PCAP_INIT 3PCAP "11 April 2020"
+.SH NAME
+pcap_init \- initialize the library
+.SH SYNOPSIS
+.nf
+.ft B
+#include <pcap/pcap.h>
+.ft
+.LP
+.nf
+.ft B
+char errbuf[PCAP_ERRBUF_SIZE];
+.ft
+.LP
+.ft B
+int pcap_init(unsigned int opts, char *errbuf);
+.ft
+.fi
+.SH DESCRIPTION
+.BR pcap_init ()
+is used to initialize the Packet Capture library.
+.I opts
+specifies options for the library;
+currently, the options are:
+.TP
+.B PCAP_CHAR_ENC_LOCAL
+Treat all strings supplied as arguments, and return all strings to the
+caller, as being in the local character encoding.
+.TP
+.B PCAP_CHAR_ENC_UTF_8
+Treat all strings supplied as arguments, and return all strings to the
+caller, as being in UTF-8.
+.PP
+On UNIX-like systems, the local character encoding is assumed to be
+UTF-8, so no character encoding transformations are done.
+.PP
+On Windows, the local character encoding is the local ANSI code page.
+.PP
+If
+.BR pcap_init ()
+is not called, strings are treated as being in the local ANSI code page
+on Windows,
+.BR pcap_lookupdev (3PCAP)
+will succeed if there is a device on which to capture, and
+.BR pcap_create (3PCAP)
+makes an attempt to check whether the string passed as an argument is a
+UTF-16LE string - note that this attempt is unsafe, as it may run past
+the end of the string - to handle
+.BR pcap_lookupdev ()
+returning a UTF-16LE string. Programs that don't call
+.BR pcap_init ()
+should, on Windows, call
+.BR pcap_wsockinit ()
+to initialize Winsock; this is not necessary if
+.BR pcap_init ()
+is called, as
+.BR pcap_init ()
+will initialize Winsock itself on Windows.
+.SH RETURN VALUE
+.BR pcap_init ()
+returns 0 on success and \-1 on failure.
+If \-1 is returned,
+.I errbuf
+is filled in with an appropriate error message.
+.I errbuf
+is assumed to be able to hold at least
+.B PCAP_ERRBUF_SIZE
+chars.
+.SH SEE ALSO
+.BR pcap (3PCAP)
diff --git a/pcap_lookupdev.3pcap b/pcap_lookupdev.3pcap

index a3807de6585c8086e73898fab1729a37f6696bce..57b7658564451d725196a8fc2fe6796c272bc34c 100644 (file)
--- a/pcap_lookupdev.3pcap
+++ b/pcap_lookupdev.3pcap
@@ -44,6 +44,11 @@ and, if the list it returns is not empty, use the first device in the
  list.  (If the list is empty, there are no devices on which capture is
  possible.)
  .LP
+.B If
+.BR pcap_init (3PCAP)
+.B has been called, this interface always returns
+.BR NULL .
+.LP
  .BR pcap_lookupdev ()
  returns a pointer to a string giving the name of a network device
  suitable for use with
@@ -55,6 +60,9 @@ or with
  and with
  .BR pcap_lookupnet (3PCAP).
  If there is an error,
+or if
+.BR pcap_init (3PCAP)
+has been called,
  .B NULL
  is returned and
  .I errbuf
diff --git a/rpcapd/CMakeLists.txt b/rpcapd/CMakeLists.txt

index 6b4a7d9b2fc56b13d7785b6fd1a1c1d274196dd7..1ee14bbe73d6384478f8688871b2342bd9e8c9e9 100644 (file)
--- a/rpcapd/CMakeLists.txt
+++ b/rpcapd/CMakeLists.txt
@@ -50,6 +50,7 @@ if(WIN32 OR ((CMAKE_USE_PTHREADS_INIT OR PTHREADS_FOUND) AND HAVE_CRYPT))
    if(WIN32)
      set(RPCAPD_EXTRA_SOURCES
          win32-svc.c
+        ${pcap_SOURCE_DIR}/charconv.c
          ${pcap_SOURCE_DIR}/missing/getopt.c
          rpcapd.rc)
      include_directories(${pcap_SOURCE_DIR}/rpcapd ${pcap_SOURCE_DIR}/missing)
diff --git a/rpcapd/rpcapd.c b/rpcapd/rpcapd.c

index bfb6e6a0981e8b4322733b0547b9a02b2af6b3d8..64f91eadec230574c8e82a2745dc0ed1e7bde6a7 100644 (file)
--- a/rpcapd/rpcapd.c
+++ b/rpcapd/rpcapd.c
@@ -325,6 +325,16 @@ int main(int argc, char *argv[])
         }
  #endif
  
+       //
+       // We want UTF-8 error messages.
+       //
+       if (pcap_init(PCAP_CHAR_ENC_UTF_8, errbuf) == -1)
+       {
+               rpcapd_log(LOGPRIO_ERROR, "%s", errbuf);
+               exit(-1);
+       }
+       pcap_fmt_set_encoding(PCAP_CHAR_ENC_UTF_8);
+
         if (sock_init(errbuf, PCAP_ERRBUF_SIZE) == -1)
         {
                 rpcapd_log(LOGPRIO_ERROR, "%s", errbuf);
diff --git a/savefile.c b/savefile.c

index aef9fb14c2405dfcbb0c48890dbafb63eeb30889..aa35bd3cf5bd279ff0f0defe6322682926818a97 100644 (file)
--- a/savefile.c
+++ b/savefile.c
@@ -54,6 +54,7 @@
  #include "sf-pcap.h"
  #include "sf-pcapng.h"
  #include "pcap-common.h"
+#include "charconv.h"
  
  #ifdef _WIN32
  /*
@@ -246,6 +247,102 @@ sf_cleanup(pcap_t *p)
         pcap_freecode(&p->fcode);
  }
  
+#ifdef _WIN32
+/*
+ * Wrapper for fopen() and _wfopen().
+ *
+ * If we're in UTF-8 mode, map the pathname from UTF-8 to UTF-16LE and
+ * call _wfopen().
+ *
+ * If we're not, just use fopen(); that'll treat it as being in the
+ * local code page.
+ */
+FILE *
+charset_fopen(const char *path, const char *mode)
+{
+       wchar_t *utf16_path;
+#define MAX_MODE_LEN   16
+       wchar_t utf16_mode[MAX_MODE_LEN+1];
+       int i;
+       char c;
+       FILE *fp;
+       int save_errno;
+
+       if (pcap_utf_8_mode) {
+               /*
+                * Map from UTF-8 to UTF-16LE.
+                * Fail if there are invalid characters in the input
+                * string, rather than converting them to REPLACEMENT
+                * CHARACTER; the latter is appropriate for strings
+                * to be displayed to the user, but for file names
+                * you just want the attempt to open the file to fail.
+                */
+               utf16_path = cp_to_utf_16le(CP_UTF8, path,
+                   MB_ERR_INVALID_CHARS);
+               if (utf16_path == NULL) {
+                       /*
+                        * Error.  Assume errno has been set.
+                        *
+                        * XXX - what about Windows errors?
+                        */
+                       return (NULL);
+               }
+
+               /*
+                * Now convert the mode to UTF-16LE as well.
+                * We assume the mode is ASCII, and that
+                * it's short, so that's easy.
+                */
+               for (i = 0; (c = *mode) != '\0'; i++, mode++) {
+                       if (c > 0x7F) {
+                               /* Not an ASCII character; fail with EINVAL. */
+                               free(utf16_path);
+                               errno = EINVAL;
+                               return (NULL);
+                       }
+                       if (i >= MAX_MODE_LEN) {
+                               /* The mode string is longer than we allow. */
+                               free(utf16_path);
+                               errno = EINVAL;
+                               return (NULL);
+                       }
+                       utf16_mode[i] = c;
+               }
+               utf16_mode[i] = '\0';
+               
+               /*
+                * OK, we have UTF-16LE strings; hand them to
+                * _wfopen().
+                */
+               fp = _wfopen(utf16_path, utf16_mode);
+
+               /*
+                * Make sure freeing the UTF-16LE string doesn't
+                * overwrite the error code we got from _wfopen().
+                */
+               save_errno = errno;
+               free(utf16_path);
+               errno = save_errno;
+
+               return (fp);
+       } else {
+               /*
+                * This takes strings in the local code page as an
+                * argument.
+                */
+               return (fopen(path, mode));
+       }
+}
+#else
+/*
+ * On other OSes, just use Boring Old fopen().
+ *
+ * "b" is supported as of C90, so *all* UN*Xes should support it, even
+ * though it does nothing.  For MS-DOS, we again need it.
+ */
+#define charset_fopen_read(path, mode) fopen((path), (mode))
+#endif
+
  pcap_t *
  pcap_open_offline_with_tstamp_precision(const char *fname, u_int precision,
                                         char *errbuf)
@@ -276,12 +373,16 @@ pcap_open_offline_with_tstamp_precision(const char *fname, u_int precision,
         }
         else {
                 /*
+                * Use charset_fopen(); on Windows, it tests whether we're
+                * in "local code page" or "UTF-8" mode, and treats the
+                * pathname appropriately, and on other platforms, it just
+                * wraps fopen().
+                *
                  * "b" is supported as of C90, so *all* UN*Xes should
-                * support it, even though it does nothing.  It's
-                * required on Windows, as the file is a binary file
-                * and must be read in binary mode.
+                * support it, even though it does nothing.  For MS-DOS,
+                * we again need it.
                  */
-               fp = fopen(fname, "rb");
+               fp = charset_fopen(fname, "rb");
                 if (fp == NULL) {
                         pcap_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE,
                             errno, "%s", fname);
diff --git a/sf-pcap.c b/sf-pcap.c

index ab51d7cabbe201e7a0c2e5960ee7a68e1df129e9..5c416b0cef32593f800796922ad4036d2f00c931 100644 (file)
--- a/sf-pcap.c
+++ b/sf-pcap.c
@@ -831,7 +831,7 @@ pcap_dump_open(pcap_t *p, const char *fname)
                  * required on Windows, as the file is a binary file
                  * and must be written in binary mode.
                  */
-               f = fopen(fname, "wb");
+               f = charset_fopen(fname, "wb");
                 if (f == NULL) {
                         pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
                             errno, "%s", fname);
@@ -931,7 +931,7 @@ pcap_dump_open_append(pcap_t *p, const char *fname)
          * even though it does nothing.  It's required on Windows, as the
          * file is a binary file and must be read in binary mode.
          */
-       f = fopen(fname, "ab+");
+       f = charset_fopen(fname, "ab+");
         if (f == NULL) {
                 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
                     errno, "%s", fname);
diff --git a/sockutils.c b/sockutils.c

index bd3d6cc2b8723bfb5cb9a4a6631be09915c15ec1..629e46e833bc43a8dbe8dff80ec0ab97c0f7bd2e 100644 (file)
--- a/sockutils.c
+++ b/sockutils.c
@@ -67,7 +67,7 @@
    /*
     * Winsock initialization.
     *
-   * Ask for WinSock 2.2.
+   * Ask for Winsock 2.2.
     */
    #define WINSOCK_MAJOR_VERSION 2
    #define WINSOCK_MINOR_VERSION 2
@@ -121,7 +121,7 @@ static int sock_ismcastaddr(const struct sockaddr *saddr);
   ****************************************************/
  
  /*
- * Format an error message given an errno value (UN*X) or a WinSock error
+ * Format an error message given an errno value (UN*X) or a Winsock error
   * (Windows).
   */
  void sock_fmterror(const char *caller, int errcode, char *errbuf, int errbuflen)
author	Guy Harris <[email protected]>
	Sat, 11 Apr 2020 21:40:01 +0000 (14:40 -0700)
committer	Guy Harris <[email protected]>
	Sat, 11 Apr 2020 21:40:01 +0000 (14:40 -0700)
CMakeLists.txt		patch \| blob \| history
Makefile.in		patch \| blob \| history
charconv.c	[new file with mode: 0644]	patch \| blob
charconv.h	[new file with mode: 0644]	patch \| blob
cmakeconfig.h.in		patch \| blob \| history
config.h.in		patch \| blob \| history
configure		patch \| blob \| history
configure.ac		patch \| blob \| history
fmtutils.c		patch \| blob \| history
fmtutils.h		patch \| blob \| history
pcap-int.h		patch \| blob \| history
pcap-npf.c		patch \| blob \| history
pcap-rpcap.c		patch \| blob \| history
pcap.3pcap.in		patch \| blob \| history
pcap.c		patch \| blob \| history
pcap/pcap.h		patch \| blob \| history
pcap_init.3pcap	[new file with mode: 0644]	patch \| blob
pcap_lookupdev.3pcap		patch \| blob \| history
rpcapd/CMakeLists.txt		patch \| blob \| history
rpcapd/rpcapd.c		patch \| blob \| history
savefile.c		patch \| blob \| history
sf-pcap.c		patch \| blob \| history
sockutils.c		patch \| blob \| history