X-Git-Url: https://git.tcpdump.org/libpcap/blobdiff_plain/cd5ff0253ffd1a9968af485245ea3b76c1605713..HEAD:/fmtutils.c

diff --git a/fmtutils.c b/fmtutils.c
index a4f59c2b..a5a4fe62 100644
--- a/fmtutils.c
+++ b/fmtutils.c
@@ -35,9 +35,7 @@
  * Utilities for message formatting used both by libpcap and rpcapd.
  */
 
-#ifdef HAVE_CONFIG_H
 #include <config.h>
-#endif
 
 #include "ftmacros.h"
 
@@ -47,28 +45,244 @@
 #include <string.h>
 #include <errno.h>
 
-#include <pcap/pcap.h>
+#include "pcap-int.h"
 
 #include "portability.h"
 
 #include "fmtutils.h"
 
+#ifdef _WIN32
+#include "charconv.h"
+#endif
+
+/*
+ * Set the encoding.
+ */
+#ifdef _WIN32
+/*
+ * True if we should use UTF-8.
+ */
+static int use_utf_8;
+
+void
+pcapint_fmt_set_encoding(unsigned int opts)
+{
+	if (opts == PCAP_CHAR_ENC_UTF_8)
+		use_utf_8 = 1;
+}
+#else
+void
+pcapint_fmt_set_encoding(unsigned int opts _U_)
+{
+	/*
+	 * Nothing to do here.
+	 */
+}
+#endif
+
+#ifdef _WIN32
+/*
+ * Convert a null-terminated UTF-16LE string to UTF-8, putting it into
+ * a buffer starting at the specified location and stopping if we go
+ * past the specified size.  This will only put out complete UTF-8
+ * sequences.
+ *
+ * We do this ourselves because Microsoft doesn't offer a "convert and
+ * stop at a UTF-8 character boundary if we run out of space" routine.
+ */
+#define IS_LEADING_SURROGATE(c) \
+	((c) >= 0xd800 && (c) < 0xdc00)
+#define IS_TRAILING_SURROGATE(c) \
+	((c) >= 0xdc00 && (c) < 0xe000)
+#define SURROGATE_VALUE(leading, trailing) \
+	(((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000)
+#define REPLACEMENT_CHARACTER	0x0FFFD
+
+static char *
+utf_16le_to_utf_8_truncated(const wchar_t *utf_16, char *utf_8,
+    size_t utf_8_len)
+{
+	wchar_t c, c2;
+	uint32_t uc;
+
+	if (utf_8_len == 0) {
+		/*
+		 * Not even enough room for a trailing '\0'.
+		 * Don't put anything into the buffer.
+		 */
+		return (utf_8);
+	}
+
+	while ((c = *utf_16++) != '\0') {
+		if (IS_LEADING_SURROGATE(c)) {
+			/*
+			 * Leading surrogate.  Must be followed by
+			 * a trailing surrogate.
+			 */
+			c2 = *utf_16;
+			if (c2 == '\0') {
+				/*
+				 * Oops, string ends with a lead
+				 * surrogate.  Try to drop in
+				 * a REPLACEMENT CHARACTER, and
+				 * don't move the string pointer,
+				 * so on the next trip through
+				 * the loop we grab the terminating
+				 * '\0' and quit.
+				 */
+				uc = REPLACEMENT_CHARACTER;
+			} else {
+				/*
+				 * OK, we can consume this 2-octet
+				 * value.
+				 */
+				utf_16++;
+				if (IS_TRAILING_SURROGATE(c2)) {
+					/*
+					 * Trailing surrogate.
+					 * This calculation will,
+					 * for c being a leading
+					 * surrogate and c2 being
+					 * a trailing surrogate,
+					 * produce a value between
+					 * 0x100000 and 0x10ffff,
+					 * so it's always going to be
+					 * a valid Unicode code point.
+					 */
+					uc = SURROGATE_VALUE(c, c2);
+				} else {
+					/*
+					 * Not a trailing surrogate;
+					 * try to drop in a
+					 * REPLACEMENT CHARACTER.
+					 */
+					uc = REPLACEMENT_CHARACTER;
+				}
+			}
+		} else {
+			/*
+			 * Not a leading surrogate.
+			 */
+			if (IS_TRAILING_SURROGATE(c)) {
+				/*
+				 * Trailing surrogate without
+				 * a preceding leading surrogate.
+				 * Try to drop in a REPLACEMENT
+				 * CHARACTER.
+				 */
+				uc = REPLACEMENT_CHARACTER;
+			} else {
+				/*
+				 * This is a valid BMP character;
+				 * drop it in.
+				 */
+				uc = c;
+			}
+		}
+
+		/*
+		 * OK, uc is a valid Unicode character; how
+		 * many bytes worth of UTF-8 does it require?
+		 */
+		if (uc < 0x0080) {
+			/* 1 byte. */
+			if (utf_8_len < 2) {
+				/*
+				 * Not enough room for that byte
+				 * plus a trailing '\0'.
+				 */
+				break;
+			}
+			*utf_8++ = (char)uc;
+			utf_8_len--;
+		} else if (uc < 0x0800) {
+			/* 2 bytes. */
+			if (utf_8_len < 3) {
+				/*
+				 * Not enough room for those bytes
+				 * plus a trailing '\0'.
+				 */
+				break;
+			}
+			*utf_8++ = ((uc >> 6) & 0x3F) | 0xC0;
+			*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
+			utf_8_len -= 2;
+		} else if (uc < 0x010000) {
+			/* 3 bytes. */
+			if (utf_8_len < 4) {
+				/*
+				 * Not enough room for those bytes
+				 * plus a trailing '\0'.
+				 */
+				break;
+			}
+			*utf_8++ = ((uc >> 12) & 0x0F) | 0xE0;
+			*utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
+			*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
+			utf_8_len -= 3;
+		} else {
+			/* 4 bytes. */
+			if (utf_8_len < 5) {
+				/*
+				 * Not enough room for those bytes
+				 * plus a trailing '\0'.
+				 */
+				break;
+			}
+			*utf_8++ = ((uc >> 18) & 0x03) | 0xF0;
+			*utf_8++ = ((uc >> 12) & 0x3F) | 0x80;
+			*utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
+			*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
+			utf_8_len -= 3;
+		}
+	}
+
+	/*
+	 * OK, we have enough room for (at least) a trailing '\0'.
+	 * (We started out with enough room, thanks to the test
+	 * for a zero-length buffer at the beginning, and if
+	 * there wasn't enough room for any character we wanted
+	 * to put into the buffer *plus* a trailing '\0',
+	 * we'd have quit before putting it into the buffer,
+	 * and thus would have left enough room for the trailing
+	 * '\0'.)
+	 *
+	 * Drop it in.
+	 */
+	*utf_8 = '\0';
+
+	/*
+	 * Return a pointer to the terminating '\0', in case we
+	 * want to drop something in after that.
+	 */
+	return (utf_8);
+}
+#endif /* _WIN32 */
+
 /*
  * Generate an error message based on a format, arguments, and an
  * errno, with a message for the errno after the formatted output.
  */
 void
-pcap_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
+pcapint_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
     const char *fmt, ...)
 {
 	va_list ap;
+
+	va_start(ap, fmt);
+	pcapint_vfmt_errmsg_for_errno(errbuf, errbuflen, errnum, fmt, ap);
+	va_end(ap);
+}
+
+void
+pcapint_vfmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
+    const char *fmt, va_list ap)
+{
 	size_t msglen;
 	char *p;
 	size_t errbuflen_remaining;
 
-	va_start(ap, fmt);
-	vsnprintf(errbuf, errbuflen, fmt, ap);
-	va_end(ap);
+	(void)vsnprintf(errbuf, errbuflen, fmt, ap);
 	msglen = strlen(errbuf);
 
 	/*
@@ -89,55 +303,39 @@ pcap_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
 	/*
 	 * Now append the string for the error code.
 	 */
-#if defined(HAVE_STRERROR_S)
+#if defined(HAVE__WCSERROR_S)
 	/*
-	 * We have a Windows-style strerror_s().
+	 * We have a Windows-style _wcserror_s().
+	 * Generate a UTF-16LE error message.
 	 */
-	errno_t err = strerror_s(p, errbuflen_remaining, errnum);
+	wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
+	errno_t err = _wcserror_s(utf_16_errbuf, PCAP_ERRBUF_SIZE, errnum);
 	if (err != 0) {
 		/*
 		 * It doesn't appear to be documented anywhere obvious
-		 * what the error returns from strerror_s().
+		 * what the error returns from _wcserror_s().
 		 */
 		snprintf(p, errbuflen_remaining, "Error %d", errnum);
+		return;
 	}
-#elif defined(HAVE_GNU_STRERROR_R)
+
 	/*
-	 * We have a GNU-style strerror_r(), which is *not* guaranteed to
-	 * do anything to the buffer handed to it, and which returns a
-	 * pointer to the error string, which may or may not be in
-	 * the buffer.
-	 *
-	 * It is, however, guaranteed to succeed.
+	 * Now convert it from UTF-16LE to UTF-8, dropping it in the
+	 * remaining space in the buffer, and truncating it - cleanly,
+	 * on a UTF-8 character boundary - if it doesn't fit.
 	 */
-	char strerror_buf[PCAP_ERRBUF_SIZE];
-	char *errstring = strerror_r(errnum, strerror_buf, PCAP_ERRBUF_SIZE);
-	snprintf(p, errbuflen_remaining, "%s", errstring);
-#elif defined(HAVE_POSIX_STRERROR_R)
+	utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
+
 	/*
-	 * We have a POSIX-style strerror_r(), which is guaranteed to fill
-	 * in the buffer, but is not guaranteed to succeed.
+	 * Now, if we're not in UTF-8 mode, convert errbuf to the
+	 * local code page.
 	 */
-	int err = strerror_r(errnum, p, errbuflen_remaining);
-	if (err == EINVAL) {
-		/*
-		 * UNIX 03 says this isn't guaranteed to produce a
-		 * fallback error message.
-		 */
-		snprintf(p, errbuflen_remaining, "Unknown error: %d",
-		    errnum);
-	} else if (err == ERANGE) {
-		/*
-		 * UNIX 03 says this isn't guaranteed to produce a
-		 * fallback error message.
-		 */
-		snprintf(p, errbuflen_remaining,
-		    "Message for error %d is too long", errnum);
-	}
+	if (!use_utf_8)
+		utf_8_to_acp_truncated(errbuf);
 #else
 	/*
-	 * We have neither strerror_s() nor strerror_r(), so we're
-	 * stuck with using pcap_strerror().
+	 * Either Windows without _wcserror_s() or not Windows.  Let pcap_strerror()
+	 * solve the non-UTF-16 part of this problem space.
 	 */
 	snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum));
 #endif
@@ -149,19 +347,28 @@ pcap_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
  * Win32 error, with a message for the Win32 error after the formatted output.
  */
 void
-pcap_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
+pcapint_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
     const char *fmt, ...)
 {
 	va_list ap;
+
+	va_start(ap, fmt);
+	pcapint_vfmt_errmsg_for_win32_err(errbuf, errbuflen, errnum, fmt, ap);
+	va_end(ap);
+}
+
+void
+pcapint_vfmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
+    const char *fmt, va_list ap)
+{
 	size_t msglen;
 	char *p;
 	size_t errbuflen_remaining;
 	DWORD retval;
-	char win32_errbuf[PCAP_ERRBUF_SIZE+1];
+	wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
+	size_t utf_8_len;
 
-	va_start(ap, fmt);
 	vsnprintf(errbuf, errbuflen, fmt, ap);
-	va_end(ap);
 	msglen = strlen(errbuf);
 
 	/*
@@ -196,9 +403,9 @@ pcap_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
 	 * get the message translated if it's in a language they don't
 	 * happen to understand.
 	 */
-	retval = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,
+	retval = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,
 	    NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
-	    win32_errbuf, PCAP_ERRBUF_SIZE, NULL);
+	    utf_16_errbuf, PCAP_ERRBUF_SIZE, NULL);
 	if (retval == 0) {
 		/*
 		 * Failed.
@@ -208,6 +415,27 @@ pcap_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
 		return;
 	}
 
-	snprintf(p, errbuflen_remaining, "%s (%lu)", win32_errbuf, errnum);
+	/*
+	 * Now convert it from UTF-16LE to UTF-8.
+	 */
+	p = utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
+
+	/*
+	 * Now append the error number, if it fits.
+	 */
+	utf_8_len = p - errbuf;
+	errbuflen_remaining -= utf_8_len;
+	if (utf_8_len == 0) {
+		/* The message was empty. */
+		snprintf(p, errbuflen_remaining, "(%lu)", errnum);
+	} else
+		snprintf(p, errbuflen_remaining, " (%lu)", errnum);
+
+	/*
+	 * Now, if we're not in UTF-8 mode, convert errbuf to the
+	 * local code page.
+	 */
+	if (!use_utf_8)
+		utf_8_to_acp_truncated(errbuf);
 }
 #endif