More UNALIGNED_MEM{CPY,CMP} on IP addresses.

[tcpdump] / extract.h
diff --git a/extract.h b/extract.h

index 6aa21e87ee521a035bacb18ecc47a91833af2407..2af90d07f3c5cae35f83d8b2649056b072b80eb9 100644 (file)
--- a/extract.h
+++ b/extract.h
@@ -17,41 +17,137 @@
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
   * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
   * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+/*
+ * Macros to extract possibly-unaligned big-endian integral values.
+ */
+#ifdef LBL_ALIGN
+/*
+ * The processor doesn't natively handle unaligned loads.
+ */
+#ifdef HAVE___ATTRIBUTE__
+/*
+ * We have __attribute__; we assume that means we have __attribute__((packed)).
+ * Declare packed structures containing a u_int16_t and a u_int32_t,
+ * cast the pointer to point to one of those, and fetch through it;
+ * the GCC manual doesn't appear to explicitly say that
+ * __attribute__((packed)) causes the compiler to generate unaligned-safe
+ * code, but it apppears to do so.
   *
- * @(#) $Header: /tcpdump/master/tcpdump/extract.h,v 1.16 2000-10-03 02:54:55 itojun Exp $ (LBL)
+ * We do this in case the compiler can generate, for this instruction set,
+ * better code to do an unaligned load and pass stuff to "ntohs()" or
+ * "ntohl()" than the code to fetch the bytes one at a time and
+ * assemble them.  (That might not be the case on a little-endian platform,
+ * where "ntohs()" and "ntohl()" might not be done inline.)
   */
+typedef struct {
+       u_int16_t       val;
+} __attribute__((packed)) unaligned_u_int16_t;
  
-/* Network to host order macros */
+typedef struct {
+       u_int32_t       val;
+} __attribute__((packed)) unaligned_u_int32_t;
  
-#ifdef LBL_ALIGN
-#define EXTRACT_16BITS(p) \
-       ((u_int16_t)*((u_int8_t *)(p) + 0) << 8 | \
-       (u_int16_t)*((u_int8_t *)(p) + 1))
-#define EXTRACT_32BITS(p) \
-       ((u_int32_t)*((u_int8_t *)(p) + 0) << 24 | \
-       (u_int32_t)*((u_int8_t *)(p) + 1) << 16 | \
-       (u_int32_t)*((u_int8_t *)(p) + 2) << 8 | \
-       (u_int32_t)*((u_int8_t *)(p) + 3))
-#else
+static inline u_int16_t
+EXTRACT_16BITS(const void *p)
+{
+       return ((u_int16_t)ntohs(((const unaligned_u_int16_t *)(p))->val));
+}
+
+static inline u_int32_t
+EXTRACT_32BITS(const void *p)
+{
+       return ((u_int32_t)ntohl(((const unaligned_u_int32_t *)(p))->val));
+}
+
+static inline u_int64_t
+EXTRACT_64BITS(const void *p)
+{
+       return ((u_int64_t)(((u_int64_t)ntohl(((const unaligned_u_int32_t *)(p) + 0)->val)) << 32 | \
+               ((u_int64_t)ntohl(((const unaligned_u_int32_t *)(p) + 1)->val)) << 0));
+
+}
+
+#else /* HAVE___ATTRIBUTE__ */
+/*
+ * We don't have __attribute__, so do unaligned loads of big-endian
+ * quantities the hard way - fetch the bytes one at a time and
+ * assemble them.
+ */
  #define EXTRACT_16BITS(p) \
-       ((u_int16_t)ntohs(*(u_int16_t *)(p)))
+       ((u_int16_t)((u_int16_t)*((const u_int8_t *)(p) + 0) << 8 | \
+                    (u_int16_t)*((const u_int8_t *)(p) + 1)))
  #define EXTRACT_32BITS(p) \
-       ((u_int32_t)ntohl(*(u_int32_t *)(p)))
-#endif
+       ((u_int32_t)((u_int32_t)*((const u_int8_t *)(p) + 0) << 24 | \
+                    (u_int32_t)*((const u_int8_t *)(p) + 1) << 16 | \
+                    (u_int32_t)*((const u_int8_t *)(p) + 2) << 8 | \
+                    (u_int32_t)*((const u_int8_t *)(p) + 3)))
+#define EXTRACT_64BITS(p) \
+       ((u_int64_t)((u_int64_t)*((const u_int8_t *)(p) + 0) << 56 | \
+                    (u_int64_t)*((const u_int8_t *)(p) + 1) << 48 | \
+                    (u_int64_t)*((const u_int8_t *)(p) + 2) << 40 | \
+                    (u_int64_t)*((const u_int8_t *)(p) + 3) << 32 | \
+                    (u_int64_t)*((const u_int8_t *)(p) + 4) << 24 | \
+                    (u_int64_t)*((const u_int8_t *)(p) + 5) << 16 | \
+                    (u_int64_t)*((const u_int8_t *)(p) + 6) << 8 | \
+                    (u_int64_t)*((const u_int8_t *)(p) + 7)))
+#endif /* HAVE___ATTRIBUTE__ */
+#else /* LBL_ALIGN */
+/*
+ * The processor natively handles unaligned loads, so we can just
+ * cast the pointer and fetch through it.
+ */
+static inline u_int16_t
+EXTRACT_16BITS(const void *p)
+{
+       return ((u_int16_t)ntohs(*(const u_int16_t *)(p)));
+}
  
-#define EXTRACT_24BITS(p) \
-       ((u_int32_t)*((u_int8_t *)(p) + 0) << 16 | \
-       (u_int32_t)*((u_int8_t *)(p) + 1) << 8 | \
-       (u_int32_t)*((u_int8_t *)(p) + 2))
+static inline u_int32_t
+EXTRACT_32BITS(const void *p)
+{
+       return ((u_int32_t)ntohl(*(const u_int32_t *)(p)));
+}
  
-/* Little endian protocol host order macros */
+static inline u_int64_t
+EXTRACT_64BITS(const void *p)
+{
+       return ((u_int64_t)(((u_int64_t)ntohl(*((const u_int32_t *)(p) + 0))) << 32 | \
+               ((u_int64_t)ntohl(*((const u_int32_t *)(p) + 1))) << 0));
  
+}
+
+#endif /* LBL_ALIGN */
+
+#define EXTRACT_24BITS(p) \
+       ((u_int32_t)((u_int32_t)*((const u_int8_t *)(p) + 0) << 16 | \
+                    (u_int32_t)*((const u_int8_t *)(p) + 1) << 8 | \
+                    (u_int32_t)*((const u_int8_t *)(p) + 2)))
+
+/*
+ * Macros to extract possibly-unaligned little-endian integral values.
+ * XXX - do loads on little-endian machines that support unaligned loads?
+ */
  #define EXTRACT_LE_8BITS(p) (*(p))
  #define EXTRACT_LE_16BITS(p) \
-       ((u_int16_t)*((u_int8_t *)(p) + 1) << 8 | \
-       (u_int16_t)*((u_int8_t *)(p) + 0))
+       ((u_int16_t)((u_int16_t)*((const u_int8_t *)(p) + 1) << 8 | \
+                    (u_int16_t)*((const u_int8_t *)(p) + 0)))
  #define EXTRACT_LE_32BITS(p) \
-       ((u_int32_t)*((u_int8_t *)(p) + 3) << 24 | \
-       (u_int32_t)*((u_int8_t *)(p) + 2) << 16 | \
-       (u_int32_t)*((u_int8_t *)(p) + 1) << 8 | \
-       (u_int32_t)*((u_int8_t *)(p) + 0))
+       ((u_int32_t)((u_int32_t)*((const u_int8_t *)(p) + 3) << 24 | \
+                    (u_int32_t)*((const u_int8_t *)(p) + 2) << 16 | \
+                    (u_int32_t)*((const u_int8_t *)(p) + 1) << 8 | \
+                    (u_int32_t)*((const u_int8_t *)(p) + 0)))
+#define EXTRACT_LE_24BITS(p) \
+       ((u_int32_t)((u_int32_t)*((const u_int8_t *)(p) + 2) << 16 | \
+                    (u_int32_t)*((const u_int8_t *)(p) + 1) << 8 | \
+                    (u_int32_t)*((const u_int8_t *)(p) + 0)))
+#define EXTRACT_LE_64BITS(p) \
+       ((u_int64_t)((u_int64_t)*((const u_int8_t *)(p) + 7) << 56 | \
+                    (u_int64_t)*((const u_int8_t *)(p) + 6) << 48 | \
+                    (u_int64_t)*((const u_int8_t *)(p) + 5) << 40 | \
+                    (u_int64_t)*((const u_int8_t *)(p) + 4) << 32 | \
+                    (u_int64_t)*((const u_int8_t *)(p) + 3) << 24 | \
+                    (u_int64_t)*((const u_int8_t *)(p) + 2) << 16 | \
+                    (u_int64_t)*((const u_int8_t *)(p) + 1) << 8 | \
+                    (u_int64_t)*((const u_int8_t *)(p) + 0)))