2 * Copyright (c) 1991, 1992, 1993, 1995, 1996, 1999
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
23 * search.c - supports fast searching through pcap files for timestamps
28 #include <sys/types.h>
37 #ifdef HAVE_OS_PROTO_H
44 * We directly read a pcap file, so declare the per-packet header
45 * ourselves. It's not platform-dependent or version-dependent;
46 * if the per-packet header doesn't look *exactly* like this, it's
47 * not a valid pcap file.
51 * This is a timeval as stored in a savefile.
52 * It has to use the same types everywhere, independent of the actual
53 * `struct timeval'; `struct timeval' has 32-bit tv_sec values on some
54 * platforms and 64-bit tv_sec values on other platforms, and writing
55 * out native `struct timeval' values would mean files could only be
56 * read on systems with the same tv_sec size as the system on which
57 * the file was written.
60 bpf_int32 tv_sec
; /* seconds */
61 bpf_int32 tv_usec
; /* microseconds */
65 * This is a `pcap_pkthdr' as actually stored in a savefile.
67 struct pcap_sf_pkthdr
{
68 struct pcap_timeval ts
; /* time stamp */
69 bpf_u_int32 caplen
; /* length of portion present */
70 bpf_u_int32 len
; /* length this packet (off wire) */
73 /* Maximum number of seconds that we can conceive of a dump file spanning. */
74 #define MAX_REASONABLE_FILE_SPAN (3600*24*366) /* one year */
76 /* Maximum packet length we ever expect to see. */
77 #define MAX_REASONABLE_PACKET_LENGTH 262144
79 /* Size of a packet header in bytes; easier than typing the sizeof() all
82 #define PACKET_HDR_LEN (sizeof( struct pcap_sf_pkthdr ))
86 /* The maximum size of a packet, including its header. */
87 #define MAX_PACKET_SIZE (PACKET_HDR_LEN + snaplen)
89 /* Number of contiguous bytes from a dumpfile in which there's guaranteed
90 * to be enough information to find a "definite" header if one exists
91 * therein. This takes 3 full packets - the first to be just misaligned
92 * (one byte short of a full packet), missing its timestamp; the second
93 * to have the legitimate timestamp; and the third to provide confirmation
94 * that the second is legit, making it a "definite" header. We could
95 * scrimp a bit here since not the entire third packet is required, but
96 * it doesn't seem worth it
98 #define MAX_BYTES_FOR_DEFINITE_HEADER (3 * MAX_PACKET_SIZE)
100 /* Maximum number of seconds that might reasonably separate two headers. */
101 #define MAX_REASONABLE_HDR_SEPARATION (3600 * 24 * 7) /* one week */
103 /* When searching a file for a packet, if we think we're within this many
104 * bytes of the packet we just search linearly. Since linear searches are
105 * probably much faster than random ones (random ones require searching for
106 * the beginning of the packet, which may be unaligned in memory), we make
107 * this value pretty hefty.
109 #define STRAIGHT_SCAN_THRESHOLD (100 * MAX_PACKET_SIZE)
112 /* Given a header and an acceptable first and last time stamp, returns non-zero
113 * if the header looks reasonable and zero otherwise.
116 reasonable_header( const struct pcap_pkthdr
*hdr
, const time_t first_time
, time_t last_time
)
118 if ( last_time
== 0 )
119 last_time
= first_time
+ MAX_REASONABLE_FILE_SPAN
;
121 return hdr
->ts
.tv_sec
>= first_time
&&
122 hdr
->ts
.tv_sec
<= last_time
&&
124 hdr
->len
<= MAX_REASONABLE_PACKET_LENGTH
&&
126 hdr
->caplen
<= MAX_REASONABLE_PACKET_LENGTH
;
129 #define SWAPLONG(y) \
130 ((((y)&0xff)<<24) | (((y)&0xff00)<<8) | (((y)&0xff0000)>>8) | (((y)>>24)&0xff))
132 /* Given a buffer, extracts a (properly aligned) packet header from it. */
134 extract_header( pcap_t
*p
, const u_char
*buf
, struct pcap_pkthdr
*hdr
)
136 struct pcap_sf_pkthdr sfhdr
;
138 memcpy(&sfhdr
, buf
, sizeof(sfhdr
));
140 hdr
->ts
.tv_sec
= sfhdr
.ts
.tv_sec
;
141 hdr
->ts
.tv_usec
= sfhdr
.ts
.tv_usec
;
142 hdr
->caplen
= sfhdr
.caplen
;
143 hdr
->len
= sfhdr
.len
;
145 if ( pcap_is_swapped( p
) )
147 hdr
->ts
.tv_sec
= SWAPLONG(hdr
->ts
.tv_sec
);
148 hdr
->ts
.tv_usec
= SWAPLONG(hdr
->ts
.tv_usec
);
149 hdr
->len
= SWAPLONG(hdr
->len
);
150 hdr
->caplen
= SWAPLONG(hdr
->caplen
);
154 * From bpf/libpcap/savefile.c:
156 * We interchanged the caplen and len fields at version 2.3,
157 * in order to match the bpf header layout. But unfortunately
158 * some files were written with version 2.3 in their headers
159 * but without the interchanged fields.
161 if ( pcap_minor_version( p
) < 3 ||
162 (pcap_minor_version( p
) == 3 && hdr
->caplen
> hdr
->len
) )
165 hdr
->caplen
= hdr
->len
;
170 /* Search a buffer to locate the first header within it. Return values
171 * are HEADER_NONE, HEADER_CLASH, HEADER_PERHAPS, and HEADER_DEFINITELY.
172 * The first indicates that no evidence of a header was found; the second
173 * that two or more possible headers were found, neither more convincing
174 * than the other(s); the third that exactly one "possible" header was
175 * found; and the fourth that exactly one "definite" header was found.
177 * Headers are detected by looking for positions in the buffer which have
178 * reasonable timestamps and lengths. If there is enough room in the buffer
179 * for another header to follow a candidate header, a check is made for
180 * that following header. If it is present then the header is *definite*
181 * (unless another "perhaps" or "definite" header is found); if not, then
182 * the header is discarded. If there is not enough room in the buffer for
183 * another header then the candidate is *perhaps* (unless another header
184 * is subsequently found). A "tie" between a "definite" header and a
185 * "perhaps" header is resolved in favor of the definite header. Any
186 * other tie leads to HEADER_CLASH.
188 * The buffer position of the header is returned in hdrpos_addr and
189 * for convenience the corresponding header in return_hdr.
191 * first_time is the earliest possible acceptable timestamp in the
192 * header. last_time, if non-zero, is the last such timestamp. If
193 * zero, then up to MAX_REASONABLE_FILE_SPAN seconds after first_time
197 #define HEADER_NONE 0
198 #define HEADER_CLASH 1
199 #define HEADER_PERHAPS 2
200 #define HEADER_DEFINITELY 3
203 find_header( pcap_t
*p
, u_char
*buf
, const int buf_len
,
204 const time_t first_time
, const time_t last_time
,
205 u_char
**hdrpos_addr
, struct pcap_pkthdr
*return_hdr
)
207 u_char
*bufptr
, *bufend
, *last_pos_to_try
;
208 struct pcap_pkthdr hdr
, hdr2
;
209 int status
= HEADER_NONE
;
210 int saw_PERHAPS_clash
= 0;
212 /* Initially, try each buffer position to see whether it looks like
213 * a valid packet header. We may later restrict the positions we look
214 * at to avoid seeing a sequence of legitimate headers as conflicting
217 bufend
= buf
+ buf_len
;
218 last_pos_to_try
= bufend
- PACKET_HDR_LEN
;
220 for ( bufptr
= buf
; bufptr
< last_pos_to_try
; ++bufptr
)
222 extract_header( p
, bufptr
, &hdr
);
224 if ( reasonable_header( &hdr
, first_time
, last_time
) )
226 u_char
*next_header
= bufptr
+ PACKET_HDR_LEN
+ hdr
.caplen
;
228 if ( next_header
+ PACKET_HDR_LEN
< bufend
)
229 { /* check for another good header */
230 extract_header( p
, next_header
, &hdr2
);
232 if ( reasonable_header( &hdr2
, hdr
.ts
.tv_sec
,
233 hdr
.ts
.tv_sec
+ MAX_REASONABLE_HDR_SEPARATION
) )
234 { /* a confirmed header */
239 status
= HEADER_DEFINITELY
;
240 *hdrpos_addr
= bufptr
;
243 /* Make sure we don't demote this "definite"
244 * to a "clash" if we stumble across its
247 last_pos_to_try
= next_header
- PACKET_HDR_LEN
;
250 case HEADER_DEFINITELY
:
254 error( "bad status in %s()", __func__
);
258 /* ... else the header is bogus - we've verified that it's
259 * not followed by a reasonable header.
263 { /* can't check for another good header */
267 status
= HEADER_PERHAPS
;
268 *hdrpos_addr
= bufptr
;
273 /* We don't immediately turn this into a
274 * clash because perhaps we'll later see a
275 * "definite" which will save us ...
277 saw_PERHAPS_clash
= 1;
280 case HEADER_DEFINITELY
:
281 /* Keep the definite in preference to this one. */
285 error( "bad status in %s()", __func__
);
291 if ( status
== HEADER_PERHAPS
&& saw_PERHAPS_clash
)
292 status
= HEADER_CLASH
;
297 /* Positions the sf_readfile stream such that the next sf_read() will
298 * read the final full packet in the file. Returns non-zero if
299 * successful, zero if unsuccessful. If successful, returns the
300 * timestamp of the last packet in last_timestamp.
302 * Note that this routine is a special case of sf_find_packet(). In
303 * order to use sf_find_packet(), one first must use this routine in
304 * order to give sf_find_packet() an upper bound on the timestamps
305 * present in the dump file.
308 sf_find_end( pcap_t
*p
, const struct timeval
*first_timestamp
,
309 struct timeval
*last_timestamp
)
311 time_t first_time
= first_timestamp
->tv_sec
;
314 u_char
*buf
, *bufpos
, *bufend
;
316 struct pcap_pkthdr hdr
, successor_hdr
;
319 /* Find the length of the file. */
320 if ( fseek64( pcap_file (p
), (int64_t) 0, SEEK_END
) < 0 )
323 len_file
= ftell64( pcap_file (p
) );
326 /* Casting a non-negative int64_t to uint64_t always works as expected. */
327 if ( (uint64_t) len_file
< MAX_BYTES_FOR_DEFINITE_HEADER
)
328 num_bytes
= len_file
;
330 num_bytes
= MAX_BYTES_FOR_DEFINITE_HEADER
;
332 /* Allow enough room for at least two full (untruncated) packets,
333 * perhaps followed by a truncated packet, so we have a shot at
334 * finding a "definite" header and following its chain to the
337 if ( fseek64( pcap_file( p
), -(int64_t) num_bytes
, SEEK_END
) < 0 )
340 buf
= (u_char
*)malloc((u_int
) num_bytes
);
346 bufend
= buf
+ num_bytes
;
348 if ( fread( (char *) bufpos
, num_bytes
, 1, pcap_file( p
) ) != 1 )
351 if ( find_header( p
, bufpos
, num_bytes
,
352 first_time
, 0L, &hdrpos
, &hdr
) != HEADER_DEFINITELY
)
355 /* Okay, we have a definite header in our hands. Follow its
356 * chain till we find the last valid packet in the file ...
360 /* move to the next header position */
361 bufpos
= hdrpos
+ PACKET_HDR_LEN
+ hdr
.caplen
;
363 /* bufpos now points to a candidate packet, which if valid
364 * should replace the current packet pointed to by hdrpos as
365 * the last valid packet ...
367 if ( bufpos
>= bufend
- PACKET_HDR_LEN
)
368 /* not enough room for another header */
371 extract_header( p
, bufpos
, &successor_hdr
);
373 first_time
= hdr
.ts
.tv_sec
;
374 if ( ! reasonable_header( &successor_hdr
, first_time
, 0L ) )
375 /* this bodes ill - it means bufpos is perhaps a
376 * bogus packet header after all ...
380 /* Note that the following test is for whether the next
381 * packet starts at a position > bufend, *not* for a
382 * position >= bufend. If this is the last packet in the
383 * file and there isn't a subsequent partial packet, then
384 * we expect the first buffer position beyond this packet
385 * to be just beyond the end of the buffer, i.e., at bufend
388 if ( bufpos
+ PACKET_HDR_LEN
+ successor_hdr
.caplen
> bufend
)
389 /* the packet is truncated */
392 /* Accept this packet as fully legit. */
397 /* Success! Last valid packet is at hdrpos. */
398 TIMEVAL_FROM_PKTHDR_TS(*last_timestamp
, hdr
.ts
);
401 /* Seek so that the next read will start at last valid packet. */
402 if ( fseek64( pcap_file( p
), -(int64_t) (bufend
- hdrpos
), SEEK_END
) < 0 )
403 error( "final fseek64() failed in %s()", __func__
);
406 free( (char *) buf
);
411 /* Takes two timeval's and returns the difference, tv2 - tv1, as a double. */
413 timeval_diff( const struct timeval
*tv1
, const struct timeval
*tv2
)
415 double result
= (tv2
->tv_sec
- tv1
->tv_sec
);
416 result
+= (tv2
->tv_usec
- tv1
->tv_usec
) / 1000000.0;
421 /* Returns true if timestamp t1 is chronologically less than timestamp t2. */
423 sf_timestamp_less_than( const struct timeval
*t1
, const struct timeval
*t2
)
425 return t1
->tv_sec
< t2
->tv_sec
||
426 (t1
->tv_sec
== t2
->tv_sec
&&
427 t1
->tv_usec
< t2
->tv_usec
);
430 /* Given two timestamps on either side of desired_time and their positions,
431 * returns the interpolated position of the desired_time packet. Returns a
432 * negative value if the desired_time is outside the given range.
435 interpolated_position( const struct timeval
*min_time
, const int64_t min_pos
,
436 const struct timeval
*max_time
, const int64_t max_pos
,
437 const struct timeval
*desired_time
)
439 double full_span
= timeval_diff( max_time
, min_time
);
440 double desired_span
= timeval_diff( desired_time
, min_time
);
441 int64_t full_span_pos
= max_pos
- min_pos
;
442 double fractional_offset
= desired_span
/ full_span
;
444 if ( fractional_offset
< 0.0 || fractional_offset
> 1.0 )
447 return min_pos
+ (int64_t) (fractional_offset
* (double) full_span_pos
);
450 /* Reads packets linearly until one with a time >= the given desired time
451 * is found; positions the dump file so that the next read will start
452 * at the given packet. Returns non-zero on success, 0 if an EOF was
456 read_up_to( pcap_t
*p
, const struct timeval
*desired_time
)
458 struct pcap_pkthdr hdr
;
464 struct timeval tvbuf
;
466 pos
= ftell64( pcap_file( p
) );
468 if ( pcap_next( p
, &hdr
) == NULL
)
470 if ( feof( pcap_file( p
) ) )
473 clearerr( pcap_file( p
) );
477 error( "bad status in %s()", __func__
);
480 TIMEVAL_FROM_PKTHDR_TS(tvbuf
, hdr
.ts
);
482 if ( ! sf_timestamp_less_than( &tvbuf
, desired_time
) )
489 if ( fseek64( pcap_file( p
), pos
, SEEK_SET
) < 0 )
490 error( "fseek64() failed in %s()", __func__
);
495 /* Positions the sf_readfile stream so that the next sf_read() will
496 * return the first packet with a time greater than or equal to
497 * desired_time. desired_time must be greater than min_time and less
498 * than max_time, which should correspond to actual packets in the
499 * file. min_pos is the file position (byte offset) corresponding to
500 * the min_time packet and max_pos is the same for the max_time packet.
502 * Returns non-zero on success, 0 if the given position is beyond max_pos.
504 * NOTE: when calling this routine, the sf_readfile stream *must* be
505 * already aligned so that the next call to sf_next_packet() will yield
509 sf_find_packet( pcap_t
*p
,
510 struct timeval
*min_time
, int64_t min_pos
,
511 struct timeval
*max_time
, int64_t max_pos
,
512 const struct timeval
*desired_time
)
515 struct timeval min_time_copy
, max_time_copy
;
516 u_int num_bytes
= MAX_BYTES_FOR_DEFINITE_HEADER
;
517 u_char
*buf
, *hdrpos
;
518 struct pcap_pkthdr hdr
;
520 buf
= (u_char
*) malloc( num_bytes
);
522 error( "malloc() failed in %s()", __func__
);
524 min_time_copy
= *min_time
;
525 min_time
= &min_time_copy
;
527 max_time_copy
= *max_time
;
528 max_time
= &max_time_copy
;
530 for ( ; ; ) /* loop until positioned correctly */
532 int64_t desired_pos
=
533 interpolated_position( min_time
, min_pos
,
536 struct timeval tvbuf
;
538 if ( desired_pos
< 0 )
544 int64_t present_pos
= ftell64( pcap_file( p
) );
545 if ( present_pos
< 0 )
546 error ( "ftell64() failed in %s()", __func__
);
548 if ( present_pos
<= desired_pos
&&
549 (uint64_t) (desired_pos
- present_pos
) < STRAIGHT_SCAN_THRESHOLD
)
550 { /* we're close enough to just blindly read ahead */
551 status
= read_up_to( p
, desired_time
);
555 /* Undershoot the target a little bit - it's much easier to
556 * then scan straight forward than to try to read backwards ...
558 desired_pos
-= STRAIGHT_SCAN_THRESHOLD
/ 2;
559 if ( desired_pos
< min_pos
)
560 desired_pos
= min_pos
;
562 if ( fseek64( pcap_file( p
), desired_pos
, SEEK_SET
) < 0 )
563 error( "fseek64() failed in %s()", __func__
);
566 fread( (char *) buf
, 1, num_bytes
, pcap_file( p
) );
568 if ( num_bytes_read
== 0 )
569 /* This shouldn't ever happen because we try to
570 * undershoot, unless the dump file has only a
571 * couple packets in it ...
573 error( "fread() failed in %s()", __func__
);
575 if ( find_header( p
, buf
, num_bytes
, min_time
->tv_sec
,
576 max_time
->tv_sec
, &hdrpos
, &hdr
) !=
578 error( "can't find header at position %ld in dump file",
581 /* Correct desired_pos to reflect beginning of packet. */
582 desired_pos
+= (hdrpos
- buf
);
584 /* Seek to the beginning of the header. */
585 if ( fseek64( pcap_file( p
), desired_pos
, SEEK_SET
) < 0 )
586 error( "fseek64() failed in %s()", __func__
);
588 TIMEVAL_FROM_PKTHDR_TS(tvbuf
, hdr
.ts
);
589 if ( sf_timestamp_less_than( &tvbuf
, desired_time
) )
590 { /* too early in the file */
592 min_pos
= desired_pos
;
595 else if ( sf_timestamp_less_than( desired_time
, &tvbuf
) )
596 { /* too late in the file */
598 max_pos
= desired_pos
;
606 free( (char *) buf
);