Skip to content

Commit f7dcb89

Browse files
committed
add url utility function.
1 parent a4e72b1 commit f7dcb89

File tree

1 file changed

+190
-0
lines changed

1 file changed

+190
-0
lines changed

utils.py

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
import urllib
2+
import sys
3+
import time
4+
import socket
5+
import struct
6+
import datetime
7+
8+
def xor(input, key):
9+
"""
10+
Xor an input string with a given character key.
11+
"""
12+
output = ''.join([chr(ord(c) ^ key) for c in input])
13+
return output
14+
15+
# decode_base64 - decodes Base64 text with (optional) custom alphabet
16+
#
17+
def decode_base64(intext, alphabet='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/', padchar='=', debug=False):
18+
19+
# Build dictionary from alphabet
20+
b64DictDec = {}
21+
i = 0
22+
for c in alphabet:
23+
if c in b64DictDec:
24+
print '%c already exists in alphabet' % (c)
25+
sys.exit(-1)
26+
b64DictDec[c] = i
27+
i += 1
28+
29+
b64DictDec[padchar] = 0
30+
alphabet += padchar
31+
32+
outtext = ''
33+
34+
# support DOS and Unix line endings
35+
intext = intext.rstrip('\r\n')
36+
37+
i = 0
38+
while i < len(intext) - 3:
39+
if intext[i] not in alphabet or intext[i + 1] not in alphabet or intext[i + 2] not in alphabet or intext[i + 3] not in alphabet:
40+
if debug:
41+
sys.stderr.write(
42+
"Non-alphabet character found in chunk: %s\n" % (hexPlusAscii(intext[i:i + 4])))
43+
if debug:
44+
sys.stderr.write("Input: %s" % hexPlusAscii(intext))
45+
raise Exception
46+
val = b64DictDec[intext[i]] * 262144
47+
val += b64DictDec[intext[i + 1]] * 4096
48+
val += b64DictDec[intext[i + 2]] * 64
49+
val += b64DictDec[intext[i + 3]]
50+
i += 4
51+
for factor in [65536, 256, 1]:
52+
outtext += chr(int(val / factor))
53+
val = val % factor
54+
55+
return outtext
56+
57+
58+
# printableUnicode - returns unicode text minus control characters
59+
#
60+
# Author: amm
61+
# Input: intext (unicode string)
62+
# onlyText (bool)
63+
# False = print tab and line-feed chars
64+
# True = Don't print tab and line-feed chars
65+
# Output: unicode string
66+
#
67+
# Reference: http://en.wikipedia.org/wiki/Unicode_control_characters
68+
#
69+
UNICODE_CONTROL_CHARS = [unichr(x) for x in range(
70+
0, 9) + [11, 12] + range(14, 0x20) + [0x7f] + range(0x80, 0xA0)]
71+
72+
73+
def printableUnicode(intext, onlyText=False):
74+
if not type(intext) == unicode:
75+
# Attempt to cast it
76+
try:
77+
intext = unicode(intext)
78+
except:
79+
try:
80+
intext = unicode(intext, 'utf-8')
81+
except:
82+
return unicode(printableText(intext, onlyText))
83+
if onlyText:
84+
return ''.join([x for x in intext if x not in UNICODE_CONTROL_CHARS + [u'\t', u'\n', u'\r']])
85+
else:
86+
return ''.join([x for x in intext if x not in UNICODE_CONTROL_CHARS])
87+
88+
# hexPlusAscii - returns two-column hex/ascii display text for binary input
89+
#
90+
# Author: amm
91+
# Input: indata (string/binary)
92+
# width (optional, bytes of hex to display per line)
93+
# offset (optional, byte offset for display)
94+
# Output: string
95+
#
96+
97+
98+
def hexPlusAscii(data, width=16, offset=0):
99+
FILTER_hex_display = ''.join(
100+
[(len(repr(chr(x))) == 3) and chr(x) or '.' for x in range(256)])
101+
dlen = len(data)
102+
output = ''
103+
for i in xrange(0, dlen, width):
104+
s = data[i:i + width]
105+
hexa = ' '.join(["%02X" % ord(x) for x in s])
106+
printable = s.translate(FILTER_hex_display)
107+
output += "%08X %-*s %s\n" % (i +
108+
offset, 16 * 3 + 1, hexa, printable)
109+
return output
110+
111+
# URLDataToParameterDict - parses URL format string (i.e. the stuff after
112+
# the question mark) and returns dictionary
113+
# of parameters
114+
#
115+
# Author: amm
116+
# Input: urldata (string)
117+
# Output: dictionary, indexed by parameter names
118+
# Requires: urllib
119+
#
120+
121+
122+
def URLDataToParameterDict(data):
123+
if not ' ' in data:
124+
p, kwp = strtok(data, sep='&')
125+
return dict((urllib.unquote(k), urllib.unquote(kwp[k]))for k in kwp.keys())
126+
127+
# strtok - string tokenizer a lot like C strtok
128+
# Author: twp
129+
# Input: a string, optionally a param sep and a key/value sep, as_list will force a list even if 0/1 params
130+
# Output: tuple of: None or string or list of params, dictionary indexed by key=value names of k/v params
131+
# Example : a,b,c=d,e=f returns ([a,b],{c:d,e:f})
132+
133+
134+
def strtok(data, sep=',', kvsep='=', as_list=False):
135+
kwparams = {}
136+
params = []
137+
for p in data.split(sep):
138+
if kvsep in p:
139+
(k, v) = p.split(kvsep, 1)
140+
kwparams[k.strip()] = v.strip()
141+
else:
142+
params.append(p.strip())
143+
if not as_list:
144+
if not len(params):
145+
params = None
146+
elif len(params) == 1:
147+
params = params[0]
148+
return params, kwparams
149+
150+
# mktime: if python timestamp object convery back to POSIX timestamp
151+
# utctime: return UTC POSIX timestamp
152+
# Author tparker
153+
154+
155+
def mktime(ts):
156+
if type(ts) == datetime.datetime:
157+
return time.mktime(ts.timetuple())
158+
return ts
159+
160+
161+
def utctime():
162+
return time.mktime(time.gmtime())
163+
164+
# xordecode(key,data)
165+
166+
167+
def xorStringDecode(key=None, data=None):
168+
ptext = ''
169+
for pos in range(0, len(data)):
170+
ptext += chr(ord(data[pos]) ^ ord(key[pos % len(key)]))
171+
return ptext
172+
173+
174+
def iptoint(ip): return struct.unpack('!L', socket.inet_aton(ip))[0]
175+
176+
# getHeader - Extracts header information from dpkt HTTP request or response
177+
# objects.
178+
179+
def getHeader(request_or_response, header_name):
180+
try:
181+
httpHdr = request_or_response.headers[header_name]
182+
except:
183+
return ''
184+
if type(httpHdr) == str:
185+
return httpHdr
186+
elif type(httpHdr) == list:
187+
# return unique list joined by ','
188+
return ', '.join(set(httpHdr))
189+
else:
190+
return ''

0 commit comments

Comments
 (0)