- Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathcryptolog.py
executable file
·204 lines (167 loc) · 6.56 KB
/
cryptolog.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#!/usr/bin/env python
fromsysimportstdin
fromosimporturandom
fromtimeimportlocaltime
fromtypesimportNoneType
fromsyslogimportsyslog, LOG_CRIT
frombase64importb64encode
fromhmacimportHMAC
fromargparseimportArgumentParser
fromsubprocessimportPopen, PIPE
importre
salt_data=None
salt_day=None
salt_size=16
entities_to_hashed_sizes= {
'IP': 6,
'UA': 22,
'TIMESTAMP': 22,
'TARGET_URL': 6,
}
defsalt():
globalsalt_data, salt_day, salt_size
t=localtime()
now= (t.tm_year, t.tm_yday)
ifsalt_day!=now:
salt_data=urandom(salt_size)
salt_day=now
returnsalt_data
defhash_entity(entity, hashed_size, salt_param=None):
salt_var=salt_paramorsalt()
returnb64encode(HMAC(salt_var, entity).digest())[:hashed_size]
classLogParseError(Exception):
pass
classUninitializedCryptoFilter(Exception):
pass
classCryptoFilter(object):
"""Class to control cryptographic logging."""
def__init__(self, regex=None, field_list=None, delete_list=None):
"""
Args:
regex: re.compile(r'(?P<A>)(?P<B>)) object, with
named groups
field_list: what to encrypt that matches named groups
above, e.g. ["IP", "UA"]
"""
self._salt=None
ifregex:
self.SetRegex(regex)
iffield_list:
self.SetFields(field_list, delete_list)
defSetRegex(self, regex):
self._regex=regex
# get a list of named groups from the regex, in order
self._named=map(lambdax: x[0], sorted(self._regex.groupindex.items(), key=lambdax: x[1]))
defSetFields(self, field_list, delete_list):
self._field_list=field_list
self._delete_list=delete_list
defIsInitialized(self):
returnself._regexandself._field_list
defReset(self):
self._regex=None
self._named=None
self._field_list=None
defEncryptSingleLogEntry(self, log_entry):
"""From self.regex, picks out relevant fields from
self._field_list and replaces them with crypt hashes.
Args:
log_entry
Returns:
crypto_log_entry
"""
# Make sure we are initialized
ifnotself.IsInitialized():
raiseUninitializedCryptoFilter("Not initialized")
results=self._regex.search(log_entry)
ifnotresults:
raiseLogParseError("Log format does not match regex.")
printself._regex.groupindex.items()
# create a list of matches based on named gropus, preserving order
results_dict=results.groupdict()
split_log=map(lambdax: results_dict[x], self._named)
# TODO(dtauerbach): this is inefficient but regex
# doesn't seem quite powerful enough to avoid it
# by being able to bulk replace named groups.
# (the concern is if one group is just, say "a",
# then the find-and-replace operation can't just replace
# the relevant instance of "a" in the named group)
# measure then optimize if necessary
forfieldinself._field_list:
# TODO(dtauerbach): below might fail if you pass in
# an entity to be hashed that isn't in the spec.
# deal with this
res=results.group(field)
ifnotres:
# TODO(dtauerbach): Figure out the convention here
# probably a warning is all that we want since fields
# could legitimately be empty
continue
split_log[split_log.index(res)] =self.EncryptField(res, 6)
forfieldinself._delete_list:
res=results.group(field)
split_log[split_log.index(res)] ='-'
split_log=filter(lambdax: type(x) !=NoneType, split_log)
return'%s\n'% (' '.join(split_log))
defSetSaltfile(self, salt_file):
self._salt=open(salt_file, 'r').read()
defEncryptField(self, field, hashed_size):
"""Encrypt relevant field (e.g. IP) using salted hash."""
returnhash_entity(field, 6, self._salt)
if__name__=="__main__":
parser=ArgumentParser(description='A program to encrypt the IP addresses in web server logs, to be used within an Apache CustomLog line. It assumes that the IP address is the first space-separated field in the log line. Input comes in the form of log lines from stdin.')
parser.add_argument('-r',
dest='regex',
help='file providing regex for log format')
parser.add_argument('-w',
dest='write',
help='filename to write logs to')
parser.add_argument('-c',
dest='command',
help='pipe logs to this external program')
parser.add_argument('-e',
dest='entities',
default='IP',
help='comma-separated list of entities to filter')
parser.add_argument('-s',
action='store_true',
dest='strip_uas_and_refs',
default=False)
args=parser.parse_args()
log_file=None
if(args.write!=None):
log_file=file(args.write, 'ab')
p=None
if(args.command!=None):
p=Popen(args.command, stdin=PIPE, shell=True)
entities=args.entities.split(',')
ipv6_exp='([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])'
ipv4_exp='\d\d?\d?\.\d\d?\d?\.\d\d?\d?\.\d\d?\d?'
ifargs.regex:
withopen(args.regex, 'r') asregex_file:
expression=regex_file.read().replace('*IPV6*', ipv6_exp).replace('*IPV4*', ipv4_exp)
regex=re.compile(expression)
else:
regex=re.compile(r'(?P<IP>'+ipv4_exp+'|'+ipv6_exp+')( )(?P<OTHER>.*)')
# todo:dta improve this regex for common log format
apache_regex=re.compile(r'(?P<IP>'+ipv4_exp+'|'+ipv6_exp+') (?P<SAVE1>-) (?P<SAVE2>-) (?P<DATETIME>\[.*\]) (?P<REQUEST>".*") (?P<SAVE3>\d*|\-) (?P<SAVE4>\d*|\-) (?P<OTHER>.*)')
delete_list= []
# hack for pound logs
ifargs.strip_uas_and_refs:
regex=apache_regex
delete_list= ['OTHER']
cryptor=CryptoFilter(regex, entities, delete_list)
log=stdin.readline()
while(log):
crypted_log=cryptor.EncryptSingleLogEntry(log)
if(log_file!=None):
log_file.write(crypted_log)
log_file.flush()
if(p!=None):
p.stdin.write(crypted_log)
p.stdin.flush()
log=stdin.readline()
if(log_file!=None):
log_file.close()
if(p!=None):
p.stdin.close()
p.wait()