Skip to content

Commit e557883

Browse files
committed
Add filtering functionality to cryptolog to strip UA/referer
1 parent 16040dd commit e557883

File tree

1 file changed

+21
-5
lines changed

1 file changed

+21
-5
lines changed

cryptolog.py

+21-5
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class UninitializedCryptoFilter(Exception):
4545
classCryptoFilter(object):
4646
"""Class to control cryptographic logging."""
4747

48-
def__init__(self, regex=None, field_list=None):
48+
def__init__(self, regex=None, field_list=None, delete_list=None):
4949
"""
5050
Args:
5151
regex: re.compile(r'(?P<A>)(?P<B>)) object, with
@@ -56,13 +56,14 @@ def __init__(self, regex=None, field_list=None):
5656
ifregex:
5757
self.SetRegex(regex)
5858
iffield_list:
59-
self.SetFields(field_list)
59+
self.SetFields(field_list, delete_list)
6060

6161
defSetRegex(self, regex):
6262
self._regex=regex
6363

64-
defSetFields(self, field_list):
64+
defSetFields(self, field_list, delete_list):
6565
self._field_list=field_list
66+
self._delete_list=delete_list
6667

6768
defIsInitialized(self):
6869
returnself._regexandself._field_list
@@ -87,6 +88,7 @@ def EncryptSingleLogEntry(self, log_entry):
8788
ifnotresults:
8889
raiseLogParseError("Log format does not match regex.")
8990
split_log=list(results.groups())
91+
9092
# TODO(dtauerbach): this is inefficient but regex
9193
# doesn't seem quite powerful enough to avoid it
9294
# by being able to bulk replace named groups.
@@ -105,6 +107,9 @@ def EncryptSingleLogEntry(self, log_entry):
105107
# could legitimately be empty
106108
continue
107109
split_log[split_log.index(res)] =self.EncryptField(res, 6)
110+
forfieldinself._delete_list:
111+
res=results.group(field)
112+
split_log[split_log.index(res)] =''
108113
return'%s\n'% (''.join(split_log))
109114

110115
defEncryptField(self, field, hashed_size):
@@ -124,6 +129,10 @@ def EncryptField(self, field, hashed_size):
124129
dest='entities',
125130
default='IP',
126131
help='comma-separated list of entities to filter')
132+
parser.add_argument('-s',
133+
action='store_true',
134+
dest='strip_uas_and_refs',
135+
default=False)
127136
args=parser.parse_args()
128137

129138
log_file=None
@@ -135,9 +144,16 @@ def EncryptField(self, field, hashed_size):
135144
p=Popen(args.command, stdin=PIPE, shell=True)
136145

137146
entities=args.entities.split(',')
138-
139147
regex=re.compile(r'(?P<IP>\d\d?\d?\.\d\d?\d?\.\d\d?\d?\.\d\d?\d?)( )(?P<OTHER>.*)')
140-
cryptor=CryptoFilter(regex, entities)
148+
apache_regex=re.compile(r'(?P<IP>\d\d?\d?\.\d\d?\d?\.\d\d?\d?\.\d\d?\d?)(?P<SAVE> - - \[.*\] ".*" \d* \d* )(?P<OTHER>.*)')
149+
delete_list= []
150+
151+
# hack for pound logs
152+
ifargs.strip_uas_and_refs:
153+
regex=apache_regex
154+
delete_list= ['OTHER']
155+
156+
cryptor=CryptoFilter(regex, entities, delete_list)
141157

142158
log=stdin.readline()
143159
while(log):

0 commit comments

Comments
 (0)
close