Skip to content

Commit 28727e9

Browse files
committed
Allowing user to specify file containing regex
1 parent f698b36 commit 28727e9

File tree

2 files changed

+24
-1
lines changed

2 files changed

+24
-1
lines changed

README.md

+14
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,20 @@ Here are some example CustomLog lines for your Apache config files:
2222

2323
Notice that if you're using the `-c` option, you need to escape spaces in the command you're running with three backslashes.
2424

25+
## Custom Regex File
26+
27+
You can use the `-r` option to specify a file containing some regex describing the log entry format. The tokens `*IPV6*` and `*IPV4*` can be used as a subsitute for the entire regex of those respective protocols. The group `IP` will be anonymized, so for instance if the file contains the following:
28+
29+
(?P<IP>*IPV6*)(, )(?P<OTHER>.*)
30+
31+
Then the entry
32+
33+
::ffff:8.8.8.8, 10.10.10.10 - - [14/Oct/2015:17:32:51 -0700] "GET /some/url HTTP/1.1" 200 13160
34+
35+
will be anonymized as
36+
37+
d68qCQ 10.10.10.10 - - [14/Oct/2015:17:32:51 -0700] "GET /some/url HTTP/1.1" 200 13160
38+
2539
## Requirements
2640

2741
- Python 2.7

cryptolog.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ def EncryptField(self, field, hashed_size):
133133

134134
if__name__=="__main__":
135135
parser=ArgumentParser(description='A program to encrypt the IP addresses in web server logs, to be used within an Apache CustomLog line. It assumes that the IP address is the first space-separated field in the log line. Input comes in the form of log lines from stdin.')
136+
parser.add_argument('-r',
137+
dest='regex',
138+
help='file providing regex for log format')
136139
parser.add_argument('-w',
137140
dest='write',
138141
help='filename to write logs to')
@@ -160,7 +163,13 @@ def EncryptField(self, field, hashed_size):
160163
entities=args.entities.split(',')
161164
ipv6_exp='([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])'
162165
ipv4_exp='\d\d?\d?\.\d\d?\d?\.\d\d?\d?\.\d\d?\d?'
163-
regex=re.compile(r'(?P<IP>'+ipv4_exp+'|'+ipv6_exp+')( )(?P<OTHER>.*)')
166+
ifargs.regex:
167+
withopen(args.regex, 'r') asregex_file:
168+
expression=regex_file.read().replace('*IPV6*', ipv6_exp).replace('*IPV4*', ipv4_exp)
169+
regex=re.compile(expression)
170+
else:
171+
regex=re.compile(r'(?P<IP>'+ipv4_exp+'|'+ipv6_exp+')( )(?P<OTHER>.*)')
172+
164173
# todo:dta improve this regex for common log format
165174
apache_regex=re.compile(r'(?P<IP>'+ipv4_exp+'|'+ipv6_exp+') (?P<SAVE1>-) (?P<SAVE2>-) (?P<DATETIME>\[.*\]) (?P<REQUEST>".*") (?P<SAVE3>\d*|\-) (?P<SAVE4>\d*|\-) (?P<OTHER>.*)')
166175
delete_list= []

0 commit comments

Comments
 (0)
close