forked from TheAlgorithms/Python
- Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaho_corasick.py
96 lines (85 loc) · 3.5 KB
/
aho_corasick.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from __future__ importannotations
fromcollectionsimportdeque
classAutomaton:
def__init__(self, keywords: list[str]):
self.adlist: list[dict] = []
self.adlist.append(
{"value": "", "next_states": [], "fail_state": 0, "output": []}
)
forkeywordinkeywords:
self.add_keyword(keyword)
self.set_fail_transitions()
deffind_next_state(self, current_state: int, char: str) ->int|None:
forstateinself.adlist[current_state]["next_states"]:
ifchar==self.adlist[state]["value"]:
returnstate
returnNone
defadd_keyword(self, keyword: str) ->None:
current_state=0
forcharacterinkeyword:
next_state=self.find_next_state(current_state, character)
ifnext_stateisNone:
self.adlist.append(
{
"value": character,
"next_states": [],
"fail_state": 0,
"output": [],
}
)
self.adlist[current_state]["next_states"].append(len(self.adlist) -1)
current_state=len(self.adlist) -1
else:
current_state=next_state
self.adlist[current_state]["output"].append(keyword)
defset_fail_transitions(self) ->None:
q: deque=deque()
fornodeinself.adlist[0]["next_states"]:
q.append(node)
self.adlist[node]["fail_state"] =0
whileq:
r=q.popleft()
forchildinself.adlist[r]["next_states"]:
q.append(child)
state=self.adlist[r]["fail_state"]
while (
self.find_next_state(state, self.adlist[child]["value"]) isNone
andstate!=0
):
state=self.adlist[state]["fail_state"]
self.adlist[child]["fail_state"] =self.find_next_state(
state, self.adlist[child]["value"]
)
ifself.adlist[child]["fail_state"] isNone:
self.adlist[child]["fail_state"] =0
self.adlist[child]["output"] = (
self.adlist[child]["output"]
+self.adlist[self.adlist[child]["fail_state"]]["output"]
)
defsearch_in(self, string: str) ->dict[str, list[int]]:
"""
>>> A = Automaton(["what", "hat", "ver", "er"])
>>> A.search_in("whatever, err ... , wherever")
{'what': [0], 'hat': [1], 'ver': [5, 25], 'er': [6, 10, 22, 26]}
"""
result: dict= {} # returns a dict with keywords and list of its occurrences
current_state=0
foriinrange(len(string)):
while (
self.find_next_state(current_state, string[i]) isNone
andcurrent_state!=0
):
current_state=self.adlist[current_state]["fail_state"]
next_state=self.find_next_state(current_state, string[i])
ifnext_stateisNone:
current_state=0
else:
current_state=next_state
forkeyinself.adlist[current_state]["output"]:
ifkeynotinresult:
result[key] = []
result[key].append(i-len(key) +1)
returnresult
if__name__=="__main__":
importdoctest
doctest.testmod()