- Notifications
You must be signed in to change notification settings - Fork 46.7k
/
Copy pathrabin_karp.py
91 lines (75 loc) · 2.55 KB
/
rabin_karp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# Numbers of alphabet which we call base
alphabet_size=256
# Modulus to hash a string
modulus=1000003
defrabin_karp(pattern: str, text: str) ->bool:
"""
The Rabin-Karp Algorithm for finding a pattern within a piece of text
with complexity O(nm), most efficient when it is used with multiple patterns
as it is able to check if any of a set of patterns match a section of text in o(1)
given the precomputed hashes.
This will be the simple version which only assumes one pattern is being searched
for but it's not hard to modify
1) Calculate pattern hash
2) Step through the text one character at a time passing a window with the same
length as the pattern
calculating the hash of the text within the window compare it with the hash
of the pattern. Only testing equality if the hashes match
"""
p_len=len(pattern)
t_len=len(text)
ifp_len>t_len:
returnFalse
p_hash=0
text_hash=0
modulus_power=1
# Calculating the hash of pattern and substring of text
foriinrange(p_len):
p_hash= (ord(pattern[i]) +p_hash*alphabet_size) %modulus
text_hash= (ord(text[i]) +text_hash*alphabet_size) %modulus
ifi==p_len-1:
continue
modulus_power= (modulus_power*alphabet_size) %modulus
foriinrange(t_len-p_len+1):
iftext_hash==p_hashandtext[i : i+p_len] ==pattern:
returnTrue
ifi==t_len-p_len:
continue
# Calculate the https://en.wikipedia.org/wiki/Rolling_hash
text_hash= (
(text_hash-ord(text[i]) *modulus_power) *alphabet_size
+ord(text[i+p_len])
) %modulus
returnFalse
deftest_rabin_karp() ->None:
"""
>>> test_rabin_karp()
Success.
"""
# Test 1)
pattern="abc1abc12"
text1="alskfjaldsabc1abc1abc12k23adsfabcabc"
text2="alskfjaldsk23adsfabcabc"
assertrabin_karp(pattern, text1)
assertnotrabin_karp(pattern, text2)
# Test 2)
pattern="ABABX"
text="ABABZABABYABABX"
assertrabin_karp(pattern, text)
# Test 3)
pattern="AAAB"
text="ABAAAAAB"
assertrabin_karp(pattern, text)
# Test 4)
pattern="abcdabcy"
text="abcxabcdabxabcdabcdabcy"
assertrabin_karp(pattern, text)
# Test 5)
pattern="Lü"
text="Lüsai"
assertrabin_karp(pattern, text)
pattern="Lue"
assertnotrabin_karp(pattern, text)
print("Success.")
if__name__=="__main__":
test_rabin_karp()