- Notifications
You must be signed in to change notification settings - Fork 1.6k
/
Copy pathRabinKarp.cs
81 lines (71 loc) · 3.05 KB
/
RabinKarp.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
usingSystem;
usingSystem.Collections.Generic;
namespaceAlgorithms.Strings.PatternMatching;
/// <summary>
/// The idea: You calculate the hash for the pattern <c>p</c> and the hash values for all the prefixes of the text
/// <c>t</c>.
/// Now, you can compare a substring in constant time using the calculated hashes.
/// time complexity: O(p + t),
/// space complexity: O(t),
/// where t - text length
/// p - pattern length.
/// </summary>
publicstaticclassRabinKarp
{
/// <summary>
/// Finds the index of all occurrences of the pattern <c>p</c> int <c>t</c>.
/// </summary>
/// <returns>List of starting indices of the pattern in the text.</returns>
publicstaticList<int>FindAllOccurrences(stringtext,stringpattern)
{
// Prime number
constulongp=65537;
// Modulo coefficient
constulongm=(ulong)1e9+7;
// p_pow[i] = P^i mod M
ulong[]pPow=newulong[Math.Max(pattern.Length,text.Length)];
pPow[0]=1;
for(vari=1;i<pPow.Length;i++)
{
pPow[i]=pPow[i-1]*p%m;
}
// hash_t[i] is the sum of the previous hash values of the letters (t[0], t[1], ..., t[i-1]) and the hash value of t[i] itself (mod M).
// The hash value of a letter t[i] is equal to the product of t[i] and p_pow[i] (mod M).
ulong[]hashT=newulong[text.Length+1];
for(vari=0;i<text.Length;i++)
{
hashT[i+1]=(hashT[i]+text[i]*pPow[i])%m;
}
// hash_s is equal to sum of the hash values of the pattern (mod M).
ulonghashS=0;
for(vari=0;i<pattern.Length;i++)
{
hashS=(hashS+pattern[i]*pPow[i])%m;
}
// In the next step you iterate over the text with the pattern.
List<int>occurrences=new();
for(vari=0;i+pattern.Length-1<text.Length;i++)
{
// In each step you calculate the hash value of the substring to be tested.
// By storing the hash values of the letters as a prefixes you can do this in constant time.
varcurrentHash=(hashT[i+pattern.Length]+m-hashT[i])%m;
// Now you can compare the hash value of the substring with the product of the hash value of the pattern and p_pow[i].
if(currentHash==hashS*pPow[i]%m)
{
// If the hash values are identical, do a double-check in case a hash collision occurs.
varj=0;
while(j<pattern.Length&&text[i+j]==pattern[j])
{
++j;
}
if(j==pattern.Length)
{
// If the hash values are identical and the double-check passes, a substring was found that matches the pattern.
// In this case you add the index i to the list of occurences.
occurrences.Add(i);
}
}
}
returnoccurrences;
}
}