- Notifications
You must be signed in to change notification settings - Fork 1.6k
/
Copy pathCountMinSketch.cs
77 lines (68 loc) · 2.52 KB
/
CountMinSketch.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
usingSystem;
namespaceDataStructures.Probabilistic;
publicclassCountMinSketch<T>whereT:notnull
{
privatereadonlyint[][]sketch;
privatereadonlyintnumHashes;
/// <summary>
/// Initializes a new instance of the <see cref="CountMinSketch{T}"/> class based off dimensions
/// passed by the user.
/// </summary>
/// <param name="width">The width of the sketch.</param>
/// <param name="numHashes">The number of hashes to use in the sketch.</param>
publicCountMinSketch(intwidth,intnumHashes)
{
sketch=newint[numHashes][];
for(vari=0;i<numHashes;i++)
{
sketch[i]=newint[width];
}
this.numHashes=numHashes;
}
/// <summary>
/// Initializes a new instance of the <see cref="CountMinSketch{T}"/> class based off the optimizing error rate
/// and error probability formula width = e/errorRate numHashes = ln(1.0/errorProp).
/// </summary>
/// <param name="errorRate">The amount of acceptable over counting for the sketch.</param>
/// <param name="errorProb">The probability that an item will be over counted.</param>
publicCountMinSketch(doubleerrorRate,doubleerrorProb)
{
varwidth=(int)Math.Ceiling(Math.E/errorRate);
numHashes=(int)Math.Ceiling(Math.Log(1.0/errorProb));
sketch=newint[numHashes][];
for(vari=0;i<numHashes;i++)
{
sketch[i]=newint[width];
}
}
/// <summary>
/// Inserts the provided item into the sketch.
/// </summary>
/// <param name="item">Item to insert.</param>
publicvoidInsert(Titem)
{
varinitialHash=item.GetHashCode();
for(inti=0;i<numHashes;i++)
{
varslot=GetSlot(i,initialHash);
sketch[i][slot]++;
}
}
/// <summary>
/// Queries the count of the given item that have been inserted into the sketch.
/// </summary>
/// <param name="item">item to insert into the sketch.</param>
/// <returns>the number of times the provided item has been inserted into the sketch.</returns>
publicintQuery(Titem)
{
varinitialHash=item.GetHashCode();
varmin=int.MaxValue;
for(inti=0;i<numHashes;i++)
{
varslot=GetSlot(i,initialHash);
min=Math.Min(sketch[i][slot],min);
}
returnmin;
}
privateintGetSlot(inti,intinitialHash)=>Math.Abs((i+1)*initialHash)%sketch[0].Length;
}