- Notifications
You must be signed in to change notification settings - Fork 1.6k
/
Copy pathInvertedIndex.cs
81 lines (72 loc) · 2.42 KB
/
InvertedIndex.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
usingSystem.Collections.Generic;
usingSystem.Linq;
namespaceDataStructures;
/// <summary>
/// Inverted index is the simplest form of document indexing,
/// allowing performing boolean queries on text data.
///
/// This realization is just simplified for better understanding the process of indexing
/// and working on straightforward string inputs.
/// </summary>
publicclassInvertedIndex
{
privatereadonlyDictionary<string,List<string>>invertedIndex=new();
/// <summary>
/// Build inverted index with source name and source content.
/// </summary>
/// <param name="sourceName">Name of the source.</param>
/// <param name="sourceContent">Content of the source.</param>
publicvoidAddToIndex(stringsourceName,stringsourceContent)
{
varcontext=sourceContent.Split(' ').Distinct();
foreach(varwordincontext)
{
if(!invertedIndex.ContainsKey(word))
{
invertedIndex.Add(word,newList<string>{sourceName});
}
else
{
invertedIndex[word].Add(sourceName);
}
}
}
/// <summary>
/// Returns the source names contains ALL terms inside at same time.
/// </summary>
/// <param name="terms">List of terms.</param>
/// <returns>Source names.</returns>
publicIEnumerable<string>And(IEnumerable<string>terms)
{
varentries=terms
.Select(term =>invertedIndex
.Where(x =>x.Key.Equals(term))
.SelectMany(x =>x.Value))
.ToList();
varintersection=entries
.Skip(1)
.Aggregate(newHashSet<string>(entries.First()),(hashSet,enumerable)=>
{
hashSet.IntersectWith(enumerable);
returnhashSet;
});
returnintersection;
}
/// <summary>
/// Returns the source names contains AT LEAST ONE from terms inside.
/// </summary>
/// <param name="terms">List of terms.</param>
/// <returns>Source names.</returns>
publicIEnumerable<string>Or(IEnumerable<string>terms)
{
varsources=newList<string>();
foreach(varterminterms)
{
varsource=invertedIndex
.Where(x =>x.Key.Equals(term))
.SelectMany(x =>x.Value);
sources.AddRange(source);
}
returnsources.Distinct();
}
}