forked from TheAlgorithms/Python
- Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlevenshtein_distance.py
125 lines (101 loc) · 4.18 KB
/
levenshtein_distance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
fromcollections.abcimportCallable
deflevenshtein_distance(first_word: str, second_word: str) ->int:
"""
Implementation of the Levenshtein distance in Python.
:param first_word: the first word to measure the difference.
:param second_word: the second word to measure the difference.
:return: the levenshtein distance between the two words.
Examples:
>>> levenshtein_distance("planet", "planetary")
3
>>> levenshtein_distance("", "test")
4
>>> levenshtein_distance("book", "back")
2
>>> levenshtein_distance("book", "book")
0
>>> levenshtein_distance("test", "")
4
>>> levenshtein_distance("", "")
0
>>> levenshtein_distance("orchestration", "container")
10
"""
# The longer word should come first
iflen(first_word) <len(second_word):
returnlevenshtein_distance(second_word, first_word)
iflen(second_word) ==0:
returnlen(first_word)
previous_row=list(range(len(second_word) +1))
fori, c1inenumerate(first_word):
current_row= [i+1]
forj, c2inenumerate(second_word):
# Calculate insertions, deletions, and substitutions
insertions=previous_row[j+1] +1
deletions=current_row[j] +1
substitutions=previous_row[j] + (c1!=c2)
# Get the minimum to append to the current row
current_row.append(min(insertions, deletions, substitutions))
# Store the previous row
previous_row=current_row
# Returns the last element (distance)
returnprevious_row[-1]
deflevenshtein_distance_optimized(first_word: str, second_word: str) ->int:
"""
Compute the Levenshtein distance between two words (strings).
The function is optimized for efficiency by modifying rows in place.
:param first_word: the first word to measure the difference.
:param second_word: the second word to measure the difference.
:return: the Levenshtein distance between the two words.
Examples:
>>> levenshtein_distance_optimized("planet", "planetary")
3
>>> levenshtein_distance_optimized("", "test")
4
>>> levenshtein_distance_optimized("book", "back")
2
>>> levenshtein_distance_optimized("book", "book")
0
>>> levenshtein_distance_optimized("test", "")
4
>>> levenshtein_distance_optimized("", "")
0
>>> levenshtein_distance_optimized("orchestration", "container")
10
"""
iflen(first_word) <len(second_word):
returnlevenshtein_distance_optimized(second_word, first_word)
iflen(second_word) ==0:
returnlen(first_word)
previous_row=list(range(len(second_word) +1))
fori, c1inenumerate(first_word):
current_row= [i+1] + [0] *len(second_word)
forj, c2inenumerate(second_word):
insertions=previous_row[j+1] +1
deletions=current_row[j] +1
substitutions=previous_row[j] + (c1!=c2)
current_row[j+1] =min(insertions, deletions, substitutions)
previous_row=current_row
returnprevious_row[-1]
defbenchmark_levenshtein_distance(func: Callable) ->None:
"""
Benchmark the Levenshtein distance function.
:param str: The name of the function being benchmarked.
:param func: The function to be benchmarked.
"""
fromtimeitimporttimeit
stmt=f"{func.__name__}('sitting', 'kitten')"
setup=f"from __main__ import {func.__name__}"
number=25_000
result=timeit(stmt=stmt, setup=setup, number=number)
print(f"{func.__name__:<30} finished {number:,} runs in {result:.5f} seconds")
if__name__=="__main__":
# Get user input for words
first_word=input("Enter the first word for Levenshtein distance:\n").strip()
second_word=input("Enter the second word for Levenshtein distance:\n").strip()
# Calculate and print Levenshtein distances
print(f"{levenshtein_distance(first_word, second_word) =}")
print(f"{levenshtein_distance_optimized(first_word, second_word) =}")
# Benchmark the Levenshtein distance functions
benchmark_levenshtein_distance(levenshtein_distance)
benchmark_levenshtein_distance(levenshtein_distance_optimized)