The Levenstein distance of two strings is the nuber of changes get from one string to the other.
See: https://en.wikipedia.org/wiki/Levenshtein_distance
Here are two possible implementations in C++, one using recursion, one non-resurvie using the full matrix method, aka. Wagner-Fischer algorithm.
int levenstein_distance_r(std::string_view s1, std::string_view s2)
{
if (s1.empty())
return s2.size();
if (s2.empty())
return s1.size();
if (s1.front() == s2.front())
return levenstein_distance_r(s1.data() + 1, s2.data() + 1);
return 1
+ std::min({
levenstein_distance_r(s1.data() + 1, s2), // insertion
levenstein_distance_r(s1, s2.data() + 1), // deletion
levenstein_distance_r(s1.data() + 1, s2.data() + 1) // substitution
});
}
Uses std::vector<std::vector<int>>
as matrix. There are more performant implementations but optimization is not the goal here.
int levenstein_distance(std::string_view s1, std::string_view s2)
{
auto d = std::vector<std::vector<int>>(s2.size() + 1u, std::vector<int>(s1.size() + 1u, 0));
for (std::size_t i = 0; i <= s1.size(); ++i)
d[0][i] = i;
for (std::size_t j = 0; j <= s2.size(); ++j)
d[j][0] = j;
for (std::size_t j = 1u; j <= s2.size(); ++j) {
for (std::size_t i = 1u; i <= s1.size(); ++i) {
d[j][i] = std::min({
d[j - 1u][i] + 1, // deletion
d[j][i - 1u] + 1, // insertion
d[j - 1u][i - 1u] + !(s1[i] == s2[j]) // substitution
});
}
}
return d[s2.size()][s1.size()];
}