1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
/********************* */
/*! \file didyoumean.cpp
** \verbatim
** Original author: Kshitij Bansal
** Major contributors: none
** Minor contributors (to current version): none
** This file is part of the CVC4 project.
** Copyright (c) 2009-2014 New York University and The University of Iowa
** See the file COPYING in the top-level source directory for licensing
** information.\endverbatim
**
** \brief did-you-mean style suggestions
**
** ``What do you mean? I don't understand.'' An attempt to be more
** helpful than that. Similar to one in git.
**
** There are no dependencies on CVC4 (except namespace).
**/
#include "didyoumean.h"
#include <iostream>
#include <sstream>
using namespace std;
using namespace CVC4;
vector<string> DidYouMean::getMatch(string input) {
/** Magic numbers */
const int similarityThreshold = 7;
const unsigned numMatchesThreshold = 10;
set< pair<int, string> > scores;
vector<string> ret;
for(typeof(d_words.begin()) it = d_words.begin(); it != d_words.end(); ++it) {
string s = (*it);
if( s == input ) {
// if input matches AS-IS just return that
ret.push_back(s);
return ret;
}
int score;
if(s.compare(0, input.size(), input) == 0) {
score = 0;
} else {
score = editDistance(input, s) + 1;
}
scores.insert( make_pair(score, s) );
}
int min_score = scores.begin()->first;
for(typeof(scores.begin()) i = scores.begin();
i != scores.end(); ++i) {
// add if score is overall not too big, and also not much close to
// the score of the best suggestion
if(i->first < similarityThreshold && i->first <= min_score + 1) {
ret.push_back(i->second);
#ifdef DIDYOUMEAN_DEBUG
cout << i->second << ": " << i->first << std::endl;
#endif
}
}
if(ret.size() > numMatchesThreshold ) ret.resize(numMatchesThreshold);;
return ret;
}
int DidYouMean::editDistance(const std::string& a, const std::string& b)
{
// input string: a
// desired string: b
const int swapCost = 0;
const int substituteCost = 2;
const int addCost = 1;
const int deleteCost = 3;
const int switchCaseCost = 0;
int len1 = a.size();
int len2 = b.size();
int C[3][len2+1]; // cost
for(int j = 0; j <= len2; ++j) {
C[0][j] = j * addCost;
}
for(int i = 1; i <= len1; ++i) {
int cur = i%3;
int prv = (i+2)%3;
int pr2 = (i+1)%3;
C[cur][0] = i * deleteCost;
for(int j = 1; j <= len2; ++j) {
C[cur][j] = 100000000; // INF
if(a[i-1] == b[j-1]) {
// match
C[cur][j] = std::min(C[cur][j], C[prv][j-1]);
} else if(tolower(a[i-1]) == tolower(b[j-1])){
// switch case
C[cur][j] = std::min(C[cur][j], C[prv][j-1] + switchCaseCost);
} else {
// substitute
C[cur][j] = std::min(C[cur][j], C[prv][j-1] + substituteCost);
}
// swap
if(i >= 2 && j >= 2 && a[i-1] == b[j-2] && a[i-2] == b[j-1]) {
C[cur][j] = std::min(C[cur][j], C[pr2][j-2] + swapCost);
}
// add
C[cur][j] = std::min(C[cur][j], C[cur][j-1] + addCost);
// delete
C[cur][j] = std::min(C[cur][j], C[prv][j] + deleteCost);
#ifdef DIDYOUMEAN_DEBUG1
std::cout << "C[" << cur << "][" << 0 << "] = " << C[cur][0] << std::endl;
#endif
}
}
return C[len1%3][len2];
}
string DidYouMean::getMatchAsString(string input, int prefixNewLines, int suffixNewLines) {
vector<string> matches = getMatch(input);
ostringstream oss;
if(matches.size() > 0) {
while(prefixNewLines --> 0) { oss << endl; }
if(matches.size() == 1) {
oss << "Did you mean this?";
} else {
oss << "Did you mean any of these?";
}
for(unsigned i = 0; i < matches.size(); ++i) {
oss << "\n " << matches[i];
}
while(suffixNewLines --> 0) { oss << endl; }
}
return oss.str();
}
|