spellcheck.h 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. /* Find near-matches for strings and identifiers.
  2. Copyright (C) 2015-2018 Free Software Foundation, Inc.
  3. This file is part of GCC.
  4. GCC is free software; you can redistribute it and/or modify it under
  5. the terms of the GNU General Public License as published by the Free
  6. Software Foundation; either version 3, or (at your option) any later
  7. version.
  8. GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  9. WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10. FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  11. for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with GCC; see the file COPYING3. If not see
  14. <http://www.gnu.org/licenses/>. */
  15. #ifndef GCC_SPELLCHECK_H
  16. #define GCC_SPELLCHECK_H
  17. typedef unsigned int edit_distance_t;
  18. const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX;
  19. /* spellcheck.c */
  20. extern edit_distance_t
  21. levenshtein_distance (const char *s, int len_s,
  22. const char *t, int len_t);
  23. extern edit_distance_t
  24. levenshtein_distance (const char *s, const char *t);
  25. extern const char *
  26. find_closest_string (const char *target,
  27. const auto_vec<const char *> *candidates);
  28. /* A traits class for describing a string-like type usable by
  29. class best_match.
  30. Specializations should provide the implementations of the following:
  31. static size_t get_length (TYPE);
  32. static const char *get_string (TYPE);
  33. get_string should return a non-NULL ptr, which does not need to be
  34. 0-terminated. */
  35. template <typename TYPE>
  36. struct edit_distance_traits {};
  37. /* Specialization of edit_distance_traits for C-style strings. */
  38. template <>
  39. struct edit_distance_traits<const char *>
  40. {
  41. static size_t get_length (const char *str)
  42. {
  43. gcc_assert (str);
  44. return strlen (str);
  45. }
  46. static const char *get_string (const char *str)
  47. {
  48. gcc_assert (str);
  49. return str;
  50. }
  51. };
  52. /* A type for use when determining the best match against a string,
  53. expressed as a template so that we can match against various
  54. string-like types (const char *, frontend identifiers, and preprocessor
  55. macros).
  56. This type accumulates the best possible match against GOAL_TYPE for
  57. a sequence of elements of CANDIDATE_TYPE, whilst minimizing the
  58. number of calls to levenshtein_distance and to
  59. edit_distance_traits<T>::get_length. */
  60. template <typename GOAL_TYPE, typename CANDIDATE_TYPE>
  61. class best_match
  62. {
  63. public:
  64. typedef GOAL_TYPE goal_t;
  65. typedef CANDIDATE_TYPE candidate_t;
  66. typedef edit_distance_traits<goal_t> goal_traits;
  67. typedef edit_distance_traits<candidate_t> candidate_traits;
  68. /* Constructor. */
  69. best_match (GOAL_TYPE goal,
  70. edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE)
  71. : m_goal (goal_traits::get_string (goal)),
  72. m_goal_len (goal_traits::get_length (goal)),
  73. m_best_candidate (NULL),
  74. m_best_distance (best_distance_so_far)
  75. {}
  76. /* Compare the edit distance between CANDIDATE and m_goal,
  77. and if it's the best so far, record it. */
  78. void consider (candidate_t candidate)
  79. {
  80. size_t candidate_len = candidate_traits::get_length (candidate);
  81. /* Calculate a lower bound on the candidate's distance to the goal,
  82. based on the difference in lengths; it will require at least
  83. this many insertions/deletions. */
  84. edit_distance_t min_candidate_distance
  85. = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len);
  86. /* If the candidate's length is sufficiently different to that
  87. of the goal string, then the number of insertions/deletions
  88. may be >= the best distance so far. If so, we can reject
  89. the candidate immediately without needing to compute
  90. the exact distance, since it won't be an improvement. */
  91. if (min_candidate_distance >= m_best_distance)
  92. return;
  93. /* If the candidate will be unable to beat the criterion in
  94. get_best_meaningful_candidate, reject it without computing
  95. the exact distance. */
  96. unsigned int cutoff = MAX (m_goal_len, candidate_len) / 2;
  97. if (min_candidate_distance > cutoff)
  98. return;
  99. /* Otherwise, compute the distance and see if the candidate
  100. has beaten the previous best value. */
  101. edit_distance_t dist
  102. = levenshtein_distance (m_goal, m_goal_len,
  103. candidate_traits::get_string (candidate),
  104. candidate_len);
  105. if (dist < m_best_distance)
  106. {
  107. m_best_distance = dist;
  108. m_best_candidate = candidate;
  109. m_best_candidate_len = candidate_len;
  110. }
  111. }
  112. /* Assuming that BEST_CANDIDATE is known to be better than
  113. m_best_candidate, update (without recomputing the edit distance to
  114. the goal). */
  115. void set_best_so_far (CANDIDATE_TYPE best_candidate,
  116. edit_distance_t best_distance,
  117. size_t best_candidate_len)
  118. {
  119. gcc_assert (best_distance < m_best_distance);
  120. m_best_candidate = best_candidate;
  121. m_best_distance = best_distance;
  122. m_best_candidate_len = best_candidate_len;
  123. }
  124. /* Get the best candidate so far, but applying a filter to ensure
  125. that we return NULL if none of the candidates are close to the goal,
  126. to avoid offering nonsensical suggestions to the user. */
  127. candidate_t get_best_meaningful_candidate () const
  128. {
  129. /* If more than half of the letters were misspelled, the suggestion is
  130. likely to be meaningless. */
  131. if (m_best_candidate)
  132. {
  133. unsigned int cutoff = MAX (m_goal_len, m_best_candidate_len) / 2;
  134. if (m_best_distance > cutoff)
  135. return NULL;
  136. }
  137. /* If the goal string somehow makes it into the candidate list, offering
  138. it as a suggestion will be nonsensical e.g.
  139. 'constexpr' does not name a type; did you mean 'constexpr'?
  140. Ultimately such suggestions are due to bugs in constructing the
  141. candidate list, but as a band-aid, do not offer suggestions for
  142. distance == 0 (where candidate == goal). */
  143. if (m_best_distance == 0)
  144. return NULL;
  145. return m_best_candidate;
  146. }
  147. /* Get the closest candidate so far, without applying any filtering. */
  148. candidate_t blithely_get_best_candidate () const
  149. {
  150. return m_best_candidate;
  151. }
  152. edit_distance_t get_best_distance () const { return m_best_distance; }
  153. size_t get_best_candidate_length () const { return m_best_candidate_len; }
  154. private:
  155. const char *m_goal;
  156. size_t m_goal_len;
  157. candidate_t m_best_candidate;
  158. edit_distance_t m_best_distance;
  159. size_t m_best_candidate_len;
  160. };
  161. #endif /* GCC_SPELLCHECK_H */