autocorrection.c 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. // Copyright 2021-2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. //
  16. // For full documentation, see
  17. // https://getreuer.info/posts/keyboards/autocorrection
  18. #include "autocorrection.h"
  19. #include <string.h>
  20. #include "autocorrection_data.h"
  21. #if AUTOCORRECTION_MIN_LENGTH < 4
  22. // Odd output or hard locks on the board have been observed when the min typo
  23. // length is 3 or lower (https://github.com/getreuer/qmk-keymap/issues/2).
  24. // Additionally, autocorrection entries for short typos are more likely to false
  25. // trigger, so it is suggested that typos be at least 5 characters.
  26. #error "Min typo length is less than 4. Autocorrection may behave poorly."
  27. #endif
  28. bool process_autocorrection(uint16_t keycode, keyrecord_t* record) {
  29. if (user_config.autocorrect) {
  30. static uint8_t typo_buffer[AUTOCORRECTION_MAX_LENGTH] = {0};
  31. static uint8_t typo_buffer_size = 0;
  32. // Ignore key release; we only process key presses.
  33. if (!record->event.pressed) { return true; }
  34. #ifndef NO_ACTION_ONESHOT
  35. const uint8_t mods = get_mods() | get_oneshot_mods();
  36. #else
  37. const uint8_t mods = get_mods();
  38. #endif // NO_ACTION_ONESHOT
  39. // Disable autocorrection while a mod other than shift is active.
  40. if ((mods & ~MOD_MASK_SHIFT) != 0) {
  41. typo_buffer_size = 0;
  42. return true;
  43. }
  44. // The following switch cases address various kinds of keycodes. This logic is
  45. // split over two switches rather than merged into one. The first switch may
  46. // extract a basic keycode which is then further handled by the second switch,
  47. // e.g. a layer-tap key with Caps Lock `LT(layer, KC_CAPS)`.
  48. switch (keycode) {
  49. #ifndef NO_ACTION_TAPPING
  50. case QK_MOD_TAP ... QK_MOD_TAP_MAX: // Tap-hold keys.
  51. #ifndef NO_ACTION_LAYER
  52. case QK_LAYER_TAP ... QK_LAYER_TAP_MAX:
  53. #endif // NO_ACTION_LAYER
  54. // Ignore when tap-hold keys are held.
  55. if (record->tap.count == 0) { return true; }
  56. // Otherwise when tapped, get the basic keycode.
  57. // Fallthrough intended.
  58. #endif // NO_ACTION_TAPPING
  59. // Handle shifted keys, e.g. symbols like KC_EXLM = S(KC_1).
  60. case QK_LSFT ... QK_LSFT + 255:
  61. case QK_RSFT ... QK_RSFT + 255:
  62. keycode &= 0xff; // Get the basic keycode.
  63. break;
  64. // NOTE: Space Cadet keys expose no info to check whether they are being
  65. // tapped vs. held. This makes autocorrection ambiguous, e.g. SC_LCPO might
  66. // be '(', which we would treat as a word break, or it might be shift, which
  67. // we would treat as having no effect. To behave cautiously, we allow Space
  68. // Cadet keycodes to fall to the logic below and clear autocorrection state.
  69. }
  70. switch (keycode) {
  71. // Ignore shifts, Caps Lock, one-shot mods, and layer switch keys.
  72. case KC_NO:
  73. case KC_LSFT:
  74. case KC_RSFT:
  75. case KC_CAPS:
  76. case QK_ONE_SHOT_MOD ... QK_ONE_SHOT_MOD_MAX:
  77. case QK_TO ... QK_TO_MAX:
  78. case QK_MOMENTARY ... QK_MOMENTARY_MAX:
  79. case QK_DEF_LAYER ... QK_DEF_LAYER_MAX:
  80. case QK_TOGGLE_LAYER ... QK_TOGGLE_LAYER_MAX:
  81. case QK_ONE_SHOT_LAYER ... QK_ONE_SHOT_LAYER_MAX:
  82. case QK_LAYER_TAP_TOGGLE ... QK_LAYER_TAP_TOGGLE_MAX:
  83. case QK_LAYER_MOD ... QK_LAYER_MOD_MAX:
  84. return true; // Ignore these keys.
  85. }
  86. if (keycode == KC_QUOT) {
  87. // Treat " (shifted ') as a word boundary.
  88. if ((mods & MOD_MASK_SHIFT) != 0) { keycode = KC_SPC; }
  89. } else if (!(KC_A <= keycode && keycode <= KC_Z)) {
  90. if (keycode == KC_BSPC) {
  91. // Remove last character from the buffer.
  92. if (typo_buffer_size > 0) { --typo_buffer_size; }
  93. return true;
  94. } else if (KC_1 <= keycode && keycode <= KC_SLSH && keycode != KC_ESC) {
  95. // Set a word boundary if space, period, digit, etc. is pressed.
  96. // Behave more conservatively for the enter key. Reset, so that enter
  97. // can't be used on a word ending.
  98. if (keycode == KC_ENT) { typo_buffer_size = 0; }
  99. keycode = KC_SPC;
  100. } else {
  101. // Clear state if some other non-alpha key is pressed.
  102. typo_buffer_size = 0;
  103. return true;
  104. }
  105. }
  106. // If the buffer is full, rotate it to discard the oldest character.
  107. if (typo_buffer_size >= AUTOCORRECTION_MAX_LENGTH) {
  108. memmove(typo_buffer, typo_buffer + 1, AUTOCORRECTION_MAX_LENGTH - 1);
  109. typo_buffer_size = AUTOCORRECTION_MAX_LENGTH - 1;
  110. }
  111. // Append `keycode` to the buffer.
  112. // NOTE: `keycode` must be a basic keycode (0-255) by this point.
  113. typo_buffer[typo_buffer_size++] = (uint8_t) keycode;
  114. // Early return if not many characters have been buffered so far.
  115. if (typo_buffer_size < AUTOCORRECTION_MIN_LENGTH) { return true; }
  116. // Check whether the buffer ends in a typo. This is done using a trie
  117. // stored in `autocorrection_data`.
  118. uint16_t state = 0;
  119. uint8_t code = pgm_read_byte(autocorrection_data + state);
  120. for (int i = typo_buffer_size - 1; i >= 0; --i) {
  121. const uint8_t key_i = typo_buffer[i];
  122. if (code & 64) { // Check for match in node with multiple children.
  123. code &= 63;
  124. for (; code != key_i;
  125. code = pgm_read_byte(autocorrection_data + (state += 3))) {
  126. if (!code) { return true; }
  127. }
  128. // Follow link to child node.
  129. state = (uint16_t)(
  130. (uint_fast16_t)pgm_read_byte(autocorrection_data + state + 1)
  131. | (uint_fast16_t)pgm_read_byte(autocorrection_data + state + 2) << 8);
  132. // Otherwise check for match in node with a single child.
  133. } else if (code != key_i) {
  134. return true;
  135. } else if (!(code = pgm_read_byte(autocorrection_data + (++state)))) {
  136. ++state;
  137. }
  138. // Stop if `state` becomes an invalid index. This should not normally
  139. // happen, it is a safeguard in case of a bug, data corruption, etc.
  140. if (state >= sizeof(autocorrection_data)) {
  141. return true;
  142. }
  143. // Read first byte of the next node.
  144. code = pgm_read_byte(autocorrection_data + state);
  145. if (code & 128) { // A typo was found! Apply autocorrection.
  146. const int backspaces = code & 63;
  147. for (int i = 0; i < backspaces; ++i) { tap_code(KC_BSPC); }
  148. send_string_P((char const*)(autocorrection_data + state + 1));
  149. if (keycode == KC_SPC) {
  150. typo_buffer[0] = KC_SPC;
  151. typo_buffer_size = 1;
  152. return true;
  153. } else {
  154. typo_buffer_size = 0;
  155. return false;
  156. }
  157. }
  158. }
  159. return true;
  160. }
  161. return true;
  162. }