From b9325253682f082ad6616ffeb0a4cc3357c0ab1e Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Fri, 6 Dec 2019 17:51:48 -0800 Subject: [PATCH] people: Use Unicode normalization for diacritic removal. Fixes #13481. Signed-off-by: Anders Kaseorg --- .eslintrc.json | 1 + babel.config.js | 1 + package.json | 1 + static/js/people.js | 26 +++----------------------- version.py | 2 +- yarn.lock | 5 +++++ 6 files changed, 12 insertions(+), 24 deletions(-) diff --git a/.eslintrc.json b/.eslintrc.json index e844682ccc..6731885d51 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -4,6 +4,7 @@ "es6": true }, "parserOptions": { + "ecmaVersion": 2019, "warnOnUnsupportedTypeScriptVersion": false, "sourceType": "module" }, diff --git a/babel.config.js b/babel.config.js index 9c54ae91b6..f3315bbd44 100644 --- a/babel.config.js +++ b/babel.config.js @@ -11,6 +11,7 @@ module.exports = { ], plugins: [ "@babel/proposal-class-properties", + ["@babel/plugin-proposal-unicode-property-regex", { useUnicodeFlag: false }], ], sourceType: "unambiguous", }; diff --git a/package.json b/package.json index 10fdf14147..65fbb60df9 100644 --- a/package.json +++ b/package.json @@ -59,6 +59,7 @@ "terser-webpack-plugin": "^2.1.0", "turndown": "^5.0.3", "underscore": "^1.9.1", + "unorm": "^1.6.0", "webfonts-loader": "^5.0.0", "webpack": "^4.33.0", "webpack-cli": "^3.3.2", diff --git a/static/js/people.js b/static/js/people.js index e635731f6f..a0e29d3854 100644 --- a/static/js/people.js +++ b/static/js/people.js @@ -1,3 +1,4 @@ +require("unorm"); // String.prototype.normalize polyfill for IE11 const Dict = require('./dict').Dict; let people_dict; @@ -753,31 +754,10 @@ exports.incr_recipient_count = function (user_id) { pm_recipient_count_dict.set(user_id, old_count + 1); }; -// Diacritic removal from: -// https://stackoverflow.com/questions/18236208/perform-a-find-match-with-javascript-ignoring-special-language-characters-acce -const diacritic_regexes = { - letters_only: /^[a-z]+$/, - a: /[áàãâä]/g, - e: /[éèëê]/g, - i: /[íìïî]/g, - o: /[óòöôõ]/g, - u: /[úùüû]/g, - c: /[ç]/g, - n: /[ñ]/g, -}; +const unicode_marks = /\p{M}/gu; exports.remove_diacritics = function (s) { - if (diacritic_regexes.letters_only.test(s)) { - return s; - } - - return s.replace(diacritic_regexes.a, "a") - .replace(diacritic_regexes.e, "e") - .replace(diacritic_regexes.i, "i") - .replace(diacritic_regexes.o, "o") - .replace(diacritic_regexes.u, "u") - .replace(diacritic_regexes.c, "c") - .replace(diacritic_regexes.n, "n"); + return s.normalize("NFKD").replace(unicode_marks, ""); }; exports.person_matches_query = function (user, query) { diff --git a/version.py b/version.py index 402d14642f..ad1a0a19bd 100644 --- a/version.py +++ b/version.py @@ -26,4 +26,4 @@ LATEST_RELEASE_ANNOUNCEMENT = "https://blog.zulip.org/2019/03/01/zulip-2-0-relea # historical commits sharing the same major version, in which case a # minor version bump suffices. -PROVISION_VERSION = '66.1' +PROVISION_VERSION = '66.2' diff --git a/yarn.lock b/yarn.lock index 3e8ba416c7..0a591bdb5f 100644 --- a/yarn.lock +++ b/yarn.lock @@ -11790,6 +11790,11 @@ unist-util-visit@^1.1.0: dependencies: unist-util-visit-parents "^2.0.0" +unorm@^1.6.0: + version "1.6.0" + resolved "https://registry.yarnpkg.com/unorm/-/unorm-1.6.0.tgz#029b289661fba714f1a9af439eb51d9b16c205af" + integrity sha512-b2/KCUlYZUeA7JFUuRJZPUtr4gZvBh7tavtv4fvk4+KV9pfGiR6CQAQAWl49ZpR3ts2dk4FYkP7EIgDJoiOLDA== + unpipe@1.0.0, unpipe@~1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/unpipe/-/unpipe-1.0.0.tgz#b2bf4ee8514aae6165b4817829d21b2ef49904ec"