From e28258010182b56f27cfbd3f9f9a58fd9cd8870d Mon Sep 17 00:00:00 2001 From: Nolan Lawson Date: Mon, 3 Jul 2017 02:02:36 -0700 Subject: [PATCH] Faster emojify() by avoiding str.replace() entirely (#4049) --- app/javascript/mastodon/emoji.js | 69 ++++++++++------------ package.json | 1 + spec/javascript/components/emojify.test.js | 34 +++++++++++ yarn.lock | 4 ++ 4 files changed, 71 insertions(+), 37 deletions(-) diff --git a/app/javascript/mastodon/emoji.js b/app/javascript/mastodon/emoji.js index d0df71ea3..7043d5f3a 100644 --- a/app/javascript/mastodon/emoji.js +++ b/app/javascript/mastodon/emoji.js @@ -1,60 +1,55 @@ import emojione from 'emojione'; +import Trie from 'substring-trie'; -const toImage = str => shortnameToImage(unicodeToImage(str)); +const mappedUnicode = emojione.mapUnicodeToShort(); +const trie = new Trie(Object.keys(emojione.jsEscapeMap)); -const unicodeToImage = str => { - const mappedUnicode = emojione.mapUnicodeToShort(); - - return str.replace(emojione.regUnicode, unicodeChar => { - if (typeof unicodeChar === 'undefined' || unicodeChar === '' || !(unicodeChar in emojione.jsEscapeMap)) { - return unicodeChar; - } - - const unicode = emojione.jsEscapeMap[unicodeChar]; - const short = mappedUnicode[unicode]; - const filename = emojione.emojioneList[short].fname; - const alt = emojione.convert(unicode.toUpperCase()); - - return `${alt}`; - }); -}; - -const shortnameToImage = str => { - // This walks through the string from end to start, ignoring any tags (

,
, etc.) - // and replacing valid shortnames like :smile: and :wink: that _aren't_ within - // tags with an version. - // The goal is to be the same as an emojione.regShortNames replacement, but faster. - // The reason we go backwards is because then we can replace substrings as we go. - let i = str.length; +function emojify(str) { + // This walks through the string from start to end, ignoring any tags (

,
, etc.) + // and replacing valid shortnames like :smile: and :wink: as well as unicode strings + // that _aren't_ within tags with an version. + // The goal is to be the same as an emojione.regShortNames/regUnicode replacement, but faster. + let i = -1; let insideTag = false; let insideShortname = false; - let shortnameEndIndex = -1; - while (i--) { + let shortnameStartIndex = -1; + let match; + while (++i < str.length) { const char = str.charAt(i); if (insideShortname && char === ':') { - const shortname = str.substring(i, shortnameEndIndex + 1); + const shortname = str.substring(shortnameStartIndex, i + 1); if (shortname in emojione.emojioneList) { const unicode = emojione.emojioneList[shortname].unicode[emojione.emojioneList[shortname].unicode.length - 1]; const alt = emojione.convert(unicode.toUpperCase()); const replacement = `${alt}`; - str = str.substring(0, i) + replacement + str.substring(shortnameEndIndex + 1); + str = str.substring(0, shortnameStartIndex) + replacement + str.substring(i + 1); + i += (replacement.length - shortname.length - 1); // jump ahead the length we've added to the string } else { - i++; // stray colon, try again + i--; // stray colon, try again } insideShortname = false; - } else if (insideTag && char === '<') { + } else if (insideTag && char === '>') { insideTag = false; - } else if (char === '>') { + } else if (char === '<') { insideTag = true; insideShortname = false; } else if (!insideTag && char === ':') { insideShortname = true; - shortnameEndIndex = i; + shortnameStartIndex = i; + } else if (!insideTag && (match = trie.search(str.substring(i)))) { + const unicodeStr = match; + if (unicodeStr in emojione.jsEscapeMap) { + const unicode = emojione.jsEscapeMap[unicodeStr]; + const short = mappedUnicode[unicode]; + const filename = emojione.emojioneList[short].fname; + const alt = emojione.convert(unicode.toUpperCase()); + const replacement = `${alt}`; + str = str.substring(0, i) + replacement + str.substring(i + unicodeStr.length); + i += (replacement.length - unicodeStr.length); // jump ahead the length we've added to the string + } } } return str; -}; +} -export default function emojify(text) { - return toImage(text); -}; +export default emojify; diff --git a/package.json b/package.json index 7fa80a0c2..d5c05dae3 100644 --- a/package.json +++ b/package.json @@ -102,6 +102,7 @@ "sass-loader": "^6.0.6", "stringz": "^0.2.2", "style-loader": "^0.18.2", + "substring-trie": "^1.0.0", "throng": "^4.0.0", "tiny-queue": "^0.2.1", "uuid": "^3.1.0", diff --git a/spec/javascript/components/emojify.test.js b/spec/javascript/components/emojify.test.js index 7a496623e..3e8b25af9 100644 --- a/spec/javascript/components/emojify.test.js +++ b/spec/javascript/components/emojify.test.js @@ -46,4 +46,38 @@ describe('emojify', () => { expect(emojify(':smile')).to.equal(':smile'); }); + it('does two emoji next to each other', () => { + expect(emojify(':smile::wink:')).to.equal( + '😄😉'); + }); + + it('does unicode', () => { + expect(emojify('\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC66\u200D\uD83D\uDC66')).to.equal( + '👩‍👩‍👦‍👦'); + expect(emojify('\uD83D\uDC68\uD83D\uDC69\uD83D\uDC67\uD83D\uDC67')).to.equal( + '👨👩👧👧'); + expect(emojify('\uD83D\uDC69\uD83D\uDC69\uD83D\uDC66')).to.equal('👩👩👦'); + expect(emojify('\u2757')).to.equal( + '❗'); + }); + + it('does multiple unicode', () => { + expect(emojify('\u2757 #\uFE0F\u20E3')).to.equal( + '❗ #️⃣'); + expect(emojify('\u2757#\uFE0F\u20E3')).to.equal( + '❗#️⃣'); + expect(emojify('\u2757 #\uFE0F\u20E3 \u2757')).to.equal( + '❗ #️⃣ ❗'); + expect(emojify('foo \u2757 #\uFE0F\u20E3 bar')).to.equal( + 'foo ❗ #️⃣ bar'); + }); + + it('does mixed unicode and shortnames', () => { + expect(emojify(':smile:#\uFE0F\u20E3:wink:\u2757')).to.equal('😄#️⃣😉❗'); + }); + + it('ignores unicode inside of tags', () => { + expect(emojify('

')).to.equal('

'); + }); + }); diff --git a/yarn.lock b/yarn.lock index adabca08d..609f256c9 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6819,6 +6819,10 @@ style-loader@^0.18.2: loader-utils "^1.0.2" schema-utils "^0.3.0" +substring-trie@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/substring-trie/-/substring-trie-1.0.0.tgz#5a7ecb83aefcca7b3720f7897cf69e97023be143" + sugarss@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/sugarss/-/sugarss-1.0.0.tgz#65e51b3958432fb70d5451a68bb33e32d0cf1ef7"