From fd7f0732fe26554c51218c4f67955e8050590d2c Mon Sep 17 00:00:00 2001 From: Nolan Lawson Date: Thu, 5 Oct 2017 18:42:34 -0700 Subject: [PATCH] Compress and combine emoji data (#5229) --- app/javascript/mastodon/actions/compose.js | 2 +- .../mastodon/components/autosuggest_emoji.js | 4 +- .../mastodon/emoji_data_compressed.js | 22 ----- app/javascript/mastodon/emoji_data_light.js | 16 ---- app/javascript/mastodon/emojione_light.js | 38 -------- .../components/emoji_picker_dropdown.js | 2 +- .../mastodon/{ => features/emoji}/emoji.js | 7 +- .../features/emoji/emoji_compressed.js | 90 +++++++++++++++++++ .../{ => features/emoji}/emoji_map.json | 0 .../features/emoji/emoji_mart_data_light.js | 41 +++++++++ .../emoji/emoji_mart_search_light.js} | 2 +- .../emoji/emoji_unicode_mapping_light.js | 35 ++++++++ .../{ => features/emoji}/emoji_utils.js | 2 +- .../features/emoji/unicode_to_filename.js | 26 ++++++ .../features/emoji/unicode_to_unified_name.js | 17 ++++ app/javascript/mastodon/reducers/accounts.js | 2 +- .../mastodon/reducers/custom_emojis.js | 4 +- app/javascript/mastodon/reducers/statuses.js | 2 +- app/javascript/packs/public.js | 2 +- lib/tasks/emojis.rake | 2 +- .../javascript/components/emoji_index.test.js | 20 ++++- spec/javascript/components/emojify.test.js | 11 ++- 22 files changed, 254 insertions(+), 93 deletions(-) delete mode 100644 app/javascript/mastodon/emoji_data_compressed.js delete mode 100644 app/javascript/mastodon/emoji_data_light.js delete mode 100644 app/javascript/mastodon/emojione_light.js rename app/javascript/mastodon/{ => features/emoji}/emoji.js (89%) create mode 100644 app/javascript/mastodon/features/emoji/emoji_compressed.js rename app/javascript/mastodon/{ => features/emoji}/emoji_map.json (100%) create mode 100644 app/javascript/mastodon/features/emoji/emoji_mart_data_light.js rename app/javascript/mastodon/{emoji_index_light.js => features/emoji/emoji_mart_search_light.js} (98%) create mode 100644 app/javascript/mastodon/features/emoji/emoji_unicode_mapping_light.js rename app/javascript/mastodon/{ => features/emoji}/emoji_utils.js (98%) create mode 100644 app/javascript/mastodon/features/emoji/unicode_to_filename.js create mode 100644 app/javascript/mastodon/features/emoji/unicode_to_unified_name.js diff --git a/app/javascript/mastodon/actions/compose.js b/app/javascript/mastodon/actions/compose.js index ed4837ebd..560c00720 100644 --- a/app/javascript/mastodon/actions/compose.js +++ b/app/javascript/mastodon/actions/compose.js @@ -1,6 +1,6 @@ import api from '../api'; import { throttle } from 'lodash'; -import { search as emojiSearch } from '../emoji_index_light'; +import { search as emojiSearch } from '../features/emoji/emoji_mart_search_light'; import { updateTimeline, diff --git a/app/javascript/mastodon/components/autosuggest_emoji.js b/app/javascript/mastodon/components/autosuggest_emoji.js index 31dc1dbb1..ce4383a60 100644 --- a/app/javascript/mastodon/components/autosuggest_emoji.js +++ b/app/javascript/mastodon/components/autosuggest_emoji.js @@ -1,6 +1,6 @@ import React from 'react'; import PropTypes from 'prop-types'; -import { unicodeMapping } from '../emojione_light'; +import unicodeMapping from '../features/emoji/emoji_unicode_mapping_light'; const assetHost = process.env.CDN_HOST || ''; @@ -23,7 +23,7 @@ export default class AutosuggestEmoji extends React.PureComponent { return null; } - url = `${assetHost}/emoji/${mapping[0]}.svg`; + url = `${assetHost}/emoji/${mapping.filename}.svg`; } return ( diff --git a/app/javascript/mastodon/emoji_data_compressed.js b/app/javascript/mastodon/emoji_data_compressed.js deleted file mode 100644 index f69a3e46a..000000000 --- a/app/javascript/mastodon/emoji_data_compressed.js +++ /dev/null @@ -1,22 +0,0 @@ -// @preval -const data = require('emoji-mart/dist/data').default; -const pick = require('lodash/pick'); -const values = require('lodash/values'); - -const condensedEmojis = Object.keys(data.emojis).map(key => { - if (!data.emojis[key].short_names[0] === key) { - throw new Error('The condenser expects the first short_code to be the ' + - 'key. It may need to be rewritten if the emoji change such that this ' + - 'is no longer the case.'); - } - return values(pick(data.emojis[key], ['short_names', 'unified', 'search'])); -}); - -// JSON.parse/stringify is to emulate what @preval is doing and avoid any -// inconsistent behavior in dev mode -module.exports = JSON.parse(JSON.stringify({ - emojis: condensedEmojis, - skins: data.skins, - categories: data.categories, - short_names: data.short_names, -})); diff --git a/app/javascript/mastodon/emoji_data_light.js b/app/javascript/mastodon/emoji_data_light.js deleted file mode 100644 index f91ee592e..000000000 --- a/app/javascript/mastodon/emoji_data_light.js +++ /dev/null @@ -1,16 +0,0 @@ -const data = require('./emoji_data_compressed'); - -// decompress -const emojis = {}; -data.emojis.forEach(compressedEmoji => { - const [ short_names, unified, search ] = compressedEmoji; - emojis[short_names[0]] = { - short_names, - unified, - search, - }; -}); - -data.emojis = emojis; - -module.exports = data; diff --git a/app/javascript/mastodon/emojione_light.js b/app/javascript/mastodon/emojione_light.js deleted file mode 100644 index 2296497b0..000000000 --- a/app/javascript/mastodon/emojione_light.js +++ /dev/null @@ -1,38 +0,0 @@ -// @preval -// http://www.unicode.org/Public/emoji/5.0/emoji-test.txt - -const emojis = require('./emoji_map.json'); -const { emojiIndex } = require('emoji-mart'); -const excluded = ['®', '©', '™']; -const skins = ['🏻', '🏼', '🏽', '🏾', '🏿']; -const shortcodeMap = {}; - -Object.keys(emojiIndex.emojis).forEach(key => { - shortcodeMap[emojiIndex.emojis[key].native] = emojiIndex.emojis[key].id; -}); - -const stripModifiers = unicode => { - skins.forEach(tone => { - unicode = unicode.replace(tone, ''); - }); - - return unicode; -}; - -Object.keys(emojis).forEach(key => { - if (excluded.includes(key)) { - delete emojis[key]; - return; - } - - const normalizedKey = stripModifiers(key); - let shortcode = shortcodeMap[normalizedKey]; - - if (!shortcode) { - shortcode = shortcodeMap[normalizedKey + '\uFE0F']; - } - - emojis[key] = [emojis[key], shortcode]; -}); - -module.exports.unicodeMapping = emojis; diff --git a/app/javascript/mastodon/features/compose/components/emoji_picker_dropdown.js b/app/javascript/mastodon/features/compose/components/emoji_picker_dropdown.js index bbc6b7a16..2bea5e2b1 100644 --- a/app/javascript/mastodon/features/compose/components/emoji_picker_dropdown.js +++ b/app/javascript/mastodon/features/compose/components/emoji_picker_dropdown.js @@ -6,7 +6,7 @@ import Overlay from 'react-overlays/lib/Overlay'; import classNames from 'classnames'; import ImmutablePropTypes from 'react-immutable-proptypes'; import detectPassiveEvents from 'detect-passive-events'; -import { buildCustomEmojis } from '../../../emoji'; +import { buildCustomEmojis } from '../../emoji/emoji'; const messages = defineMessages({ emoji: { id: 'emoji_button.label', defaultMessage: 'Insert emoji' }, diff --git a/app/javascript/mastodon/emoji.js b/app/javascript/mastodon/features/emoji/emoji.js similarity index 89% rename from app/javascript/mastodon/emoji.js rename to app/javascript/mastodon/features/emoji/emoji.js index cf0077958..998cb0a06 100644 --- a/app/javascript/mastodon/emoji.js +++ b/app/javascript/mastodon/features/emoji/emoji.js @@ -1,4 +1,4 @@ -import { unicodeMapping } from './emojione_light'; +import unicodeMapping from './emoji_unicode_mapping_light'; import Trie from 'substring-trie'; const trie = new Trie(Object.keys(unicodeMapping)); @@ -35,8 +35,9 @@ const emojify = (str, customEmojis = {}) => { if (!rend) break; i = rend; } else { // matched to unicode emoji - const [filename, shortCode] = unicodeMapping[match]; - replacement = `${match}`; + const { filename, shortCode } = unicodeMapping[match]; + const title = shortCode ? `:${shortCode}:` : ''; + replacement = `${match}`; rend = i + match.length; } rtn += str.slice(0, i) + replacement; diff --git a/app/javascript/mastodon/features/emoji/emoji_compressed.js b/app/javascript/mastodon/features/emoji/emoji_compressed.js new file mode 100644 index 000000000..3ed4dc82b --- /dev/null +++ b/app/javascript/mastodon/features/emoji/emoji_compressed.js @@ -0,0 +1,90 @@ +// @preval +// http://www.unicode.org/Public/emoji/5.0/emoji-test.txt +// This file contains the compressed version of the emoji data from +// both emoji_map.json and from emoji-mart's emojiIndex and data objects. +// It's designed to be emitted in an array format to take up less space +// over the wire. + +const { unicodeToFilename } = require('./unicode_to_filename'); +const { unicodeToUnifiedName } = require('./unicode_to_unified_name'); +const emojiMap = require('./emoji_map.json'); +const { emojiIndex } = require('emoji-mart'); +const emojiMartData = require('emoji-mart/dist/data').default; +const excluded = ['®', '©', '™']; +const skins = ['🏻', '🏼', '🏽', '🏾', '🏿']; +const shortcodeMap = {}; + +const shortCodesToEmojiData = {}; +const emojisWithoutShortCodes = []; + +Object.keys(emojiIndex.emojis).forEach(key => { + shortcodeMap[emojiIndex.emojis[key].native] = emojiIndex.emojis[key].id; +}); + +const stripModifiers = unicode => { + skins.forEach(tone => { + unicode = unicode.replace(tone, ''); + }); + + return unicode; +}; + +Object.keys(emojiMap).forEach(key => { + if (excluded.includes(key)) { + delete emojiMap[key]; + return; + } + + const normalizedKey = stripModifiers(key); + let shortcode = shortcodeMap[normalizedKey]; + + if (!shortcode) { + shortcode = shortcodeMap[normalizedKey + '\uFE0F']; + } + + const filename = emojiMap[key]; + + const filenameData = [key]; + + if (unicodeToFilename(key) !== filename) { + // filename can't be derived using unicodeToFilename + filenameData.push(filename); + } + + if (typeof shortcode === 'undefined') { + emojisWithoutShortCodes.push(filenameData); + } else { + shortCodesToEmojiData[shortcode] = shortCodesToEmojiData[shortcode] || [[]]; + shortCodesToEmojiData[shortcode][0].push(filenameData); + } +}); + +Object.keys(emojiIndex.emojis).forEach(key => { + const { native } = emojiIndex.emojis[key]; + const { short_names, search, unified } = emojiMartData.emojis[key]; + if (short_names[0] !== key) { + throw new Error('The compresser expects the first short_code to be the ' + + 'key. It may need to be rewritten if the emoji change such that this ' + + 'is no longer the case.'); + } + + short_names.splice(0, 1); // first short name can be inferred from the key + + const searchData = [native, short_names, search]; + if (unicodeToUnifiedName(native) !== unified) { + // unified name can't be derived from unicodeToUnifiedName + searchData.push(unified); + } + + shortCodesToEmojiData[key].push(searchData); +}); + +// JSON.parse/stringify is to emulate what @preval is doing and avoid any +// inconsistent behavior in dev mode +module.exports = JSON.parse(JSON.stringify([ + shortCodesToEmojiData, + emojiMartData.skins, + emojiMartData.categories, + emojiMartData.short_names, + emojisWithoutShortCodes, +])); diff --git a/app/javascript/mastodon/emoji_map.json b/app/javascript/mastodon/features/emoji/emoji_map.json similarity index 100% rename from app/javascript/mastodon/emoji_map.json rename to app/javascript/mastodon/features/emoji/emoji_map.json diff --git a/app/javascript/mastodon/features/emoji/emoji_mart_data_light.js b/app/javascript/mastodon/features/emoji/emoji_mart_data_light.js new file mode 100644 index 000000000..45086fc4c --- /dev/null +++ b/app/javascript/mastodon/features/emoji/emoji_mart_data_light.js @@ -0,0 +1,41 @@ +// The output of this module is designed to mimic emoji-mart's +// "data" object, such that we can use it for a light version of emoji-mart's +// emojiIndex.search functionality. +const { unicodeToUnifiedName } = require('./unicode_to_unified_name'); +const [ shortCodesToEmojiData, skins, categories, short_names ] = require('./emoji_compressed'); + +const emojis = {}; + +// decompress +Object.keys(shortCodesToEmojiData).forEach((shortCode) => { + let [ + filenameData, // eslint-disable-line no-unused-vars + searchData, + ] = shortCodesToEmojiData[shortCode]; + let [ + native, + short_names, + search, + unified, + ] = searchData; + + if (!unified) { + // unified name can be derived from unicodeToUnifiedName + unified = unicodeToUnifiedName(native); + } + + short_names = [shortCode].concat(short_names); + emojis[shortCode] = { + native, + search, + short_names, + unified, + }; +}); + +module.exports = { + emojis, + skins, + categories, + short_names, +}; diff --git a/app/javascript/mastodon/emoji_index_light.js b/app/javascript/mastodon/features/emoji/emoji_mart_search_light.js similarity index 98% rename from app/javascript/mastodon/emoji_index_light.js rename to app/javascript/mastodon/features/emoji/emoji_mart_search_light.js index 0719eda5e..5da8de1cf 100644 --- a/app/javascript/mastodon/emoji_index_light.js +++ b/app/javascript/mastodon/features/emoji/emoji_mart_search_light.js @@ -1,7 +1,7 @@ // This code is largely borrowed from: // https://github.com/missive/emoji-mart/blob/bbd4fbe/src/utils/emoji-index.js -import data from './emoji_data_light'; +import data from './emoji_mart_data_light'; import { getData, getSanitizedData, intersect } from './emoji_utils'; let index = {}; diff --git a/app/javascript/mastodon/features/emoji/emoji_unicode_mapping_light.js b/app/javascript/mastodon/features/emoji/emoji_unicode_mapping_light.js new file mode 100644 index 000000000..918684c31 --- /dev/null +++ b/app/javascript/mastodon/features/emoji/emoji_unicode_mapping_light.js @@ -0,0 +1,35 @@ +// A mapping of unicode strings to an object containing the filename +// (i.e. the svg filename) and a shortCode intended to be shown +// as a "title" attribute in an HTML element (aka tooltip). + +const [ + shortCodesToEmojiData, + skins, // eslint-disable-line no-unused-vars + categories, // eslint-disable-line no-unused-vars + short_names, // eslint-disable-line no-unused-vars + emojisWithoutShortCodes, +] = require('./emoji_compressed'); +const { unicodeToFilename } = require('./unicode_to_filename'); + +// decompress +const unicodeMapping = {}; + +function processEmojiMapData(emojiMapData, shortCode) { + let [ native, filename ] = emojiMapData; + if (!filename) { + // filename name can be derived from unicodeToFilename + filename = unicodeToFilename(native); + } + unicodeMapping[native] = { + shortCode: shortCode, + filename: filename, + }; +} + +Object.keys(shortCodesToEmojiData).forEach((shortCode) => { + let [ filenameData ] = shortCodesToEmojiData[shortCode]; + filenameData.forEach(emojiMapData => processEmojiMapData(emojiMapData, shortCode)); +}); +emojisWithoutShortCodes.forEach(emojiMapData => processEmojiMapData(emojiMapData)); + +module.exports = unicodeMapping; diff --git a/app/javascript/mastodon/emoji_utils.js b/app/javascript/mastodon/features/emoji/emoji_utils.js similarity index 98% rename from app/javascript/mastodon/emoji_utils.js rename to app/javascript/mastodon/features/emoji/emoji_utils.js index 6475df571..6ef2785d9 100644 --- a/app/javascript/mastodon/emoji_utils.js +++ b/app/javascript/mastodon/features/emoji/emoji_utils.js @@ -1,7 +1,7 @@ // This code is largely borrowed from: // https://github.com/missive/emoji-mart/blob/bbd4fbe/src/utils/index.js -import data from './emoji_data_light'; +import data from './emoji_mart_data_light'; const COLONS_REGEX = /^(?:\:([^\:]+)\:)(?:\:skin-tone-(\d)\:)?$/; diff --git a/app/javascript/mastodon/features/emoji/unicode_to_filename.js b/app/javascript/mastodon/features/emoji/unicode_to_filename.js new file mode 100644 index 000000000..c75c4cd7d --- /dev/null +++ b/app/javascript/mastodon/features/emoji/unicode_to_filename.js @@ -0,0 +1,26 @@ +// taken from: +// https://github.com/twitter/twemoji/blob/47732c7/twemoji-generator.js#L848-L866 +exports.unicodeToFilename = (str) => { + let result = ''; + let charCode = 0; + let p = 0; + let i = 0; + while (i < str.length) { + charCode = str.charCodeAt(i++); + if (p) { + if (result.length > 0) { + result += '-'; + } + result += (0x10000 + ((p - 0xD800) << 10) + (charCode - 0xDC00)).toString(16); + p = 0; + } else if (0xD800 <= charCode && charCode <= 0xDBFF) { + p = charCode; + } else { + if (result.length > 0) { + result += '-'; + } + result += charCode.toString(16); + } + } + return result; +}; diff --git a/app/javascript/mastodon/features/emoji/unicode_to_unified_name.js b/app/javascript/mastodon/features/emoji/unicode_to_unified_name.js new file mode 100644 index 000000000..808ac197e --- /dev/null +++ b/app/javascript/mastodon/features/emoji/unicode_to_unified_name.js @@ -0,0 +1,17 @@ +function padLeft(str, num) { + while (str.length < num) { + str = '0' + str; + } + return str; +} + +exports.unicodeToUnifiedName = (str) => { + let output = ''; + for (let i = 0; i < str.length; i += 2) { + if (i > 0) { + output += '-'; + } + output += padLeft(str.codePointAt(i).toString(16).toUpperCase(), 4); + } + return output; +}; diff --git a/app/javascript/mastodon/reducers/accounts.js b/app/javascript/mastodon/reducers/accounts.js index 5391a93ae..8a4d69f26 100644 --- a/app/javascript/mastodon/reducers/accounts.js +++ b/app/javascript/mastodon/reducers/accounts.js @@ -44,7 +44,7 @@ import { FAVOURITED_STATUSES_EXPAND_SUCCESS, } from '../actions/favourites'; import { STORE_HYDRATE } from '../actions/store'; -import emojify from '../emoji'; +import emojify from '../features/emoji/emoji'; import { Map as ImmutableMap, fromJS } from 'immutable'; import escapeTextContentForBrowser from 'escape-html'; diff --git a/app/javascript/mastodon/reducers/custom_emojis.js b/app/javascript/mastodon/reducers/custom_emojis.js index b7c9b1d7c..307bcc7dc 100644 --- a/app/javascript/mastodon/reducers/custom_emojis.js +++ b/app/javascript/mastodon/reducers/custom_emojis.js @@ -1,7 +1,7 @@ import { List as ImmutableList } from 'immutable'; import { STORE_HYDRATE } from '../actions/store'; -import { search as emojiSearch } from '../emoji_index_light'; -import { buildCustomEmojis } from '../emoji'; +import { search as emojiSearch } from '../features/emoji/emoji_mart_search_light'; +import { buildCustomEmojis } from '../features/emoji/emoji'; const initialState = ImmutableList(); diff --git a/app/javascript/mastodon/reducers/statuses.js b/app/javascript/mastodon/reducers/statuses.js index ed16e016f..32772fff7 100644 --- a/app/javascript/mastodon/reducers/statuses.js +++ b/app/javascript/mastodon/reducers/statuses.js @@ -39,7 +39,7 @@ import { PINNED_STATUSES_FETCH_SUCCESS, } from '../actions/pin_statuses'; import { SEARCH_FETCH_SUCCESS } from '../actions/search'; -import emojify from '../emoji'; +import emojify from '../features/emoji/emoji'; import { Map as ImmutableMap, fromJS } from 'immutable'; import escapeTextContentForBrowser from 'escape-html'; diff --git a/app/javascript/packs/public.js b/app/javascript/packs/public.js index 6f72a8050..a47fc2830 100644 --- a/app/javascript/packs/public.js +++ b/app/javascript/packs/public.js @@ -21,7 +21,7 @@ function main() { const { length } = require('stringz'); const IntlRelativeFormat = require('intl-relativeformat').default; const { delegate } = require('rails-ujs'); - const emojify = require('../mastodon/emoji').default; + const emojify = require('../mastodon/features/emoji/emoji').default; const { getLocale } = require('../mastodon/locales'); const { localeData } = getLocale(); const VideoContainer = require('../mastodon/containers/video_container').default; diff --git a/lib/tasks/emojis.rake b/lib/tasks/emojis.rake index cd5e30e96..625a6e55d 100644 --- a/lib/tasks/emojis.rake +++ b/lib/tasks/emojis.rake @@ -17,7 +17,7 @@ namespace :emojis do task :generate do source = 'http://www.unicode.org/Public/emoji/5.0/emoji-test.txt' codes = [] - dest = Rails.root.join('app', 'javascript', 'mastodon', 'emoji_map.json') + dest = Rails.root.join('app', 'javascript', 'mastodon', 'features', 'emoji', 'emoji_map.json') puts "Downloading emojos from source... (#{source})" diff --git a/spec/javascript/components/emoji_index.test.js b/spec/javascript/components/emoji_index.test.js index 8c6d2cedb..4bff79265 100644 --- a/spec/javascript/components/emoji_index.test.js +++ b/spec/javascript/components/emoji_index.test.js @@ -1,5 +1,5 @@ import { expect } from 'chai'; -import { search } from '../../../app/javascript/mastodon/emoji_index_light'; +import { search } from '../../../app/javascript/mastodon/features/emoji/emoji_mart_search_light'; import { emojiIndex } from 'emoji-mart'; import { pick } from 'lodash'; @@ -78,4 +78,22 @@ describe('emoji_index', () => { expect(emojiIndex.search('flag', { include: ['people'] })) .to.deep.equal([]); }); + + it('does an emoji whose unified name is irregular', () => { + let expected = [{ + 'id': 'water_polo', + 'unified': '1f93d', + 'native': '🤽', + }, { + 'id': 'man-playing-water-polo', + 'unified': '1f93d-200d-2642-fe0f', + 'native': '🤽‍♂️', + }, { + 'id': 'woman-playing-water-polo', + 'unified': '1f93d-200d-2640-fe0f', + 'native': '🤽‍♀️', + }]; + expect(search('polo').map(trimEmojis)).to.deep.equal(expected); + expect(emojiIndex.search('polo').map(trimEmojis)).to.deep.equal(expected); + }); }); diff --git a/spec/javascript/components/emojify.test.js b/spec/javascript/components/emojify.test.js index 4202e52e1..3105c8e3f 100644 --- a/spec/javascript/components/emojify.test.js +++ b/spec/javascript/components/emojify.test.js @@ -1,5 +1,5 @@ import { expect } from 'chai'; -import emojify from '../../../app/javascript/mastodon/emoji'; +import emojify from '../../../app/javascript/mastodon/features/emoji/emoji'; describe('emojify', () => { it('ignores unknown shortcodes', () => { @@ -49,4 +49,13 @@ describe('emojify', () => { expect(emojify('👌🌈💕')).to.equal('👌🌈💕'); expect(emojify('👌 🌈 💕')).to.equal('👌 🌈 💕'); }); + + it('does an emoji that has no shortcode', () => { + expect(emojify('🕉️')).to.equal('🕉️'); + }); + + it('does an emoji whose filename is irregular', () => { + expect(emojify('↙️')).to.equal('↙️'); + }); + });