diff options
Diffstat (limited to 'wp-admin/js/word-count.js')
-rw-r--r-- | wp-admin/js/word-count.js | 220 |
1 files changed, 220 insertions, 0 deletions
diff --git a/wp-admin/js/word-count.js b/wp-admin/js/word-count.js new file mode 100644 index 0000000..066fc58 --- /dev/null +++ b/wp-admin/js/word-count.js @@ -0,0 +1,220 @@ +/** + * Word or character counting functionality. Count words or characters in a + * provided text string. + * + * @namespace wp.utils + * + * @since 2.6.0 + * @output wp-admin/js/word-count.js + */ + +( function() { + /** + * Word counting utility + * + * @namespace wp.utils.wordcounter + * @memberof wp.utils + * + * @class + * + * @param {Object} settings Optional. Key-value object containing overrides for + * settings. + * @param {RegExp} settings.HTMLRegExp Optional. Regular expression to find HTML elements. + * @param {RegExp} settings.HTMLcommentRegExp Optional. Regular expression to find HTML comments. + * @param {RegExp} settings.spaceRegExp Optional. Regular expression to find irregular space + * characters. + * @param {RegExp} settings.HTMLEntityRegExp Optional. Regular expression to find HTML entities. + * @param {RegExp} settings.connectorRegExp Optional. Regular expression to find connectors that + * split words. + * @param {RegExp} settings.removeRegExp Optional. Regular expression to find remove unwanted + * characters to reduce false-positives. + * @param {RegExp} settings.astralRegExp Optional. Regular expression to find unwanted + * characters when searching for non-words. + * @param {RegExp} settings.wordsRegExp Optional. Regular expression to find words by spaces. + * @param {RegExp} settings.characters_excluding_spacesRegExp Optional. Regular expression to find characters which + * are non-spaces. + * @param {RegExp} settings.characters_including_spacesRegExp Optional. Regular expression to find characters + * including spaces. + * @param {RegExp} settings.shortcodesRegExp Optional. Regular expression to find shortcodes. + * @param {Object} settings.l10n Optional. Localization object containing specific + * configuration for the current localization. + * @param {string} settings.l10n.type Optional. Method of finding words to count. + * @param {Array} settings.l10n.shortcodes Optional. Array of shortcodes that should be removed + * from the text. + * + * @return {void} + */ + function WordCounter( settings ) { + var key, + shortcodes; + + // Apply provided settings to object settings. + if ( settings ) { + for ( key in settings ) { + + // Only apply valid settings. + if ( settings.hasOwnProperty( key ) ) { + this.settings[ key ] = settings[ key ]; + } + } + } + + shortcodes = this.settings.l10n.shortcodes; + + // If there are any localization shortcodes, add this as type in the settings. + if ( shortcodes && shortcodes.length ) { + this.settings.shortcodesRegExp = new RegExp( '\\[\\/?(?:' + shortcodes.join( '|' ) + ')[^\\]]*?\\]', 'g' ); + } + } + + // Default settings. + WordCounter.prototype.settings = { + HTMLRegExp: /<\/?[a-z][^>]*?>/gi, + HTMLcommentRegExp: /<!--[\s\S]*?-->/g, + spaceRegExp: / | /gi, + HTMLEntityRegExp: /&\S+?;/g, + + // \u2014 = em-dash. + connectorRegExp: /--|\u2014/g, + + // Characters to be removed from input text. + removeRegExp: new RegExp( [ + '[', + + // Basic Latin (extract). + '\u0021-\u0040\u005B-\u0060\u007B-\u007E', + + // Latin-1 Supplement (extract). + '\u0080-\u00BF\u00D7\u00F7', + + /* + * The following range consists of: + * General Punctuation + * Superscripts and Subscripts + * Currency Symbols + * Combining Diacritical Marks for Symbols + * Letterlike Symbols + * Number Forms + * Arrows + * Mathematical Operators + * Miscellaneous Technical + * Control Pictures + * Optical Character Recognition + * Enclosed Alphanumerics + * Box Drawing + * Block Elements + * Geometric Shapes + * Miscellaneous Symbols + * Dingbats + * Miscellaneous Mathematical Symbols-A + * Supplemental Arrows-A + * Braille Patterns + * Supplemental Arrows-B + * Miscellaneous Mathematical Symbols-B + * Supplemental Mathematical Operators + * Miscellaneous Symbols and Arrows + */ + '\u2000-\u2BFF', + + // Supplemental Punctuation. + '\u2E00-\u2E7F', + ']' + ].join( '' ), 'g' ), + + // Remove UTF-16 surrogate points, see https://en.wikipedia.org/wiki/UTF-16#U.2BD800_to_U.2BDFFF + astralRegExp: /[\uD800-\uDBFF][\uDC00-\uDFFF]/g, + wordsRegExp: /\S\s+/g, + characters_excluding_spacesRegExp: /\S/g, + + /* + * Match anything that is not a formatting character, excluding: + * \f = form feed + * \n = new line + * \r = carriage return + * \t = tab + * \v = vertical tab + * \u00AD = soft hyphen + * \u2028 = line separator + * \u2029 = paragraph separator + */ + characters_including_spacesRegExp: /[^\f\n\r\t\v\u00AD\u2028\u2029]/g, + l10n: window.wordCountL10n || {} + }; + + /** + * Counts the number of words (or other specified type) in the specified text. + * + * @since 2.6.0 + * + * @memberof wp.utils.wordcounter + * + * @param {string} text Text to count elements in. + * @param {string} type Optional. Specify type to use. + * + * @return {number} The number of items counted. + */ + WordCounter.prototype.count = function( text, type ) { + var count = 0; + + // Use default type if none was provided. + type = type || this.settings.l10n.type; + + // Sanitize type to one of three possibilities: 'words', 'characters_excluding_spaces' or 'characters_including_spaces'. + if ( type !== 'characters_excluding_spaces' && type !== 'characters_including_spaces' ) { + type = 'words'; + } + + // If we have any text at all. + if ( text ) { + text = text + '\n'; + + // Replace all HTML with a new-line. + text = text.replace( this.settings.HTMLRegExp, '\n' ); + + // Remove all HTML comments. + text = text.replace( this.settings.HTMLcommentRegExp, '' ); + + // If a shortcode regular expression has been provided use it to remove shortcodes. + if ( this.settings.shortcodesRegExp ) { + text = text.replace( this.settings.shortcodesRegExp, '\n' ); + } + + // Normalize non-breaking space to a normal space. + text = text.replace( this.settings.spaceRegExp, ' ' ); + + if ( type === 'words' ) { + + // Remove HTML Entities. + text = text.replace( this.settings.HTMLEntityRegExp, '' ); + + // Convert connectors to spaces to count attached text as words. + text = text.replace( this.settings.connectorRegExp, ' ' ); + + // Remove unwanted characters. + text = text.replace( this.settings.removeRegExp, '' ); + } else { + + // Convert HTML Entities to "a". + text = text.replace( this.settings.HTMLEntityRegExp, 'a' ); + + // Remove surrogate points. + text = text.replace( this.settings.astralRegExp, 'a' ); + } + + // Match with the selected type regular expression to count the items. + text = text.match( this.settings[ type + 'RegExp' ] ); + + // If we have any matches, set the count to the number of items found. + if ( text ) { + count = text.length; + } + } + + return count; + }; + + // Add the WordCounter to the WP Utils. + window.wp = window.wp || {}; + window.wp.utils = window.wp.utils || {}; + window.wp.utils.WordCounter = WordCounter; +} )(); |