summaryrefslogtreecommitdiffstats
path: root/wp-admin/js/word-count.js
diff options
context:
space:
mode:
Diffstat (limited to 'wp-admin/js/word-count.js')
-rw-r--r--wp-admin/js/word-count.js220
1 files changed, 220 insertions, 0 deletions
diff --git a/wp-admin/js/word-count.js b/wp-admin/js/word-count.js
new file mode 100644
index 0000000..066fc58
--- /dev/null
+++ b/wp-admin/js/word-count.js
@@ -0,0 +1,220 @@
+/**
+ * Word or character counting functionality. Count words or characters in a
+ * provided text string.
+ *
+ * @namespace wp.utils
+ *
+ * @since 2.6.0
+ * @output wp-admin/js/word-count.js
+ */
+
+( function() {
+ /**
+ * Word counting utility
+ *
+ * @namespace wp.utils.wordcounter
+ * @memberof wp.utils
+ *
+ * @class
+ *
+ * @param {Object} settings Optional. Key-value object containing overrides for
+ * settings.
+ * @param {RegExp} settings.HTMLRegExp Optional. Regular expression to find HTML elements.
+ * @param {RegExp} settings.HTMLcommentRegExp Optional. Regular expression to find HTML comments.
+ * @param {RegExp} settings.spaceRegExp Optional. Regular expression to find irregular space
+ * characters.
+ * @param {RegExp} settings.HTMLEntityRegExp Optional. Regular expression to find HTML entities.
+ * @param {RegExp} settings.connectorRegExp Optional. Regular expression to find connectors that
+ * split words.
+ * @param {RegExp} settings.removeRegExp Optional. Regular expression to find remove unwanted
+ * characters to reduce false-positives.
+ * @param {RegExp} settings.astralRegExp Optional. Regular expression to find unwanted
+ * characters when searching for non-words.
+ * @param {RegExp} settings.wordsRegExp Optional. Regular expression to find words by spaces.
+ * @param {RegExp} settings.characters_excluding_spacesRegExp Optional. Regular expression to find characters which
+ * are non-spaces.
+ * @param {RegExp} settings.characters_including_spacesRegExp Optional. Regular expression to find characters
+ * including spaces.
+ * @param {RegExp} settings.shortcodesRegExp Optional. Regular expression to find shortcodes.
+ * @param {Object} settings.l10n Optional. Localization object containing specific
+ * configuration for the current localization.
+ * @param {string} settings.l10n.type Optional. Method of finding words to count.
+ * @param {Array} settings.l10n.shortcodes Optional. Array of shortcodes that should be removed
+ * from the text.
+ *
+ * @return {void}
+ */
+ function WordCounter( settings ) {
+ var key,
+ shortcodes;
+
+ // Apply provided settings to object settings.
+ if ( settings ) {
+ for ( key in settings ) {
+
+ // Only apply valid settings.
+ if ( settings.hasOwnProperty( key ) ) {
+ this.settings[ key ] = settings[ key ];
+ }
+ }
+ }
+
+ shortcodes = this.settings.l10n.shortcodes;
+
+ // If there are any localization shortcodes, add this as type in the settings.
+ if ( shortcodes && shortcodes.length ) {
+ this.settings.shortcodesRegExp = new RegExp( '\\[\\/?(?:' + shortcodes.join( '|' ) + ')[^\\]]*?\\]', 'g' );
+ }
+ }
+
+ // Default settings.
+ WordCounter.prototype.settings = {
+ HTMLRegExp: /<\/?[a-z][^>]*?>/gi,
+ HTMLcommentRegExp: /<!--[\s\S]*?-->/g,
+ spaceRegExp: /&nbsp;|&#160;/gi,
+ HTMLEntityRegExp: /&\S+?;/g,
+
+ // \u2014 = em-dash.
+ connectorRegExp: /--|\u2014/g,
+
+ // Characters to be removed from input text.
+ removeRegExp: new RegExp( [
+ '[',
+
+ // Basic Latin (extract).
+ '\u0021-\u0040\u005B-\u0060\u007B-\u007E',
+
+ // Latin-1 Supplement (extract).
+ '\u0080-\u00BF\u00D7\u00F7',
+
+ /*
+ * The following range consists of:
+ * General Punctuation
+ * Superscripts and Subscripts
+ * Currency Symbols
+ * Combining Diacritical Marks for Symbols
+ * Letterlike Symbols
+ * Number Forms
+ * Arrows
+ * Mathematical Operators
+ * Miscellaneous Technical
+ * Control Pictures
+ * Optical Character Recognition
+ * Enclosed Alphanumerics
+ * Box Drawing
+ * Block Elements
+ * Geometric Shapes
+ * Miscellaneous Symbols
+ * Dingbats
+ * Miscellaneous Mathematical Symbols-A
+ * Supplemental Arrows-A
+ * Braille Patterns
+ * Supplemental Arrows-B
+ * Miscellaneous Mathematical Symbols-B
+ * Supplemental Mathematical Operators
+ * Miscellaneous Symbols and Arrows
+ */
+ '\u2000-\u2BFF',
+
+ // Supplemental Punctuation.
+ '\u2E00-\u2E7F',
+ ']'
+ ].join( '' ), 'g' ),
+
+ // Remove UTF-16 surrogate points, see https://en.wikipedia.org/wiki/UTF-16#U.2BD800_to_U.2BDFFF
+ astralRegExp: /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
+ wordsRegExp: /\S\s+/g,
+ characters_excluding_spacesRegExp: /\S/g,
+
+ /*
+ * Match anything that is not a formatting character, excluding:
+ * \f = form feed
+ * \n = new line
+ * \r = carriage return
+ * \t = tab
+ * \v = vertical tab
+ * \u00AD = soft hyphen
+ * \u2028 = line separator
+ * \u2029 = paragraph separator
+ */
+ characters_including_spacesRegExp: /[^\f\n\r\t\v\u00AD\u2028\u2029]/g,
+ l10n: window.wordCountL10n || {}
+ };
+
+ /**
+ * Counts the number of words (or other specified type) in the specified text.
+ *
+ * @since 2.6.0
+ *
+ * @memberof wp.utils.wordcounter
+ *
+ * @param {string} text Text to count elements in.
+ * @param {string} type Optional. Specify type to use.
+ *
+ * @return {number} The number of items counted.
+ */
+ WordCounter.prototype.count = function( text, type ) {
+ var count = 0;
+
+ // Use default type if none was provided.
+ type = type || this.settings.l10n.type;
+
+ // Sanitize type to one of three possibilities: 'words', 'characters_excluding_spaces' or 'characters_including_spaces'.
+ if ( type !== 'characters_excluding_spaces' && type !== 'characters_including_spaces' ) {
+ type = 'words';
+ }
+
+ // If we have any text at all.
+ if ( text ) {
+ text = text + '\n';
+
+ // Replace all HTML with a new-line.
+ text = text.replace( this.settings.HTMLRegExp, '\n' );
+
+ // Remove all HTML comments.
+ text = text.replace( this.settings.HTMLcommentRegExp, '' );
+
+ // If a shortcode regular expression has been provided use it to remove shortcodes.
+ if ( this.settings.shortcodesRegExp ) {
+ text = text.replace( this.settings.shortcodesRegExp, '\n' );
+ }
+
+ // Normalize non-breaking space to a normal space.
+ text = text.replace( this.settings.spaceRegExp, ' ' );
+
+ if ( type === 'words' ) {
+
+ // Remove HTML Entities.
+ text = text.replace( this.settings.HTMLEntityRegExp, '' );
+
+ // Convert connectors to spaces to count attached text as words.
+ text = text.replace( this.settings.connectorRegExp, ' ' );
+
+ // Remove unwanted characters.
+ text = text.replace( this.settings.removeRegExp, '' );
+ } else {
+
+ // Convert HTML Entities to "a".
+ text = text.replace( this.settings.HTMLEntityRegExp, 'a' );
+
+ // Remove surrogate points.
+ text = text.replace( this.settings.astralRegExp, 'a' );
+ }
+
+ // Match with the selected type regular expression to count the items.
+ text = text.match( this.settings[ type + 'RegExp' ] );
+
+ // If we have any matches, set the count to the number of items found.
+ if ( text ) {
+ count = text.length;
+ }
+ }
+
+ return count;
+ };
+
+ // Add the WordCounter to the WP Utils.
+ window.wp = window.wp || {};
+ window.wp.utils = window.wp.utils || {};
+ window.wp.utils.WordCounter = WordCounter;
+} )();