diff --git a/src/node/utils/ExportHtml.js b/src/node/utils/ExportHtml.js index e6bfef37f..d885c6824 100644 --- a/src/node/utils/ExportHtml.js +++ b/src/node/utils/ExportHtml.js @@ -1,3 +1,5 @@ +'use strict'; + /** * Copyright 2009 Google Inc. * @@ -15,572 +17,514 @@ */ -var async = require("async"); -var Changeset = require("ep_etherpad-lite/static/js/Changeset"); -var padManager = require("../db/PadManager"); -var ERR = require("async-stacktrace"); +var async = require('async'); +var Changeset = require('ep_etherpad-lite/static/js/Changeset'); +var padManager = require('../db/PadManager'); +var eRR = require('async-stacktrace'); var _ = require('underscore'); var Security = require('ep_etherpad-lite/static/js/security'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); var eejs = require('ep_etherpad-lite/node/eejs'); var _analyzeLine = require('./ExportHelper')._analyzeLine; var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace; +// copied from ACE +var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/; +var _REGEX_URLCHAR = new RegExp('(' + (/[-:@a-zA-Z0-9_.,~%+/\\?=&#;()$]/).source + '|' + _REGEX_WORDCHAR.source + ')'); +var _REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source + _REGEX_URLCHAR.source + '*(?![:.,;])' + _REGEX_URLCHAR.source, 'g'); -function getPadHTML(pad, revNum, callback) -{ - var atext = pad.atext; - var html; - async.waterfall([ - // fetch revision atext - function (callback) - { - if (revNum != undefined) - { - pad.getInternalRevisionAText(revNum, function (err, revisionAtext) - { - if(ERR(err, callback)) return; - atext = revisionAtext; - callback(); - }); +// returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...] +// copied from ACE +function _processSpaces (s) { + var doesWrap = true; + if (s.indexOf('<') < 0 && !doesWrap) { + // short-cut + return s.replace(/ /g, ' '); } - else - { - callback(null); + var parts = []; + s.replace(/<[^>]*>?| |[^ <]+/g, function (m) { + parts.push(m); + }); + if (doesWrap) { + var endOfLine = true; + var beforeSpace = false; + // last space in a run is normal, others are nbsp, + // end of line is nbsp + for (var i = parts.length - 1; i >= 0; i--) { + var p = parts[i]; + if (p === ' ') { + if (endOfLine || beforeSpace) parts[i] = ' '; + endOfLine = false; + beforeSpace = true; + } else if (p.charAt(0) !== '<') { + endOfLine = false; + beforeSpace = false; + } + } + // beginning of line is nbsp + for (i = 0; i < parts.length; i++) { + p = parts[i]; + if (p === ' ') { + parts[i] = ' '; + break; + } else if (p.charAt(0) !== '<') { + break; + } + } + } else { + for (i = 0; i < parts.length; i++) { + p = parts[i]; + if (p === ' ') { + parts[i] = ' '; + } + } } - }, - // convert atext to html - - - function (callback) - { - html = getHTMLFromAtext(pad, atext); - callback(null); - }], - // run final callback - - - function (err) - { - if(ERR(err, callback)) return; - callback(null, html); - }); + return parts.join(''); } -exports.getPadHTML = getPadHTML; -exports.getHTMLFromAtext = getHTMLFromAtext; +function _findURLs (text) { + _REGEX_URL.lastIndex = 0; + var urls = null; + var execResult; + while (execResult = _REGEX_URL.exec(text)) { + urls = urls || []; + var startIndex = execResult.index; + var url = execResult[0]; + urls.push([startIndex, url]); + } -function getHTMLFromAtext(pad, atext, authorColors) -{ - var apool = pad.apool(); - var textLines = atext.text.slice(0, -1).split('\n'); - var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); + return urls; +} - var tags = ['h1', 'h2', 'strong', 'em', 'u', 's']; - var props = ['heading1', 'heading2', 'bold', 'italic', 'underline', 'strikethrough']; +/** + * Return docx from input html + * + * @param {text} [pad] text + * @param {text} [atext] text + * @param {text} [authorColors] text + * + * @returns {html} html + * + * @public + */ +function getHTMLFromAtext (pad, atext, authorColors) { + var apool = pad.apool(); + var textLines = atext.text.slice(0, -1).split('\n'); + var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); - // prepare tags stored as ['tag', true] to be exported - hooks.aCallAll("exportHtmlAdditionalTags", pad, function(err, newProps){ - newProps.forEach(function (propName, i){ - tags.push(propName); - props.push(propName); + var tags = ['h1', 'h2', 'strong', 'em', 'u', 's']; + var props = ['heading1', 'heading2', 'bold', 'italic', 'underline', 'strikethrough']; + + // prepare tags stored as ['tag', true] to be exported + hooks.aCallAll('exportHtmlAdditionalTags', pad, function (err, newProps) { + if (err) { + return err; + } + newProps.forEach(function (propName) { + tags.push(propName); + props.push(propName); + }); }); - }); - // prepare tags stored as ['tag', 'value'] to be exported. This will generate HTML - // with tags like - hooks.aCallAll("exportHtmlAdditionalTagsWithData", pad, function(err, newProps){ - newProps.forEach(function (propName, i){ - tags.push('span data-' + propName[0] + '="' + propName[1] + '"'); - props.push(propName); + // prepare tags stored as ['tag', 'value'] to be exported. This will generate HTML + // with tags like + hooks.aCallAll('exportHtmlAdditionalTagsWithData', pad, function (err, newProps) { + if (err) { + return err; + } + newProps.forEach(function (propName) { + tags.push('span data-' + propName[0] + '="' + propName[1] + '"'); + props.push(propName); + }); }); - }); - // holds a map of used styling attributes (*1, *2, etc) in the apool - // and maps them to an index in props - // *3:2 -> the attribute *3 means strong - // *2:5 -> the attribute *2 means s(trikethrough) - var anumMap = {}; - var css = ""; + // holds a map of used styling attributes (*1, *2, etc) in the apool + // and maps them to an index in props + // *3:2 -> the attribute *3 means strong + // *2:5 -> the attribute *2 means s(trikethrough) + var anumMap = {}; + var css = ''; - var stripDotFromAuthorID = function(id){ - return id.replace(/\./g,'_'); - }; + var stripDotFromAuthorID = function (id) { + return id.replace(/\./g, '_'); + }; - if(authorColors){ - css+="'; } - css+=""; - } + // iterates over all props(h1,h2,strong,...), checks if it is used in + // this pad, and if yes puts its attrib id->props value into anumMap + props.forEach(function (propName, i) { + var attrib = [propName, true]; + if (_.isArray(propName)) { + // propName can be in the form of ['color', 'red'], + // see hook exportHtmlAdditionalTagsWithData + attrib = propName; + } + var propTrueNum = apool.putAttrib(attrib, true); + if (propTrueNum >= 0) { + anumMap[propTrueNum] = i; + } + }); - // iterates over all props(h1,h2,strong,...), checks if it is used in - // this pad, and if yes puts its attrib id->props value into anumMap - props.forEach(function (propName, i) - { - var attrib = [propName, true]; - if (_.isArray(propName)) { - // propName can be in the form of ['color', 'red'], - // see hook exportHtmlAdditionalTagsWithData - attrib = propName; - } - var propTrueNum = apool.putAttrib(attrib, true); - if (propTrueNum >= 0) - { - anumMap[propTrueNum] = i; - } - }); - - function getLineHTML(text, attribs) - { + function getLineHTML (text, attribs) { // Use order of tags (b/i/u) as order of nesting, for simplicity // and decent nesting. For example, // Just bold Bold and italics Just italics // becomes // Just bold Bold and italics Just italics - var taker = Changeset.stringIterator(text); - var assem = Changeset.stringAssembler(); - var openTags = []; + var taker = Changeset.stringIterator(text); + var assem = Changeset.stringAssembler(); + var openTags = []; - function getSpanClassFor(i){ - //return if author colors are disabled - if (!authorColors) return false; + function getSpanClassFor (i) { + //return if author colors are disabled + if (!authorColors) return false; - var property = props[i]; + var property = props[i]; - // we are not insterested on properties in the form of ['color', 'red'], - // see hook exportHtmlAdditionalTagsWithData - if (_.isArray(property)) { - return false; - } + // we are not insterested on properties in the form of ['color', 'red'], + // see hook exportHtmlAdditionalTagsWithData + if (_.isArray(property)) { + return false; + } - if(property.substr(0,6) === "author"){ - return stripDotFromAuthorID(property); - } + if (property.substr(0, 6) === 'author') { + return stripDotFromAuthorID(property); + } - if(property === "removed"){ - return "removed"; - } + if (property === 'removed') { + return 'removed'; + } - return false; + return false; + } + + // tags added by exportHtmlAdditionalTagsWithData will be exported as with + // data attributes + function isSpanWithData (i) { + var property = props[i]; + + return _.isArray(property); + } + + function emitOpenTag (i) { + openTags.unshift(i); + var spanClass = getSpanClassFor(i); + + if (spanClass) { + assem.append(''); + } else { + assem.append('<'); + assem.append(tags[i]); + assem.append('>'); + } + } + + // this closes an open tag and removes its reference from openTags + function emitCloseTag (i) { + openTags.shift(); + var spanClass = getSpanClassFor(i); + var spanWithData = isSpanWithData(i); + + if (spanClass || spanWithData) { + assem.append(''); + } else { + assem.append(''); + } + } + + var urls = _findURLs(text); + + var idx = 0; + + function processNextChars (numChars) { + if (numChars <= 0) { + return; + } + + var iter = Changeset.opIterator(Changeset.subattribution(attribs, idx, idx + numChars)); + idx += numChars; + + // this iterates over every op string and decides which tags to open or to close + // based on the attribs used + while (iter.hasNext()) { + var o = iter.next(); + var usedAttribs = []; + + // mark all attribs as used + Changeset.eachAttribNumber(o.attribs, function (a) { + if (a in anumMap) { + usedAttribs.push(anumMap[a]); // i = 0 => bold, etc. + } + }); + var outermostTag = -1; + // find the outer most open tag that is no longer used + for (var i = openTags.length - 1; i >= 0; i--) { + if (usedAttribs.indexOf(openTags[i]) === -1) { + outermostTag = i; + break; + } + } + + // close all tags upto the outer most + if (outermostTag !== -1) { + while (outermostTag >= 0) { + emitCloseTag(openTags[0]); + outermostTag--; + } + } + + // open all tags that are used but not open + for (i = 0; i < usedAttribs.length; i++) { + if (openTags.indexOf(usedAttribs[i]) === -1) { + emitOpenTag(usedAttribs[i]); + } + } + + var chars = o.chars; + if (o.lines) { + chars--; // exclude newline at end of line, if present + } + + var s = taker.take(chars); + + //removes the characters with the code 12. Don't know where they come + //from but they break the abiword parser and are completly useless + s = s.replace(String.fromCharCode(12), ''); + + assem.append(_encodeWhitespace(Security.escapeHTML(s))); + } // end iteration over spans in line + + // close all the tags that are open after the last op + while (openTags.length > 0) { + emitCloseTag(openTags[0]); + } + } // end processNextChars + + if (urls) { + urls.forEach(function (urlData) { + var startIndex = urlData[0]; + var url = urlData[1]; + var urlLength = url.length; + processNextChars(startIndex - idx); + assem.append(''); + processNextChars(urlLength); + assem.append(''); + }); + } + + processNextChars(text.length - idx); + + return _processSpaces(assem.toString()); + } // end getLineHTML + + var pieces = [css]; + + // Need to deal with constraints imposed on HTML lists; can + // only gain one level of nesting at once, can't change type + // mid-list, etc. + // People might use weird indenting, e.g. skip a level, + // so we want to do something reasonable there. We also + // want to deal gracefully with blank lines. + // => keeps track of the parents level of indentation + var lists = []; // e.g. [[1,'bullet'], [3,'bullet'], ...] + var listLevels = []; + for (var i = 0; i < textLines.length; i++) { + var context; + var line = _analyzeLine(textLines[i], attribLines[i], apool); + var lineContent = getLineHTML(line.text, line.aline); + listLevels.push(line.listLevel); + + if (line.listLevel) { //If we are inside a list + // do list stuff + var whichList = -1; // index into lists or -1 + if (line.listLevel) { + whichList = lists.length; + for (var j = lists.length - 1; j >= 0; j--) { + if (line.listLevel <= lists[j][0]) { + whichList = j; + } + } + } + context = { + line: line, + lineContent: lineContent, + apool: apool, + attribLine: attribLines[i], + text: textLines[i], + padId: pad.id + }; + hooks.callAll('getLineHTMLForExport', context); + if (whichList >= lists.length) { + if (lists.length > 0) { + pieces.push(''); + } + lists.push([line.listLevel, line.listTypeName]); + + // if there is a previous list we need to open x tags, where x is the difference of the levels + // if there is no previous list we need to open x tags, where x is the wanted level + var toOpen = lists.length > 1 ? line.listLevel - lists[lists.length - 2][0] - 1 : line.listLevel - 1 + + if (line.listTypeName === 'number') { + if (toOpen > 0) { + pieces.push(new Array(toOpen + 1).join('
  1. ')); + } + pieces.push('
    1. ', context.lineContent || '
      '); + } else { + if (toOpen > 0) { + pieces.push(new Array(toOpen + 1).join('
      • ')); + } + pieces.push('
        • ', context.lineContent || '
          '); + } + } else { //means we are getting closer to the lowest level of indentation or are at the same level + var toClose = lists.length > 0 ? listLevels[listLevels.length - 2] - line.listLevel : 0 + if (toClose > 0) { + pieces.push('
        • '); + if (lists[lists.length - 1][1] === 'number') { + pieces.push(new Array(toClose + 1).join('
  2. ')); + pieces.push('
  3. ', context.lineContent || '
    '); + } else { + pieces.push(new Array(toClose + 1).join('
  4. ')); + pieces.push('
  5. ', context.lineContent || '
    '); + } + lists = lists.slice(0, whichList + 1); + } else { + pieces.push('
  6. ', context.lineContent || '
    '); + } + } + } else { //outside any list, need to close line.listLevel of lists + + if (lists.length > 0) { + if (lists[lists.length - 1][1] === 'number') { + pieces.push('
'); + pieces.push(new Array(listLevels[listLevels.length - 2]).join('')); + } else { + pieces.push(''); + pieces.push(new Array(listLevels[listLevels.length - 2]).join('')); + } + } + lists = []; + + context = { + line: line, + lineContent: lineContent, + apool: apool, + attribLine: attribLines[i], + text: textLines[i], + padId: pad.id + }; + + hooks.callAll('getLineHTMLForExport', context); + pieces.push(context.lineContent, '
'); + } } - // tags added by exportHtmlAdditionalTagsWithData will be exported as with - // data attributes - function isSpanWithData(i){ - var property = props[i]; - return _.isArray(property); - } - - function emitOpenTag(i) - { - openTags.unshift(i); - var spanClass = getSpanClassFor(i); - - if(spanClass){ - assem.append(''); - } else { - assem.append('<'); - assem.append(tags[i]); - assem.append('>'); - } - } - - // this closes an open tag and removes its reference from openTags - function emitCloseTag(i) - { - openTags.shift(); - var spanClass = getSpanClassFor(i); - var spanWithData = isSpanWithData(i); - - if(spanClass || spanWithData){ - assem.append(''); - } else { - assem.append(''); - } - } - - var urls = _findURLs(text); - - var idx = 0; - - function processNextChars(numChars) - { - if (numChars <= 0) - { - return; - } - - var iter = Changeset.opIterator(Changeset.subattribution(attribs, idx, idx + numChars)); - idx += numChars; - - // this iterates over every op string and decides which tags to open or to close - // based on the attribs used - while (iter.hasNext()) - { - var o = iter.next(); - var usedAttribs = []; - - // mark all attribs as used - Changeset.eachAttribNumber(o.attribs, function (a) - { - if (a in anumMap) - { - usedAttribs.push(anumMap[a]); // i = 0 => bold, etc. - } - }); - var outermostTag = -1; - // find the outer most open tag that is no longer used - for (var i = openTags.length - 1; i >= 0; i--) - { - if (usedAttribs.indexOf(openTags[i]) === -1) - { - outermostTag = i; - break; - } - } - - // close all tags upto the outer most - if (outermostTag != -1) - { - while ( outermostTag >= 0 ) - { - emitCloseTag(openTags[0]); - outermostTag--; - } - } - - // open all tags that are used but not open - for (i=0; i < usedAttribs.length; i++) - { - if (openTags.indexOf(usedAttribs[i]) === -1) - { - emitOpenTag(usedAttribs[i]) - } - } - - var chars = o.chars; - if (o.lines) - { - chars--; // exclude newline at end of line, if present - } - - var s = taker.take(chars); - - //removes the characters with the code 12. Don't know where they come - //from but they break the abiword parser and are completly useless - s = s.replace(String.fromCharCode(12), ""); - - assem.append(_encodeWhitespace(Security.escapeHTML(s))); - } // end iteration over spans in line - - // close all the tags that are open after the last op - while (openTags.length > 0) - { - emitCloseTag(openTags[0]) - } - } // end processNextChars - if (urls) - { - urls.forEach(function (urlData) - { - var startIndex = urlData[0]; - var url = urlData[1]; - var urlLength = url.length; - processNextChars(startIndex - idx); - assem.append(''); - processNextChars(urlLength); - assem.append(''); - }); - } - processNextChars(text.length - idx); - - return _processSpaces(assem.toString()); - } // end getLineHTML - var pieces = [css]; - - // Need to deal with constraints imposed on HTML lists; can - // only gain one level of nesting at once, can't change type - // mid-list, etc. - // People might use weird indenting, e.g. skip a level, - // so we want to do something reasonable there. We also - // want to deal gracefully with blank lines. - // => keeps track of the parents level of indentation - var lists = []; // e.g. [[1,'bullet'], [3,'bullet'], ...] - var listLevels = [] - for (var i = 0; i < textLines.length; i++) - { - var line = _analyzeLine(textLines[i], attribLines[i], apool); - var lineContent = getLineHTML(line.text, line.aline); - listLevels.push(line.listLevel) - - if (line.listLevel)//If we are inside a list - { - // do list stuff - var whichList = -1; // index into lists or -1 - if (line.listLevel) - { - whichList = lists.length; - for (var j = lists.length - 1; j >= 0; j--) - { - if (line.listLevel <= lists[j][0]) - { - whichList = j; - } - } - } - var context = { - line: line, - lineContent: lineContent, - apool: apool, - attribLine: attribLines[i], - text: textLines[i], - padId: pad.id - } - hooks.callAll("getLineHTMLForExport", context); - if (whichList >= lists.length)//means we are on a deeper level of indentation than the previous line - { - if(lists.length > 0){ - pieces.push('') - } - lists.push([line.listLevel, line.listTypeName]); - - // if there is a previous list we need to open x tags, where x is the difference of the levels - // if there is no previous list we need to open x tags, where x is the wanted level - var toOpen = lists.length > 1 ? line.listLevel - lists[lists.length - 2][0] - 1 : line.listLevel - 1 - - if(line.listTypeName == "number") - { - if(toOpen > 0){ - pieces.push(new Array(toOpen + 1).join('
    ')) - } - pieces.push('
    1. ', context.lineContent || '
      '); - } - else - { - if(toOpen > 0){ - pieces.push(new Array(toOpen + 1).join('
        ')) - } - pieces.push('
        • ', context.lineContent || '
          '); - } - } - //the following code *seems* dead after my patch. - //I keep it just in case I'm wrong... - /*else if (whichList == -1)//means we are not inside a list - { - if (line.text) - { - console.log('trace 1'); - // non-blank line, end all lists - if(line.listTypeName == "number") - { - pieces.push(new Array(lists.length + 1).join('
    ')); - } - else - { - pieces.push(new Array(lists.length + 1).join('')); - } - lists.length = 0; - pieces.push(lineContent, '
    '); - } - else - { - console.log('trace 2'); - pieces.push('

    '); - } - }*/ - else//means we are getting closer to the lowest level of indentation or are at the same level - { - var toClose = lists.length > 0 ? listLevels[listLevels.length - 2] - line.listLevel : 0 - if( toClose > 0){ - pieces.push('') - if(lists[lists.length - 1][1] == "number") - { - pieces.push(new Array(toClose+1).join('
')) - pieces.push('
  • ', context.lineContent || '
    '); - } - else - { - pieces.push(new Array(toClose+1).join('')) - pieces.push('
  • ', context.lineContent || '
    '); - } - lists = lists.slice(0,whichList+1) + for (var k = lists.length - 1; k >= 0; k--) { + if (lists[k][1] === 'number') { + pieces.push('
  • '); } else { - pieces.push('
  • ', context.lineContent || '
    '); + pieces.push('
  • '); } - } } - else//outside any list, need to close line.listLevel of lists - { - if(lists.length > 0){ - if(lists[lists.length - 1][1] == "number"){ - pieces.push(''); - pieces.push(new Array(listLevels[listLevels.length - 2]).join('')) - } else { - pieces.push(''); - pieces.push(new Array(listLevels[listLevels.length - 2]).join('')) - } - } - lists = [] - var context = { - line: line, - lineContent: lineContent, - apool: apool, - attribLine: attribLines[i], - text: textLines[i], - padId: pad.id - } - - hooks.callAll("getLineHTMLForExport", context); - - pieces.push(context.lineContent, '
    '); - } - } - - for (var k = lists.length - 1; k >= 0; k--) - { - if(lists[k][1] == "number") - { - pieces.push(''); - } - else - { - pieces.push(''); - } - } - - return pieces.join(''); + return pieces.join(''); } -exports.getPadHTMLDocument = function (padId, revNum, callback) -{ - padManager.getPad(padId, function (err, pad) - { - if(ERR(err, callback)) return; +function getPadHTML (pad, revNum, callback) { + var atext = pad.atext; + var html; + async.waterfall( + [ + // fetch revision atext + function (callback) { + if (revNum !== undefined) { + pad.getInternalRevisionAText(revNum, function (err, revisionAtext) { + if (eRR(err, callback)) return; + atext = revisionAtext; - var stylesForExportCSS = ""; - // Include some Styles into the Head for Export - hooks.aCallAll("stylesForExport", padId, function(err, stylesForExport){ - stylesForExport.forEach(function(css){ - stylesForExportCSS += css; - }); + return callback(); + }); + } else { + return callback(null); + } + }, - getPadHTML(pad, revNum, function (err, html) - { - if(ERR(err, callback)) return; - var exportedDoc = eejs.require("ep_etherpad-lite/templates/export_html.html", { - body: html, - padId: Security.escapeHTML(padId), - extraCSS: stylesForExportCSS + // convert atext to html + + + function (callback) { + html = getHTMLFromAtext(pad, atext); + callback(null); + } + ], + // run final callback + + + function (err) { + if (eRR(err, callback)) return; + callback(null, html); + } + ); +} + +exports.getPadHTML = getPadHTML; +exports.getHTMLFromAtext = getHTMLFromAtext; +exports.getPadHTMLDocument = function (padId, revNum, callback) { + padManager.getPad(padId, function (err, pad) { + if (eRR(err, callback)) return; + + var stylesForExportCSS = ''; + // Include some Styles into the Head for Export + hooks.aCallAll('stylesForExport', padId, function (err, stylesForExport) { + if (eRR(err, callback)) return; + stylesForExport.forEach(function (css) { + stylesForExportCSS += css; + }); + + getPadHTML(pad, revNum, function (err, html) { + if (eRR(err, callback)) return; + var exportedDoc = eejs.require('ep_etherpad-lite/templates/export_html.html', { + body: html, + padId: Security.escapeHTML(padId), + extraCSS: stylesForExportCSS + }); + callback(null, exportedDoc); + }); }); - callback(null, exportedDoc); - }); }); - }); }; - -// copied from ACE -var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/; -var _REGEX_SPACE = /\s/; -var _REGEX_URLCHAR = new RegExp('(' + /[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source + '|' + _REGEX_WORDCHAR.source + ')'); -var _REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source + _REGEX_URLCHAR.source + '*(?![:.,;])' + _REGEX_URLCHAR.source, 'g'); - -// returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...] - - -function _findURLs(text) -{ - _REGEX_URL.lastIndex = 0; - var urls = null; - var execResult; - while ((execResult = _REGEX_URL.exec(text))) - { - urls = (urls || []); - var startIndex = execResult.index; - var url = execResult[0]; - urls.push([startIndex, url]); - } - - return urls; -} - - -// copied from ACE -function _processSpaces(s){ - var doesWrap = true; - if (s.indexOf("<") < 0 && !doesWrap){ - // short-cut - return s.replace(/ /g, ' '); - } - var parts = []; - s.replace(/<[^>]*>?| |[^ <]+/g, function (m){ - parts.push(m); - }); - if (doesWrap){ - var endOfLine = true; - var beforeSpace = false; - // last space in a run is normal, others are nbsp, - // end of line is nbsp - for (var i = parts.length - 1; i >= 0; i--){ - var p = parts[i]; - if (p == " "){ - if (endOfLine || beforeSpace) parts[i] = ' '; - endOfLine = false; - beforeSpace = true; - } - else if (p.charAt(0) != "<"){ - endOfLine = false; - beforeSpace = false; - } - } - // beginning of line is nbsp - for (i = 0; i < parts.length; i++){ - p = parts[i]; - if (p == " "){ - parts[i] = ' '; - break; - } - else if (p.charAt(0) != "<"){ - break; - } - } - } - else - { - for (i = 0; i < parts.length; i++){ - p = parts[i]; - if (p == " "){ - parts[i] = ' '; - } - } - } - return parts.join(''); -}