/** * Copyright 2009 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS-IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ var async = require("async"); var Changeset = require("./Changeset"); var padManager = require("../db/PadManager"); function getPadPlainText(pad, revNum) { var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) : pad.atext()); var textLines = atext.text.slice(0, -1).split('\n'); var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); var apool = pad.pool(); var pieces = []; for (var i = 0; i < textLines.length; i++) { var line = _analyzeLine(textLines[i], attribLines[i], apool); if (line.listLevel) { var numSpaces = line.listLevel * 2 - 1; var bullet = '*'; pieces.push(new Array(numSpaces + 1).join(' '), bullet, ' ', line.text, '\n'); } else { pieces.push(line.text, '\n'); } } return pieces.join(''); } function getPadHTML(pad, revNum, callback) { var atext = pad.atext; var html; async.waterfall([ // fetch revision atext function (callback) { if (revNum != undefined) { pad.getInternalRevisionAText(revNum, function (err, revisionAtext) { atext = revisionAtext; callback(err); }); } else { callback(null); } }, // convert atext to html function (callback) { html = getHTMLFromAtext(pad, atext); callback(null); }], // run final callback function (err) { callback(err, html); }); } exports.getPadHTML = getPadHTML; function getHTMLFromAtext(pad, atext) { var apool = pad.apool(); var textLines = atext.text.slice(0, -1).split('\n'); var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); var tags = ['h1', 'h2', 'strong', 'em', 'u', 's']; var props = ['heading1', 'heading2', 'bold', 'italic', 'underline', 'strikethrough']; var anumMap = {}; props.forEach(function (propName, i) { var propTrueNum = apool.putAttrib([propName, true], true); if (propTrueNum >= 0) { anumMap[propTrueNum] = i; } }); function getLineHTML(text, attribs) { var propVals = [false, false, false]; var ENTER = 1; var STAY = 2; var LEAVE = 0; // Use order of tags (b/i/u) as order of nesting, for simplicity // and decent nesting. For example, // Just bold Bold and italics Just italics // becomes // Just bold Bold and italics Just italics var taker = Changeset.stringIterator(text); var assem = Changeset.stringAssembler(); var openTags = []; function emitOpenTag(i) { openTags.unshift(i); assem.append('<'); assem.append(tags[i]); assem.append('>'); } function emitCloseTag(i) { openTags.shift(); assem.append(''); } function orderdCloseTags(tags2close) { for(var i=0;i bold, etc. if (!propVals[i]) { propVals[i] = ENTER; propChanged = true; } else { propVals[i] = STAY; } } }); for (var i = 0; i < propVals.length; i++) { if (propVals[i] === true) { propVals[i] = LEAVE; propChanged = true; } else if (propVals[i] === STAY) { propVals[i] = true; // set it back } } // now each member of propVal is in {false,LEAVE,ENTER,true} // according to what happens at start of span if (propChanged) { // leaving bold (e.g.) also leaves italics, etc. var left = false; for (var i = 0; i < propVals.length; i++) { var v = propVals[i]; if (!left) { if (v === LEAVE) { left = true; } } else { if (v === true) { propVals[i] = STAY; // tag will be closed and re-opened } } } var tags2close = []; for (var i = propVals.length - 1; i >= 0; i--) { if (propVals[i] === LEAVE) { //emitCloseTag(i); tags2close.push(i); propVals[i] = false; } else if (propVals[i] === STAY) { //emitCloseTag(i); tags2close.push(i); } } orderdCloseTags(tags2close); for (var i = 0; i < propVals.length; i++) { if (propVals[i] === ENTER || propVals[i] === STAY) { emitOpenTag(i); propVals[i] = true; } } // propVals is now all {true,false} again } // end if (propChanged) var chars = o.chars; if (o.lines) { chars--; // exclude newline at end of line, if present } var s = taker.take(chars); //removes the characters with the code 12. Don't know where they come //from but they break the abiword parser and are completly useless s = s.replace(String.fromCharCode(12), ""); assem.append(_escapeHTML(s)); } // end iteration over spans in line var tags2close = []; for (var i = propVals.length - 1; i >= 0; i--) { if (propVals[i]) { tags2close.push(i); propVals[i] = false; } } orderdCloseTags(tags2close); } // end processNextChars if (urls) { urls.forEach(function (urlData) { var startIndex = urlData[0]; var url = urlData[1]; var urlLength = url.length; processNextChars(startIndex - idx); assem.append(''); processNextChars(urlLength); assem.append(''); }); } processNextChars(text.length - idx); return _processSpaces(assem.toString()); } // end getLineHTML var pieces = []; // Need to deal with constraints imposed on HTML lists; can // only gain one level of nesting at once, can't change type // mid-list, etc. // People might use weird indenting, e.g. skip a level, // so we want to do something reasonable there. We also // want to deal gracefully with blank lines. var lists = []; // e.g. [[1,'bullet'], [3,'bullet'], ...] for (var i = 0; i < textLines.length; i++) { var line = _analyzeLine(textLines[i], attribLines[i], apool); var lineContent = getLineHTML(line.text, line.aline); if (line.listLevel || lists.length > 0) { // do list stuff var whichList = -1; // index into lists or -1 if (line.listLevel) { whichList = lists.length; for (var j = lists.length - 1; j >= 0; j--) { if (line.listLevel <= lists[j][0]) { whichList = j; } } } if (whichList >= lists.length) { lists.push([line.listLevel, line.listTypeName]); pieces.push('
', lineContent || '
'); } else if (whichList == -1) { if (line.text) { // non-blank line, end all lists pieces.push(new Array(lists.length + 1).join('
')); lists.length = 0; pieces.push(lineContent, '
'); } else { pieces.push('

'); } } else { while (whichList < lists.length - 1) { pieces.push('
'); lists.length--; } pieces.push('
', lineContent || '
'); } } else { pieces.push(lineContent, '
'); } } pieces.push(new Array(lists.length + 1).join('
')); return pieces.join(''); } function _analyzeLine(text, aline, apool) { var line = {}; // identify list var lineMarker = 0; line.listLevel = 0; if (aline) { var opIter = Changeset.opIterator(aline); if (opIter.hasNext()) { var listType = Changeset.opAttributeValue(opIter.next(), 'list', apool); if (listType) { lineMarker = 1; listType = /([a-z]+)([12345678])/.exec(listType); if (listType) { line.listTypeName = listType[1]; line.listLevel = Number(listType[2]); } } } } if (lineMarker) { line.text = text.substring(1); line.aline = Changeset.subattribution(aline, 1); } else { line.text = text; line.aline = aline; } return line; } exports.getPadHTMLDocument = function (padId, revNum, noDocType, callback) { padManager.getPad(padId, function (err, pad) { if (err) { callback(err); return; } var head = (noDocType ? '' : '\n') + '\n' + (noDocType ? '' : '\n' + '\n' + '\n' + '\n') + ''; var foot = '\n\n'; getPadHTML(pad, revNum, function (err, html) { callback(err, head + html + foot); }); }); } function _escapeHTML(s) { var re = /[&<>]/g; if (!re.MAP) { // persisted across function calls! re.MAP = { '&': '&', '<': '<', '>': '>', }; } s = s.replace(re, function (c) { return re.MAP[c]; }); return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c) { return "&#" +c.charCodeAt(0) + ";" }); } // copied from ACE function _processSpaces(s) { var doesWrap = true; if (s.indexOf("<") < 0 && !doesWrap) { // short-cut return s.replace(/ /g, ' '); } var parts = []; s.replace(/<[^>]*>?| |[^ <]+/g, function (m) { parts.push(m); }); if (doesWrap) { var endOfLine = true; var beforeSpace = false; // last space in a run is normal, others are nbsp, // end of line is nbsp for (var i = parts.length - 1; i >= 0; i--) { var p = parts[i]; if (p == " ") { if (endOfLine || beforeSpace) parts[i] = ' '; endOfLine = false; beforeSpace = true; } else if (p.charAt(0) != "<") { endOfLine = false; beforeSpace = false; } } // beginning of line is nbsp for (var i = 0; i < parts.length; i++) { var p = parts[i]; if (p == " ") { parts[i] = ' '; break; } else if (p.charAt(0) != "<") { break; } } } else { for (var i = 0; i < parts.length; i++) { var p = parts[i]; if (p == " ") { parts[i] = ' '; } } } return parts.join(''); } // copied from ACE var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/; var _REGEX_SPACE = /\s/; var _REGEX_URLCHAR = new RegExp('(' + /[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source + '|' + _REGEX_WORDCHAR.source + ')'); var _REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source + _REGEX_URLCHAR.source + '*(?![:.,;])' + _REGEX_URLCHAR.source, 'g'); // returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...] function _findURLs(text) { _REGEX_URL.lastIndex = 0; var urls = null; var execResult; while ((execResult = _REGEX_URL.exec(text))) { urls = (urls || []); var startIndex = execResult.index; var url = execResult[0]; urls.push([startIndex, url]); } return urls; }