From 3fb2f02875bfd0da7097a3fa021c7959bde1811b Mon Sep 17 00:00:00 2001 From: John McLear Date: Tue, 22 Jan 2013 23:16:49 +0000 Subject: [PATCH] semi working --- src/node/db/API.js | 6 +- src/node/utils/padDiff.js | 596 ++++++++++++++++++++++++++++++++++---- 2 files changed, 539 insertions(+), 63 deletions(-) diff --git a/src/node/db/API.js b/src/node/db/API.js index 50c4a6c5d..2c8b91abb 100644 --- a/src/node/db/API.js +++ b/src/node/db/API.js @@ -611,19 +611,16 @@ exports.createDiff = function(padID, startRev, endRev, callback){ //get the pad getPadSafe(padID, true, function(err, pad) { -console.warn(padID); if(err){ return callback(err); } try { -console.warn(pad); var padDiff = new PadDiff(pad, startRev, endRev); -console.warn("AFTER"); } catch(e) { + // console.warn(e.stack); return callback({stop:e.message}); } -/* var html, authors; async.series([ @@ -650,7 +647,6 @@ console.warn("AFTER"); ], function(err){ callback(err, {html: html, authors: authors}) }); - */ }); } diff --git a/src/node/utils/padDiff.js b/src/node/utils/padDiff.js index 645c2e2d4..f898cbe16 100644 --- a/src/node/utils/padDiff.js +++ b/src/node/utils/padDiff.js @@ -1,74 +1,554 @@ -exports.createDiff = function(padID, startRev, endRev, callback){ -console.warn("WTF"); - - //check if rev is a number - if(startRev !== undefined && typeof startRev != "number") +var Changeset = require("../../static/js/Changeset"); +var async = require("async"); +var exportHtml = require('./ExportHtml'); + +function PadDiff (pad, fromRev, toRev){ + //check parameters + if(!pad || !pad.id || !pad.atext || !pad.pool) { - //try to parse the number - if(!isNaN(parseInt(startRev))) - { - startRev = parseInt(startRev, 10); - } - else - { - callback({stop: "startRev is not a number"}); - return; - } + throw new Error('Invalid pad'); } - //check if rev is a number - if(endRev !== undefined && typeof endRev != "number") - { - //try to parse the number - if(!isNaN(parseInt(endRev))) - { - endRev = parseInt(endRev, 10); - } - else - { - callback({stop: "endRev is not a number"}); - return; - } + var range = pad.getValidRevisionRange(fromRev, toRev); + if(!range) { throw new Error('Invalid revision range.' + + ' startRev: ' + fromRev + + ' endRev: ' + toRev); } + + this._pad = pad; + this._fromRev = range.startRev; + this._toRev = range.endRev; + this._html = null; + this._authors = []; +} + +PadDiff.prototype._isClearAuthorship = function(changeset){ + //unpack + var unpacked = Changeset.unpack(changeset); + + //check if there is nothing in the charBank + if(unpacked.charBank !== "") + return false; + + //check if oldLength == newLength + if(unpacked.oldLen !== unpacked.newLen) + return false; + + //lets iterator over the operators + var iterator = Changeset.opIterator(unpacked.ops); + + //get the first operator, this should be a clear operator + var clearOperator = iterator.next(); + + //check if there is only one operator + if(iterator.hasNext() === true) + return false; + + //check if this operator doesn't change text + if(clearOperator.opcode !== "=") + return false; + + //check that this operator applys to the complete text + //if the text ends with a new line, its exactly one character less, else it has the same length + if(clearOperator.chars !== unpacked.oldLen-1 && clearOperator.chars !== unpacked.oldLen) + return false; + + var attributes = []; + Changeset.eachAttribNumber(changeset, function(attrNum){ + attributes.push(attrNum); + }); + + //check that this changeset uses only one attribute + if(attributes.length !== 1) + return false; + + var appliedAttribute = this._pad.pool.getAttrib(attributes[0]); + + //check if the applied attribute is an anonymous author attribute + if(appliedAttribute[0] !== "author" || appliedAttribute[1] !== "") + return false; + + return true; +} + +PadDiff.prototype._createClearAuthorship = function(rev, callback){ + var self = this; + this._pad.getInternalRevisionAText(rev, function(err, atext){ + if(err){ + return callback(err); + } + + //build clearAuthorship changeset + var builder = Changeset.builder(atext.text.length); + builder.keepText(atext.text, [['author','']], self._pad.pool); + var changeset = builder.toString(); + + callback(null, changeset); + }); +} + +PadDiff.prototype._createClearStartAtext = function(rev, callback){ + var self = this; + + //get the atext of this revision + this._pad.getInternalRevisionAText(rev, function(err, atext){ + if(err){ + return callback(err); + } + + //create the clearAuthorship changeset + self._createClearAuthorship(rev, function(err, changeset){ + if(err){ + return callback(err); + } + + //apply the clearAuthorship changeset + var newAText = Changeset.applyToAText(changeset, atext, self._pad.pool); + + callback(null, newAText); + }); + }); +} + +PadDiff.prototype._getChangesetsInBulk = function(startRev, count, callback) { + var self = this; + + //find out which revisions we need + var revisions = []; + for(var i=startRev;i<(startRev+count) && i<=this._pad.head;i++){ + revisions.push(i); } + + var changesets = [], authors = []; + + //get all needed revisions + async.forEach(revisions, function(rev, callback){ + self._pad.getRevision(rev, function(err, revision){ + if(err){ + return callback(err) + } + + var arrayNum = rev-startRev; + + changesets[arrayNum] = revision.changeset; + authors[arrayNum] = revision.meta.author; + + callback(); + }); + }, function(err){ + callback(err, changesets, authors); + }); +} - //get the pad - getPadSafe(padID, true, function(err, pad) - { - if(err){ - return callback(err); +PadDiff.prototype._addAuthors = function(authors) { + var self = this; + //add to array if not in the array + authors.forEach(function(author){ + if(self._authors.indexOf(author) == -1){ + self._authors.push(author); } + }); +} - try { - var padDiff = new PadDiff(pad, startRev, endRev); - } catch(e) { - return callback({stop:e.message}); - } +PadDiff.prototype._createDiffAtext = function(callback) { + var self = this; + var bulkSize = 100; + + //get the cleaned startAText + self._createClearStartAtext(self._fromRev, function(err, atext){ + if(err) { return callback(err); } + + var superChangeset = null; + + var rev = self._fromRev + 1; - var html, authors; - - async.series([ - function(callback){ - padDiff.getHtml(function(err, _html){ - if(err){ - return callback(err); + //async while loop + async.whilst( + //loop condition + function () { return rev <= self._toRev; }, + + //loop body + function (callback) { + //get the bulk + self._getChangesetsInBulk(rev,bulkSize,function(err, changesets, authors){ + var addedAuthors = []; + + //run trough all changesets + for(var i=0;i= curChar) { + curLineNextOp.chars -= (curChar - indexIntoLine); + done = true; + } else { + indexIntoLine += curLineNextOp.chars; + } + } + } + + while (numChars > 0) { + if ((!curLineNextOp.chars) && (!curLineOpIter.hasNext())) { + curLine++; + curChar = 0; + curLineOpIterLine = curLine; + curLineNextOp.chars = 0; + curLineOpIter = Changeset.opIterator(alines_get(curLine)); + } + if (!curLineNextOp.chars) { + curLineOpIter.next(curLineNextOp); + } + var charsToUse = Math.min(numChars, curLineNextOp.chars); + func(charsToUse, curLineNextOp.attribs, charsToUse == curLineNextOp.chars && curLineNextOp.lines > 0); + numChars -= charsToUse; + curLineNextOp.chars -= charsToUse; + curChar += charsToUse; + } + + if ((!curLineNextOp.chars) && (!curLineOpIter.hasNext())) { + curLine++; + curChar = 0; + } + } + + function skip(N, L) { + if (L) { + curLine += L; + curChar = 0; + } else { + if (curLineOpIter && curLineOpIterLine == curLine) { + consumeAttribRuns(N, function () {}); + } else { + curChar += N; + } + } + } + + function nextText(numChars) { + var len = 0; + var assem = Changeset.stringAssembler(); + var firstString = lines_get(curLine).substring(curChar); + len += firstString.length; + assem.append(firstString); + + var lineNum = curLine + 1; + while (len < numChars) { + var nextString = lines_get(lineNum); + len += nextString.length; + assem.append(nextString); + lineNum++; + } + + return assem.toString().substring(0, numChars); + } + + function cachedStrFunc(func) { + var cache = {}; + return function (s) { + if (!cache[s]) { + cache[s] = func(s); + } + return cache[s]; + }; + } + + var attribKeys = []; + var attribValues = []; + + //iterate over all operators of this changeset + while (csIter.hasNext()) { + var csOp = csIter.next(); + + if (csOp.opcode == '=') { + var textBank = nextText(csOp.chars); + + // decide if this equal operator is an attribution change or not. We can see this by checkinf if attribs is set. + // If the text this operator applies to is only a star, than this is a false positive and should be ignored + if (csOp.attribs && textBank != "*") { + var deletedAttrib = apool.putAttrib(["removed", true]); + var authorAttrib = apool.putAttrib(["author", ""]);; + + attribKeys.length = 0; + attribValues.length = 0; + Changeset.eachAttribNumber(csOp.attribs, function (n) { + attribKeys.push(apool.getAttribKey(n)); + attribValues.push(apool.getAttribValue(n)); + + if(apool.getAttribKey(n) === "author"){ + authorAttrib = n; + }; + }); + + var undoBackToAttribs = cachedStrFunc(function (attribs) { + var backAttribs = []; + for (var i = 0; i < attribKeys.length; i++) { + var appliedKey = attribKeys[i]; + var appliedValue = attribValues[i]; + var oldValue = Changeset.attribsAttributeValue(attribs, appliedKey, apool); + if (appliedValue != oldValue) { + backAttribs.push([appliedKey, oldValue]); + } + } + return Changeset.makeAttribsString('=', backAttribs, apool); + }); + + var oldAttribsAddition = "*" + Changeset.numToString(deletedAttrib) + "*" + Changeset.numToString(authorAttrib); + + var textLeftToProcess = textBank; + + while(textLeftToProcess.length > 0){ + //process till the next line break or process only one line break + var lengthToProcess = textLeftToProcess.indexOf("\n"); + var lineBreak = false; + switch(lengthToProcess){ + case -1: + lengthToProcess=textLeftToProcess.length; + break; + case 0: + lineBreak = true; + lengthToProcess=1; + break; + } + + //get the text we want to procceed in this step + var processText = textLeftToProcess.substr(0, lengthToProcess); + textLeftToProcess = textLeftToProcess.substr(lengthToProcess); + + if(lineBreak){ + builder.keep(1, 1); //just skip linebreaks, don't do a insert + keep for a linebreak + + //consume the attributes of this linebreak + consumeAttribRuns(1, function(){}); + } else { + //add the old text via an insert, but add a deletion attribute + the author attribute of the author who deleted it + var textBankIndex = 0; + consumeAttribRuns(lengthToProcess, function (len, attribs, endsLine) { + //get the old attributes back + var attribs = (undoBackToAttribs(attribs) || "") + oldAttribsAddition; + + builder.insert(processText.substr(textBankIndex, len), attribs); + textBankIndex += len; + }); + + builder.keep(lengthToProcess, 0); + } + } + } else { + skip(csOp.chars, csOp.lines); + builder.keep(csOp.chars, csOp.lines); + } + } else if (csOp.opcode == '+') { + builder.keep(csOp.chars, csOp.lines); + } else if (csOp.opcode == '-') { + var textBank = nextText(csOp.chars); + var textBankIndex = 0; + + consumeAttribRuns(csOp.chars, function (len, attribs, endsLine) { + builder.insert(textBank.substr(textBankIndex, len), attribs + csOp.attribs); + textBankIndex += len; + }); + } + } + + return Changeset.checkRep(builder.toString()); +}; + +//export the constructor +module.exports = PadDiff;