From 01f6d85371b09aad249a02c53461b1f066a76955 Mon Sep 17 00:00:00 2001 From: Timothy Chavez Date: Wed, 19 Nov 2014 12:42:42 -0600 Subject: [PATCH 1/3] Restore pad to new location at a given revision This script gives an admin with shell access the ability to restore a pad at a given revision by essentially rebuilding it at a new location with data associated with the original pad. The upsides to creating a new pad vs. changing the original are: 1) avoiding service disruptions (no deletes, no moving targets - builds from previous revision); and 2) preservation of data (no deletes, no overwriting of the source pad). The most obvious downside is the pad has a new ID which could require folks to update their links, bookmarks, etc. to point at the new location. --- bin/rebuildPad.js | 131 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 bin/rebuildPad.js diff --git a/bin/rebuildPad.js b/bin/rebuildPad.js new file mode 100644 index 000000000..b5d9b7e31 --- /dev/null +++ b/bin/rebuildPad.js @@ -0,0 +1,131 @@ +/* + This is a repair tool. It rebuilds an old pad at a new pad location up to a + known "good" revision. +*/ + +if(process.argv.length != 4) { + console.error("Use: node bin/repairPad.js $PADID $REV"); + process.exit(1); +} + +var npm = require("../src/node_modules/npm"); +var async = require("../src/node_modules/async"); +var ueberDB = require("../src/node_modules/ueberDB"); + +var padId = process.argv[2]; +var newRevHead = process.argv[3]; +var newPadId = padId + "-rebuilt"; + +var db, pad, newPad, settings; +var AuthorManager, ChangeSet, PadManager; + +async.series([ + function(callback) { + npm.load({}, function(err) { + if(err) { + console.error("Could not load NPM: " + err) + process.exit(1); + } else { + callback(); + } + }) + }, + function(callback) { + // Get a handle into the database + db = require('../src/node/db/DB'); + db.init(callback); + }, function(callback) { + // Get references to the original pad and to a newly created pad + // HACK: This is a standalone script, so we want to write everything + // out to the database immediately. The only problem with this is + // that a driver (like the mysql driver) hardcodes these values. + db.db.db.settings = {cache: 0, writeInterval: 0, json: true}; + PadManager = require('../src/node/db/PadManager'); + PadManager.getPad(padId, function(err, _pad) { + pad = _pad; + PadManager.getPad(newPadId, function(err, _newPad) { + newPad = _newPad; + callback(); + }); + }); + }, function(callback) { + // Clone all Chat revisions + var chatHead = pad.chatHead; + for(var i = 0; i <= chatHead; i++) { + db.db.get("pad:" + padId + ":chat:" + i, function (err, chat) { + db.db.set("pad:" + newPadId + ":chat:" + i, chat); + console.log("Created: Chat Revision: pad:" + newPadId + ":chat:" + i) + }); + } + callback(); + }, function(callback) { + // Rebuild Pad from revisions up to and including the new revision head + AuthorManager = require("../src/node/db/AuthorManager"); + Changeset = require("ep_etherpad-lite/static/js/Changeset"); + // Author attributes are derived from changesets, but there can also be + // non-author attributes with specific mappings that changesets depend on + // and, AFAICT, cannot be recreated any other way + newPad.pool.numToAttrib = pad.pool.numToAttrib; + for(var i = 1; i <= newRevHead; i++) { + db.db.get("pad:" + padId + ":revs:" + i, function(err, rev) { + var author = rev.meta.author; + var timestamp = rev.meta.timestamp; + var changeset = rev.changeset; + + var newAText = Changeset.applyToAText(changeset, newPad.atext, newPad.pool); + Changeset.copyAText(newAText, newPad.atext); + + var newRev = ++newPad.head; + + var newRevData = {}; + newRevData.changeset = changeset; + newRevData.meta = {}; + newRevData.meta.author = author; + newRevData.meta.timestamp = timestamp; + + newPad.pool.putAttrib(['author', author || '']); + + if(newRev % 100 == 0) + { + newRevData.meta.atext = newPad.atext; + } + + db.db.set("pad:"+newPad.id+":revs:"+newRev, newRevData); + console.log("Created: Revision: pad:" + newPad.id + ":revs:" + newRev); + + if(author) + AuthorManager.addPad(author, newPad.id); + + if (newRev == newRevHead) { + callback(); + } + }); + } + }, function(callback) { + // Add saved revisions up to the new revision head + console.log(newPad.head); + var newSavedRevisions = []; + for(var i in pad.savedRevisions) { + savedRev = pad.savedRevisions[i] + if (savedRev.revNum <= newRevHead) { + newSavedRevisions.push(savedRev); + console.log("Added: Saved Revision: " + savedRev.revNum); + } + } + newPad.savedRevisions = newSavedRevisions; + callback(); + }, function(callback) { + // Save the source pad + db.db.set("pad:"+newPadId, newPad, function(err) { + console.log("Created: Source Pad: pad:" + newPadId); + newPad.saveToDatabase(); + callback(); + }); + } +], function (err) { + if(err) throw err; + else { + console.info("finished"); + process.exit(0); + } +}); From 25ccb6cfc367c6027bdc3f75f67ea2e941d344b3 Mon Sep 17 00:00:00 2001 From: Timothy Chavez Date: Thu, 20 Nov 2014 21:31:03 -0600 Subject: [PATCH 2/3] Simplify the rebuild process The majority of the information needed to build the new pad can be communicated by simply cloning the rev using a db.set(). --- bin/rebuildPad.js | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/bin/rebuildPad.js b/bin/rebuildPad.js index b5d9b7e31..d349467b9 100644 --- a/bin/rebuildPad.js +++ b/bin/rebuildPad.js @@ -38,7 +38,7 @@ async.series([ // Get references to the original pad and to a newly created pad // HACK: This is a standalone script, so we want to write everything // out to the database immediately. The only problem with this is - // that a driver (like the mysql driver) hardcodes these values. + // that a driver (like the mysql driver) can hardcode these values. db.db.db.settings = {cache: 0, writeInterval: 0, json: true}; PadManager = require('../src/node/db/PadManager'); PadManager.getPad(padId, function(err, _pad) { @@ -69,33 +69,23 @@ async.series([ for(var i = 1; i <= newRevHead; i++) { db.db.get("pad:" + padId + ":revs:" + i, function(err, rev) { var author = rev.meta.author; - var timestamp = rev.meta.timestamp; var changeset = rev.changeset; - - var newAText = Changeset.applyToAText(changeset, newPad.atext, newPad.pool); - Changeset.copyAText(newAText, newPad.atext); - var newRev = ++newPad.head; + var newRevId = "pad:" + newPad.id + ":revs:" + newRev; + var newAtext = Changeset.applyToAText(changeset, newPad.atext, newPad.pool); - var newRevData = {}; - newRevData.changeset = changeset; - newRevData.meta = {}; - newRevData.meta.author = author; - newRevData.meta.timestamp = timestamp; - + AuthorManager.addPad(author, newPad.id); newPad.pool.putAttrib(['author', author || '']); - if(newRev % 100 == 0) - { - newRevData.meta.atext = newPad.atext; + Changeset.copyAText(newAtext, newPad.atext); + + db.db.set(newRevId, rev); + if(newRev % 100 == 0) { + db.db.setSub(newRevId, ["meta", "atext"], newPad.atext) } - db.db.set("pad:"+newPad.id+":revs:"+newRev, newRevData); console.log("Created: Revision: pad:" + newPad.id + ":revs:" + newRev); - if(author) - AuthorManager.addPad(author, newPad.id); - if (newRev == newRevHead) { callback(); } From c7b1aebfe82d4c9eda968f2a0b5a1337d12590be Mon Sep 17 00:00:00 2001 From: Timothy Chavez Date: Wed, 3 Dec 2014 20:11:39 -0600 Subject: [PATCH 3/3] Make changes based on code review Simplified the cloning process, added validation checks to ensure the new pad ID is valid and that a pad doesn't already exist with that ID. Also fixed a bug in the chatHead cloning loop and added the ability to specify a pad ID on the command the line (defaulting to the original "-rebuilt" pad ID formula) --- bin/rebuildPad.js | 77 +++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/bin/rebuildPad.js b/bin/rebuildPad.js index d349467b9..c83833420 100644 --- a/bin/rebuildPad.js +++ b/bin/rebuildPad.js @@ -3,8 +3,8 @@ known "good" revision. */ -if(process.argv.length != 4) { - console.error("Use: node bin/repairPad.js $PADID $REV"); +if(process.argv.length != 4 && process.argv.length != 5) { + console.error("Use: node bin/repairPad.js $PADID $REV [$NEWPADID]"); process.exit(1); } @@ -14,10 +14,10 @@ var ueberDB = require("../src/node_modules/ueberDB"); var padId = process.argv[2]; var newRevHead = process.argv[3]; -var newPadId = padId + "-rebuilt"; +var newPadId = process.argv[4] || padId + "-rebuilt"; -var db, pad, newPad, settings; -var AuthorManager, ChangeSet, PadManager; +var db, oldPad, newPad, settings; +var AuthorManager, ChangeSet, Pad, PadManager; async.series([ function(callback) { @@ -35,26 +35,37 @@ async.series([ db = require('../src/node/db/DB'); db.init(callback); }, function(callback) { + PadManager = require('../src/node/db/PadManager'); + Pad = require('../src/node/db/Pad').Pad; // Get references to the original pad and to a newly created pad // HACK: This is a standalone script, so we want to write everything // out to the database immediately. The only problem with this is // that a driver (like the mysql driver) can hardcode these values. db.db.db.settings = {cache: 0, writeInterval: 0, json: true}; - PadManager = require('../src/node/db/PadManager'); - PadManager.getPad(padId, function(err, _pad) { - pad = _pad; - PadManager.getPad(newPadId, function(err, _newPad) { - newPad = _newPad; - callback(); - }); + // Validate the newPadId if specified and that a pad with that ID does + // not already exist to avoid overwriting it. + if (!PadManager.isValidPadId(newPadId)) { + console.error("Cannot create a pad with that id as it is invalid"); + process.exit(1); + } + PadManager.doesPadExists(newPadId, function(err, exists) { + if (exists) { + console.error("Cannot create a pad with that id as it already exists"); + process.exit(1); + } + }); + PadManager.getPad(padId, function(err, pad) { + oldPad = pad; + newPad = new Pad(newPadId); + callback(); }); }, function(callback) { // Clone all Chat revisions - var chatHead = pad.chatHead; - for(var i = 0; i <= chatHead; i++) { + var chatHead = oldPad.chatHead; + for(var i = 0, curHeadNum = 0; i <= chatHead; i++) { db.db.get("pad:" + padId + ":chat:" + i, function (err, chat) { - db.db.set("pad:" + newPadId + ":chat:" + i, chat); - console.log("Created: Chat Revision: pad:" + newPadId + ":chat:" + i) + db.db.set("pad:" + newPadId + ":chat:" + curHeadNum++, chat); + console.log("Created: Chat Revision: pad:" + newPadId + ":chat:" + curHeadNum); }); } callback(); @@ -65,28 +76,16 @@ async.series([ // Author attributes are derived from changesets, but there can also be // non-author attributes with specific mappings that changesets depend on // and, AFAICT, cannot be recreated any other way - newPad.pool.numToAttrib = pad.pool.numToAttrib; - for(var i = 1; i <= newRevHead; i++) { - db.db.get("pad:" + padId + ":revs:" + i, function(err, rev) { - var author = rev.meta.author; - var changeset = rev.changeset; - var newRev = ++newPad.head; - var newRevId = "pad:" + newPad.id + ":revs:" + newRev; - var newAtext = Changeset.applyToAText(changeset, newPad.atext, newPad.pool); - - AuthorManager.addPad(author, newPad.id); - newPad.pool.putAttrib(['author', author || '']); - - Changeset.copyAText(newAtext, newPad.atext); - + newPad.pool.numToAttrib = oldPad.pool.numToAttrib; + for(var curRevNum = 0; curRevNum <= newRevHead; curRevNum++) { + db.db.get("pad:" + padId + ":revs:" + curRevNum, function(err, rev) { + var newRevNum = ++newPad.head; + var newRevId = "pad:" + newPad.id + ":revs:" + newRevNum; db.db.set(newRevId, rev); - if(newRev % 100 == 0) { - db.db.setSub(newRevId, ["meta", "atext"], newPad.atext) - } - - console.log("Created: Revision: pad:" + newPad.id + ":revs:" + newRev); - - if (newRev == newRevHead) { + AuthorManager.addPad(rev.meta.author, newPad.id); + newPad.atext = Changeset.applyToAText(rev.changeset, newPad.atext, newPad.pool); + console.log("Created: Revision: pad:" + newPad.id + ":revs:" + newRevNum); + if (newRevNum == newRevHead) { callback(); } }); @@ -95,8 +94,8 @@ async.series([ // Add saved revisions up to the new revision head console.log(newPad.head); var newSavedRevisions = []; - for(var i in pad.savedRevisions) { - savedRev = pad.savedRevisions[i] + for(var i in oldPad.savedRevisions) { + savedRev = oldPad.savedRevisions[i] if (savedRev.revNum <= newRevHead) { newSavedRevisions.push(savedRev); console.log("Added: Saved Revision: " + savedRev.revNum);