diff --git a/CHANGELOG.md b/CHANGELOG.md index 2643885d8..5ed2d637c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,28 @@ generator function. * `newOp()`: Deprecated in favor of the new `Op` class. +# 1.8.16 + +### Security fixes + +If you cannot upgrade to v1.8.16 for some reason, you are encouraged to try +cherry-picking the fixes to the version you are running: + +```shell +git cherry-pick b7065eb9a0ec..77bcb507b30e +``` + +* Maliciously crafted `.etherpad` files can no longer overwrite arbitrary + non-pad database records when imported. +* Imported `.etherpad` files are now subject to numerous consistency checks + before any records are written to the database. This should help avoid + denial-of-service attacks via imports of malformed `.etherpad` files. + +### Notable enhancements and fixes + +* Fixed several `.etherpad` import bugs. +* Improved support for large `.etherpad` imports. + # 1.8.15 ### Security fixes diff --git a/doc/api/hooks_server-side.md b/doc/api/hooks_server-side.md index aa19adec7..34a906ca6 100644 --- a/doc/api/hooks_server-side.md +++ b/doc/api/hooks_server-side.md @@ -175,14 +175,15 @@ Things in context: This hook gets called when a new pad was created. -## padLoad -Called from: src/node/db/Pad.js +## `padLoad` -Things in context: +Called from: `src/node/db/PadManager.js` -1. pad - the pad instance +Called when a pad is loaded, including after new pad creation. -This hook gets called when a pad was loaded. If a new pad was created and loaded this event will be emitted too. +Context properties: + +* `pad`: The Pad object. ## padUpdate Called from: src/node/db/Pad.js diff --git a/src/bin/checkAllPads.js b/src/bin/checkAllPads.js index d15e5ec5b..d2a5f837c 100644 --- a/src/bin/checkAllPads.js +++ b/src/bin/checkAllPads.js @@ -10,79 +10,18 @@ process.on('unhandledRejection', (err) => { throw err; }); if (process.argv.length !== 2) throw new Error('Use: node src/bin/checkAllPads.js'); (async () => { - // initialize the database - require('../node/utils/Settings'); const db = require('../node/db/DB'); await db.init(); - - // load modules - const Changeset = require('../static/js/Changeset'); const padManager = require('../node/db/PadManager'); - - let revTestedCount = 0; - - // get all pads - const res = await padManager.listAllPads(); - for (const padId of res.padIDs) { + await Promise.all((await padManager.listAllPads()).padIDs.map(async (padId) => { const pad = await padManager.getPad(padId); - - // check if the pad has a pool - if (pad.pool == null) { - console.error(`[${pad.id}] Missing attribute pool`); - continue; + try { + await pad.check(); + } catch (err) { + console.error(`Error in pad ${padId}: ${err.stack || err}`); + return; } - // create an array with key kevisions - // key revisions always save the full pad atext - const head = pad.getHeadRevisionNumber(); - const keyRevisions = []; - for (let rev = 0; rev < head; rev += 100) { - keyRevisions.push(rev); - } - - // run through all key revisions - for (const keyRev of keyRevisions) { - // create an array of revisions we need till the next keyRevision or the End - const revisionsNeeded = []; - for (let rev = keyRev; rev <= keyRev + 100 && rev <= head; rev++) { - revisionsNeeded.push(rev); - } - - // this array will hold all revision changesets - const revisions = []; - - // run through all needed revisions and get them from the database - for (const revNum of revisionsNeeded) { - const revision = await db.get(`pad:${pad.id}:revs:${revNum}`); - revisions[revNum] = revision; - } - - // check if the revision exists - if (revisions[keyRev] == null) { - console.error(`[${pad.id}] Missing revision ${keyRev}`); - continue; - } - - // check if there is a atext in the keyRevisions - let {meta: {atext} = {}} = revisions[keyRev]; - if (atext == null) { - console.error(`[${pad.id}] Missing atext in revision ${keyRev}`); - continue; - } - - const apool = pad.pool; - for (let rev = keyRev + 1; rev <= keyRev + 100 && rev <= head; rev++) { - try { - const cs = revisions[rev].changeset; - atext = Changeset.applyToAText(cs, atext, apool); - revTestedCount++; - } catch (e) { - console.error(`[${pad.id}] Bad changeset at revision ${rev} - ${e.message}`); - } - } - } - } - if (revTestedCount === 0) { - throw new Error('No revisions tested'); - } - console.log(`Finished: Tested ${revTestedCount} revisions`); + console.log(`Pad ${padId}: OK`); + })); + console.log('Finished.'); })(); diff --git a/src/bin/checkPad.js b/src/bin/checkPad.js index 5b17fa31a..6aa4b3034 100644 --- a/src/bin/checkPad.js +++ b/src/bin/checkPad.js @@ -8,75 +8,13 @@ process.on('unhandledRejection', (err) => { throw err; }); if (process.argv.length !== 3) throw new Error('Use: node src/bin/checkPad.js $PADID'); - -// get the padID const padId = process.argv[2]; -let checkRevisionCount = 0; - (async () => { - // initialize database - require('../node/utils/Settings'); const db = require('../node/db/DB'); await db.init(); - - // load modules - const Changeset = require('../static/js/Changeset'); const padManager = require('../node/db/PadManager'); - - const exists = await padManager.doesPadExists(padId); - if (!exists) throw new Error('Pad does not exist'); - - // get the pad + if (!await padManager.doesPadExists(padId)) throw new Error('Pad does not exist'); const pad = await padManager.getPad(padId); - - // create an array with key revisions - // key revisions always save the full pad atext - const head = pad.getHeadRevisionNumber(); - const keyRevisions = []; - for (let rev = 0; rev < head; rev += 100) { - keyRevisions.push(rev); - } - - // run through all key revisions - for (let keyRev of keyRevisions) { - keyRev = parseInt(keyRev); - // create an array of revisions we need till the next keyRevision or the End - const revisionsNeeded = []; - for (let rev = keyRev; rev <= keyRev + 100 && rev <= head; rev++) { - revisionsNeeded.push(rev); - } - - // this array will hold all revision changesets - const revisions = []; - - // run through all needed revisions and get them from the database - for (const revNum of revisionsNeeded) { - const revision = await db.get(`pad:${padId}:revs:${revNum}`); - revisions[revNum] = revision; - } - - // check if the pad has a pool - if (pad.pool == null) throw new Error('Attribute pool is missing'); - - // check if there is an atext in the keyRevisions - let {meta: {atext} = {}} = revisions[keyRev] || {}; - if (atext == null) { - console.error(`No atext in key revision ${keyRev}`); - continue; - } - - const apool = pad.pool; - - for (let rev = keyRev + 1; rev <= keyRev + 100 && rev <= head; rev++) { - checkRevisionCount++; - try { - const cs = revisions[rev].changeset; - atext = Changeset.applyToAText(cs, atext, apool); - } catch (e) { - console.error(`Bad changeset at revision ${rev} - ${e.message}`); - continue; - } - } - console.log(`Finished: Checked ${checkRevisionCount} revisions`); - } + await pad.check(); + console.log('Finished.'); })(); diff --git a/src/bin/checkPadDeltas.js b/src/bin/checkPadDeltas.js deleted file mode 100644 index 852c68332..000000000 --- a/src/bin/checkPadDeltas.js +++ /dev/null @@ -1,103 +0,0 @@ -'use strict'; -/* - * This is a debug tool. It checks all revisions for data corruption - */ - -// As of v14, Node.js does not exit when there is an unhandled Promise rejection. Convert an -// unhandled rejection into an uncaught exception, which does cause Node.js to exit. -process.on('unhandledRejection', (err) => { throw err; }); - -if (process.argv.length !== 3) throw new Error('Use: node src/bin/checkPadDeltas.js $PADID'); - -// get the padID -const padId = process.argv[2]; - -const expect = require('../tests/frontend/lib/expect'); -const diff = require('diff'); - -(async () => { - // initialize database - require('../node/utils/Settings'); - const db = require('../node/db/DB'); - await db.init(); - - // load modules - const Changeset = require('../static/js/Changeset'); - const padManager = require('../node/db/PadManager'); - - const exists = await padManager.doesPadExists(padId); - if (!exists) throw new Error('Pad does not exist'); - - // get the pad - const pad = await padManager.getPad(padId); - - // create an array with key revisions - // key revisions always save the full pad atext - const head = pad.getHeadRevisionNumber(); - const keyRevisions = []; - for (let i = 0; i < head; i += 100) { - keyRevisions.push(i); - } - - // create an array with all revisions - const revisions = []; - for (let i = 0; i <= head; i++) { - revisions.push(i); - } - - let atext = Changeset.makeAText('\n'); - - // run through all revisions - for (const revNum of revisions) { - // console.log('Fetching', revNum) - const revision = await db.get(`pad:${padId}:revs:${revNum}`); - // check if there is a atext in the keyRevisions - const {meta: {atext: revAtext} = {}} = revision || {}; - if (~keyRevisions.indexOf(revNum) && revAtext == null) { - console.error(`No atext in key revision ${revNum}`); - continue; - } - - // try glue everything together - try { - // console.log("check revision ", revNum); - const cs = revision.changeset; - atext = Changeset.applyToAText(cs, atext, pad.pool); - } catch (e) { - console.error(`Bad changeset at revision ${revNum} - ${e.message}`); - continue; - } - - // check things are working properly - if (~keyRevisions.indexOf(revNum)) { - try { - expect(revision.meta.atext.text).to.eql(atext.text); - expect(revision.meta.atext.attribs).to.eql(atext.attribs); - } catch (e) { - console.error(`Atext in key revision ${revNum} doesn't match computed one.`); - console.log(diff.diffChars(atext.text, revision.meta.atext.text).map((op) => { - if (!op.added && !op.removed) op.value = op.value.length; - return op; - })); - // console.error(e) - // console.log('KeyRev. :', revision.meta.atext) - // console.log('Computed:', atext) - continue; - } - } - } - - // check final text is right... - if (pad.atext.text === atext.text) { - console.log('ok'); - } else { - console.error('Pad AText doesn\'t match computed one! (Computed ', - atext.text.length, ', db', pad.atext.text.length, ')'); - console.log(diff.diffChars(atext.text, pad.atext.text).map((op) => { - if (!op.added && !op.removed) { - op.value = op.value.length; - return op; - } - })); - } -})(); diff --git a/src/node/db/Pad.js b/src/node/db/Pad.js index 2aed5fd23..80ac5369d 100644 --- a/src/node/db/Pad.js +++ b/src/node/db/Pad.js @@ -7,6 +7,7 @@ const Changeset = require('../../static/js/Changeset'); const ChatMessage = require('../../static/js/ChatMessage'); const AttributePool = require('../../static/js/AttributePool'); +const assert = require('assert').strict; const db = require('./DB'); const settings = require('../utils/Settings'); const authorManager = require('./AuthorManager'); @@ -20,7 +21,7 @@ const hooks = require('../../static/js/pluginfw/hooks'); const promises = require('../utils/promises'); // serialization/deserialization attributes -const attributeBlackList = ['id']; +const attributeBlackList = ['_db', 'id']; const jsonableList = ['pool']; /** @@ -33,7 +34,15 @@ exports.cleanText = (txt) => txt.replace(/\r\n/g, '\n') .replace(/\t/g, ' ') .replace(/\xa0/g, ' '); -const Pad = function (id) { +/** + * @param [database] - Database object to access this pad's records (and only this pad's records -- + * the shared global Etherpad database object is still used for all other pad accesses, such as + * copying the pad). Defaults to the shared global Etherpad database object. This parameter can + * be used to shard pad storage across multiple database backends, to put each pad in its own + * database table, or to validate imported pad data before it is written to the database. + */ +const Pad = function (id, database = db) { + this._db = database; this.atext = Changeset.makeAText('\n'); this.pool = new AttributePool(); this.head = -1; @@ -94,7 +103,7 @@ Pad.prototype.appendRevision = async function (aChangeset, author) { } const p = [ - db.set(`pad:${this.id}:revs:${newRev}`, newRevData), + this._db.set(`pad:${this.id}:revs:${newRev}`, newRevData), this.saveToDatabase(), ]; @@ -127,25 +136,25 @@ Pad.prototype.saveToDatabase = async function () { } } - await db.set(`pad:${this.id}`, dbObject); + await this._db.set(`pad:${this.id}`, dbObject); }; // get time of last edit (changeset application) -Pad.prototype.getLastEdit = function () { +Pad.prototype.getLastEdit = async function () { const revNum = this.getHeadRevisionNumber(); - return db.getSub(`pad:${this.id}:revs:${revNum}`, ['meta', 'timestamp']); + return await this._db.getSub(`pad:${this.id}:revs:${revNum}`, ['meta', 'timestamp']); }; -Pad.prototype.getRevisionChangeset = function (revNum) { - return db.getSub(`pad:${this.id}:revs:${revNum}`, ['changeset']); +Pad.prototype.getRevisionChangeset = async function (revNum) { + return await this._db.getSub(`pad:${this.id}:revs:${revNum}`, ['changeset']); }; -Pad.prototype.getRevisionAuthor = function (revNum) { - return db.getSub(`pad:${this.id}:revs:${revNum}`, ['meta', 'author']); +Pad.prototype.getRevisionAuthor = async function (revNum) { + return await this._db.getSub(`pad:${this.id}:revs:${revNum}`, ['meta', 'author']); }; -Pad.prototype.getRevisionDate = function (revNum) { - return db.getSub(`pad:${this.id}:revs:${revNum}`, ['meta', 'timestamp']); +Pad.prototype.getRevisionDate = async function (revNum) { + return await this._db.getSub(`pad:${this.id}:revs:${revNum}`, ['meta', 'timestamp']); }; Pad.prototype.getAllAuthors = function () { @@ -172,7 +181,7 @@ Pad.prototype.getInternalRevisionAText = async function (targetRev) { // get all needed data out of the database // start to get the atext of the key revision - const p_atext = db.getSub(`pad:${this.id}:revs:${keyRev}`, ['meta', 'atext']); + const p_atext = this._db.getSub(`pad:${this.id}:revs:${keyRev}`, ['meta', 'atext']); // get all needed changesets const changesets = []; @@ -195,8 +204,8 @@ Pad.prototype.getInternalRevisionAText = async function (targetRev) { return atext; }; -Pad.prototype.getRevision = function (revNum) { - return db.get(`pad:${this.id}:revs:${revNum}`); +Pad.prototype.getRevision = async function (revNum) { + return await this._db.get(`pad:${this.id}:revs:${revNum}`); }; Pad.prototype.getAllAuthorColors = async function () { @@ -292,7 +301,7 @@ Pad.prototype.appendChatMessage = async function (msgOrText, authorId = null, ti // Don't save the display name in the database because the user can change it at any time. The // `displayName` property will be populated with the current value when the message is read from // the database. - db.set(`pad:${this.id}:chat:${this.chatHead}`, {...msg, displayName: undefined}), + this._db.set(`pad:${this.id}:chat:${this.chatHead}`, {...msg, displayName: undefined}), this.saveToDatabase(), ]); }; @@ -302,7 +311,7 @@ Pad.prototype.appendChatMessage = async function (msgOrText, authorId = null, ti * @returns {?ChatMessage} */ Pad.prototype.getChatMessage = async function (entryNum) { - const entry = await db.get(`pad:${this.id}:chat:${entryNum}`); + const entry = await this._db.get(`pad:${this.id}:chat:${entryNum}`); if (entry == null) return null; const message = ChatMessage.fromObject(entry); message.displayName = await authorManager.getAuthorName(message.authorId); @@ -339,7 +348,7 @@ Pad.prototype.init = async function (text) { } // try to load the pad - const value = await db.get(`pad:${this.id}`); + const value = await this._db.get(`pad:${this.id}`); // if this pad exists, load it if (value != null) { @@ -357,13 +366,9 @@ Pad.prototype.init = async function (text) { await this.appendRevision(firstChangeset, ''); } - - hooks.callAll('padLoad', {pad: this}); }; Pad.prototype.copy = async function (destinationID, force) { - const sourceID = this.id; - // Kick everyone from this pad. // This was commented due to https://github.com/ether/etherpad-lite/issues/3183. // Do we really need to kick everyone out? @@ -379,7 +384,7 @@ Pad.prototype.copy = async function (destinationID, force) { await this.removePadIfForceIsTrueAndAlreadyExist(destinationID, force); // copy the 'pad' entry - const pad = await db.get(`pad:${sourceID}`); + const pad = await this._db.get(`pad:${this.id}`); db.set(`pad:${destinationID}`, pad); // copy all relations in parallel @@ -388,7 +393,7 @@ Pad.prototype.copy = async function (destinationID, force) { // copy all chat messages const chatHead = this.chatHead; for (let i = 0; i <= chatHead; ++i) { - const p = db.get(`pad:${sourceID}:chat:${i}`) + const p = this._db.get(`pad:${this.id}:chat:${i}`) .then((chat) => db.set(`pad:${destinationID}:chat:${i}`, chat)); promises.push(p); } @@ -396,7 +401,7 @@ Pad.prototype.copy = async function (destinationID, force) { // copy all revisions const revHead = this.head; for (let i = 0; i <= revHead; ++i) { - const p = db.get(`pad:${sourceID}:revs:${i}`) + const p = this._db.get(`pad:${this.id}:revs:${i}`) .then((rev) => db.set(`pad:${destinationID}:revs:${i}`, rev)); promises.push(p); } @@ -552,12 +557,12 @@ Pad.prototype.remove = async function () { // delete all chat messages p.push(promises.timesLimit(this.chatHead + 1, 500, async (i) => { - await db.remove(`pad:${padID}:chat:${i}`, null); + await this._db.remove(`pad:${this.id}:chat:${i}`, null); })); // delete all revisions p.push(promises.timesLimit(this.head + 1, 500, async (i) => { - await db.remove(`pad:${padID}:revs:${i}`, null); + await this._db.remove(`pad:${this.id}:revs:${i}`, null); })); // remove pad from all authors who contributed @@ -601,3 +606,127 @@ Pad.prototype.addSavedRevision = async function (revNum, savedById, label) { Pad.prototype.getSavedRevisions = function () { return this.savedRevisions; }; + +/** + * Asserts that all pad data is consistent. Throws if inconsistent. + */ +Pad.prototype.check = async function () { + assert(this.id != null); + assert.equal(typeof this.id, 'string'); + + const head = this.getHeadRevisionNumber(); + assert(Number.isInteger(head)); + assert(head >= -1); + + const savedRevisionsList = this.getSavedRevisionsList(); + assert(Array.isArray(savedRevisionsList)); + assert.equal(this.getSavedRevisionsNumber(), savedRevisionsList.length); + let prevSavedRev = null; + for (const rev of savedRevisionsList) { + assert(Number.isInteger(rev)); + assert(rev >= 0); + assert(rev <= head); + assert(prevSavedRev == null || rev > prevSavedRev); + prevSavedRev = rev; + } + const savedRevisions = this.getSavedRevisions(); + assert(Array.isArray(savedRevisions)); + assert.equal(savedRevisions.length, savedRevisionsList.length); + const savedRevisionsIds = new Set(); + for (const savedRev of savedRevisions) { + assert(savedRev != null); + assert.equal(typeof savedRev, 'object'); + assert(savedRevisionsList.includes(savedRev.revNum)); + assert(savedRev.id != null); + assert.equal(typeof savedRev.id, 'string'); + assert(!savedRevisionsIds.has(savedRev.id)); + savedRevisionsIds.add(savedRev.id); + } + + const pool = this.apool(); + assert(pool instanceof AttributePool); + await pool.check(); + + const decodeAttribString = function* (str) { + const re = /\*([0-9a-z]+)|./gy; + let match; + while ((match = re.exec(str)) != null) { + const [m, n] = match; + if (n == null) throw new Error(`invalid character in attribute string: ${m}`); + yield Number.parseInt(n, 36); + } + }; + + const authors = new Set(); + pool.eachAttrib((k, v) => { + if (k === 'author' && v) authors.add(v); + }); + let atext = Changeset.makeAText('\n'); + let r; + try { + for (r = 0; r <= head; ++r) { + const [changeset, author, timestamp] = await Promise.all([ + this.getRevisionChangeset(r), + this.getRevisionAuthor(r), + this.getRevisionDate(r), + ]); + assert(author != null); + assert.equal(typeof author, 'string'); + if (author) authors.add(author); + assert(timestamp != null); + assert.equal(typeof timestamp, 'number'); + assert(timestamp > 0); + assert(changeset != null); + assert.equal(typeof changeset, 'string'); + Changeset.checkRep(changeset); + const unpacked = Changeset.unpack(changeset); + let text = atext.text; + const iter = Changeset.opIterator(unpacked.ops); + while (iter.hasNext()) { + const op = iter.next(); + if (['=', '-'].includes(op.opcode)) { + assert(text.length >= op.chars); + const consumed = text.slice(0, op.chars); + const nlines = (consumed.match(/\n/g) || []).length; + assert.equal(op.lines, nlines); + if (op.lines > 0) assert(consumed.endsWith('\n')); + text = text.slice(op.chars); + } + let prevK = null; + for (const n of decodeAttribString(op.attribs)) { + const attrib = pool.getAttrib(n); + assert(attrib != null); + const [k] = attrib; + assert(prevK == null || prevK < k); + prevK = k; + } + } + atext = Changeset.applyToAText(changeset, atext, pool); + assert.deepEqual(await this.getInternalRevisionAText(r), atext); + } + } catch (err) { + const pfx = `(pad ${this.id} revision ${r}) `; + if (err.stack) err.stack = pfx + err.stack; + err.message = pfx + err.message; + throw err; + } + assert.equal(this.text(), atext.text); + assert.deepEqual(this.atext, atext); + assert.deepEqual(this.getAllAuthors().sort(), [...authors].sort()); + + assert(Number.isInteger(this.chatHead)); + assert(this.chatHead >= -1); + let c; + try { + for (c = 0; c <= this.chatHead; ++c) { + const msg = await this.getChatMessage(c); + assert(msg != null); + assert(msg instanceof ChatMessage); + } + } catch (err) { + const pfx = `(pad ${this.id} chat message ${c}) `; + if (err.stack) err.stack = pfx + err.stack; + err.message = pfx + err.message; + throw err; + } +}; diff --git a/src/node/db/PadManager.js b/src/node/db/PadManager.js index 58434c8cd..4aebc1a86 100644 --- a/src/node/db/PadManager.js +++ b/src/node/db/PadManager.js @@ -20,8 +20,9 @@ */ const CustomError = require('../utils/customError'); -const Pad = require('../db/Pad').Pad; +const Pad = require('../db/Pad'); const db = require('./DB'); +const hooks = require('../../static/js/pluginfw/hooks'); /** * A cache of all loaded Pads. @@ -137,10 +138,11 @@ exports.getPad = async (id, text) => { } // try to load pad - pad = new Pad(id); + pad = new Pad.Pad(id); // initialize the pad await pad.init(text); + hooks.callAll('padLoad', {pad}); globalPads.set(id, pad); padList.addPad(id); diff --git a/src/node/utils/ImportEtherpad.js b/src/node/utils/ImportEtherpad.js index b90718a57..60a3b8492 100644 --- a/src/node/utils/ImportEtherpad.js +++ b/src/node/utils/ImportEtherpad.js @@ -1,5 +1,5 @@ -// 'use strict'; -// Uncommenting above breaks tests. +'use strict'; + /** * 2014 John McLear (Etherpad Foundation / McLear Ltd) * @@ -16,6 +16,10 @@ * limitations under the License. */ +const AttributePool = require('../../static/js/AttributePool'); +const {Pad} = require('../db/Pad'); +const async = require('async'); +const authorManager = require('../db/AuthorManager'); const db = require('../db/DB'); const hooks = require('../../static/js/pluginfw/hooks'); const log4js = require('log4js'); @@ -23,7 +27,7 @@ const supportedElems = require('../../static/js/contentcollector').supportedElem const logger = log4js.getLogger('ImportEtherpad'); -exports.setPadRaw = (padId, r) => { +exports.setPadRaw = async (padId, r) => { const records = JSON.parse(r); // get supported block Elements from plugins, we will use this later. @@ -31,72 +35,86 @@ exports.setPadRaw = (padId, r) => { supportedElems.add(element); }); - const unsupportedElements = new Set(); + // DB key prefixes for pad records. Each key is expected to have the form `${prefix}:${padId}` or + // `${prefix}:${padId}:${otherstuff}`. + const padKeyPrefixes = [ + ...await hooks.aCallAll('exportEtherpadAdditionalContent'), + 'pad', + ]; - Object.keys(records).forEach(async (key) => { - let value = records[key]; + let originalPadId = null; + const checkOriginalPadId = (padId) => { + if (originalPadId == null) originalPadId = padId; + if (originalPadId !== padId) throw new Error('unexpected pad ID in record'); + }; + // Limit the number of in-flight database queries so that the queries do not time out when + // importing really large files. + const q = async.queue(async (task) => await task(), 100); + + // First validate and transform values. Do not commit any records to the database yet in case + // there is a problem with the data. + + const dbRecords = new Map(); + const existingAuthors = new Set(); + await Promise.all(Object.entries(records).map(([key, value]) => q.pushAsync(async () => { if (!value) { return; } - - let newKey; - - if (value.padIDs) { - // Author data - rewrite author pad ids - value.padIDs[padId] = 1; - newKey = key; - - // Does this author already exist? - const author = await db.get(key); - - if (author) { - // Yes, add the padID to the author - if (Object.prototype.toString.call(author) === '[object Array]') { - author.padIDs.push(padId); - } - - value = author; - } else { - // No, create a new array with the author info in - value.padIDs = [padId]; + const keyParts = key.split(':'); + const [prefix, id] = keyParts; + if (prefix === 'globalAuthor' && keyParts.length === 2) { + // In the database, the padIDs subkey is an object (which is used as a set) that records every + // pad the author has worked on. When exported, that object becomes a single string containing + // the exported pad's ID. + if (typeof value.padIDs !== 'string') { + throw new TypeError('globalAuthor padIDs subkey is not a string'); } + checkOriginalPadId(value.padIDs); + if (await authorManager.doesAuthorExist(id)) { + existingAuthors.add(id); + return; + } + value.padIDs = {[padId]: 1}; + } else if (padKeyPrefixes.includes(prefix)) { + checkOriginalPadId(id); + if (prefix === 'pad' && keyParts.length === 2) { + const pool = new AttributePool().fromJsonable(value.pool); + const unsupportedElements = new Set(); + pool.eachAttrib((k, v) => { + if (!supportedElems.has(k)) unsupportedElements.add(k); + }); + if (unsupportedElements.size) { + logger.warn(`(pad ${padId}) unsupported attributes (try installing a plugin): ` + + `${[...unsupportedElements].join(', ')}`); + } + } + keyParts[1] = padId; + key = keyParts.join(':'); } else { - // Not author data, probably pad data - // we can split it to look to see if it's pad data - - // is this an attribute we support or not? If not, tell the admin - if (value.pool) { - for (const attrib of Object.keys(value.pool.numToAttrib)) { - const attribName = value.pool.numToAttrib[attrib][0]; - if (!supportedElems.has(attribName)) unsupportedElements.add(attribName); - } - } - const oldPadId = key.split(':'); - - // we know it's pad data - if (oldPadId[0] === 'pad') { - // so set the new pad id for the author - oldPadId[1] = padId; - - // and create the value - newKey = oldPadId.join(':'); // create the new key - } - - // is this a key that is supported through a plugin? - // get content that has a different prefix IE comments:padId:foo - // a plugin would return something likle ['comments', 'cakes'] - for (const prefix of await hooks.aCallAll('exportEtherpadAdditionalContent')) { - if (prefix === oldPadId[0]) newKey = `${prefix}:${padId}`; - } + logger.warn(`(pad ${padId}) Ignoring record with unsupported key: ${key}`); + return; } + dbRecords.set(key, value); + }))); - // Write the value to the server - await db.set(newKey, value); + const pad = new Pad(padId, { + // Only fetchers are needed to check the pad's integrity. + get: async (k) => dbRecords.get(k), + getSub: async (k, sub) => { + let v = dbRecords.get(k); + for (const sk of sub) { + if (v == null) return null; + v = v[sk]; + } + return v; + }, }); + await pad.init(); + await pad.check(); - if (unsupportedElements.size) { - logger.warn('Ignoring unsupported elements (you might want to install a plugin): ' + - `${[...unsupportedElements].join(', ')}`); - } + await Promise.all([ + ...[...dbRecords].map(([k, v]) => q.pushAsync(() => db.set(k, v))), + ...[...existingAuthors].map((a) => q.pushAsync(() => authorManager.addPad(a, padId))), + ]); }; diff --git a/src/package-lock.json b/src/package-lock.json index 594907f71..8c983062f 100644 --- a/src/package-lock.json +++ b/src/package-lock.json @@ -1,6 +1,6 @@ { "name": "ep_etherpad-lite", - "version": "1.8.15", + "version": "1.8.16", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/src/package.json b/src/package.json index d369cdfb9..4a70eefce 100644 --- a/src/package.json +++ b/src/package.json @@ -246,6 +246,6 @@ "test": "mocha --timeout 120000 --recursive tests/backend/specs ../node_modules/ep_*/static/tests/backend/specs", "test-container": "mocha --timeout 5000 tests/container/specs/api" }, - "version": "1.8.15", + "version": "1.8.16", "license": "Apache-2.0" } diff --git a/src/static/js/AttributePool.js b/src/static/js/AttributePool.js index d46b207bb..ccdd2eb35 100644 --- a/src/static/js/AttributePool.js +++ b/src/static/js/AttributePool.js @@ -205,6 +205,35 @@ class AttributePool { } return this; } + + /** + * Asserts that the data in the pool is consistent. Throws if inconsistent. + */ + check() { + if (!Number.isInteger(this.nextNum)) throw new Error('nextNum property is not an integer'); + if (this.nextNum < 0) throw new Error('nextNum property is negative'); + for (const prop of ['numToAttrib', 'attribToNum']) { + const obj = this[prop]; + if (obj == null) throw new Error(`${prop} property is null`); + if (typeof obj !== 'object') throw new TypeError(`${prop} property is not an object`); + const keys = Object.keys(obj); + if (keys.length !== this.nextNum) { + throw new Error(`${prop} size mismatch (want ${this.nextNum}, got ${keys.length})`); + } + } + for (let i = 0; i < this.nextNum; ++i) { + const attr = this.numToAttrib[`${i}`]; + if (!Array.isArray(attr)) throw new TypeError(`attrib ${i} is not an array`); + if (attr.length !== 2) throw new Error(`attrib ${i} is not an array of length 2`); + const [k, v] = attr; + if (k == null) throw new TypeError(`attrib ${i} key is null`); + if (typeof k !== 'string') throw new TypeError(`attrib ${i} key is not a string`); + if (v == null) throw new TypeError(`attrib ${i} value is null`); + if (typeof v !== 'string') throw new TypeError(`attrib ${i} value is not a string`); + const attrStr = String(attr); + if (this.attribToNum[attrStr] !== i) throw new Error(`attribToNum for ${attrStr} !== ${i}`); + } + } } module.exports = AttributePool; diff --git a/src/static/js/Changeset.js b/src/static/js/Changeset.js index 669041ffb..0779884c0 100644 --- a/src/static/js/Changeset.js +++ b/src/static/js/Changeset.js @@ -380,7 +380,6 @@ exports.checkRep = (cs) => { const assem = exports.smartOpAssembler(); let oldPos = 0; let calcNewLen = 0; - let numInserted = 0; for (const o of exports.deserializeOps(ops)) { switch (o.opcode) { case '=': @@ -393,25 +392,29 @@ exports.checkRep = (cs) => { break; case '+': { + assert(charBank.length >= o.chars, 'Invalid changeset: not enough chars in charBank'); + const chars = charBank.slice(0, o.chars); + const nlines = (chars.match(/\n/g) || []).length; + assert(nlines === o.lines, + 'Invalid changeset: number of newlines in insert op does not match the charBank'); + assert(o.lines === 0 || chars.endsWith('\n'), + 'Invalid changeset: multiline insert op does not end with a newline'); + charBank = charBank.slice(o.chars); calcNewLen += o.chars; - numInserted += o.chars; assert(calcNewLen <= newLen, `${calcNewLen} > ${newLen} in ${cs}`); break; } + default: + assert(false, `Invalid changeset: Unknown opcode: ${JSON.stringify(o.opcode)}`); } assem.append(o); } - calcNewLen += oldLen - oldPos; - charBank = charBank.substring(0, numInserted); - while (charBank.length < numInserted) { - charBank += '?'; - } - + assert(calcNewLen === newLen, 'Invalid changeset: claimed length does not match actual length'); + assert(charBank === '', 'Invalid changeset: excess characters in the charBank'); assem.endDocument(); - const normalized = exports.pack(oldLen, calcNewLen, assem.toString(), charBank); - assert(normalized === cs, 'Invalid changeset (checkRep failed)'); - + const normalized = exports.pack(oldLen, calcNewLen, assem.toString(), unpacked.charBank); + assert(normalized === cs, 'Invalid changeset: not in canonical form'); return cs; }; @@ -1059,9 +1062,7 @@ const applyZip = (in1, in2, func) => { exports.unpack = (cs) => { const headerRegex = /Z:([0-9a-z]+)([><])([0-9a-z]+)|/; const headerMatch = headerRegex.exec(cs); - if ((!headerMatch) || (!headerMatch[0])) { - error(`Not a exports: ${cs}`); - } + if ((!headerMatch) || (!headerMatch[0])) error(`Not a changeset: ${cs}`); const oldLen = exports.parseNum(headerMatch[1]); const changeSign = (headerMatch[2] === '>') ? 1 : -1; const changeMag = exports.parseNum(headerMatch[3]); diff --git a/src/tests/backend/specs/ImportEtherpad.js b/src/tests/backend/specs/ImportEtherpad.js new file mode 100644 index 000000000..6dfd11112 --- /dev/null +++ b/src/tests/backend/specs/ImportEtherpad.js @@ -0,0 +1,170 @@ +'use strict'; + +const assert = require('assert').strict; +const authorManager = require('../../../node/db/AuthorManager'); +const db = require('../../../node/db/DB'); +const importEtherpad = require('../../../node/utils/ImportEtherpad'); +const padManager = require('../../../node/db/PadManager'); +const {randomString} = require('../../../static/js/pad_utils'); + +describe(__filename, function () { + let padId; + + const makeAuthorId = () => `a.${randomString(16)}`; + + const makeExport = (authorId) => ({ + 'pad:testing': { + atext: { + text: 'foo\n', + attribs: '|1+4', + }, + pool: { + numToAttrib: {}, + nextNum: 0, + }, + head: 0, + savedRevisions: [], + }, + [`globalAuthor:${authorId}`]: { + colorId: '#000000', + name: 'new', + timestamp: 1598747784631, + padIDs: 'testing', + }, + 'pad:testing:revs:0': { + changeset: 'Z:1>3+3$foo', + meta: { + author: '', + timestamp: 1597632398288, + pool: { + numToAttrib: {}, + nextNum: 0, + }, + atext: { + text: 'foo\n', + attribs: '|1+4', + }, + }, + }, + }); + + beforeEach(async function () { + padId = randomString(10); + assert(!await padManager.doesPadExist(padId)); + }); + + it('unknown db records are ignored', async function () { + const badKey = `maliciousDbKey${randomString(10)}`; + await importEtherpad.setPadRaw(padId, JSON.stringify({ + [badKey]: 'value', + ...makeExport(makeAuthorId()), + })); + assert(await db.get(badKey) == null); + }); + + it('changes are all or nothing', async function () { + const authorId = makeAuthorId(); + const data = makeExport(authorId); + data['pad:differentPadId:revs:0'] = data['pad:testing:revs:0']; + delete data['pad:testing:revs:0']; + assert.rejects(importEtherpad.setPadRaw(padId, JSON.stringify(data)), /unexpected pad ID/); + assert(!await authorManager.doesAuthorExist(authorId)); + assert(!await padManager.doesPadExist(padId)); + }); + + describe('author pad IDs', function () { + let existingAuthorId; + let newAuthorId; + + beforeEach(async function () { + existingAuthorId = (await authorManager.createAuthor('existing')).authorID; + assert(await authorManager.doesAuthorExist(existingAuthorId)); + assert.deepEqual((await authorManager.listPadsOfAuthor(existingAuthorId)).padIDs, []); + newAuthorId = makeAuthorId(); + assert.notEqual(newAuthorId, existingAuthorId); + assert(!await authorManager.doesAuthorExist(newAuthorId)); + }); + + it('author does not yet exist', async function () { + await importEtherpad.setPadRaw(padId, JSON.stringify(makeExport(newAuthorId))); + assert(await authorManager.doesAuthorExist(newAuthorId)); + const author = await authorManager.getAuthor(newAuthorId); + assert.equal(author.name, 'new'); + assert.equal(author.colorId, '#000000'); + assert.deepEqual((await authorManager.listPadsOfAuthor(newAuthorId)).padIDs, [padId]); + }); + + it('author already exists, no pads', async function () { + newAuthorId = existingAuthorId; + await importEtherpad.setPadRaw(padId, JSON.stringify(makeExport(newAuthorId))); + assert(await authorManager.doesAuthorExist(newAuthorId)); + const author = await authorManager.getAuthor(newAuthorId); + assert.equal(author.name, 'existing'); + assert.notEqual(author.colorId, '#000000'); + assert.deepEqual((await authorManager.listPadsOfAuthor(newAuthorId)).padIDs, [padId]); + }); + + it('author already exists, on different pad', async function () { + const otherPadId = randomString(10); + await authorManager.addPad(existingAuthorId, otherPadId); + newAuthorId = existingAuthorId; + await importEtherpad.setPadRaw(padId, JSON.stringify(makeExport(newAuthorId))); + assert(await authorManager.doesAuthorExist(newAuthorId)); + const author = await authorManager.getAuthor(newAuthorId); + assert.equal(author.name, 'existing'); + assert.notEqual(author.colorId, '#000000'); + assert.deepEqual( + (await authorManager.listPadsOfAuthor(newAuthorId)).padIDs.sort(), + [otherPadId, padId].sort()); + }); + + it('author already exists, on same pad', async function () { + await authorManager.addPad(existingAuthorId, padId); + newAuthorId = existingAuthorId; + await importEtherpad.setPadRaw(padId, JSON.stringify(makeExport(newAuthorId))); + assert(await authorManager.doesAuthorExist(newAuthorId)); + const author = await authorManager.getAuthor(newAuthorId); + assert.equal(author.name, 'existing'); + assert.notEqual(author.colorId, '#000000'); + assert.deepEqual((await authorManager.listPadsOfAuthor(newAuthorId)).padIDs, [padId]); + }); + }); + + describe('enforces consistent pad ID', function () { + it('pad record has different pad ID', async function () { + const data = makeExport(makeAuthorId()); + data['pad:differentPadId'] = data['pad:testing']; + delete data['pad:testing']; + assert.rejects(importEtherpad.setPadRaw(padId, JSON.stringify(data)), /unexpected pad ID/); + }); + + it('globalAuthor record has different pad ID', async function () { + const authorId = makeAuthorId(); + const data = makeExport(authorId); + data[`globalAuthor:${authorId}`].padIDs = 'differentPadId'; + assert.rejects(importEtherpad.setPadRaw(padId, JSON.stringify(data)), /unexpected pad ID/); + }); + + it('pad rev record has different pad ID', async function () { + const data = makeExport(makeAuthorId()); + data['pad:differentPadId:revs:0'] = data['pad:testing:revs:0']; + delete data['pad:testing:revs:0']; + assert.rejects(importEtherpad.setPadRaw(padId, JSON.stringify(data)), /unexpected pad ID/); + }); + }); + + describe('order of records does not matter', function () { + for (const perm of [[0, 1, 2], [0, 2, 1], [1, 0, 2], [1, 2, 0], [2, 0, 1], [2, 1, 0]]) { + it(JSON.stringify(perm), async function () { + const authorId = makeAuthorId(); + const records = Object.entries(makeExport(authorId)); + assert.equal(records.length, 3); + await importEtherpad.setPadRaw( + padId, JSON.stringify(Object.fromEntries(perm.map((i) => records[i])))); + assert.deepEqual((await authorManager.listPadsOfAuthor(authorId)).padIDs, [padId]); + const pad = await padManager.getPad(padId); + assert.equal(pad.text(), 'foo\n'); + }); + } + }); +}); diff --git a/src/tests/backend/specs/api/importexportGetPost.js b/src/tests/backend/specs/api/importexportGetPost.js index 584341cc0..98244b22b 100644 --- a/src/tests/backend/specs/api/importexportGetPost.js +++ b/src/tests/backend/specs/api/importexportGetPost.js @@ -315,6 +315,118 @@ describe(__filename, function () { }); }); + describe('malformed .etherpad files are rejected', function () { + const makeGoodExport = () => ({ + 'pad:testing': { + atext: { + text: 'foo\n', + attribs: '|1+4', + }, + pool: { + numToAttrib: { + 0: ['author', 'a.foo'], + }, + nextNum: 1, + }, + head: 0, + savedRevisions: [], + }, + 'globalAuthor:a.foo': { + colorId: '#000000', + name: 'author foo', + timestamp: 1598747784631, + padIDs: 'testing', + }, + 'pad:testing:revs:0': { + changeset: 'Z:1>3+3$foo', + meta: { + author: 'a.foo', + timestamp: 1597632398288, + pool: { + numToAttrib: {}, + nextNum: 0, + }, + atext: { + text: 'foo\n', + attribs: '|1+4', + }, + }, + }, + }); + + const importEtherpad = (records) => agent.post(`/p/${testPadId}/import`) + .attach('file', Buffer.from(JSON.stringify(records), 'utf8'), { + filename: '/test.etherpad', + contentType: 'application/etherpad', + }); + + before(async function () { + // makeGoodExport() is assumed to produce good .etherpad records. Verify that assumption so + // that a buggy makeGoodExport() doesn't cause checks to accidentally pass. + const records = makeGoodExport(); + await importEtherpad(records) + .expect(200) + .expect('Content-Type', /json/) + .expect((res) => assert.deepEqual(res.body, { + code: 0, + message: 'ok', + data: {directDatabaseAccess: true}, + })); + await agent.get(`/p/${testPadId}/export/txt`) + .expect(200) + .buffer(true).parse(superagent.parse.text) + .expect((res) => assert.match(res.text, /foo/)); + }); + + it('missing rev', async function () { + const records = makeGoodExport(); + delete records['pad:testing:revs:0']; + await importEtherpad(records).expect(500); + }); + + it('bad changeset', async function () { + const records = makeGoodExport(); + records['pad:testing:revs:0'].changeset = 'garbage'; + await importEtherpad(records).expect(500); + }); + + it('missing attrib in pool', async function () { + const records = makeGoodExport(); + records['pad:testing'].pool.nextNum++; + await importEtherpad(records).expect(500); + }); + + it('extra attrib in pool', async function () { + const records = makeGoodExport(); + const pool = records['pad:testing'].pool; + pool.numToAttrib[pool.nextNum] = ['key', 'value']; + await importEtherpad(records).expect(500); + }); + + it('changeset refers to non-existent attrib', async function () { + const records = makeGoodExport(); + records['pad:testing:revs:1'] = { + changeset: 'Z:4>4*1+4$asdf', + meta: { + author: 'a.foo', + timestamp: 1597632398288, + }, + }; + records['pad:testing'].head = 1; + records['pad:testing'].atext = { + text: 'asdffoo\n', + attribs: '*1+4|1+4', + }; + await importEtherpad(records).expect(500); + }); + + it('pad atext does not match', async function () { + const records = makeGoodExport(); + records['pad:testing'].atext.attribs = `*0${records['pad:testing'].atext.attribs}`; + await importEtherpad(records).expect(500); + }); + }); + describe('Import authorization checks', function () { let authorize;