From 9bc90128cb4d527e5bc7585df6e84aca1c991a20 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Tue, 23 Nov 2021 00:34:59 -0500 Subject: [PATCH 01/18] ImportEtherpad: Fix async logic --- src/node/utils/ImportEtherpad.js | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/node/utils/ImportEtherpad.js b/src/node/utils/ImportEtherpad.js index b90718a57..63d049347 100644 --- a/src/node/utils/ImportEtherpad.js +++ b/src/node/utils/ImportEtherpad.js @@ -23,7 +23,7 @@ const supportedElems = require('../../static/js/contentcollector').supportedElem const logger = log4js.getLogger('ImportEtherpad'); -exports.setPadRaw = (padId, r) => { +exports.setPadRaw = async (padId, r) => { const records = JSON.parse(r); // get supported block Elements from plugins, we will use this later. @@ -33,9 +33,7 @@ exports.setPadRaw = (padId, r) => { const unsupportedElements = new Set(); - Object.keys(records).forEach(async (key) => { - let value = records[key]; - + await Promise.all(Object.entries(records).map(async ([key, value]) => { if (!value) { return; } @@ -93,7 +91,7 @@ exports.setPadRaw = (padId, r) => { // Write the value to the server await db.set(newKey, value); - }); + })); if (unsupportedElements.size) { logger.warn('Ignoring unsupported elements (you might want to install a plugin): ' + From 2f0561abc04fdb5a30073f3fd2e4d5d7ae8cd7ed Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Wed, 24 Nov 2021 23:39:27 -0500 Subject: [PATCH 02/18] ImportEtherpad: Remove unnecessary variable --- src/node/utils/ImportEtherpad.js | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/node/utils/ImportEtherpad.js b/src/node/utils/ImportEtherpad.js index 63d049347..1c8498fbf 100644 --- a/src/node/utils/ImportEtherpad.js +++ b/src/node/utils/ImportEtherpad.js @@ -38,12 +38,9 @@ exports.setPadRaw = async (padId, r) => { return; } - let newKey; - if (value.padIDs) { // Author data - rewrite author pad ids value.padIDs[padId] = 1; - newKey = key; // Does this author already exist? const author = await db.get(key); @@ -76,21 +73,17 @@ exports.setPadRaw = async (padId, r) => { if (oldPadId[0] === 'pad') { // so set the new pad id for the author oldPadId[1] = padId; - - // and create the value - newKey = oldPadId.join(':'); // create the new key + key = oldPadId.join(':'); } // is this a key that is supported through a plugin? // get content that has a different prefix IE comments:padId:foo // a plugin would return something likle ['comments', 'cakes'] for (const prefix of await hooks.aCallAll('exportEtherpadAdditionalContent')) { - if (prefix === oldPadId[0]) newKey = `${prefix}:${padId}`; + if (prefix === oldPadId[0]) key = `${prefix}:${padId}`; } } - - // Write the value to the server - await db.set(newKey, value); + await db.set(key, value); })); if (unsupportedElements.size) { From fea7948b05a352b0a14bb6691efa0f4c7ea74034 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 25 Nov 2021 01:43:46 -0500 Subject: [PATCH 03/18] ImportEtherpad: Fix author info processing --- src/node/utils/ImportEtherpad.js | 30 ++---- src/tests/backend/specs/ImportEtherpad.js | 112 ++++++++++++++++++++++ 2 files changed, 122 insertions(+), 20 deletions(-) create mode 100644 src/tests/backend/specs/ImportEtherpad.js diff --git a/src/node/utils/ImportEtherpad.js b/src/node/utils/ImportEtherpad.js index 1c8498fbf..63c9afc9b 100644 --- a/src/node/utils/ImportEtherpad.js +++ b/src/node/utils/ImportEtherpad.js @@ -1,5 +1,5 @@ -// 'use strict'; -// Uncommenting above breaks tests. +'use strict'; + /** * 2014 John McLear (Etherpad Foundation / McLear Ltd) * @@ -16,6 +16,7 @@ * limitations under the License. */ +const authorManager = require('../db/AuthorManager'); const db = require('../db/DB'); const hooks = require('../../static/js/pluginfw/hooks'); const log4js = require('log4js'); @@ -37,25 +38,14 @@ exports.setPadRaw = async (padId, r) => { if (!value) { return; } - - if (value.padIDs) { - // Author data - rewrite author pad ids - value.padIDs[padId] = 1; - - // Does this author already exist? - const author = await db.get(key); - - if (author) { - // Yes, add the padID to the author - if (Object.prototype.toString.call(author) === '[object Array]') { - author.padIDs.push(padId); - } - - value = author; - } else { - // No, create a new array with the author info in - value.padIDs = [padId]; + const keyParts = key.split(':'); + const [prefix, id] = keyParts; + if (prefix === 'globalAuthor' && keyParts.length === 2) { + if (await authorManager.doesAuthorExist(id)) { + await authorManager.addPad(id, padId); + return; } + value.padIDs = {[padId]: 1}; } else { // Not author data, probably pad data // we can split it to look to see if it's pad data diff --git a/src/tests/backend/specs/ImportEtherpad.js b/src/tests/backend/specs/ImportEtherpad.js new file mode 100644 index 000000000..c32cdb858 --- /dev/null +++ b/src/tests/backend/specs/ImportEtherpad.js @@ -0,0 +1,112 @@ +'use strict'; + +const assert = require('assert').strict; +const authorManager = require('../../../node/db/AuthorManager'); +const importEtherpad = require('../../../node/utils/ImportEtherpad'); +const padManager = require('../../../node/db/PadManager'); +const {randomString} = require('../../../static/js/pad_utils'); + +describe(__filename, function () { + let padId; + + const makeAuthorId = () => `a.${randomString(16)}`; + + const makeExport = (authorId) => ({ + 'pad:testing': { + atext: { + text: 'foo\n', + attribs: '|1+4', + }, + pool: { + numToAttrib: {}, + nextNum: 0, + }, + head: 0, + savedRevisions: [], + }, + [`globalAuthor:${authorId}`]: { + colorId: '#000000', + name: 'new', + timestamp: 1598747784631, + padIDs: 'testing', + }, + 'pad:testing:revs:0': { + changeset: 'Z:1>3+3$foo', + meta: { + author: '', + timestamp: 1597632398288, + pool: { + numToAttrib: {}, + nextNum: 0, + }, + atext: { + text: 'foo\n', + attribs: '|1+4', + }, + }, + }, + }); + + beforeEach(async function () { + padId = randomString(10); + assert(!await padManager.doesPadExist(padId)); + }); + + describe('author pad IDs', function () { + let existingAuthorId; + let newAuthorId; + + beforeEach(async function () { + existingAuthorId = (await authorManager.createAuthor('existing')).authorID; + assert(await authorManager.doesAuthorExist(existingAuthorId)); + assert.deepEqual((await authorManager.listPadsOfAuthor(existingAuthorId)).padIDs, []); + newAuthorId = makeAuthorId(); + assert.notEqual(newAuthorId, existingAuthorId); + assert(!await authorManager.doesAuthorExist(newAuthorId)); + }); + + it('author does not yet exist', async function () { + await importEtherpad.setPadRaw(padId, JSON.stringify(makeExport(newAuthorId))); + assert(await authorManager.doesAuthorExist(newAuthorId)); + const author = await authorManager.getAuthor(newAuthorId); + assert.equal(author.name, 'new'); + assert.equal(author.colorId, '#000000'); + assert.deepEqual((await authorManager.listPadsOfAuthor(newAuthorId)).padIDs, [padId]); + }); + + it('author already exists, no pads', async function () { + newAuthorId = existingAuthorId; + await importEtherpad.setPadRaw(padId, JSON.stringify(makeExport(newAuthorId))); + assert(await authorManager.doesAuthorExist(newAuthorId)); + const author = await authorManager.getAuthor(newAuthorId); + assert.equal(author.name, 'existing'); + assert.notEqual(author.colorId, '#000000'); + assert.deepEqual((await authorManager.listPadsOfAuthor(newAuthorId)).padIDs, [padId]); + }); + + it('author already exists, on different pad', async function () { + const otherPadId = randomString(10); + await authorManager.addPad(existingAuthorId, otherPadId); + newAuthorId = existingAuthorId; + await importEtherpad.setPadRaw(padId, JSON.stringify(makeExport(newAuthorId))); + assert(await authorManager.doesAuthorExist(newAuthorId)); + const author = await authorManager.getAuthor(newAuthorId); + assert.equal(author.name, 'existing'); + assert.notEqual(author.colorId, '#000000'); + assert.deepEqual( + (await authorManager.listPadsOfAuthor(newAuthorId)).padIDs.sort(), + [otherPadId, padId].sort()); + }); + + it('author already exists, on same pad', async function () { + await authorManager.addPad(existingAuthorId, padId); + newAuthorId = existingAuthorId; + await importEtherpad.setPadRaw(padId, JSON.stringify(makeExport(newAuthorId))); + assert(await authorManager.doesAuthorExist(newAuthorId)); + const author = await authorManager.getAuthor(newAuthorId); + assert.equal(author.name, 'existing'); + assert.notEqual(author.colorId, '#000000'); + assert.deepEqual((await authorManager.listPadsOfAuthor(newAuthorId)).padIDs, [padId]); + }); + }); +}); From 003e5cbd4bebeac620aa06bd7c97c05b47ef2d16 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 25 Nov 2021 01:19:00 -0500 Subject: [PATCH 04/18] ImportEtherpad: Fix DB key pad ID transformation --- src/node/utils/ImportEtherpad.js | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/src/node/utils/ImportEtherpad.js b/src/node/utils/ImportEtherpad.js index 63c9afc9b..224424a43 100644 --- a/src/node/utils/ImportEtherpad.js +++ b/src/node/utils/ImportEtherpad.js @@ -34,6 +34,13 @@ exports.setPadRaw = async (padId, r) => { const unsupportedElements = new Set(); + // DB key prefixes for pad records. Each key is expected to have the form `${prefix}:${padId}` or + // `${prefix}:${padId}:${otherstuff}`. + const padKeyPrefixes = [ + ...await hooks.aCallAll('exportEtherpadAdditionalContent'), + 'pad', + ]; + await Promise.all(Object.entries(records).map(async ([key, value]) => { if (!value) { return; @@ -47,9 +54,6 @@ exports.setPadRaw = async (padId, r) => { } value.padIDs = {[padId]: 1}; } else { - // Not author data, probably pad data - // we can split it to look to see if it's pad data - // is this an attribute we support or not? If not, tell the admin if (value.pool) { for (const attrib of Object.keys(value.pool.numToAttrib)) { @@ -57,20 +61,9 @@ exports.setPadRaw = async (padId, r) => { if (!supportedElems.has(attribName)) unsupportedElements.add(attribName); } } - const oldPadId = key.split(':'); - - // we know it's pad data - if (oldPadId[0] === 'pad') { - // so set the new pad id for the author - oldPadId[1] = padId; - key = oldPadId.join(':'); - } - - // is this a key that is supported through a plugin? - // get content that has a different prefix IE comments:padId:foo - // a plugin would return something likle ['comments', 'cakes'] - for (const prefix of await hooks.aCallAll('exportEtherpadAdditionalContent')) { - if (prefix === oldPadId[0]) key = `${prefix}:${padId}`; + if (padKeyPrefixes.includes(prefix)) { + keyParts[1] = padId; + key = keyParts.join(':'); } } await db.set(key, value); From 8e9bc8d325c96be8abdc1103ea84562f93ba2ebf Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 25 Nov 2021 01:10:04 -0500 Subject: [PATCH 05/18] ImportEtherpad: Avoid false positives when checking apool --- src/node/utils/ImportEtherpad.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/node/utils/ImportEtherpad.js b/src/node/utils/ImportEtherpad.js index 224424a43..714e1deb8 100644 --- a/src/node/utils/ImportEtherpad.js +++ b/src/node/utils/ImportEtherpad.js @@ -54,8 +54,7 @@ exports.setPadRaw = async (padId, r) => { } value.padIDs = {[padId]: 1}; } else { - // is this an attribute we support or not? If not, tell the admin - if (value.pool) { + if (prefix === 'pad' && keyParts.length === 2 && value.pool) { for (const attrib of Object.keys(value.pool.numToAttrib)) { const attribName = value.pool.numToAttrib[attrib][0]; if (!supportedElems.has(attribName)) unsupportedElements.add(attribName); From 00fc7c8e86802f6e624c98fc826f600e14e12155 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 25 Nov 2021 02:05:09 -0500 Subject: [PATCH 06/18] ImportEtherpad: Reject unknown DB records --- src/node/utils/ImportEtherpad.js | 11 ++++++----- src/tests/backend/specs/ImportEtherpad.js | 10 ++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/node/utils/ImportEtherpad.js b/src/node/utils/ImportEtherpad.js index 714e1deb8..59a407e9d 100644 --- a/src/node/utils/ImportEtherpad.js +++ b/src/node/utils/ImportEtherpad.js @@ -53,17 +53,18 @@ exports.setPadRaw = async (padId, r) => { return; } value.padIDs = {[padId]: 1}; - } else { + } else if (padKeyPrefixes.includes(prefix)) { if (prefix === 'pad' && keyParts.length === 2 && value.pool) { for (const attrib of Object.keys(value.pool.numToAttrib)) { const attribName = value.pool.numToAttrib[attrib][0]; if (!supportedElems.has(attribName)) unsupportedElements.add(attribName); } } - if (padKeyPrefixes.includes(prefix)) { - keyParts[1] = padId; - key = keyParts.join(':'); - } + keyParts[1] = padId; + key = keyParts.join(':'); + } else { + logger.warn(`(pad ${padId}) Ignoring record with unsupported key: ${key}`); + return; } await db.set(key, value); })); diff --git a/src/tests/backend/specs/ImportEtherpad.js b/src/tests/backend/specs/ImportEtherpad.js index c32cdb858..a339e9b4d 100644 --- a/src/tests/backend/specs/ImportEtherpad.js +++ b/src/tests/backend/specs/ImportEtherpad.js @@ -2,6 +2,7 @@ const assert = require('assert').strict; const authorManager = require('../../../node/db/AuthorManager'); +const db = require('../../../node/db/DB'); const importEtherpad = require('../../../node/utils/ImportEtherpad'); const padManager = require('../../../node/db/PadManager'); const {randomString} = require('../../../static/js/pad_utils'); @@ -52,6 +53,15 @@ describe(__filename, function () { assert(!await padManager.doesPadExist(padId)); }); + it('unknown db records are ignored', async function () { + const badKey = `maliciousDbKey${randomString(10)}`; + await importEtherpad.setPadRaw(padId, JSON.stringify({ + [badKey]: 'value', + ...makeExport(makeAuthorId()), + })); + assert(await db.get(badKey) == null); + }); + describe('author pad IDs', function () { let existingAuthorId; let newAuthorId; From 33778281b90ce9af5cb73b9acabed6fd393e6d1b Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 25 Nov 2021 18:45:46 -0500 Subject: [PATCH 07/18] ImportEtherpad: Simplify attribute key iteration --- src/node/utils/ImportEtherpad.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/node/utils/ImportEtherpad.js b/src/node/utils/ImportEtherpad.js index 59a407e9d..cce4f5145 100644 --- a/src/node/utils/ImportEtherpad.js +++ b/src/node/utils/ImportEtherpad.js @@ -55,9 +55,8 @@ exports.setPadRaw = async (padId, r) => { value.padIDs = {[padId]: 1}; } else if (padKeyPrefixes.includes(prefix)) { if (prefix === 'pad' && keyParts.length === 2 && value.pool) { - for (const attrib of Object.keys(value.pool.numToAttrib)) { - const attribName = value.pool.numToAttrib[attrib][0]; - if (!supportedElems.has(attribName)) unsupportedElements.add(attribName); + for (const [k] of Object.values(value.pool.numToAttrib)) { + if (!supportedElems.has(k)) unsupportedElements.add(k); } } keyParts[1] = padId; From a2e77a71288ea19f851d82eff9f079de0ba6d538 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 25 Nov 2021 18:14:38 -0500 Subject: [PATCH 08/18] ImportEtherpad: Enforce single-pad records --- src/node/utils/ImportEtherpad.js | 14 +++++++++ src/tests/backend/specs/ImportEtherpad.js | 38 +++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/src/node/utils/ImportEtherpad.js b/src/node/utils/ImportEtherpad.js index cce4f5145..cfa6a2643 100644 --- a/src/node/utils/ImportEtherpad.js +++ b/src/node/utils/ImportEtherpad.js @@ -41,6 +41,12 @@ exports.setPadRaw = async (padId, r) => { 'pad', ]; + let originalPadId = null; + const checkOriginalPadId = (padId) => { + if (originalPadId == null) originalPadId = padId; + if (originalPadId !== padId) throw new Error('unexpected pad ID in record'); + }; + await Promise.all(Object.entries(records).map(async ([key, value]) => { if (!value) { return; @@ -48,12 +54,20 @@ exports.setPadRaw = async (padId, r) => { const keyParts = key.split(':'); const [prefix, id] = keyParts; if (prefix === 'globalAuthor' && keyParts.length === 2) { + // In the database, the padIDs subkey is an object (which is used as a set) that records every + // pad the author has worked on. When exported, that object becomes a single string containing + // the exported pad's ID. + if (typeof value.padIDs !== 'string') { + throw new TypeError('globalAuthor padIDs subkey is not a string'); + } + checkOriginalPadId(value.padIDs); if (await authorManager.doesAuthorExist(id)) { await authorManager.addPad(id, padId); return; } value.padIDs = {[padId]: 1}; } else if (padKeyPrefixes.includes(prefix)) { + checkOriginalPadId(id); if (prefix === 'pad' && keyParts.length === 2 && value.pool) { for (const [k] of Object.values(value.pool.numToAttrib)) { if (!supportedElems.has(k)) unsupportedElements.add(k); diff --git a/src/tests/backend/specs/ImportEtherpad.js b/src/tests/backend/specs/ImportEtherpad.js index a339e9b4d..7906c3afb 100644 --- a/src/tests/backend/specs/ImportEtherpad.js +++ b/src/tests/backend/specs/ImportEtherpad.js @@ -119,4 +119,42 @@ describe(__filename, function () { assert.deepEqual((await authorManager.listPadsOfAuthor(newAuthorId)).padIDs, [padId]); }); }); + + describe('enforces consistent pad ID', function () { + it('pad record has different pad ID', async function () { + const data = makeExport(makeAuthorId()); + data['pad:differentPadId'] = data['pad:testing']; + delete data['pad:testing']; + assert.rejects(importEtherpad.setPadRaw(padId, JSON.stringify(data)), /unexpected pad ID/); + }); + + it('globalAuthor record has different pad ID', async function () { + const authorId = makeAuthorId(); + const data = makeExport(authorId); + data[`globalAuthor:${authorId}`].padIDs = 'differentPadId'; + assert.rejects(importEtherpad.setPadRaw(padId, JSON.stringify(data)), /unexpected pad ID/); + }); + + it('pad rev record has different pad ID', async function () { + const data = makeExport(makeAuthorId()); + data['pad:differentPadId:revs:0'] = data['pad:testing:revs:0']; + delete data['pad:testing:revs:0']; + assert.rejects(importEtherpad.setPadRaw(padId, JSON.stringify(data)), /unexpected pad ID/); + }); + }); + + describe('order of records does not matter', function () { + for (const perm of [[0, 1, 2], [0, 2, 1], [1, 0, 2], [1, 2, 0], [2, 0, 1], [2, 1, 0]]) { + it(JSON.stringify(perm), async function () { + const authorId = makeAuthorId(); + const records = Object.entries(makeExport(authorId)); + assert.equal(records.length, 3); + await importEtherpad.setPadRaw( + padId, JSON.stringify(Object.fromEntries(perm.map((i) => records[i])))); + assert.deepEqual((await authorManager.listPadsOfAuthor(authorId)).padIDs, [padId]); + const pad = await padManager.getPad(padId); + assert.equal(pad.text(), 'foo\n'); + }); + } + }); }); From 23f8a12922a019e74b464016c167bed6efdc8f8f Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Fri, 26 Nov 2021 01:35:02 -0500 Subject: [PATCH 09/18] ImportEtherpad: Don't make any changes if data is bad --- src/node/utils/ImportEtherpad.js | 14 ++++++++++++-- src/tests/backend/specs/ImportEtherpad.js | 10 ++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/node/utils/ImportEtherpad.js b/src/node/utils/ImportEtherpad.js index cfa6a2643..bc54e9ca1 100644 --- a/src/node/utils/ImportEtherpad.js +++ b/src/node/utils/ImportEtherpad.js @@ -47,6 +47,11 @@ exports.setPadRaw = async (padId, r) => { if (originalPadId !== padId) throw new Error('unexpected pad ID in record'); }; + // First validate and transform values. Do not commit any records to the database yet in case + // there is a problem with the data. + + const dbRecords = new Map(); + const existingAuthors = new Set(); await Promise.all(Object.entries(records).map(async ([key, value]) => { if (!value) { return; @@ -62,7 +67,7 @@ exports.setPadRaw = async (padId, r) => { } checkOriginalPadId(value.padIDs); if (await authorManager.doesAuthorExist(id)) { - await authorManager.addPad(id, padId); + existingAuthors.add(id); return; } value.padIDs = {[padId]: 1}; @@ -79,11 +84,16 @@ exports.setPadRaw = async (padId, r) => { logger.warn(`(pad ${padId}) Ignoring record with unsupported key: ${key}`); return; } - await db.set(key, value); + dbRecords.set(key, value); })); if (unsupportedElements.size) { logger.warn('Ignoring unsupported elements (you might want to install a plugin): ' + `${[...unsupportedElements].join(', ')}`); } + + await Promise.all([ + ...[...dbRecords].map(async ([k, v]) => await db.set(k, v)), + ...[...existingAuthors].map(async (authorId) => await authorManager.addPad(authorId, padId)), + ]); }; diff --git a/src/tests/backend/specs/ImportEtherpad.js b/src/tests/backend/specs/ImportEtherpad.js index 7906c3afb..6dfd11112 100644 --- a/src/tests/backend/specs/ImportEtherpad.js +++ b/src/tests/backend/specs/ImportEtherpad.js @@ -62,6 +62,16 @@ describe(__filename, function () { assert(await db.get(badKey) == null); }); + it('changes are all or nothing', async function () { + const authorId = makeAuthorId(); + const data = makeExport(authorId); + data['pad:differentPadId:revs:0'] = data['pad:testing:revs:0']; + delete data['pad:testing:revs:0']; + assert.rejects(importEtherpad.setPadRaw(padId, JSON.stringify(data)), /unexpected pad ID/); + assert(!await authorManager.doesAuthorExist(authorId)); + assert(!await padManager.doesPadExist(padId)); + }); + describe('author pad IDs', function () { let existingAuthorId; let newAuthorId; From ad78b2411345831f8043fe094fe7da6737940899 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 25 Nov 2021 01:13:31 -0500 Subject: [PATCH 10/18] ImportEtherpad: Warn about unsupported attrib at encounter --- src/node/utils/ImportEtherpad.js | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/node/utils/ImportEtherpad.js b/src/node/utils/ImportEtherpad.js index bc54e9ca1..de74fc41c 100644 --- a/src/node/utils/ImportEtherpad.js +++ b/src/node/utils/ImportEtherpad.js @@ -32,8 +32,6 @@ exports.setPadRaw = async (padId, r) => { supportedElems.add(element); }); - const unsupportedElements = new Set(); - // DB key prefixes for pad records. Each key is expected to have the form `${prefix}:${padId}` or // `${prefix}:${padId}:${otherstuff}`. const padKeyPrefixes = [ @@ -74,9 +72,14 @@ exports.setPadRaw = async (padId, r) => { } else if (padKeyPrefixes.includes(prefix)) { checkOriginalPadId(id); if (prefix === 'pad' && keyParts.length === 2 && value.pool) { + const unsupportedElements = new Set(); for (const [k] of Object.values(value.pool.numToAttrib)) { if (!supportedElems.has(k)) unsupportedElements.add(k); } + if (unsupportedElements.size) { + logger.warn(`(pad ${padId}) unsupported attributes (try installing a plugin): ` + + `${[...unsupportedElements].join(', ')}`); + } } keyParts[1] = padId; key = keyParts.join(':'); @@ -87,11 +90,6 @@ exports.setPadRaw = async (padId, r) => { dbRecords.set(key, value); })); - if (unsupportedElements.size) { - logger.warn('Ignoring unsupported elements (you might want to install a plugin): ' + - `${[...unsupportedElements].join(', ')}`); - } - await Promise.all([ ...[...dbRecords].map(async ([k, v]) => await db.set(k, v)), ...[...existingAuthors].map(async (authorId) => await authorManager.addPad(authorId, padId)), From 2608a81654a83a278cc2f3197de02506f5187700 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sat, 27 Nov 2021 02:35:58 -0500 Subject: [PATCH 11/18] Changeset: Stricter validation checks --- src/static/js/Changeset.js | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/static/js/Changeset.js b/src/static/js/Changeset.js index a335f7ccb..c16a3a75c 100644 --- a/src/static/js/Changeset.js +++ b/src/static/js/Changeset.js @@ -296,7 +296,6 @@ exports.checkRep = (cs) => { const assem = exports.smartOpAssembler(); let oldPos = 0; let calcNewLen = 0; - let numInserted = 0; const iter = exports.opIterator(ops); while (iter.hasNext()) { const o = iter.next(); @@ -311,25 +310,29 @@ exports.checkRep = (cs) => { break; case '+': { + assert(charBank.length >= o.chars, 'Invalid changeset: not enough chars in charBank'); + const chars = charBank.slice(0, o.chars); + const nlines = (chars.match(/\n/g) || []).length; + assert(nlines === o.lines, + 'Invalid changeset: number of newlines in insert op does not match the charBank'); + assert(o.lines === 0 || chars.endsWith('\n'), + 'Invalid changeset: multiline insert op does not end with a newline'); + charBank = charBank.slice(o.chars); calcNewLen += o.chars; - numInserted += o.chars; assert(calcNewLen <= newLen, `${calcNewLen} > ${newLen} in ${cs}`); break; } + default: + assert(false, `Invalid changeset: Unknown opcode: ${JSON.stringify(o.opcode)}`); } assem.append(o); } - calcNewLen += oldLen - oldPos; - charBank = charBank.substring(0, numInserted); - while (charBank.length < numInserted) { - charBank += '?'; - } - + assert(calcNewLen === newLen, 'Invalid changeset: claimed length does not match actual length'); + assert(charBank === '', 'Invalid changeset: excess characters in the charBank'); assem.endDocument(); - const normalized = exports.pack(oldLen, calcNewLen, assem.toString(), charBank); - assert(normalized === cs, 'Invalid changeset (checkRep failed)'); - + const normalized = exports.pack(oldLen, calcNewLen, assem.toString(), unpacked.charBank); + assert(normalized === cs, 'Invalid changeset: not in canonical form'); return cs; }; @@ -998,9 +1001,7 @@ const applyZip = (in1, in2, func) => { exports.unpack = (cs) => { const headerRegex = /Z:([0-9a-z]+)([><])([0-9a-z]+)|/; const headerMatch = headerRegex.exec(cs); - if ((!headerMatch) || (!headerMatch[0])) { - error(`Not a exports: ${cs}`); - } + if ((!headerMatch) || (!headerMatch[0])) error(`Not a changeset: ${cs}`); const oldLen = exports.parseNum(headerMatch[1]); const changeSign = (headerMatch[2] === '>') ? 1 : -1; const changeMag = exports.parseNum(headerMatch[3]); From 7c870f8a58565f95d58511571d1e16a01d03683f Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sat, 27 Nov 2021 03:37:34 -0500 Subject: [PATCH 12/18] Pad: Add strict validation checks --- src/bin/checkAllPads.js | 79 +++------------------ src/bin/checkPad.js | 68 +----------------- src/bin/checkPadDeltas.js | 103 --------------------------- src/node/db/Pad.js | 125 +++++++++++++++++++++++++++++++++ src/static/js/AttributePool.js | 29 ++++++++ 5 files changed, 166 insertions(+), 238 deletions(-) delete mode 100644 src/bin/checkPadDeltas.js diff --git a/src/bin/checkAllPads.js b/src/bin/checkAllPads.js index d15e5ec5b..d2a5f837c 100644 --- a/src/bin/checkAllPads.js +++ b/src/bin/checkAllPads.js @@ -10,79 +10,18 @@ process.on('unhandledRejection', (err) => { throw err; }); if (process.argv.length !== 2) throw new Error('Use: node src/bin/checkAllPads.js'); (async () => { - // initialize the database - require('../node/utils/Settings'); const db = require('../node/db/DB'); await db.init(); - - // load modules - const Changeset = require('../static/js/Changeset'); const padManager = require('../node/db/PadManager'); - - let revTestedCount = 0; - - // get all pads - const res = await padManager.listAllPads(); - for (const padId of res.padIDs) { + await Promise.all((await padManager.listAllPads()).padIDs.map(async (padId) => { const pad = await padManager.getPad(padId); - - // check if the pad has a pool - if (pad.pool == null) { - console.error(`[${pad.id}] Missing attribute pool`); - continue; + try { + await pad.check(); + } catch (err) { + console.error(`Error in pad ${padId}: ${err.stack || err}`); + return; } - // create an array with key kevisions - // key revisions always save the full pad atext - const head = pad.getHeadRevisionNumber(); - const keyRevisions = []; - for (let rev = 0; rev < head; rev += 100) { - keyRevisions.push(rev); - } - - // run through all key revisions - for (const keyRev of keyRevisions) { - // create an array of revisions we need till the next keyRevision or the End - const revisionsNeeded = []; - for (let rev = keyRev; rev <= keyRev + 100 && rev <= head; rev++) { - revisionsNeeded.push(rev); - } - - // this array will hold all revision changesets - const revisions = []; - - // run through all needed revisions and get them from the database - for (const revNum of revisionsNeeded) { - const revision = await db.get(`pad:${pad.id}:revs:${revNum}`); - revisions[revNum] = revision; - } - - // check if the revision exists - if (revisions[keyRev] == null) { - console.error(`[${pad.id}] Missing revision ${keyRev}`); - continue; - } - - // check if there is a atext in the keyRevisions - let {meta: {atext} = {}} = revisions[keyRev]; - if (atext == null) { - console.error(`[${pad.id}] Missing atext in revision ${keyRev}`); - continue; - } - - const apool = pad.pool; - for (let rev = keyRev + 1; rev <= keyRev + 100 && rev <= head; rev++) { - try { - const cs = revisions[rev].changeset; - atext = Changeset.applyToAText(cs, atext, apool); - revTestedCount++; - } catch (e) { - console.error(`[${pad.id}] Bad changeset at revision ${rev} - ${e.message}`); - } - } - } - } - if (revTestedCount === 0) { - throw new Error('No revisions tested'); - } - console.log(`Finished: Tested ${revTestedCount} revisions`); + console.log(`Pad ${padId}: OK`); + })); + console.log('Finished.'); })(); diff --git a/src/bin/checkPad.js b/src/bin/checkPad.js index 5b17fa31a..6aa4b3034 100644 --- a/src/bin/checkPad.js +++ b/src/bin/checkPad.js @@ -8,75 +8,13 @@ process.on('unhandledRejection', (err) => { throw err; }); if (process.argv.length !== 3) throw new Error('Use: node src/bin/checkPad.js $PADID'); - -// get the padID const padId = process.argv[2]; -let checkRevisionCount = 0; - (async () => { - // initialize database - require('../node/utils/Settings'); const db = require('../node/db/DB'); await db.init(); - - // load modules - const Changeset = require('../static/js/Changeset'); const padManager = require('../node/db/PadManager'); - - const exists = await padManager.doesPadExists(padId); - if (!exists) throw new Error('Pad does not exist'); - - // get the pad + if (!await padManager.doesPadExists(padId)) throw new Error('Pad does not exist'); const pad = await padManager.getPad(padId); - - // create an array with key revisions - // key revisions always save the full pad atext - const head = pad.getHeadRevisionNumber(); - const keyRevisions = []; - for (let rev = 0; rev < head; rev += 100) { - keyRevisions.push(rev); - } - - // run through all key revisions - for (let keyRev of keyRevisions) { - keyRev = parseInt(keyRev); - // create an array of revisions we need till the next keyRevision or the End - const revisionsNeeded = []; - for (let rev = keyRev; rev <= keyRev + 100 && rev <= head; rev++) { - revisionsNeeded.push(rev); - } - - // this array will hold all revision changesets - const revisions = []; - - // run through all needed revisions and get them from the database - for (const revNum of revisionsNeeded) { - const revision = await db.get(`pad:${padId}:revs:${revNum}`); - revisions[revNum] = revision; - } - - // check if the pad has a pool - if (pad.pool == null) throw new Error('Attribute pool is missing'); - - // check if there is an atext in the keyRevisions - let {meta: {atext} = {}} = revisions[keyRev] || {}; - if (atext == null) { - console.error(`No atext in key revision ${keyRev}`); - continue; - } - - const apool = pad.pool; - - for (let rev = keyRev + 1; rev <= keyRev + 100 && rev <= head; rev++) { - checkRevisionCount++; - try { - const cs = revisions[rev].changeset; - atext = Changeset.applyToAText(cs, atext, apool); - } catch (e) { - console.error(`Bad changeset at revision ${rev} - ${e.message}`); - continue; - } - } - console.log(`Finished: Checked ${checkRevisionCount} revisions`); - } + await pad.check(); + console.log('Finished.'); })(); diff --git a/src/bin/checkPadDeltas.js b/src/bin/checkPadDeltas.js deleted file mode 100644 index 852c68332..000000000 --- a/src/bin/checkPadDeltas.js +++ /dev/null @@ -1,103 +0,0 @@ -'use strict'; -/* - * This is a debug tool. It checks all revisions for data corruption - */ - -// As of v14, Node.js does not exit when there is an unhandled Promise rejection. Convert an -// unhandled rejection into an uncaught exception, which does cause Node.js to exit. -process.on('unhandledRejection', (err) => { throw err; }); - -if (process.argv.length !== 3) throw new Error('Use: node src/bin/checkPadDeltas.js $PADID'); - -// get the padID -const padId = process.argv[2]; - -const expect = require('../tests/frontend/lib/expect'); -const diff = require('diff'); - -(async () => { - // initialize database - require('../node/utils/Settings'); - const db = require('../node/db/DB'); - await db.init(); - - // load modules - const Changeset = require('../static/js/Changeset'); - const padManager = require('../node/db/PadManager'); - - const exists = await padManager.doesPadExists(padId); - if (!exists) throw new Error('Pad does not exist'); - - // get the pad - const pad = await padManager.getPad(padId); - - // create an array with key revisions - // key revisions always save the full pad atext - const head = pad.getHeadRevisionNumber(); - const keyRevisions = []; - for (let i = 0; i < head; i += 100) { - keyRevisions.push(i); - } - - // create an array with all revisions - const revisions = []; - for (let i = 0; i <= head; i++) { - revisions.push(i); - } - - let atext = Changeset.makeAText('\n'); - - // run through all revisions - for (const revNum of revisions) { - // console.log('Fetching', revNum) - const revision = await db.get(`pad:${padId}:revs:${revNum}`); - // check if there is a atext in the keyRevisions - const {meta: {atext: revAtext} = {}} = revision || {}; - if (~keyRevisions.indexOf(revNum) && revAtext == null) { - console.error(`No atext in key revision ${revNum}`); - continue; - } - - // try glue everything together - try { - // console.log("check revision ", revNum); - const cs = revision.changeset; - atext = Changeset.applyToAText(cs, atext, pad.pool); - } catch (e) { - console.error(`Bad changeset at revision ${revNum} - ${e.message}`); - continue; - } - - // check things are working properly - if (~keyRevisions.indexOf(revNum)) { - try { - expect(revision.meta.atext.text).to.eql(atext.text); - expect(revision.meta.atext.attribs).to.eql(atext.attribs); - } catch (e) { - console.error(`Atext in key revision ${revNum} doesn't match computed one.`); - console.log(diff.diffChars(atext.text, revision.meta.atext.text).map((op) => { - if (!op.added && !op.removed) op.value = op.value.length; - return op; - })); - // console.error(e) - // console.log('KeyRev. :', revision.meta.atext) - // console.log('Computed:', atext) - continue; - } - } - } - - // check final text is right... - if (pad.atext.text === atext.text) { - console.log('ok'); - } else { - console.error('Pad AText doesn\'t match computed one! (Computed ', - atext.text.length, ', db', pad.atext.text.length, ')'); - console.log(diff.diffChars(atext.text, pad.atext.text).map((op) => { - if (!op.added && !op.removed) { - op.value = op.value.length; - return op; - } - })); - } -})(); diff --git a/src/node/db/Pad.js b/src/node/db/Pad.js index 677e9c014..08e980958 100644 --- a/src/node/db/Pad.js +++ b/src/node/db/Pad.js @@ -7,6 +7,7 @@ const Changeset = require('../../static/js/Changeset'); const ChatMessage = require('../../static/js/ChatMessage'); const AttributePool = require('../../static/js/AttributePool'); +const assert = require('assert').strict; const db = require('./DB'); const settings = require('../utils/Settings'); const authorManager = require('./AuthorManager'); @@ -602,3 +603,127 @@ Pad.prototype.addSavedRevision = async function (revNum, savedById, label) { Pad.prototype.getSavedRevisions = function () { return this.savedRevisions; }; + +/** + * Asserts that all pad data is consistent. Throws if inconsistent. + */ +Pad.prototype.check = async function () { + assert(this.id != null); + assert.equal(typeof this.id, 'string'); + + const head = this.getHeadRevisionNumber(); + assert(Number.isInteger(head)); + assert(head >= -1); + + const savedRevisionsList = this.getSavedRevisionsList(); + assert(Array.isArray(savedRevisionsList)); + assert.equal(this.getSavedRevisionsNumber(), savedRevisionsList.length); + let prevSavedRev = null; + for (const rev of savedRevisionsList) { + assert(Number.isInteger(rev)); + assert(rev >= 0); + assert(rev <= head); + assert(prevSavedRev == null || rev > prevSavedRev); + prevSavedRev = rev; + } + const savedRevisions = this.getSavedRevisions(); + assert(Array.isArray(savedRevisions)); + assert.equal(savedRevisions.length, savedRevisionsList.length); + const savedRevisionsIds = new Set(); + for (const savedRev of savedRevisions) { + assert(savedRev != null); + assert.equal(typeof savedRev, 'object'); + assert(savedRevisionsList.includes(savedRev.revNum)); + assert(savedRev.id != null); + assert.equal(typeof savedRev.id, 'string'); + assert(!savedRevisionsIds.has(savedRev.id)); + savedRevisionsIds.add(savedRev.id); + } + + const pool = this.apool(); + assert(pool instanceof AttributePool); + await pool.check(); + + const decodeAttribString = function* (str) { + const re = /\*([0-9a-z]+)|./gy; + let match; + while ((match = re.exec(str)) != null) { + const [m, n] = match; + if (n == null) throw new Error(`invalid character in attribute string: ${m}`); + yield Number.parseInt(n, 36); + } + }; + + const authors = new Set(); + pool.eachAttrib((k, v) => { + if (k === 'author' && v) authors.add(v); + }); + let atext = Changeset.makeAText('\n'); + let r; + try { + for (r = 0; r <= head; ++r) { + const [changeset, author, timestamp] = await Promise.all([ + this.getRevisionChangeset(r), + this.getRevisionAuthor(r), + this.getRevisionDate(r), + ]); + assert(author != null); + assert.equal(typeof author, 'string'); + if (author) authors.add(author); + assert(timestamp != null); + assert.equal(typeof timestamp, 'number'); + assert(timestamp > 0); + assert(changeset != null); + assert.equal(typeof changeset, 'string'); + Changeset.checkRep(changeset); + const unpacked = Changeset.unpack(changeset); + let text = atext.text; + const iter = Changeset.opIterator(unpacked.ops); + while (iter.hasNext()) { + const op = iter.next(); + if (['=', '-'].includes(op.opcode)) { + assert(text.length >= op.chars); + const consumed = text.slice(0, op.chars); + const nlines = (consumed.match(/\n/g) || []).length; + assert.equal(op.lines, nlines); + if (op.lines > 0) assert(consumed.endsWith('\n')); + text = text.slice(op.chars); + } + let prevK = null; + for (const n of decodeAttribString(op.attribs)) { + const attrib = pool.getAttrib(n); + assert(attrib != null); + const [k] = attrib; + assert(prevK == null || prevK < k); + prevK = k; + } + } + atext = Changeset.applyToAText(changeset, atext, pool); + assert.deepEqual(await this.getInternalRevisionAText(r), atext); + } + } catch (err) { + const pfx = `(pad ${this.id} revision ${r}) `; + if (err.stack) err.stack = pfx + err.stack; + err.message = pfx + err.message; + throw err; + } + assert.equal(this.text(), atext.text); + assert.deepEqual(this.atext, atext); + assert.deepEqual(this.getAllAuthors().sort(), [...authors].sort()); + + assert(Number.isInteger(this.chatHead)); + assert(this.chatHead >= -1); + let c; + try { + for (c = 0; c <= this.chatHead; ++c) { + const msg = await this.getChatMessage(c); + assert(msg != null); + assert(msg instanceof ChatMessage); + } + } catch (err) { + const pfx = `(pad ${this.id} chat message ${c}) `; + if (err.stack) err.stack = pfx + err.stack; + err.message = pfx + err.message; + throw err; + } +}; diff --git a/src/static/js/AttributePool.js b/src/static/js/AttributePool.js index d8e587654..3479e03d1 100644 --- a/src/static/js/AttributePool.js +++ b/src/static/js/AttributePool.js @@ -188,6 +188,35 @@ class AttributePool { } return this; } + + /** + * Asserts that the data in the pool is consistent. Throws if inconsistent. + */ + check() { + if (!Number.isInteger(this.nextNum)) throw new Error('nextNum property is not an integer'); + if (this.nextNum < 0) throw new Error('nextNum property is negative'); + for (const prop of ['numToAttrib', 'attribToNum']) { + const obj = this[prop]; + if (obj == null) throw new Error(`${prop} property is null`); + if (typeof obj !== 'object') throw new TypeError(`${prop} property is not an object`); + const keys = Object.keys(obj); + if (keys.length !== this.nextNum) { + throw new Error(`${prop} size mismatch (want ${this.nextNum}, got ${keys.length})`); + } + } + for (let i = 0; i < this.nextNum; ++i) { + const attr = this.numToAttrib[`${i}`]; + if (!Array.isArray(attr)) throw new TypeError(`attrib ${i} is not an array`); + if (attr.length !== 2) throw new Error(`attrib ${i} is not an array of length 2`); + const [k, v] = attr; + if (k == null) throw new TypeError(`attrib ${i} key is null`); + if (typeof k !== 'string') throw new TypeError(`attrib ${i} key is not a string`); + if (v == null) throw new TypeError(`attrib ${i} value is null`); + if (typeof v !== 'string') throw new TypeError(`attrib ${i} value is not a string`); + const attrStr = String(attr); + if (this.attribToNum[attrStr] !== i) throw new Error(`attribToNum for ${attrStr} !== ${i}`); + } + } } module.exports = AttributePool; From f7d4abdabef5ede1a620cd15355bdd640bf5d163 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sat, 27 Nov 2021 17:42:58 -0500 Subject: [PATCH 13/18] Pad: Inject the database dependency --- src/node/db/Pad.js | 56 +++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/src/node/db/Pad.js b/src/node/db/Pad.js index 08e980958..1307bcb89 100644 --- a/src/node/db/Pad.js +++ b/src/node/db/Pad.js @@ -21,7 +21,7 @@ const hooks = require('../../static/js/pluginfw/hooks'); const promises = require('../utils/promises'); // serialization/deserialization attributes -const attributeBlackList = ['id']; +const attributeBlackList = ['_db', 'id']; const jsonableList = ['pool']; /** @@ -34,7 +34,15 @@ exports.cleanText = (txt) => txt.replace(/\r\n/g, '\n') .replace(/\t/g, ' ') .replace(/\xa0/g, ' '); -const Pad = function (id) { +/** + * @param [database] - Database object to access this pad's records (and only this pad's records -- + * the shared global Etherpad database object is still used for all other pad accesses, such as + * copying the pad). Defaults to the shared global Etherpad database object. This parameter can + * be used to shard pad storage across multiple database backends, to put each pad in its own + * database table, or to validate imported pad data before it is written to the database. + */ +const Pad = function (id, database = db) { + this._db = database; this.atext = Changeset.makeAText('\n'); this.pool = new AttributePool(); this.head = -1; @@ -95,7 +103,7 @@ Pad.prototype.appendRevision = async function (aChangeset, author) { } const p = [ - db.set(`pad:${this.id}:revs:${newRev}`, newRevData), + this._db.set(`pad:${this.id}:revs:${newRev}`, newRevData), this.saveToDatabase(), ]; @@ -128,25 +136,25 @@ Pad.prototype.saveToDatabase = async function () { } } - await db.set(`pad:${this.id}`, dbObject); + await this._db.set(`pad:${this.id}`, dbObject); }; // get time of last edit (changeset application) -Pad.prototype.getLastEdit = function () { +Pad.prototype.getLastEdit = async function () { const revNum = this.getHeadRevisionNumber(); - return db.getSub(`pad:${this.id}:revs:${revNum}`, ['meta', 'timestamp']); + return await this._db.getSub(`pad:${this.id}:revs:${revNum}`, ['meta', 'timestamp']); }; -Pad.prototype.getRevisionChangeset = function (revNum) { - return db.getSub(`pad:${this.id}:revs:${revNum}`, ['changeset']); +Pad.prototype.getRevisionChangeset = async function (revNum) { + return await this._db.getSub(`pad:${this.id}:revs:${revNum}`, ['changeset']); }; -Pad.prototype.getRevisionAuthor = function (revNum) { - return db.getSub(`pad:${this.id}:revs:${revNum}`, ['meta', 'author']); +Pad.prototype.getRevisionAuthor = async function (revNum) { + return await this._db.getSub(`pad:${this.id}:revs:${revNum}`, ['meta', 'author']); }; -Pad.prototype.getRevisionDate = function (revNum) { - return db.getSub(`pad:${this.id}:revs:${revNum}`, ['meta', 'timestamp']); +Pad.prototype.getRevisionDate = async function (revNum) { + return await this._db.getSub(`pad:${this.id}:revs:${revNum}`, ['meta', 'timestamp']); }; Pad.prototype.getAllAuthors = function () { @@ -173,7 +181,7 @@ Pad.prototype.getInternalRevisionAText = async function (targetRev) { // get all needed data out of the database // start to get the atext of the key revision - const p_atext = db.getSub(`pad:${this.id}:revs:${keyRev}`, ['meta', 'atext']); + const p_atext = this._db.getSub(`pad:${this.id}:revs:${keyRev}`, ['meta', 'atext']); // get all needed changesets const changesets = []; @@ -196,8 +204,8 @@ Pad.prototype.getInternalRevisionAText = async function (targetRev) { return atext; }; -Pad.prototype.getRevision = function (revNum) { - return db.get(`pad:${this.id}:revs:${revNum}`); +Pad.prototype.getRevision = async function (revNum) { + return await this._db.get(`pad:${this.id}:revs:${revNum}`); }; Pad.prototype.getAllAuthorColors = async function () { @@ -293,7 +301,7 @@ Pad.prototype.appendChatMessage = async function (msgOrText, authorId = null, ti // Don't save the display name in the database because the user can change it at any time. The // `displayName` property will be populated with the current value when the message is read from // the database. - db.set(`pad:${this.id}:chat:${this.chatHead}`, {...msg, displayName: undefined}), + this._db.set(`pad:${this.id}:chat:${this.chatHead}`, {...msg, displayName: undefined}), this.saveToDatabase(), ]); }; @@ -303,7 +311,7 @@ Pad.prototype.appendChatMessage = async function (msgOrText, authorId = null, ti * @returns {?ChatMessage} */ Pad.prototype.getChatMessage = async function (entryNum) { - const entry = await db.get(`pad:${this.id}:chat:${entryNum}`); + const entry = await this._db.get(`pad:${this.id}:chat:${entryNum}`); if (entry == null) return null; const message = ChatMessage.fromObject(entry); message.displayName = await authorManager.getAuthorName(message.authorId); @@ -340,7 +348,7 @@ Pad.prototype.init = async function (text) { } // try to load the pad - const value = await db.get(`pad:${this.id}`); + const value = await this._db.get(`pad:${this.id}`); // if this pad exists, load it if (value != null) { @@ -363,8 +371,6 @@ Pad.prototype.init = async function (text) { }; Pad.prototype.copy = async function (destinationID, force) { - const sourceID = this.id; - // Kick everyone from this pad. // This was commented due to https://github.com/ether/etherpad-lite/issues/3183. // Do we really need to kick everyone out? @@ -380,7 +386,7 @@ Pad.prototype.copy = async function (destinationID, force) { await this.removePadIfForceIsTrueAndAlreadyExist(destinationID, force); // copy the 'pad' entry - const pad = await db.get(`pad:${sourceID}`); + const pad = await this._db.get(`pad:${this.id}`); db.set(`pad:${destinationID}`, pad); // copy all relations in parallel @@ -389,7 +395,7 @@ Pad.prototype.copy = async function (destinationID, force) { // copy all chat messages const chatHead = this.chatHead; for (let i = 0; i <= chatHead; ++i) { - const p = db.get(`pad:${sourceID}:chat:${i}`) + const p = this._db.get(`pad:${this.id}:chat:${i}`) .then((chat) => db.set(`pad:${destinationID}:chat:${i}`, chat)); promises.push(p); } @@ -397,7 +403,7 @@ Pad.prototype.copy = async function (destinationID, force) { // copy all revisions const revHead = this.head; for (let i = 0; i <= revHead; ++i) { - const p = db.get(`pad:${sourceID}:revs:${i}`) + const p = this._db.get(`pad:${this.id}:revs:${i}`) .then((rev) => db.set(`pad:${destinationID}:revs:${i}`, rev)); promises.push(p); } @@ -554,12 +560,12 @@ Pad.prototype.remove = async function () { // delete all chat messages p.push(promises.timesLimit(this.chatHead + 1, 500, async (i) => { - await db.remove(`pad:${padID}:chat:${i}`, null); + await this._db.remove(`pad:${this.id}:chat:${i}`, null); })); // delete all revisions p.push(promises.timesLimit(this.head + 1, 500, async (i) => { - await db.remove(`pad:${padID}:revs:${i}`, null); + await this._db.remove(`pad:${this.id}:revs:${i}`, null); })); // remove pad from all authors who contributed From 885ff3bcde9e00ca6f278ca0584a1a2d58a79d86 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 28 Nov 2021 19:27:46 -0500 Subject: [PATCH 14/18] Pad: Move `padLoad` hook invocation to `PadManager.js` This puts global state change logic with the rest of the global state management logic. This also makes it possible to create temporary Pad objects without triggering plugin actions. --- doc/api/hooks_server-side.md | 11 ++++++----- src/node/db/Pad.js | 2 -- src/node/db/PadManager.js | 2 ++ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/doc/api/hooks_server-side.md b/doc/api/hooks_server-side.md index 5e8832fd4..50dfff0db 100644 --- a/doc/api/hooks_server-side.md +++ b/doc/api/hooks_server-side.md @@ -175,14 +175,15 @@ Things in context: This hook gets called when a new pad was created. -## padLoad -Called from: src/node/db/Pad.js +## `padLoad` -Things in context: +Called from: `src/node/db/PadManager.js` -1. pad - the pad instance +Called when a pad is loaded, including after new pad creation. -This hook gets called when a pad was loaded. If a new pad was created and loaded this event will be emitted too. +Context properties: + +* `pad`: The Pad object. ## padUpdate Called from: src/node/db/Pad.js diff --git a/src/node/db/Pad.js b/src/node/db/Pad.js index 1307bcb89..9e1ef0399 100644 --- a/src/node/db/Pad.js +++ b/src/node/db/Pad.js @@ -366,8 +366,6 @@ Pad.prototype.init = async function (text) { await this.appendRevision(firstChangeset, ''); } - - hooks.callAll('padLoad', {pad: this}); }; Pad.prototype.copy = async function (destinationID, force) { diff --git a/src/node/db/PadManager.js b/src/node/db/PadManager.js index 58434c8cd..40b27383e 100644 --- a/src/node/db/PadManager.js +++ b/src/node/db/PadManager.js @@ -22,6 +22,7 @@ const CustomError = require('../utils/customError'); const Pad = require('../db/Pad').Pad; const db = require('./DB'); +const hooks = require('../../static/js/pluginfw/hooks'); /** * A cache of all loaded Pads. @@ -141,6 +142,7 @@ exports.getPad = async (id, text) => { // initialize the pad await pad.init(text); + hooks.callAll('padLoad', {pad}); globalPads.set(id, pad); padList.addPad(id); From 19909eae53369581d5898faca045cb11648e3213 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sat, 27 Nov 2021 17:49:25 -0500 Subject: [PATCH 15/18] ImportEtherpad: Rigorously check imported data --- src/node/db/PadManager.js | 4 +- src/node/utils/ImportEtherpad.js | 16 +++ .../backend/specs/api/importexportGetPost.js | 112 ++++++++++++++++++ 3 files changed, 130 insertions(+), 2 deletions(-) diff --git a/src/node/db/PadManager.js b/src/node/db/PadManager.js index 40b27383e..4aebc1a86 100644 --- a/src/node/db/PadManager.js +++ b/src/node/db/PadManager.js @@ -20,7 +20,7 @@ */ const CustomError = require('../utils/customError'); -const Pad = require('../db/Pad').Pad; +const Pad = require('../db/Pad'); const db = require('./DB'); const hooks = require('../../static/js/pluginfw/hooks'); @@ -138,7 +138,7 @@ exports.getPad = async (id, text) => { } // try to load pad - pad = new Pad(id); + pad = new Pad.Pad(id); // initialize the pad await pad.init(text); diff --git a/src/node/utils/ImportEtherpad.js b/src/node/utils/ImportEtherpad.js index de74fc41c..761b35aa4 100644 --- a/src/node/utils/ImportEtherpad.js +++ b/src/node/utils/ImportEtherpad.js @@ -16,6 +16,7 @@ * limitations under the License. */ +const {Pad} = require('../db/Pad'); const authorManager = require('../db/AuthorManager'); const db = require('../db/DB'); const hooks = require('../../static/js/pluginfw/hooks'); @@ -90,6 +91,21 @@ exports.setPadRaw = async (padId, r) => { dbRecords.set(key, value); })); + const pad = new Pad(padId, { + // Only fetchers are needed to check the pad's integrity. + get: async (k) => dbRecords.get(k), + getSub: async (k, sub) => { + let v = dbRecords.get(k); + for (const sk of sub) { + if (v == null) return null; + v = v[sk]; + } + return v; + }, + }); + await pad.init(); + await pad.check(); + await Promise.all([ ...[...dbRecords].map(async ([k, v]) => await db.set(k, v)), ...[...existingAuthors].map(async (authorId) => await authorManager.addPad(authorId, padId)), diff --git a/src/tests/backend/specs/api/importexportGetPost.js b/src/tests/backend/specs/api/importexportGetPost.js index 584341cc0..98244b22b 100644 --- a/src/tests/backend/specs/api/importexportGetPost.js +++ b/src/tests/backend/specs/api/importexportGetPost.js @@ -315,6 +315,118 @@ describe(__filename, function () { }); }); + describe('malformed .etherpad files are rejected', function () { + const makeGoodExport = () => ({ + 'pad:testing': { + atext: { + text: 'foo\n', + attribs: '|1+4', + }, + pool: { + numToAttrib: { + 0: ['author', 'a.foo'], + }, + nextNum: 1, + }, + head: 0, + savedRevisions: [], + }, + 'globalAuthor:a.foo': { + colorId: '#000000', + name: 'author foo', + timestamp: 1598747784631, + padIDs: 'testing', + }, + 'pad:testing:revs:0': { + changeset: 'Z:1>3+3$foo', + meta: { + author: 'a.foo', + timestamp: 1597632398288, + pool: { + numToAttrib: {}, + nextNum: 0, + }, + atext: { + text: 'foo\n', + attribs: '|1+4', + }, + }, + }, + }); + + const importEtherpad = (records) => agent.post(`/p/${testPadId}/import`) + .attach('file', Buffer.from(JSON.stringify(records), 'utf8'), { + filename: '/test.etherpad', + contentType: 'application/etherpad', + }); + + before(async function () { + // makeGoodExport() is assumed to produce good .etherpad records. Verify that assumption so + // that a buggy makeGoodExport() doesn't cause checks to accidentally pass. + const records = makeGoodExport(); + await importEtherpad(records) + .expect(200) + .expect('Content-Type', /json/) + .expect((res) => assert.deepEqual(res.body, { + code: 0, + message: 'ok', + data: {directDatabaseAccess: true}, + })); + await agent.get(`/p/${testPadId}/export/txt`) + .expect(200) + .buffer(true).parse(superagent.parse.text) + .expect((res) => assert.match(res.text, /foo/)); + }); + + it('missing rev', async function () { + const records = makeGoodExport(); + delete records['pad:testing:revs:0']; + await importEtherpad(records).expect(500); + }); + + it('bad changeset', async function () { + const records = makeGoodExport(); + records['pad:testing:revs:0'].changeset = 'garbage'; + await importEtherpad(records).expect(500); + }); + + it('missing attrib in pool', async function () { + const records = makeGoodExport(); + records['pad:testing'].pool.nextNum++; + await importEtherpad(records).expect(500); + }); + + it('extra attrib in pool', async function () { + const records = makeGoodExport(); + const pool = records['pad:testing'].pool; + pool.numToAttrib[pool.nextNum] = ['key', 'value']; + await importEtherpad(records).expect(500); + }); + + it('changeset refers to non-existent attrib', async function () { + const records = makeGoodExport(); + records['pad:testing:revs:1'] = { + changeset: 'Z:4>4*1+4$asdf', + meta: { + author: 'a.foo', + timestamp: 1597632398288, + }, + }; + records['pad:testing'].head = 1; + records['pad:testing'].atext = { + text: 'asdffoo\n', + attribs: '*1+4|1+4', + }; + await importEtherpad(records).expect(500); + }); + + it('pad atext does not match', async function () { + const records = makeGoodExport(); + records['pad:testing'].atext.attribs = `*0${records['pad:testing'].atext.attribs}`; + await importEtherpad(records).expect(500); + }); + }); + describe('Import authorization checks', function () { let authorize; From 5b3575acf08c17bd332147383e9d34a5d5b508ba Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sat, 27 Nov 2021 18:04:26 -0500 Subject: [PATCH 16/18] ImportEtherpad: Use AttributePool to check attributes --- src/node/utils/ImportEtherpad.js | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/node/utils/ImportEtherpad.js b/src/node/utils/ImportEtherpad.js index 761b35aa4..e129a6995 100644 --- a/src/node/utils/ImportEtherpad.js +++ b/src/node/utils/ImportEtherpad.js @@ -16,6 +16,7 @@ * limitations under the License. */ +const AttributePool = require('../../static/js/AttributePool'); const {Pad} = require('../db/Pad'); const authorManager = require('../db/AuthorManager'); const db = require('../db/DB'); @@ -72,11 +73,12 @@ exports.setPadRaw = async (padId, r) => { value.padIDs = {[padId]: 1}; } else if (padKeyPrefixes.includes(prefix)) { checkOriginalPadId(id); - if (prefix === 'pad' && keyParts.length === 2 && value.pool) { + if (prefix === 'pad' && keyParts.length === 2) { + const pool = new AttributePool().fromJsonable(value.pool); const unsupportedElements = new Set(); - for (const [k] of Object.values(value.pool.numToAttrib)) { + pool.eachAttrib((k, v) => { if (!supportedElems.has(k)) unsupportedElements.add(k); - } + }); if (unsupportedElements.size) { logger.warn(`(pad ${padId}) unsupported attributes (try installing a plugin): ` + `${[...unsupportedElements].join(', ')}`); From 77bcb507b30e762e9375b0511b3763e0162aae53 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Fri, 26 Nov 2021 01:48:42 -0500 Subject: [PATCH 17/18] ImportEtherpad: Limit in-flight DB queries --- src/node/utils/ImportEtherpad.js | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/node/utils/ImportEtherpad.js b/src/node/utils/ImportEtherpad.js index e129a6995..60a3b8492 100644 --- a/src/node/utils/ImportEtherpad.js +++ b/src/node/utils/ImportEtherpad.js @@ -18,6 +18,7 @@ const AttributePool = require('../../static/js/AttributePool'); const {Pad} = require('../db/Pad'); +const async = require('async'); const authorManager = require('../db/AuthorManager'); const db = require('../db/DB'); const hooks = require('../../static/js/pluginfw/hooks'); @@ -47,12 +48,16 @@ exports.setPadRaw = async (padId, r) => { if (originalPadId !== padId) throw new Error('unexpected pad ID in record'); }; + // Limit the number of in-flight database queries so that the queries do not time out when + // importing really large files. + const q = async.queue(async (task) => await task(), 100); + // First validate and transform values. Do not commit any records to the database yet in case // there is a problem with the data. const dbRecords = new Map(); const existingAuthors = new Set(); - await Promise.all(Object.entries(records).map(async ([key, value]) => { + await Promise.all(Object.entries(records).map(([key, value]) => q.pushAsync(async () => { if (!value) { return; } @@ -91,7 +96,7 @@ exports.setPadRaw = async (padId, r) => { return; } dbRecords.set(key, value); - })); + }))); const pad = new Pad(padId, { // Only fetchers are needed to check the pad's integrity. @@ -109,7 +114,7 @@ exports.setPadRaw = async (padId, r) => { await pad.check(); await Promise.all([ - ...[...dbRecords].map(async ([k, v]) => await db.set(k, v)), - ...[...existingAuthors].map(async (authorId) => await authorManager.addPad(authorId, padId)), + ...[...dbRecords].map(([k, v]) => q.pushAsync(() => db.set(k, v))), + ...[...existingAuthors].map((a) => q.pushAsync(() => authorManager.addPad(a, padId))), ]); }; From 142a47cbbc40d93513dd9ef1efe35c3e0f9efda6 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 28 Nov 2021 16:57:38 -0500 Subject: [PATCH 18/18] Release v1.8.16 --- CHANGELOG.md | 22 ++++++++++++++++++++++ src/package-lock.json | 2 +- src/package.json | 2 +- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 777571bdd..450f8e616 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,25 @@ +# 1.8.16 + +### Security fixes + +If you cannot upgrade to v1.8.16 for some reason, you are encouraged to try +cherry-picking the fixes to the version you are running: + +```shell +git cherry-pick b7065eb9a0ec..77bcb507b30e +``` + +* Maliciously crafted `.etherpad` files can no longer overwrite arbitrary + non-pad database records when imported. +* Imported `.etherpad` files are now subject to numerous consistency checks + before any records are written to the database. This should help avoid + denial-of-service attacks via imports of malformed `.etherpad` files. + +### Notable enhancements and fixes + +* Fixed several `.etherpad` import bugs. +* Improved support for large `.etherpad` imports. + # 1.8.15 ### Security fixes diff --git a/src/package-lock.json b/src/package-lock.json index bccb0f8c0..79539bf9c 100644 --- a/src/package-lock.json +++ b/src/package-lock.json @@ -1,6 +1,6 @@ { "name": "ep_etherpad-lite", - "version": "1.8.15", + "version": "1.8.16", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/src/package.json b/src/package.json index 90ac13b36..349810915 100644 --- a/src/package.json +++ b/src/package.json @@ -246,6 +246,6 @@ "test": "mocha --timeout 120000 --recursive tests/backend/specs ../node_modules/ep_*/static/tests/backend/specs", "test-container": "mocha --timeout 5000 tests/container/specs/api" }, - "version": "1.8.15", + "version": "1.8.16", "license": "Apache-2.0" }