From c816c20bc7fd1a9d165476c1a563b13db7371767 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 01:08:08 -0500 Subject: [PATCH] HTML import: Replace cheerio with jsdom to simplify contentcollector Cheerio provides jQuery-like objects but they wrap DOM Node-like objects that are not 100% API compatible with the DOM spec. Because of this, contentcollector, which is used in browsers and in Node.js during HTML import, has until now needed to support two different APIs. This commit modifies HTML import to use jsdom instead of cheerio and simplifies contentcollector. --- CHANGELOG.md | 19 + src/node/utils/ImportHtml.js | 9 +- src/package-lock.json | 451 +++++++++++++------- src/package.json | 2 +- src/static/js/contentcollector.js | 57 +-- src/tests/backend/specs/contentcollector.js | 7 +- 6 files changed, 330 insertions(+), 215 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 05d61e1ff..2604b73ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,22 @@ +# Next Release + +### Compatibility changes + +* For plugin authors: + * Etherpad now uses [jsdom](https://github.com/jsdom/jsdom) instead of + [cheerio](https://cheerio.js.org/) for processing HTML imports. There are + two consequences of this change: + * `require('ep_etherpad-lite/node_modules/cheerio')` no longer works. To + fix, your plugin should directly depend on `cheerio` and do + `require('cheerio')`. + * The `node` context argument passed to the `collectContentImage` hook is + now an + [`HTMLImageElement`](https://developer.mozilla.org/en-US/docs/Web/API/HTMLImageElement) + object rather than a Cheerio Node-like object, so the API is slightly + different. See + [citizenos/ep_image_upload#49](https://github.com/citizenos/ep_image_upload/pull/49) + for an example fix. + # 1.8.14 ### Security fixes diff --git a/src/node/utils/ImportHtml.js b/src/node/utils/ImportHtml.js index 83160b54e..26e541fe1 100644 --- a/src/node/utils/ImportHtml.js +++ b/src/node/utils/ImportHtml.js @@ -18,7 +18,7 @@ const log4js = require('log4js'); const Changeset = require('../../static/js/Changeset'); const contentcollector = require('../../static/js/contentcollector'); -const cheerio = require('cheerio'); +const jsdom = require('jsdom'); const rehype = require('rehype'); const minifyWhitespace = require('rehype-minify-whitespace'); @@ -31,13 +31,12 @@ exports.setPadHTML = async (pad, html) => { html = String(output); }); - const $ = cheerio.load(html); + const {window: {document}} = new jsdom.JSDOM(html); // Appends a line break, used by Etherpad to ensure a caret is available // below the last line of an import - $('body').append('

'); + document.body.appendChild(document.createElement('p')); - const doc = $('body')[0]; apiLogger.debug('html:'); apiLogger.debug(html); @@ -46,7 +45,7 @@ exports.setPadHTML = async (pad, html) => { const cc = contentcollector.makeContentCollector(true, null, pad.pool); try { // we use a try here because if the HTML is bad it will blow up - cc.collectContent(doc); + cc.collectContent(document.body); } catch (e) { apiLogger.warn('HTML was not properly formed', e); diff --git a/src/package-lock.json b/src/package-lock.json index fbf782a5f..b3457e898 100644 --- a/src/package-lock.json +++ b/src/package-lock.json @@ -641,6 +641,11 @@ "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.3.tgz", "integrity": "sha512-FvUupuM3rlRsRtCN+fDudtmytGO6iHJuuRKS1Ss0pG5z8oX0diNEw94UEL7hgDbpN94rgaK5R7sWm6RrSkZuAQ==" }, + "abab": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/abab/-/abab-2.0.5.tgz", + "integrity": "sha512-9IK9EadsbHo6jLWIpxpR6pL0sazTXV6+SQv25ZB+F7Bj9mJNaOc4nCRabwd5M/JwmUa8idz6Eci6eKfJryPs6Q==" + }, "abbrev": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz", @@ -667,8 +672,16 @@ "acorn": { "version": "7.4.1", "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.4.1.tgz", - "integrity": "sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==", - "dev": true + "integrity": "sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==" + }, + "acorn-globals": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/acorn-globals/-/acorn-globals-6.0.0.tgz", + "integrity": "sha512-ZQl7LOWaF5ePqqcX4hLuv/bLXYQNfNWw2c0/yX/TsPRKamzHcTGQnlCjHT3TsmkOUVEPS3crCxiPfdzE/Trlhg==", + "requires": { + "acorn": "^7.1.1", + "acorn-walk": "^7.1.1" + } }, "acorn-jsx": { "version": "5.3.1", @@ -676,6 +689,11 @@ "integrity": "sha512-K0Ptm/47OKfQRpNQ2J/oIN/3QYiK6FwW+eJbILhsdxh2WTLdl+30o8aGdTbm5JbffpFFAg/g+zi1E+jvJha5ng==", "dev": true }, + "acorn-walk": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-7.2.0.tgz", + "integrity": "sha512-OPdCF6GsMIP+Az+aWfAAOEt2/+iVDKE7oy6lJ098aoe59oAmK76qV6Gw60SbZ8jHuG2wH058GF4pLFbYamYrVA==" + }, "adal-node": { "version": "0.2.2", "resolved": "https://registry.npmjs.org/adal-node/-/adal-node-0.2.2.tgz", @@ -1084,11 +1102,6 @@ } } }, - "boolbase": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", - "integrity": "sha1-aN/1++YMUes3cl6p4+0xDcwed24=" - }, "brace-expansion": { "version": "1.1.11", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", @@ -1107,6 +1120,11 @@ "fill-range": "^7.0.1" } }, + "browser-process-hrtime": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/browser-process-hrtime/-/browser-process-hrtime-1.0.0.tgz", + "integrity": "sha512-9o5UecI3GhkpM6DrXr69PblIuWxPKk9Y0jHBRhdocZ2y7YECBFCsHm79Pr3OyR2AvjhDkabFJaDJMYRazHgsow==" + }, "browser-request": { "version": "0.3.3", "resolved": "https://registry.npmjs.org/browser-request/-/browser-request-0.3.3.tgz", @@ -1224,29 +1242,6 @@ "resolved": "https://registry.npmjs.org/character-entities-legacy/-/character-entities-legacy-1.1.4.tgz", "integrity": "sha512-3Xnr+7ZFS1uxeiUDvV02wQ+QDbc55o97tIV5zHScSPJpcLm/r0DFPcoY3tYRp+VZukxuMeKgXYmsXQHO05zQeA==" }, - "cheerio": { - "version": "0.22.0", - "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-0.22.0.tgz", - "integrity": "sha1-qbqoYKP5tZWmuBsahocxIe06Jp4=", - "requires": { - "css-select": "~1.2.0", - "dom-serializer": "~0.1.0", - "entities": "~1.1.1", - "htmlparser2": "^3.9.1", - "lodash.assignin": "^4.0.9", - "lodash.bind": "^4.1.4", - "lodash.defaults": "^4.0.1", - "lodash.filter": "^4.4.0", - "lodash.flatten": "^4.2.0", - "lodash.foreach": "^4.3.0", - "lodash.map": "^4.4.0", - "lodash.merge": "^4.4.0", - "lodash.pick": "^4.2.1", - "lodash.reduce": "^4.4.0", - "lodash.reject": "^4.4.0", - "lodash.some": "^4.4.0" - } - }, "chokidar": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.3.0.tgz", @@ -1480,21 +1475,25 @@ "which": "^2.0.1" } }, - "css-select": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/css-select/-/css-select-1.2.0.tgz", - "integrity": "sha1-KzoRBTnFNV8c2NMUYj6HCxIeyFg=", - "requires": { - "boolbase": "~1.0.0", - "css-what": "2.1", - "domutils": "1.5.1", - "nth-check": "~1.0.1" - } + "cssom": { + "version": "0.4.4", + "resolved": "https://registry.npmjs.org/cssom/-/cssom-0.4.4.tgz", + "integrity": "sha512-p3pvU7r1MyyqbTk+WbNJIgJjG2VmTIaB10rI93LzVPrmDJKkzKYMtxxyAvQXR/NS6otuzveI7+7BBq3SjBS2mw==" }, - "css-what": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.3.tgz", - "integrity": "sha512-a+EPoD+uZiNfh+5fxw2nO9QwFa6nJe2Or35fGY6Ipw1R3R4AGz1d1TEZrCegvw2YTmZ0jXirGYlzxxpYSHwpEg==" + "cssstyle": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-2.3.0.tgz", + "integrity": "sha512-AZL67abkUzIuvcHqk7c09cezpGNcxUxU4Ioi/05xHk4DQeTkWmGYftIE6ctU6AEt+Gn4n1lDStOtj7FKycP71A==", + "requires": { + "cssom": "~0.3.6" + }, + "dependencies": { + "cssom": { + "version": "0.3.8", + "resolved": "https://registry.npmjs.org/cssom/-/cssom-0.3.8.tgz", + "integrity": "sha512-b0tGHbfegbhPJpxpiBPU2sCkigAqtM9O121le6bbOlgyV+NyGyCmVfJ6QW9eRjz8CpNfWEOYBIMIGRYkLwsIYg==" + } + } }, "dashdash": { "version": "1.14.1", @@ -1504,6 +1503,16 @@ "assert-plus": "^1.0.0" } }, + "data-urls": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-2.0.0.tgz", + "integrity": "sha512-X5eWTSXO/BJmpdIKCRuKUgSCgAN0OwliVK3yPKbwIWU1Tdw5BRajxlzMidvh+gwko9AfQ9zIj52pzF91Q3YAvQ==", + "requires": { + "abab": "^2.0.3", + "whatwg-mimetype": "^2.3.0", + "whatwg-url": "^8.0.0" + } + }, "date-utils": { "version": "1.2.21", "resolved": "https://registry.npmjs.org/date-utils/-/date-utils-1.2.21.tgz", @@ -1523,6 +1532,11 @@ "integrity": "sha1-9lNNFRSCabIDUue+4m9QH5oZEpA=", "dev": true }, + "decimal.js": { + "version": "10.3.1", + "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.3.1.tgz", + "integrity": "sha512-V0pfhfr8suzyPGOx3nmq4aHqabehUZn6Ch9kyFpV79TGDTWFmHqUqXdabR7QHqxzrYolF4+tVmJhUG4OURg5dQ==" + }, "decompress-response": { "version": "4.2.1", "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-4.2.1.tgz", @@ -1541,8 +1555,7 @@ "deep-is": { "version": "0.1.3", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.3.tgz", - "integrity": "sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ=", - "dev": true + "integrity": "sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ=" }, "define-properties": { "version": "1.1.3", @@ -1605,35 +1618,19 @@ "esutils": "^2.0.2" } }, - "dom-serializer": { - "version": "0.1.1", - "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-0.1.1.tgz", - "integrity": "sha512-l0IU0pPzLWSHBcieZbpOKgkIn3ts3vAh7ZuFyXNwJxJXk/c4Gwj9xaTJwIDVQCXawWD0qb3IzMGH5rglQaO0XA==", + "domexception": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/domexception/-/domexception-2.0.1.tgz", + "integrity": "sha512-yxJ2mFy/sibVQlu5qHjOkf9J3K6zgmCxgJ94u2EdvDOV09H+32LtRswEcUsmUWN72pVLOEnTSRaIVVzVQgS0dg==", "requires": { - "domelementtype": "^1.3.0", - "entities": "^1.1.1" - } - }, - "domelementtype": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.3.1.tgz", - "integrity": "sha512-BSKB+TSpMpFI/HOxCNr1O8aMOTZ8hT3pM3GQ0w/mWRmkhEDSFJkkyzz4XQsBV44BChwGkrDfMyjVD0eA2aFV3w==" - }, - "domhandler": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-2.4.2.tgz", - "integrity": "sha512-JiK04h0Ht5u/80fdLMCEmV4zkNh2BcoMFBmZ/91WtYZ8qVXSKjiw7fXMgFPnHcSZgOo3XdinHvmnDUeMf5R4wA==", - "requires": { - "domelementtype": "1" - } - }, - "domutils": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/domutils/-/domutils-1.5.1.tgz", - "integrity": "sha1-3NhIiib1Y9YQeeSMn3t+Mjc2gs8=", - "requires": { - "dom-serializer": "0", - "domelementtype": "1" + "webidl-conversions": "^5.0.0" + }, + "dependencies": { + "webidl-conversions": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-5.0.0.tgz", + "integrity": "sha512-VlZwKPCkYKxQgeSbH5EyngOmRp7Ww7I9rQLERETtf5ofd9pGeswWiOtogpEO850jziPRarreGxn5QIiTqpb2wA==" + } } }, "ecc-jsbn": { @@ -1802,11 +1799,6 @@ "ansi-colors": "^4.1.1" } }, - "entities": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/entities/-/entities-1.1.2.tgz", - "integrity": "sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w==" - }, "errs": { "version": "0.3.2", "resolved": "https://registry.npmjs.org/errs/-/errs-0.3.2.tgz", @@ -1871,6 +1863,60 @@ "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", "integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=" }, + "escodegen": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.0.0.tgz", + "integrity": "sha512-mmHKys/C8BFUGI+MAWNcSYoORYLMdPzjrknd2Vc+bUsjN5bXcr8EhrNB+UTqfL1y3I9c4fw2ihgtMPQLBRiQxw==", + "requires": { + "esprima": "^4.0.1", + "estraverse": "^5.2.0", + "esutils": "^2.0.2", + "optionator": "^0.8.1", + "source-map": "~0.6.1" + }, + "dependencies": { + "estraverse": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz", + "integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==" + }, + "levn": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz", + "integrity": "sha1-OwmSTt+fCDwEkP3UwLxEIeBHZO4=", + "requires": { + "prelude-ls": "~1.1.2", + "type-check": "~0.3.2" + } + }, + "optionator": { + "version": "0.8.3", + "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.8.3.tgz", + "integrity": "sha512-+IW9pACdk3XWmmTXG8m3upGUJst5XRGzxMRjXzAuJ1XnIFNvfhjjIuYkDvysnPQ7qzqVzLt78BCruntqRhWQbA==", + "requires": { + "deep-is": "~0.1.3", + "fast-levenshtein": "~2.0.6", + "levn": "~0.3.0", + "prelude-ls": "~1.1.2", + "type-check": "~0.3.2", + "word-wrap": "~1.2.3" + } + }, + "prelude-ls": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.1.2.tgz", + "integrity": "sha1-IZMqVJ9eUv/ZqCf1cOBL5iqX2lQ=" + }, + "type-check": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.3.2.tgz", + "integrity": "sha1-WITKtRLPHTVeP7eE8wgEsrUg23I=", + "requires": { + "prelude-ls": "~1.1.2" + } + } + } + }, "eslint": { "version": "7.28.0", "resolved": "https://registry.npmjs.org/eslint/-/eslint-7.28.0.tgz", @@ -2226,8 +2272,7 @@ "esprima": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", - "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", - "dev": true + "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==" }, "esquery": { "version": "1.4.0", @@ -2272,8 +2317,7 @@ "esutils": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", - "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", - "dev": true + "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==" }, "etag": { "version": "1.8.1", @@ -2418,8 +2462,7 @@ "fast-levenshtein": { "version": "2.0.6", "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", - "integrity": "sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=", - "dev": true + "integrity": "sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=" }, "file-entry-cache": { "version": "6.0.1", @@ -2826,24 +2869,19 @@ "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", "dev": true }, + "html-encoding-sniffer": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-2.0.1.tgz", + "integrity": "sha512-D5JbOMBIR/TVZkubHT+OyT2705QvogUW4IBn6nHd756OwieSF9aDYFj4dv6HHEVGYbHaLETa3WggZYWWMyy3ZQ==", + "requires": { + "whatwg-encoding": "^1.0.5" + } + }, "html-void-elements": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/html-void-elements/-/html-void-elements-1.0.5.tgz", "integrity": "sha512-uE/TxKuyNIcx44cIWnjr/rfIATDH7ZaOMmstu0CwhFG1Dunhlp4OC6/NMbhiwoq5BpW0ubi303qnEk/PZj614w==" }, - "htmlparser2": { - "version": "3.10.1", - "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.10.1.tgz", - "integrity": "sha512-IgieNijUMbkDovyoKObU1DUhm1iwNYE/fuifEoEHfd1oZKZDaONBSkal7Y01shxsM49R4XaMdGez3WnF9UfiCQ==", - "requires": { - "domelementtype": "^1.3.1", - "domhandler": "^2.3.0", - "domutils": "^1.5.1", - "entities": "^1.1.1", - "inherits": "^2.0.1", - "readable-stream": "^3.1.1" - } - }, "http-errors": { "version": "1.8.0", "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.8.0.tgz", @@ -3144,6 +3182,11 @@ "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-2.1.0.tgz", "integrity": "sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA==" }, + "is-potential-custom-element-name": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz", + "integrity": "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==" + }, "is-promise": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/is-promise/-/is-promise-1.0.1.tgz", @@ -3249,6 +3292,72 @@ "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", "integrity": "sha1-peZUwuWi3rXyAdls77yoDA7y9RM=" }, + "jsdom": { + "version": "16.7.0", + "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-16.7.0.tgz", + "integrity": "sha512-u9Smc2G1USStM+s/x1ru5Sxrl6mPYCbByG1U/hUmqaVsm4tbNyS7CicOSRyuGQYZhTu0h84qkZZQ/I+dzizSVw==", + "requires": { + "abab": "^2.0.5", + "acorn": "^8.2.4", + "acorn-globals": "^6.0.0", + "cssom": "^0.4.4", + "cssstyle": "^2.3.0", + "data-urls": "^2.0.0", + "decimal.js": "^10.2.1", + "domexception": "^2.0.1", + "escodegen": "^2.0.0", + "form-data": "^3.0.0", + "html-encoding-sniffer": "^2.0.1", + "http-proxy-agent": "^4.0.1", + "https-proxy-agent": "^5.0.0", + "is-potential-custom-element-name": "^1.0.1", + "nwsapi": "^2.2.0", + "parse5": "6.0.1", + "saxes": "^5.0.1", + "symbol-tree": "^3.2.4", + "tough-cookie": "^4.0.0", + "w3c-hr-time": "^1.0.2", + "w3c-xmlserializer": "^2.0.0", + "webidl-conversions": "^6.1.0", + "whatwg-encoding": "^1.0.5", + "whatwg-mimetype": "^2.3.0", + "whatwg-url": "^8.5.0", + "ws": "^7.4.6", + "xml-name-validator": "^3.0.0" + }, + "dependencies": { + "acorn": { + "version": "8.4.1", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.4.1.tgz", + "integrity": "sha512-asabaBSkEKosYKMITunzX177CXxQ4Q8BSSzMTKD+FefUhipQC70gfW5SiUDhYQ3vk8G+81HqQk7Fv9OXwwn9KA==" + }, + "form-data": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-3.0.1.tgz", + "integrity": "sha512-RHkBKtLWUVwd7SqRIvCZMEvAMoGUp0XU+seQiZejj0COz3RI3hWP4sCv3gZWWLjJTd7rGwcsF5eKZGii0r/hbg==", + "requires": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + } + }, + "parse5": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz", + "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==" + }, + "tough-cookie": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-4.0.0.tgz", + "integrity": "sha512-tHdtEpQCMrc1YLrMaqXXcj6AxhYi/xgit6mZu1+EDWUn+qhUf8wMQoFIy9NXuq23zAwtcB0t/MjACGR18pcRbg==", + "requires": { + "psl": "^1.1.33", + "punycode": "^2.1.1", + "universalify": "^0.1.2" + } + } + } + }, "json-schema": { "version": "0.2.3", "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.2.3.tgz", @@ -3473,41 +3582,11 @@ "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" }, - "lodash.assignin": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/lodash.assignin/-/lodash.assignin-4.2.0.tgz", - "integrity": "sha1-uo31+4QesKPoBEIysOJjqNxqKKI=" - }, - "lodash.bind": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/lodash.bind/-/lodash.bind-4.2.1.tgz", - "integrity": "sha1-euMBfpOWIqwxt9fX3LGzTbFpDTU=" - }, "lodash.clonedeep": { "version": "4.5.0", "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz", "integrity": "sha1-4j8/nE+Pvd6HJSnBBxhXoIblzO8=" }, - "lodash.defaults": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz", - "integrity": "sha1-0JF4cW/+pN3p5ft7N/bwgCJ0WAw=" - }, - "lodash.filter": { - "version": "4.6.0", - "resolved": "https://registry.npmjs.org/lodash.filter/-/lodash.filter-4.6.0.tgz", - "integrity": "sha1-ZosdSYFgOuHMWm+nYBQ+SAtMSs4=" - }, - "lodash.flatten": { - "version": "4.4.0", - "resolved": "https://registry.npmjs.org/lodash.flatten/-/lodash.flatten-4.4.0.tgz", - "integrity": "sha1-8xwiIlqWMtK7+OSt2+8kCqdlph8=" - }, - "lodash.foreach": { - "version": "4.5.0", - "resolved": "https://registry.npmjs.org/lodash.foreach/-/lodash.foreach-4.5.0.tgz", - "integrity": "sha1-Gmo16s5AEoDH8G3d7DUWWrJ+PlM=" - }, "lodash.get": { "version": "4.4.2", "resolved": "https://registry.npmjs.org/lodash.get/-/lodash.get-4.4.2.tgz", @@ -3544,11 +3623,6 @@ "resolved": "https://registry.npmjs.org/lodash.isstring/-/lodash.isstring-4.0.1.tgz", "integrity": "sha1-1SfftUVuynzJu5XV2ur4i6VKVFE=" }, - "lodash.map": { - "version": "4.6.0", - "resolved": "https://registry.npmjs.org/lodash.map/-/lodash.map-4.6.0.tgz", - "integrity": "sha1-dx7Hg540c9nEzeKLGTlMNWL09tM=" - }, "lodash.merge": { "version": "4.6.2", "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", @@ -3559,26 +3633,6 @@ "resolved": "https://registry.npmjs.org/lodash.once/-/lodash.once-4.1.1.tgz", "integrity": "sha1-DdOXEhPHxW34gJd9UEyI+0cal6w=" }, - "lodash.pick": { - "version": "4.4.0", - "resolved": "https://registry.npmjs.org/lodash.pick/-/lodash.pick-4.4.0.tgz", - "integrity": "sha1-UvBWEP/53tQiYRRB7R/BI6AwAbM=" - }, - "lodash.reduce": { - "version": "4.6.0", - "resolved": "https://registry.npmjs.org/lodash.reduce/-/lodash.reduce-4.6.0.tgz", - "integrity": "sha1-8atrg5KZrUj3hKu/R2WW8DuRTTs=" - }, - "lodash.reject": { - "version": "4.6.0", - "resolved": "https://registry.npmjs.org/lodash.reject/-/lodash.reject-4.6.0.tgz", - "integrity": "sha1-gNZJLcFHCGS79YNTO2UfQqn1JBU=" - }, - "lodash.some": { - "version": "4.6.0", - "resolved": "https://registry.npmjs.org/lodash.some/-/lodash.some-4.6.0.tgz", - "integrity": "sha1-G7nzFO9ri63tE7VJFpsqlF62jk0=" - }, "lodash.truncate": { "version": "4.4.2", "resolved": "https://registry.npmjs.org/lodash.truncate/-/lodash.truncate-4.4.2.tgz", @@ -7353,19 +7407,16 @@ "set-blocking": "~2.0.0" } }, - "nth-check": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-1.0.2.tgz", - "integrity": "sha512-WeBOdju8SnzPN5vTUJYxYUxLeXpCaVP5i5e0LF8fg7WORF2Wd7wFX/pk0tYZk7s8T+J7VLy0Da6J1+wCT0AtHg==", - "requires": { - "boolbase": "~1.0.0" - } - }, "number-is-nan": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/number-is-nan/-/number-is-nan-1.0.1.tgz", "integrity": "sha1-CXtgK1NCKlIsGvuHkDGDNpQaAR0=" }, + "nwsapi": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.0.tgz", + "integrity": "sha512-h2AatdwYH+JHiZpv7pt/gSX1XoRGb7L/qSIeuqA6GwYoF9w1vP1cw42TO0aI2pNyshRK5893hNSl+1//vHK7hQ==" + }, "oauth-sign": { "version": "0.9.0", "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz", @@ -8101,6 +8152,14 @@ "resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz", "integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==" }, + "saxes": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/saxes/-/saxes-5.0.1.tgz", + "integrity": "sha512-5LBh1Tls8c9xgGjw3QrMwETmTMVk0oFgvrFSvWx62llR2hcEInrKNZ2GZCCuuy2lvWrdl5jhbpeqc5hRYKFOcw==", + "requires": { + "xmlchars": "^2.2.0" + } + }, "security": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/security/-/security-1.0.0.tgz", @@ -8714,6 +8773,11 @@ "resolved": "https://registry.npmjs.org/swagger-schema-official/-/swagger-schema-official-2.0.0-bab6bed.tgz", "integrity": "sha1-cAcEaNbSl3ylI3suUZyn0Gouo/0=" }, + "symbol-tree": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz", + "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==" + }, "table": { "version": "6.7.1", "resolved": "https://registry.npmjs.org/table/-/table-6.7.1.tgz", @@ -8993,6 +9057,14 @@ "punycode": "^2.1.1" } }, + "tr46": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-2.1.0.tgz", + "integrity": "sha512-15Ih7phfcdP5YxqiB+iDtLoaTz4Nd35+IiAv0kQ5FNKHzXgdWqPoTIqEDDJmXceQt4JZk6lVPT8lnDlPpGDppw==", + "requires": { + "punycode": "^2.1.1" + } + }, "trough": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/trough/-/trough-1.0.5.tgz", @@ -9201,11 +9273,55 @@ "unist-util-stringify-position": "^2.0.0" } }, + "w3c-hr-time": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/w3c-hr-time/-/w3c-hr-time-1.0.2.tgz", + "integrity": "sha512-z8P5DvDNjKDoFIHK7q8r8lackT6l+jo/Ye3HOle7l9nICP9lf1Ci25fy9vHd0JOWewkIFzXIEig3TdKT7JQ5fQ==", + "requires": { + "browser-process-hrtime": "^1.0.0" + } + }, + "w3c-xmlserializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-2.0.0.tgz", + "integrity": "sha512-4tzD0mF8iSiMiNs30BiLO3EpfGLZUT2MSX/G+o7ZywDzliWQ3OPtTZ0PTC3B3ca1UAf4cJMHB+2Bf56EriJuRA==", + "requires": { + "xml-name-validator": "^3.0.0" + } + }, "web-namespaces": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/web-namespaces/-/web-namespaces-1.1.4.tgz", "integrity": "sha512-wYxSGajtmoP4WxfejAPIr4l0fVh+jeMXZb08wNc0tMg6xsfZXj3cECqIK0G7ZAqUq0PP8WlMDtaOGVBTAWztNw==" }, + "webidl-conversions": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-6.1.0.tgz", + "integrity": "sha512-qBIvFLGiBpLjfwmYAaHPXsn+ho5xZnGvyGvsarywGNc8VyQJUMHJ8OBKGGrPER0okBeMDaan4mNBlgBROxuI8w==" + }, + "whatwg-encoding": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-1.0.5.tgz", + "integrity": "sha512-b5lim54JOPN9HtzvK9HFXvBma/rnfFeqsic0hSpjtDbVxR3dJKLc+KB4V6GgiGOvl7CY/KNh8rxSo9DKQrnUEw==", + "requires": { + "iconv-lite": "0.4.24" + } + }, + "whatwg-mimetype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-2.3.0.tgz", + "integrity": "sha512-M4yMwr6mAnQz76TbJm914+gPpB/nCwvZbJU28cUD6dR004SAxDLOOSUaB1JDRqLtaOV/vi0IC5lEAGFgrjGv/g==" + }, + "whatwg-url": { + "version": "8.7.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-8.7.0.tgz", + "integrity": "sha512-gAojqb/m9Q8a5IV96E3fHJM70AzCkgt4uXYX2O7EmuyOnLrViCQlsEBmF9UQIu3/aeAIp2U17rtbpZWNntQqdg==", + "requires": { + "lodash": "^4.7.0", + "tr46": "^2.1.0", + "webidl-conversions": "^6.1.0" + } + }, "which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", @@ -9244,8 +9360,7 @@ "word-wrap": { "version": "1.2.3", "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz", - "integrity": "sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==", - "dev": true + "integrity": "sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==" }, "wrap-ansi": { "version": "5.1.0", @@ -9317,6 +9432,11 @@ "source-map-support": "^0.5.19" } }, + "xml-name-validator": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-3.0.0.tgz", + "integrity": "sha512-A5CUptxDsvxKJEU3yO6DuWBSJz/qizqzJKOMIfUJHETbBw/sFaDxgd6fxm1ewUaM0jZ444Fc5vC5ROYurg/4Pw==" + }, "xml2js": { "version": "0.4.23", "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.4.23.tgz", @@ -9331,6 +9451,11 @@ "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-11.0.1.tgz", "integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==" }, + "xmlchars": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz", + "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==" + }, "xmldom": { "version": "0.6.0", "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.6.0.tgz", diff --git a/src/package.json b/src/package.json index c4092869d..8050b167e 100644 --- a/src/package.json +++ b/src/package.json @@ -33,7 +33,6 @@ "async": "^3.2.0", "async-stacktrace": "0.0.2", "channels": "0.0.4", - "cheerio": "0.22.0", "clean-css": "4.2.3", "cookie-parser": "1.4.5", "cross-spawn": "^7.0.3", @@ -47,6 +46,7 @@ "formidable": "1.2.2", "http-errors": "1.8.0", "js-cookie": "^2.2.1", + "jsdom": "^16.7.0", "jsonminify": "0.4.1", "languages4translatewiki": "0.1.3", "lodash.clonedeep": "4.5.0", diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index 18643a944..3d2bd9aa8 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -31,30 +31,7 @@ const Changeset = require('./Changeset'); const hooks = require('./pluginfw/hooks'); const sanitizeUnicode = (s) => UNorm.nfc(s); - -// This file is used both in browsers and with cheerio in Node.js (for importing HTML). Cheerio's -// Node-like objects are not 100% API compatible with the DOM specification; the following functions -// abstract away the differences. - -// .nodeType works with DOM and cheerio 0.22.0, but cheerio 0.22.0 does not provide the Node.*_NODE -// constants so they cannot be used here. -const isElementNode = (n) => n.nodeType === 1; // Node.ELEMENT_NODE -const isTextNode = (n) => n.nodeType === 3; // Node.TEXT_NODE -// .tagName works with DOM and cheerio 0.22.0, but: -// * With DOM, .tagName is an uppercase string. -// * With cheerio 0.22.0, .tagName is a lowercase string. -// For consistency, this function always returns a lowercase string. const tagName = (n) => n.tagName && n.tagName.toLowerCase(); -// .childNodes works with DOM and cheerio 0.22.0, except in cheerio the .childNodes property does -// not exist on text nodes (and maybe other non-element nodes). -const childNodes = (n) => n.childNodes || []; -const getAttribute = (n, a) => { - // .getAttribute() works with DOM but not with cheerio 0.22.0. - if (n.getAttribute != null) return n.getAttribute(a); - // .attribs[] works with cheerio 0.22.0 but not with DOM. - if (n.attribs != null) return n.attribs[a]; - return null; -}; // supportedElems are Supported natively within Etherpad and don't require a plugin const supportedElems = new Set([ 'author', @@ -146,17 +123,13 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) let selEnd = [-1, -1]; const _isEmpty = (node, state) => { // consider clean blank lines pasted in IE to be empty - if (childNodes(node).length === 0) return true; - if (childNodes(node).length === 1 && + if (node.childNodes.length === 0) return true; + if (node.childNodes.length === 1 && getAssoc(node, 'shouldBeEmpty') && - // Note: The .innerHTML property exists on DOM Element objects but not on cheerio's - // Element-like objects (cheerio v0.22.0) so this equality check will always be false. - // Cheerio's Element-like objects have no equivalent to .innerHTML. (Cheerio objects have an - // .html() method, but that isn't accessible here.) node.innerHTML === ' ' && !getAssoc(node, 'unpasted')) { if (state) { - const child = childNodes(node)[0]; + const child = node.childNodes[0]; _reachPoint(child, 0, state); _reachPoint(child, 1, state); } @@ -176,7 +149,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) }; const _reachBlockPoint = (nd, idx, state) => { - if (!isTextNode(nd)) _reachPoint(nd, idx, state); + if (nd.nodeType !== nd.TEXT_NODE) _reachPoint(nd, idx, state); }; const _reachPoint = (nd, idx, state) => { @@ -348,8 +321,8 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) const startLine = lines.length() - 1; _reachBlockPoint(node, 0, state); - if (isTextNode(node)) { - const tname = getAttribute(node.parentNode, 'name'); + if (node.nodeType === node.TEXT_NODE) { + const tname = node.parentNode.getAttribute('name'); const context = {cc: this, state, tname, node, text: node.nodeValue}; // Hook functions may either return a string (deprecated) or modify context.text. If any hook // function modifies context.text then all returned strings are ignored. If no hook functions @@ -406,7 +379,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) cc.startNewLine(state); } } - } else if (isElementNode(node)) { + } else if (node.nodeType === node.ELEMENT_NODE) { const tname = tagName(node) || ''; if (tname === 'img') { @@ -425,7 +398,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) if (tname === 'br') { this.breakLine = true; - const tvalue = getAttribute(node, 'value'); + const tvalue = node.getAttribute('value'); const [startNewLine = true] = hooks.callAll('collectContentLineBreak', { cc: this, state, @@ -440,8 +413,8 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) } else if (tname === 'script' || tname === 'style') { // ignore } else if (!isEmpty) { - let styl = getAttribute(node, 'style'); - let cls = getAttribute(node, 'class'); + let styl = node.getAttribute('style'); + let cls = node.getAttribute('class'); let isPre = (tname === 'pre'); if ((!isPre) && abrowser && abrowser.safari) { isPre = (styl && /\bwhite-space:\s*pre\b/i.exec(styl)); @@ -488,14 +461,14 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) cc.doAttrib(state, 'strikethrough'); } if (tname === 'ul' || tname === 'ol') { - let type = getAttribute(node, 'class'); + let type = node.getAttribute('class'); const rr = cls && /(?:^| )list-([a-z]+[0-9]+)\b/.exec(cls); // lists do not need to have a type, so before we make a wrong guess // check if we find a better hint within the node's children if (!rr && !type) { - for (const child of childNodes(node)) { + for (const child of node.childNodes) { if (tagName(child) !== 'ul') continue; - type = getAttribute(child, 'class'); + type = child.getAttribute('class'); if (type) break; } } @@ -503,7 +476,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) type = rr[1]; } else { if (tname === 'ul') { - const cls = getAttribute(node, 'class'); + const cls = node.getAttribute('class'); if ((type && type.match('indent')) || (cls && cls.match('indent'))) { type = 'indent'; } else { @@ -575,7 +548,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) } } - for (const c of childNodes(node)) { + for (const c of node.childNodes) { cc.collectContent(c, state); } diff --git a/src/tests/backend/specs/contentcollector.js b/src/tests/backend/specs/contentcollector.js index 1739bd7a0..de3dae39c 100644 --- a/src/tests/backend/specs/contentcollector.js +++ b/src/tests/backend/specs/contentcollector.js @@ -11,8 +11,8 @@ const AttributePool = require('../../../static/js/AttributePool'); const assert = require('assert').strict; -const cheerio = require('cheerio'); const contentcollector = require('../../../static/js/contentcollector'); +const jsdom = require('jsdom'); const tests = { nestedLi: { @@ -286,14 +286,13 @@ describe(__filename, function () { it(testObj.description, async function () { this.timeout(250); - const $ = cheerio.load(testObj.html); // Load HTML into Cheerio - const doc = $('body')[0]; // Creates a dom-like representation of HTML + const {window: {document}} = new jsdom.JSDOM(testObj.html); // Create an empty attribute pool const apool = new AttributePool(); // Convert a dom tree into a list of lines and attribute liens // using the content collector object const cc = contentcollector.makeContentCollector(true, null, apool); - cc.collectContent(doc); + cc.collectContent(document.body); const result = cc.finish(); const gotAttributes = result.lineAttribs; const wantAttributes = testObj.wantLineAttribs;