diff --git a/src/node/utils/ImportHtml.js b/src/node/utils/ImportHtml.js index 48188dfd0..79217cb65 100644 --- a/src/node/utils/ImportHtml.js +++ b/src/node/utils/ImportHtml.js @@ -14,22 +14,18 @@ * limitations under the License. */ -var jsdom = require('jsdom-nocontextifiy').jsdom; +var jsdom = require('jsdom').jsdom; var log4js = require('log4js'); var Changeset = require("ep_etherpad-lite/static/js/Changeset"); var contentcollector = require("ep_etherpad-lite/static/js/contentcollector"); +var cheerio = require("cheerio"); function setPadHTML(pad, html, callback) { var apiLogger = log4js.getLogger("ImportHtml"); - // Parse the incoming HTML with jsdom - try{ - var doc = jsdom(html.replace(/>\n+<')); - }catch(e){ - apiLogger.warn("Error importing, possibly caused by malformed HTML"); - var doc = jsdom("
Error during import, possibly malformed HTML
"); - } + var $ = cheerio.load(html); + var doc = $('html')[0]; apiLogger.debug('html:'); apiLogger.debug(html); @@ -38,7 +34,7 @@ function setPadHTML(pad, html, callback) // using the content collector object var cc = contentcollector.makeContentCollector(true, null, pad.pool); try{ // we use a try here because if the HTML is bad it will blow up - cc.collectContent(doc.childNodes[0]); + cc.collectContent(doc); }catch(e){ apiLogger.warn("HTML was not properly formed", e); return; // We don't process the HTML because it was bad.. diff --git a/src/package.json b/src/package.json index 4b0ae82b4..e69d97305 100644 --- a/src/package.json +++ b/src/package.json @@ -25,7 +25,7 @@ "formidable" : "1.0.9", "log4js" : "0.6.6", "nodemailer" : "0.3.x", - "jsdom-nocontextifiy" : "0.2.10", + "cheerio" : "0.18.0", "async-stacktrace" : "0.0.2", "npm" : "1.4.x", "ejs" : "0.6.1", diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index 5e393670f..048ed4915 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -54,10 +54,14 @@ function makeContentCollector(collectStyles, browser, apool, domInterface, class }, nodeNumChildren: function(n) { + if(n.childNodes == null) return 0; return n.childNodes.length; }, nodeChild: function(n, i) { + if(n.childNodes.item == null){ + return n.childNodes[i]; + } return n.childNodes.item(i); }, nodeProp: function(n, p) @@ -66,6 +70,7 @@ function makeContentCollector(collectStyles, browser, apool, domInterface, class }, nodeAttr: function(n, a) { + if(n.getAttribute == null) return null; return n.getAttribute(a); }, optNodeInnerHTML: function(n)