contentcollector: Document the `dom` object

pull/4685/head
Richard Hansen 2021-01-24 18:03:13 -05:00 committed by John McLear
parent 74bb2f76cc
commit dd7fb1babe
1 changed files with 22 additions and 1 deletions

View File

@ -33,26 +33,47 @@ const hooks = require('./pluginfw/hooks');
const sanitizeUnicode = (s) => UNorm.nfc(s);
const makeContentCollector = (collectStyles, abrowser, apool, className2Author) => {
// This file is used both in browsers and with cheerio in Node.js (for importing HTML). Cheerio's
// Node-like objects are not 100% API compatible with the DOM Node specification; this `dom`
// object abstracts away the differences.
const dom = {
isNodeText: (n) => n.nodeType === 3,
// .nodeType works with DOM and cheerio 0.22.0. Note: Cheerio 0.22.0 does not provide the
// Node.*_NODE constants, so they cannot be used here.
isNodeText: (n) => n.nodeType === 3, // Node.TEXT_NODE
// .tagName works with DOM and cheerio 0.22.0, but:
// * With DOM, .tagName is an uppercase string.
// * With cheerio 0.22.0, .tagName is a lowercase string.
nodeTagName: (n) => n.tagName,
// .nodeValue works with DOM and cheerio 0.22.0.
nodeValue: (n) => n.nodeValue,
// Returns the number of Node children (n.childNodes.length), not the number of Element children
// (n.children.length in DOM).
nodeNumChildren: (n) => {
// .childNodes.length works with DOM and cheerio 0.22.0, except in cheerio the .childNodes
// property does not exist on text nodes (and maybe other non-element nodes).
if (n.childNodes == null) return 0;
return n.childNodes.length;
},
// Returns the i'th Node child (n.childNodes[i]), not the i'th Element child (n.children[i] in
// DOM).
nodeChild: (n, i) => {
if (n.childNodes.item == null) {
// .childNodes[] works with DOM and cheerio 0.22.0.
return n.childNodes[i];
}
// .childNodes.item() works with DOM but not with cheerio 0.22.0.
return n.childNodes.item(i);
},
nodeProp: (n, p) => n[p],
nodeAttr: (n, a) => {
// .getAttribute() works with DOM but not with cheerio 0.22.0.
if (n.getAttribute != null) return n.getAttribute(a);
// .attribs[] works with cheerio 0.22.0 but not with DOM.
if (n.attribs != null) return n.attribs[a];
return null;
},
// .innerHTML works with DOM but not with cheerio 0.22.0. Cheerio's Element-like objects have no
// equivalent. (Cheerio objects have an .html() method, but that isn't accessible here.)
optNodeInnerHTML: (n) => n.innerHTML,
};