tests for spaces (#4594)
parent
a637920e55
commit
040057239e
|
@ -67,7 +67,163 @@ const testImports = {
|
||||||
input: '<html><body><ul class="indent"><li>indent</li><li>indent</ul></body></html>',
|
input: '<html><body><ul class="indent"><li>indent</li><li>indent</ul></body></html>',
|
||||||
expectedHTML: '<!DOCTYPE HTML><html><body><ul class="indent"><li>indent</li><li>indent</ul><br></body></html>',
|
expectedHTML: '<!DOCTYPE HTML><html><body><ul class="indent"><li>indent</li><li>indent</ul><br></body></html>',
|
||||||
expectedText: '\tindent\n\tindent\n\n'
|
expectedText: '\tindent\n\tindent\n\n'
|
||||||
}
|
},
|
||||||
|
lineWithMultipleSpaces: {
|
||||||
|
description: 'Multiple spaces should be collapsed',
|
||||||
|
input: '<html><body>Text with more than one space.<br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body>Text with more than one space.<br><br></body></html>',
|
||||||
|
expectedText: 'Text with more than one space.\n\n'
|
||||||
|
},
|
||||||
|
lineWithMultipleNonBreakingAndNormalSpaces: {
|
||||||
|
// XXX the HTML between "than" and "one" looks strange
|
||||||
|
description: 'non-breaking space should be preserved, but can be replaced when it',
|
||||||
|
input: '<html><body>Text with more than one space.<br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body>Text with more than one space.<br><br></body></html>',
|
||||||
|
expectedText: 'Text with more than one space.\n\n'
|
||||||
|
},
|
||||||
|
multiplenbsp: {
|
||||||
|
description: 'Multiple non-breaking space should be preserved',
|
||||||
|
input: '<html><body> <br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body> <br><br></body></html>',
|
||||||
|
expectedText: ' \n\n'
|
||||||
|
},
|
||||||
|
multipleNonBreakingSpaceBetweenWords: {
|
||||||
|
description: 'A normal space is always inserted before a word',
|
||||||
|
input: '<html><body> word1 word2 word3<br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body> word1 word2 word3<br><br></body></html>',
|
||||||
|
expectedText: ' word1 word2 word3\n\n'
|
||||||
|
},
|
||||||
|
nonBreakingSpacePreceededBySpaceBetweenWords: {
|
||||||
|
description: 'A non-breaking space preceeded by a normal space',
|
||||||
|
input: '<html><body> word1 word2 word3<br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body> word1 word2 word3<br><br></body></html>',
|
||||||
|
expectedText: ' word1 word2 word3\n\n'
|
||||||
|
},
|
||||||
|
nonBreakingSpaceFollowededBySpaceBetweenWords: {
|
||||||
|
description: 'A non-breaking space followed by a normal space',
|
||||||
|
input: '<html><body> word1 word2 word3<br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body> word1 word2 word3<br><br></body></html>',
|
||||||
|
expectedText: ' word1 word2 word3\n\n'
|
||||||
|
},
|
||||||
|
spacesAfterNewline: {
|
||||||
|
description: 'Collapse spaces that follow a newline',
|
||||||
|
input:'<!doctype html><html><body>something<br> something<br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body>something<br>something<br><br></body></html>',
|
||||||
|
expectedText: 'something\nsomething\n\n'
|
||||||
|
},
|
||||||
|
spacesAfterNewlineP: {
|
||||||
|
description: 'Collapse spaces that follow a paragraph',
|
||||||
|
input:'<!doctype html><html><body>something<p></p> something<br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body>something<br><br>something<br><br></body></html>',
|
||||||
|
expectedText: 'something\n\nsomething\n\n'
|
||||||
|
},
|
||||||
|
spacesAtEndOfLine: {
|
||||||
|
description: 'Collapse spaces that preceed/follow a newline',
|
||||||
|
input:'<html><body>something <br> something<br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body>something<br>something<br><br></body></html>',
|
||||||
|
expectedText: 'something\nsomething\n\n'
|
||||||
|
},
|
||||||
|
spacesAtEndOfLineP: {
|
||||||
|
description: 'Collapse spaces that preceed/follow a paragraph',
|
||||||
|
input:'<html><body>something <p></p> something<br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body>something<br><br>something<br><br></body></html>',
|
||||||
|
expectedText: 'something\n\nsomething\n\n'
|
||||||
|
},
|
||||||
|
nonBreakingSpacesAfterNewlines: {
|
||||||
|
description: 'Don\'t collapse non-breaking spaces that follow a newline',
|
||||||
|
input:'<html><body>something<br> something<br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body>something<br> something<br><br></body></html>',
|
||||||
|
expectedText: 'something\n something\n\n'
|
||||||
|
},
|
||||||
|
nonBreakingSpacesAfterNewlinesP: {
|
||||||
|
description: 'Don\'t collapse non-breaking spaces that follow a paragraph',
|
||||||
|
input:'<html><body>something<p></p> something<br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body>something<br><br> something<br><br></body></html>',
|
||||||
|
expectedText: 'something\n\n something\n\n'
|
||||||
|
},
|
||||||
|
collapseSpacesInsideElements: {
|
||||||
|
description: 'Preserve only one space when multiple are present',
|
||||||
|
input: '<html><body>Need <span> more </span> space<i> s </i> !<br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space<em> s </em>!<br><br></body></html>',
|
||||||
|
expectedText: 'Need more space s !\n\n'
|
||||||
|
},
|
||||||
|
collapseSpacesAcrossNewlines: {
|
||||||
|
description: 'Newlines and multiple spaces across newlines should be collapsed',
|
||||||
|
input: `
|
||||||
|
<html><body>Need
|
||||||
|
<span> more </span>
|
||||||
|
space
|
||||||
|
<i> s </i>
|
||||||
|
!<br></body></html>`,
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space <em>s </em>!<br><br></body></html>',
|
||||||
|
expectedText: 'Need more space s !\n\n'
|
||||||
|
},
|
||||||
|
multipleNewLinesAtBeginning: {
|
||||||
|
description: 'Multiple new lines and paragraphs at the beginning should be preserved',
|
||||||
|
input: '<html><body><br><br><p></p><p></p>first line<br><br>second line<br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body><br><br><br><br>first line<br><br>second line<br><br></body></html>',
|
||||||
|
expectedText: '\n\n\n\nfirst line\n\nsecond line\n\n'
|
||||||
|
},
|
||||||
|
multiLineParagraph:{
|
||||||
|
description: "A paragraph with multiple lines should not loose spaces when lines are combined",
|
||||||
|
input:`<html><body>
|
||||||
|
<p>
|
||||||
|
а б в г ґ д е є ж з и і ї й к л м н о
|
||||||
|
п р с т у ф х ц ч ш щ ю я ь
|
||||||
|
</p>
|
||||||
|
</body></html>`,
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body>а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь<br><br></body></html>',
|
||||||
|
expectedText: 'а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь\n\n'
|
||||||
|
},
|
||||||
|
multiLineParagraphWithPre:{
|
||||||
|
//XXX why is there before "in"?
|
||||||
|
description: "lines in preformatted text should be kept intact",
|
||||||
|
input:`<html><body>
|
||||||
|
<p>
|
||||||
|
а б в г ґ д е є ж з и і ї й к л м н о<pre>multiple
|
||||||
|
lines
|
||||||
|
in
|
||||||
|
pre
|
||||||
|
</pre></p><p>п р с т у ф х ц ч ш щ ю я
|
||||||
|
ь</p>
|
||||||
|
</body></html>`,
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body>а б в г ґ д е є ж з и і ї й к л м н о<br>multiple<br> lines<br> in<br> pre<br><br>п р с т у ф х ц ч ш щ ю я ь<br><br></body></html>',
|
||||||
|
expectedText: 'а б в г ґ д е є ж з и і ї й к л м н о\nmultiple\n lines\n in\n pre\n\nп р с т у ф х ц ч ш щ ю я ь\n\n'
|
||||||
|
},
|
||||||
|
preIntroducesASpace: {
|
||||||
|
description: "pre should be on a new line not preceeded by a space",
|
||||||
|
input:`<html><body><p>
|
||||||
|
1
|
||||||
|
<pre>preline
|
||||||
|
</pre></p></body></html>`,
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body>1<br>preline<br><br><br></body></html>',
|
||||||
|
expectedText: '1\npreline\n\n\n'
|
||||||
|
},
|
||||||
|
dontDeleteSpaceInsideElements: {
|
||||||
|
description: 'Preserve spaces inside elements',
|
||||||
|
input: '<html><body>Need<span> more </span>space<i> s </i>!<br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space<em> s </em>!<br><br></body></html>',
|
||||||
|
expectedText: 'Need more space s !\n\n'
|
||||||
|
},
|
||||||
|
dontDeleteSpaceOutsideElements: {
|
||||||
|
description: 'Preserve spaces outside elements',
|
||||||
|
input: '<html><body>Need <span>more</span> space <i>s</i> !<br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space <em>s</em> !<br><br></body></html>',
|
||||||
|
expectedText: 'Need more space s !\n\n'
|
||||||
|
},
|
||||||
|
dontDeleteSpaceAtEndOfElement: {
|
||||||
|
description: 'Preserve spaces at the end of an element',
|
||||||
|
input: '<html><body>Need <span>more </span>space <i>s </i>!<br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space <em>s </em>!<br><br></body></html>',
|
||||||
|
expectedText: 'Need more space s !\n\n'
|
||||||
|
},
|
||||||
|
dontDeleteSpaceAtBeginOfElements: {
|
||||||
|
description: 'Preserve spaces at the start of an element',
|
||||||
|
input: '<html><body>Need<span> more</span> space<i> s</i> !<br></body></html>',
|
||||||
|
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space<em> s</em> !<br><br></body></html>',
|
||||||
|
expectedText: 'Need more space s !\n\n'
|
||||||
|
},
|
||||||
|
>>>>>>> 5a47aff2... tests for spaces
|
||||||
};
|
};
|
||||||
|
|
||||||
describe(__filename, function () {
|
describe(__filename, function () {
|
||||||
|
|
|
@ -1,6 +1,14 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
/* eslint-disable max-len */
|
/* eslint-disable max-len */
|
||||||
|
/*
|
||||||
|
* While importexport tests target the `setHTML` API endpoint, which is nearly identical to what happens
|
||||||
|
* when a user manually imports a document via the UI, the contentcollector tests here don't use rehype to process
|
||||||
|
* the document. Rehype removes spaces and newĺines were applicable, so the expected results here can
|
||||||
|
* differ from importexport.js.
|
||||||
|
*
|
||||||
|
* If you add tests here, please also add them to importexport.js
|
||||||
|
*/
|
||||||
|
|
||||||
const contentcollector = require('../../../src/static/js/contentcollector');
|
const contentcollector = require('../../../src/static/js/contentcollector');
|
||||||
const AttributePool = require('../../../src/static/js/AttributePool');
|
const AttributePool = require('../../../src/static/js/AttributePool');
|
||||||
|
@ -113,6 +121,156 @@ const tests = {
|
||||||
expectedLineAttribs: ['+5'],
|
expectedLineAttribs: ['+5'],
|
||||||
expectedText: ['empty'],
|
expectedText: ['empty'],
|
||||||
},
|
},
|
||||||
|
lineWithMultipleSpaces: {
|
||||||
|
description: 'Multiple spaces should be preserved',
|
||||||
|
html: '<html><body>Text with more than one space.<br></body></html>',
|
||||||
|
expectedLineAttribs: [ '+10' ],
|
||||||
|
expectedText: ['Text with more than one space.']
|
||||||
|
},
|
||||||
|
lineWithMultipleNonBreakingAndNormalSpaces: {
|
||||||
|
description: 'non-breaking and normal space should be preserved',
|
||||||
|
html: '<html><body>Text with more than one space.<br></body></html>',
|
||||||
|
expectedLineAttribs: [ '+10' ],
|
||||||
|
expectedText: ['Text with more than one space.']
|
||||||
|
},
|
||||||
|
multiplenbsp: {
|
||||||
|
description: 'Multiple nbsp should be preserved',
|
||||||
|
html: '<html><body> <br></body></html>',
|
||||||
|
expectedLineAttribs: [ '+2' ],
|
||||||
|
expectedText: [' ']
|
||||||
|
},
|
||||||
|
multipleNonBreakingSpaceBetweenWords: {
|
||||||
|
description: 'Multiple nbsp between words ',
|
||||||
|
html: '<html><body> word1 word2 word3<br></body></html>',
|
||||||
|
expectedLineAttribs: [ '+m' ],
|
||||||
|
expectedText: [' word1 word2 word3']
|
||||||
|
},
|
||||||
|
nonBreakingSpacePreceededBySpaceBetweenWords: {
|
||||||
|
description: 'A non-breaking space preceeded by a normal space',
|
||||||
|
html: '<html><body> word1 word2 word3<br></body></html>',
|
||||||
|
expectedLineAttribs: [ '+l' ],
|
||||||
|
expectedText: [' word1 word2 word3']
|
||||||
|
},
|
||||||
|
nonBreakingSpaceFollowededBySpaceBetweenWords: {
|
||||||
|
description: 'A non-breaking space followed by a normal space',
|
||||||
|
html: '<html><body> word1 word2 word3<br></body></html>',
|
||||||
|
expectedLineAttribs: [ '+l' ],
|
||||||
|
expectedText: [' word1 word2 word3']
|
||||||
|
},
|
||||||
|
spacesAfterNewline: {
|
||||||
|
description: 'Don\'t collapse spaces that follow a newline',
|
||||||
|
html:'<!doctype html><html><body>something<br> something<br></body></html>',
|
||||||
|
expectedLineAttribs: ['+9', '+m'],
|
||||||
|
expectedText: ['something', ' something']
|
||||||
|
},
|
||||||
|
spacesAfterNewlineP: {
|
||||||
|
description: 'Don\'t collapse spaces that follow a empty paragraph',
|
||||||
|
html:'<!doctype html><html><body>something<p></p> something<br></body></html>',
|
||||||
|
expectedLineAttribs: ['+9', '', '+m'],
|
||||||
|
expectedText: ['something', '', ' something']
|
||||||
|
},
|
||||||
|
spacesAtEndOfLine: {
|
||||||
|
description: 'Don\'t collapse spaces that preceed/follow a newline',
|
||||||
|
html:'<html><body>something <br> something<br></body></html>',
|
||||||
|
expectedLineAttribs: ['+l', '+m'],
|
||||||
|
expectedText: ['something ', ' something']
|
||||||
|
},
|
||||||
|
spacesAtEndOfLineP: {
|
||||||
|
description: 'Don\'t collapse spaces that preceed/follow a empty paragraph',
|
||||||
|
html:'<html><body>something <p></p> something<br></body></html>',
|
||||||
|
expectedLineAttribs: ['+l', '', '+m'],
|
||||||
|
expectedText: ['something ', '', ' something']
|
||||||
|
},
|
||||||
|
nonBreakingSpacesAfterNewlines: {
|
||||||
|
description: 'Don\'t collapse non-breaking spaces that follow a newline',
|
||||||
|
html:'<html><body>something<br> something<br></body></html>',
|
||||||
|
expectedLineAttribs: ['+9', '+c'],
|
||||||
|
expectedText: ['something', ' something']
|
||||||
|
},
|
||||||
|
nonBreakingSpacesAfterNewlinesP: {
|
||||||
|
description: 'Don\'t collapse non-breaking spaces that follow a paragraph',
|
||||||
|
html:'<html><body>something<p></p> something<br></body></html>',
|
||||||
|
expectedLineAttribs: ['+9', '', '+c'],
|
||||||
|
expectedText: ['something', '', ' something']
|
||||||
|
},
|
||||||
|
preserveSpacesInsideElements: {
|
||||||
|
description: 'Preserve all spaces when multiple are present',
|
||||||
|
html: '<html><body>Need <span> more </span> space<i> s </i> !<br></body></html>',
|
||||||
|
expectedLineAttribs: ['+h*0+4+2'],
|
||||||
|
expectedText: ['Need more space s !'],
|
||||||
|
},
|
||||||
|
preserveSpacesAcrossNewlines: {
|
||||||
|
description: 'Newlines and multiple spaces across newlines should be preserved',
|
||||||
|
html: `
|
||||||
|
<html><body>Need
|
||||||
|
<span> more </span>
|
||||||
|
space
|
||||||
|
<i> s </i>
|
||||||
|
!<br></body></html>`,
|
||||||
|
expectedLineAttribs: [ '+19*0+4+b' ],
|
||||||
|
expectedText: [ 'Need more space s !' ]
|
||||||
|
},
|
||||||
|
multipleNewLinesAtBeginning: {
|
||||||
|
description: 'Multiple new lines at the beginning should be preserved',
|
||||||
|
html: '<html><body><br><br><p></p><p></p>first line<br><br>second line<br></body></html>',
|
||||||
|
expectedLineAttribs: ['', '', '', '', '+a', '', '+b'],
|
||||||
|
expectedText: [ '', '', '', '', 'first line', '', 'second line']
|
||||||
|
},
|
||||||
|
multiLineParagraph:{
|
||||||
|
description: "A paragraph with multiple lines should not loose spaces when lines are combined",
|
||||||
|
html:`<html><body><p>
|
||||||
|
а б в г ґ д е є ж з и і ї й к л м н о
|
||||||
|
п р с т у ф х ц ч ш щ ю я ь</p>
|
||||||
|
</body></html>`,
|
||||||
|
expectedLineAttribs: [ '+1t' ],
|
||||||
|
expectedText: ["а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь"]
|
||||||
|
},
|
||||||
|
multiLineParagraphWithPre:{
|
||||||
|
description: "lines in preformatted text should be kept intact",
|
||||||
|
html:`<html><body><p>
|
||||||
|
а б в г ґ д е є ж з и і ї й к л м н о<pre>multiple
|
||||||
|
lines
|
||||||
|
in
|
||||||
|
pre
|
||||||
|
</pre></p><p>п р с т у ф х ц ч ш щ ю я
|
||||||
|
ь</p>
|
||||||
|
</body></html>`,
|
||||||
|
expectedLineAttribs: [ '+11', '+8', '+5', '+2', '+3', '+r' ],
|
||||||
|
expectedText: ['а б в г ґ д е є ж з и і ї й к л м н о', 'multiple', 'lines', 'in', 'pre', 'п р с т у ф х ц ч ш щ ю я ь']
|
||||||
|
},
|
||||||
|
preIntroducesASpace: {
|
||||||
|
description: "pre should be on a new line not preceeded by a space",
|
||||||
|
html:`<html><body><p>
|
||||||
|
1
|
||||||
|
<pre>preline
|
||||||
|
</pre></p></body></html>`,
|
||||||
|
expectedLineAttribs: [ '+6', '+7' ],
|
||||||
|
expectedText: [' 1 ', 'preline']
|
||||||
|
},
|
||||||
|
dontDeleteSpaceInsideElements: {
|
||||||
|
description: 'Preserve spaces on the beginning and end of a element',
|
||||||
|
html: '<html><body>Need<span> more </span>space<i> s </i>!<br></body></html>',
|
||||||
|
expectedLineAttribs: ['+f*0+3+1'],
|
||||||
|
expectedText: ['Need more space s !']
|
||||||
|
},
|
||||||
|
dontDeleteSpaceOutsideElements: {
|
||||||
|
description: 'Preserve spaces outside elements',
|
||||||
|
html: '<html><body>Need <span>more</span> space <i>s</i> !<br></body></html>',
|
||||||
|
expectedLineAttribs: ['+g*0+1+2'],
|
||||||
|
expectedText: ['Need more space s !']
|
||||||
|
},
|
||||||
|
dontDeleteSpaceAtEndOfElement: {
|
||||||
|
description: 'Preserve spaces at the end of an element',
|
||||||
|
html: '<html><body>Need <span>more </span>space <i>s </i>!<br></body></html>',
|
||||||
|
expectedLineAttribs: ['+g*0+2+1'],
|
||||||
|
expectedText: ['Need more space s !']
|
||||||
|
},
|
||||||
|
dontDeleteSpaceAtBeginOfElements: {
|
||||||
|
description: 'Preserve spaces at the start of an element',
|
||||||
|
html: '<html><body>Need<span> more</span> space<i> s</i> !<br></body></html>',
|
||||||
|
expectedLineAttribs: ['+f*0+2+2'],
|
||||||
|
expectedText: ['Need more space s !']
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
describe(__filename, function () {
|
describe(__filename, function () {
|
||||||
|
|
Loading…
Reference in New Issue