tests for spaces (#4594)
parent
a637920e55
commit
040057239e
|
@ -67,7 +67,163 @@ const testImports = {
|
|||
input: '<html><body><ul class="indent"><li>indent</li><li>indent</ul></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body><ul class="indent"><li>indent</li><li>indent</ul><br></body></html>',
|
||||
expectedText: '\tindent\n\tindent\n\n'
|
||||
}
|
||||
},
|
||||
lineWithMultipleSpaces: {
|
||||
description: 'Multiple spaces should be collapsed',
|
||||
input: '<html><body>Text with more than one space.<br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body>Text with more than one space.<br><br></body></html>',
|
||||
expectedText: 'Text with more than one space.\n\n'
|
||||
},
|
||||
lineWithMultipleNonBreakingAndNormalSpaces: {
|
||||
// XXX the HTML between "than" and "one" looks strange
|
||||
description: 'non-breaking space should be preserved, but can be replaced when it',
|
||||
input: '<html><body>Text with more than one space.<br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body>Text with more than one space.<br><br></body></html>',
|
||||
expectedText: 'Text with more than one space.\n\n'
|
||||
},
|
||||
multiplenbsp: {
|
||||
description: 'Multiple non-breaking space should be preserved',
|
||||
input: '<html><body> <br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body> <br><br></body></html>',
|
||||
expectedText: ' \n\n'
|
||||
},
|
||||
multipleNonBreakingSpaceBetweenWords: {
|
||||
description: 'A normal space is always inserted before a word',
|
||||
input: '<html><body> word1 word2 word3<br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body> word1 word2 word3<br><br></body></html>',
|
||||
expectedText: ' word1 word2 word3\n\n'
|
||||
},
|
||||
nonBreakingSpacePreceededBySpaceBetweenWords: {
|
||||
description: 'A non-breaking space preceeded by a normal space',
|
||||
input: '<html><body> word1 word2 word3<br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body> word1 word2 word3<br><br></body></html>',
|
||||
expectedText: ' word1 word2 word3\n\n'
|
||||
},
|
||||
nonBreakingSpaceFollowededBySpaceBetweenWords: {
|
||||
description: 'A non-breaking space followed by a normal space',
|
||||
input: '<html><body> word1 word2 word3<br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body> word1 word2 word3<br><br></body></html>',
|
||||
expectedText: ' word1 word2 word3\n\n'
|
||||
},
|
||||
spacesAfterNewline: {
|
||||
description: 'Collapse spaces that follow a newline',
|
||||
input:'<!doctype html><html><body>something<br> something<br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body>something<br>something<br><br></body></html>',
|
||||
expectedText: 'something\nsomething\n\n'
|
||||
},
|
||||
spacesAfterNewlineP: {
|
||||
description: 'Collapse spaces that follow a paragraph',
|
||||
input:'<!doctype html><html><body>something<p></p> something<br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body>something<br><br>something<br><br></body></html>',
|
||||
expectedText: 'something\n\nsomething\n\n'
|
||||
},
|
||||
spacesAtEndOfLine: {
|
||||
description: 'Collapse spaces that preceed/follow a newline',
|
||||
input:'<html><body>something <br> something<br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body>something<br>something<br><br></body></html>',
|
||||
expectedText: 'something\nsomething\n\n'
|
||||
},
|
||||
spacesAtEndOfLineP: {
|
||||
description: 'Collapse spaces that preceed/follow a paragraph',
|
||||
input:'<html><body>something <p></p> something<br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body>something<br><br>something<br><br></body></html>',
|
||||
expectedText: 'something\n\nsomething\n\n'
|
||||
},
|
||||
nonBreakingSpacesAfterNewlines: {
|
||||
description: 'Don\'t collapse non-breaking spaces that follow a newline',
|
||||
input:'<html><body>something<br> something<br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body>something<br> something<br><br></body></html>',
|
||||
expectedText: 'something\n something\n\n'
|
||||
},
|
||||
nonBreakingSpacesAfterNewlinesP: {
|
||||
description: 'Don\'t collapse non-breaking spaces that follow a paragraph',
|
||||
input:'<html><body>something<p></p> something<br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body>something<br><br> something<br><br></body></html>',
|
||||
expectedText: 'something\n\n something\n\n'
|
||||
},
|
||||
collapseSpacesInsideElements: {
|
||||
description: 'Preserve only one space when multiple are present',
|
||||
input: '<html><body>Need <span> more </span> space<i> s </i> !<br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space<em> s </em>!<br><br></body></html>',
|
||||
expectedText: 'Need more space s !\n\n'
|
||||
},
|
||||
collapseSpacesAcrossNewlines: {
|
||||
description: 'Newlines and multiple spaces across newlines should be collapsed',
|
||||
input: `
|
||||
<html><body>Need
|
||||
<span> more </span>
|
||||
space
|
||||
<i> s </i>
|
||||
!<br></body></html>`,
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space <em>s </em>!<br><br></body></html>',
|
||||
expectedText: 'Need more space s !\n\n'
|
||||
},
|
||||
multipleNewLinesAtBeginning: {
|
||||
description: 'Multiple new lines and paragraphs at the beginning should be preserved',
|
||||
input: '<html><body><br><br><p></p><p></p>first line<br><br>second line<br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body><br><br><br><br>first line<br><br>second line<br><br></body></html>',
|
||||
expectedText: '\n\n\n\nfirst line\n\nsecond line\n\n'
|
||||
},
|
||||
multiLineParagraph:{
|
||||
description: "A paragraph with multiple lines should not loose spaces when lines are combined",
|
||||
input:`<html><body>
|
||||
<p>
|
||||
а б в г ґ д е є ж з и і ї й к л м н о
|
||||
п р с т у ф х ц ч ш щ ю я ь
|
||||
</p>
|
||||
</body></html>`,
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body>а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь<br><br></body></html>',
|
||||
expectedText: 'а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь\n\n'
|
||||
},
|
||||
multiLineParagraphWithPre:{
|
||||
//XXX why is there before "in"?
|
||||
description: "lines in preformatted text should be kept intact",
|
||||
input:`<html><body>
|
||||
<p>
|
||||
а б в г ґ д е є ж з и і ї й к л м н о<pre>multiple
|
||||
lines
|
||||
in
|
||||
pre
|
||||
</pre></p><p>п р с т у ф х ц ч ш щ ю я
|
||||
ь</p>
|
||||
</body></html>`,
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body>а б в г ґ д е є ж з и і ї й к л м н о<br>multiple<br> lines<br> in<br> pre<br><br>п р с т у ф х ц ч ш щ ю я ь<br><br></body></html>',
|
||||
expectedText: 'а б в г ґ д е є ж з и і ї й к л м н о\nmultiple\n lines\n in\n pre\n\nп р с т у ф х ц ч ш щ ю я ь\n\n'
|
||||
},
|
||||
preIntroducesASpace: {
|
||||
description: "pre should be on a new line not preceeded by a space",
|
||||
input:`<html><body><p>
|
||||
1
|
||||
<pre>preline
|
||||
</pre></p></body></html>`,
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body>1<br>preline<br><br><br></body></html>',
|
||||
expectedText: '1\npreline\n\n\n'
|
||||
},
|
||||
dontDeleteSpaceInsideElements: {
|
||||
description: 'Preserve spaces inside elements',
|
||||
input: '<html><body>Need<span> more </span>space<i> s </i>!<br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space<em> s </em>!<br><br></body></html>',
|
||||
expectedText: 'Need more space s !\n\n'
|
||||
},
|
||||
dontDeleteSpaceOutsideElements: {
|
||||
description: 'Preserve spaces outside elements',
|
||||
input: '<html><body>Need <span>more</span> space <i>s</i> !<br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space <em>s</em> !<br><br></body></html>',
|
||||
expectedText: 'Need more space s !\n\n'
|
||||
},
|
||||
dontDeleteSpaceAtEndOfElement: {
|
||||
description: 'Preserve spaces at the end of an element',
|
||||
input: '<html><body>Need <span>more </span>space <i>s </i>!<br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space <em>s </em>!<br><br></body></html>',
|
||||
expectedText: 'Need more space s !\n\n'
|
||||
},
|
||||
dontDeleteSpaceAtBeginOfElements: {
|
||||
description: 'Preserve spaces at the start of an element',
|
||||
input: '<html><body>Need<span> more</span> space<i> s</i> !<br></body></html>',
|
||||
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space<em> s</em> !<br><br></body></html>',
|
||||
expectedText: 'Need more space s !\n\n'
|
||||
},
|
||||
>>>>>>> 5a47aff2... tests for spaces
|
||||
};
|
||||
|
||||
describe(__filename, function () {
|
||||
|
|
|
@ -1,6 +1,14 @@
|
|||
'use strict';
|
||||
|
||||
/* eslint-disable max-len */
|
||||
/*
|
||||
* While importexport tests target the `setHTML` API endpoint, which is nearly identical to what happens
|
||||
* when a user manually imports a document via the UI, the contentcollector tests here don't use rehype to process
|
||||
* the document. Rehype removes spaces and newĺines were applicable, so the expected results here can
|
||||
* differ from importexport.js.
|
||||
*
|
||||
* If you add tests here, please also add them to importexport.js
|
||||
*/
|
||||
|
||||
const contentcollector = require('../../../src/static/js/contentcollector');
|
||||
const AttributePool = require('../../../src/static/js/AttributePool');
|
||||
|
@ -113,6 +121,156 @@ const tests = {
|
|||
expectedLineAttribs: ['+5'],
|
||||
expectedText: ['empty'],
|
||||
},
|
||||
lineWithMultipleSpaces: {
|
||||
description: 'Multiple spaces should be preserved',
|
||||
html: '<html><body>Text with more than one space.<br></body></html>',
|
||||
expectedLineAttribs: [ '+10' ],
|
||||
expectedText: ['Text with more than one space.']
|
||||
},
|
||||
lineWithMultipleNonBreakingAndNormalSpaces: {
|
||||
description: 'non-breaking and normal space should be preserved',
|
||||
html: '<html><body>Text with more than one space.<br></body></html>',
|
||||
expectedLineAttribs: [ '+10' ],
|
||||
expectedText: ['Text with more than one space.']
|
||||
},
|
||||
multiplenbsp: {
|
||||
description: 'Multiple nbsp should be preserved',
|
||||
html: '<html><body> <br></body></html>',
|
||||
expectedLineAttribs: [ '+2' ],
|
||||
expectedText: [' ']
|
||||
},
|
||||
multipleNonBreakingSpaceBetweenWords: {
|
||||
description: 'Multiple nbsp between words ',
|
||||
html: '<html><body> word1 word2 word3<br></body></html>',
|
||||
expectedLineAttribs: [ '+m' ],
|
||||
expectedText: [' word1 word2 word3']
|
||||
},
|
||||
nonBreakingSpacePreceededBySpaceBetweenWords: {
|
||||
description: 'A non-breaking space preceeded by a normal space',
|
||||
html: '<html><body> word1 word2 word3<br></body></html>',
|
||||
expectedLineAttribs: [ '+l' ],
|
||||
expectedText: [' word1 word2 word3']
|
||||
},
|
||||
nonBreakingSpaceFollowededBySpaceBetweenWords: {
|
||||
description: 'A non-breaking space followed by a normal space',
|
||||
html: '<html><body> word1 word2 word3<br></body></html>',
|
||||
expectedLineAttribs: [ '+l' ],
|
||||
expectedText: [' word1 word2 word3']
|
||||
},
|
||||
spacesAfterNewline: {
|
||||
description: 'Don\'t collapse spaces that follow a newline',
|
||||
html:'<!doctype html><html><body>something<br> something<br></body></html>',
|
||||
expectedLineAttribs: ['+9', '+m'],
|
||||
expectedText: ['something', ' something']
|
||||
},
|
||||
spacesAfterNewlineP: {
|
||||
description: 'Don\'t collapse spaces that follow a empty paragraph',
|
||||
html:'<!doctype html><html><body>something<p></p> something<br></body></html>',
|
||||
expectedLineAttribs: ['+9', '', '+m'],
|
||||
expectedText: ['something', '', ' something']
|
||||
},
|
||||
spacesAtEndOfLine: {
|
||||
description: 'Don\'t collapse spaces that preceed/follow a newline',
|
||||
html:'<html><body>something <br> something<br></body></html>',
|
||||
expectedLineAttribs: ['+l', '+m'],
|
||||
expectedText: ['something ', ' something']
|
||||
},
|
||||
spacesAtEndOfLineP: {
|
||||
description: 'Don\'t collapse spaces that preceed/follow a empty paragraph',
|
||||
html:'<html><body>something <p></p> something<br></body></html>',
|
||||
expectedLineAttribs: ['+l', '', '+m'],
|
||||
expectedText: ['something ', '', ' something']
|
||||
},
|
||||
nonBreakingSpacesAfterNewlines: {
|
||||
description: 'Don\'t collapse non-breaking spaces that follow a newline',
|
||||
html:'<html><body>something<br> something<br></body></html>',
|
||||
expectedLineAttribs: ['+9', '+c'],
|
||||
expectedText: ['something', ' something']
|
||||
},
|
||||
nonBreakingSpacesAfterNewlinesP: {
|
||||
description: 'Don\'t collapse non-breaking spaces that follow a paragraph',
|
||||
html:'<html><body>something<p></p> something<br></body></html>',
|
||||
expectedLineAttribs: ['+9', '', '+c'],
|
||||
expectedText: ['something', '', ' something']
|
||||
},
|
||||
preserveSpacesInsideElements: {
|
||||
description: 'Preserve all spaces when multiple are present',
|
||||
html: '<html><body>Need <span> more </span> space<i> s </i> !<br></body></html>',
|
||||
expectedLineAttribs: ['+h*0+4+2'],
|
||||
expectedText: ['Need more space s !'],
|
||||
},
|
||||
preserveSpacesAcrossNewlines: {
|
||||
description: 'Newlines and multiple spaces across newlines should be preserved',
|
||||
html: `
|
||||
<html><body>Need
|
||||
<span> more </span>
|
||||
space
|
||||
<i> s </i>
|
||||
!<br></body></html>`,
|
||||
expectedLineAttribs: [ '+19*0+4+b' ],
|
||||
expectedText: [ 'Need more space s !' ]
|
||||
},
|
||||
multipleNewLinesAtBeginning: {
|
||||
description: 'Multiple new lines at the beginning should be preserved',
|
||||
html: '<html><body><br><br><p></p><p></p>first line<br><br>second line<br></body></html>',
|
||||
expectedLineAttribs: ['', '', '', '', '+a', '', '+b'],
|
||||
expectedText: [ '', '', '', '', 'first line', '', 'second line']
|
||||
},
|
||||
multiLineParagraph:{
|
||||
description: "A paragraph with multiple lines should not loose spaces when lines are combined",
|
||||
html:`<html><body><p>
|
||||
а б в г ґ д е є ж з и і ї й к л м н о
|
||||
п р с т у ф х ц ч ш щ ю я ь</p>
|
||||
</body></html>`,
|
||||
expectedLineAttribs: [ '+1t' ],
|
||||
expectedText: ["а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь"]
|
||||
},
|
||||
multiLineParagraphWithPre:{
|
||||
description: "lines in preformatted text should be kept intact",
|
||||
html:`<html><body><p>
|
||||
а б в г ґ д е є ж з и і ї й к л м н о<pre>multiple
|
||||
lines
|
||||
in
|
||||
pre
|
||||
</pre></p><p>п р с т у ф х ц ч ш щ ю я
|
||||
ь</p>
|
||||
</body></html>`,
|
||||
expectedLineAttribs: [ '+11', '+8', '+5', '+2', '+3', '+r' ],
|
||||
expectedText: ['а б в г ґ д е є ж з и і ї й к л м н о', 'multiple', 'lines', 'in', 'pre', 'п р с т у ф х ц ч ш щ ю я ь']
|
||||
},
|
||||
preIntroducesASpace: {
|
||||
description: "pre should be on a new line not preceeded by a space",
|
||||
html:`<html><body><p>
|
||||
1
|
||||
<pre>preline
|
||||
</pre></p></body></html>`,
|
||||
expectedLineAttribs: [ '+6', '+7' ],
|
||||
expectedText: [' 1 ', 'preline']
|
||||
},
|
||||
dontDeleteSpaceInsideElements: {
|
||||
description: 'Preserve spaces on the beginning and end of a element',
|
||||
html: '<html><body>Need<span> more </span>space<i> s </i>!<br></body></html>',
|
||||
expectedLineAttribs: ['+f*0+3+1'],
|
||||
expectedText: ['Need more space s !']
|
||||
},
|
||||
dontDeleteSpaceOutsideElements: {
|
||||
description: 'Preserve spaces outside elements',
|
||||
html: '<html><body>Need <span>more</span> space <i>s</i> !<br></body></html>',
|
||||
expectedLineAttribs: ['+g*0+1+2'],
|
||||
expectedText: ['Need more space s !']
|
||||
},
|
||||
dontDeleteSpaceAtEndOfElement: {
|
||||
description: 'Preserve spaces at the end of an element',
|
||||
html: '<html><body>Need <span>more </span>space <i>s </i>!<br></body></html>',
|
||||
expectedLineAttribs: ['+g*0+2+1'],
|
||||
expectedText: ['Need more space s !']
|
||||
},
|
||||
dontDeleteSpaceAtBeginOfElements: {
|
||||
description: 'Preserve spaces at the start of an element',
|
||||
html: '<html><body>Need<span> more</span> space<i> s</i> !<br></body></html>',
|
||||
expectedLineAttribs: ['+f*0+2+2'],
|
||||
expectedText: ['Need more space s !']
|
||||
},
|
||||
};
|
||||
|
||||
describe(__filename, function () {
|
||||
|
|
Loading…
Reference in New Issue