Normalize inserted text using UNorm

For some reason, the client was sending the server a Unicode-normalized version of inserted strings. So if for example we inserted the string 'ä' (i.e. \x61\xCC\x88) into the document, what would be sent to the server would be 'ä' (i.e. \xC3\xA4). This wouldn't be a problem on its own. BUT JavaScript reports that the length of the first string is 2, while the length of the second one is 1. So the command that was being sent to the server was 'Z:1>2*0+1$ä', when it should really be 'Z:1>1*0+1$ä'. When the `checkRep` method checks the length of the inserted string, it finds an inconsistency, and disconnects the client. We now normalize the inserted string before the command is generated, so the length is always correct.
2012-08-13 17:09:02 +01:00 · 2012-08-13 17:09:02 +01:00 · 699aa299f8
parent 48f1545777
commit 699aa299f8
1 changed files with 2 additions and 1 deletions
--- a/src/static/js/contentcollector.js
+++ b/src/static/js/contentcollector.js
@ -25,13 +25,14 @@

 var _MAX_LIST_LEVEL = 8;

+var UNorm = require('./unorm');
 var Changeset = require('./Changeset');
 var hooks = require('./pluginfw/hooks');
 var _ = require('./underscore');

 function sanitizeUnicode(s)
 {
-  return s.replace(/[\uffff\ufffe\ufeff\ufdd0-\ufdef\ud800-\udfff]/g, '?');
+  return UNorm.nfc(s).replace(/[\uffff\ufffe\ufeff\ufdd0-\ufdef\ud800-\udfff]/g, '?');
 }

 function makeContentCollector(collectStyles, browser, apool, domInterface, className2Author)