From 2bfc3026d2e549064bd13e496d407169b2755df5 Mon Sep 17 00:00:00 2001 From: Simon Gaeremynck Date: Tue, 20 Oct 2015 19:46:08 +0100 Subject: [PATCH 1/2] Allow LibreOffice to be used when exporting a pad This commit adds support for LibreOffice when exporting a pad to doc, pdf, .. This commit also cleans up some export logic when exporting to txt --- settings.json.template | 6 +- src/node/handler/ExportHandler.js | 80 +++++--------------------- src/node/utils/ExportHtml.js | 31 ++++++----- src/node/utils/LibreOffice.js | 93 +++++++++++++++++++++++++++++++ src/node/utils/Settings.js | 5 ++ 5 files changed, 133 insertions(+), 82 deletions(-) create mode 100644 src/node/utils/LibreOffice.js diff --git a/settings.json.template b/settings.json.template index bfd0c7e66..321d21d6b 100644 --- a/settings.json.template +++ b/settings.json.template @@ -86,10 +86,14 @@ may cause problems during deployment. Set to 0 to disable caching */ "maxAge" : 21600, // 60 * 60 * 6 = 6 hours - /* This is the path to the Abiword executable. Setting it to null, disables abiword. + /* This is the absolute path to the Abiword executable. Setting it to null, disables abiword. Abiword is needed to advanced import/export features of pads*/ "abiword" : null, + /* This is the absolute path to the soffice executable. Setting it to null, disables LibreOffice exporting. + LibreOffice can be used in lieu of Abiword to export pads */ + "soffice" : null, + /* This is the path to the Tidy executable. Setting it to null, disables Tidy. Tidy is used to improve the quality of exported pads*/ "tidyHtml" : null, diff --git a/src/node/handler/ExportHandler.js b/src/node/handler/ExportHandler.js index f861c82eb..0a8089775 100644 --- a/src/node/handler/ExportHandler.js +++ b/src/node/handler/ExportHandler.js @@ -30,9 +30,15 @@ var os = require('os'); var hooks = require("ep_etherpad-lite/static/js/pluginfw/hooks"); var TidyHtml = require('../utils/TidyHtml'); +var convertor = null; + //load abiword only if its enabled if(settings.abiword != null) - var abiword = require("../utils/Abiword"); + convertor = require("../utils/Abiword"); + +// Use LibreOffice if an executable has been defined in the settings +if(settings.soffice != null) + convertor = require("../utils/LibreOffice"); var tempDirectory = "/tmp"; @@ -70,71 +76,11 @@ exports.doExport = function(req, res, padId, type) } else if(type == "txt") { - var txt; - var randNum; - var srcFile, destFile; - - async.series([ - //render the txt document - function(callback) - { - exporttxt.getPadTXTDocument(padId, req.params.rev, false, function(err, _txt) - { - if(ERR(err, callback)) return; - txt = _txt; - callback(); - }); - }, - //decide what to do with the txt export - function(callback) - { - //if this is a txt export, we can send this from here directly - res.send(txt); - callback("stop"); - }, - //send the convert job to abiword - function(callback) - { - //ensure html can be collected by the garbage collector - txt = null; - - destFile = tempDirectory + "/etherpad_export_" + randNum + "." + type; - abiword.convertFile(srcFile, destFile, type, callback); - }, - //send the file - function(callback) - { - res.sendFile(destFile, null, callback); - }, - //clean up temporary files - function(callback) - { - async.parallel([ - function(callback) - { - fs.unlink(srcFile, callback); - }, - function(callback) - { - //100ms delay to accomidate for slow windows fs - if(os.type().indexOf("Windows") > -1) - { - setTimeout(function() - { - fs.unlink(destFile, callback); - }, 100); - } - else - { - fs.unlink(destFile, callback); - } - } - ], callback); - } - ], function(err) + exporttxt.getPadTXTDocument(padId, req.params.rev, false, function(err, txt) { - if(err && err != "stop") ERR(err); - }) + if(ERR(err)) return; + res.send(txt); + }); } else { @@ -183,11 +129,11 @@ exports.doExport = function(req, res, padId, type) TidyHtml.tidy(srcFile, callback); }, - //send the convert job to abiword + //send the convert job to the convertor (abiword, libreoffice, ..) function(callback) { destFile = tempDirectory + "/etherpad_export_" + randNum + "." + type; - abiword.convertFile(srcFile, destFile, type, callback); + convertor.convertFile(srcFile, destFile, type, callback); }, //send the file function(callback) diff --git a/src/node/utils/ExportHtml.js b/src/node/utils/ExportHtml.js index 53469c9b9..fef2508c5 100644 --- a/src/node/utils/ExportHtml.js +++ b/src/node/utils/ExportHtml.js @@ -123,8 +123,8 @@ function getHTMLFromAtext(pad, atext, authorColors) var newLength = props.push(propName); anumMap[a] = newLength -1; - css+=".removed {text-decoration: line-through; " + - "-ms-filter:'progid:DXImageTransform.Microsoft.Alpha(Opacity=80)'; "+ + css+=".removed {text-decoration: line-through; " + + "-ms-filter:'progid:DXImageTransform.Microsoft.Alpha(Opacity=80)'; "+ "filter: alpha(opacity=80); "+ "opacity: 0.8; "+ "}\n"; @@ -287,7 +287,7 @@ function getHTMLFromAtext(pad, atext, authorColors) var s = taker.take(chars); - //removes the characters with the code 12. Don't know where they come + //removes the characters with the code 12. Don't know where they come //from but they break the abiword parser and are completly useless s = s.replace(String.fromCharCode(12), ""); @@ -401,7 +401,7 @@ function getHTMLFromAtext(pad, atext, authorColors) pieces.push('

'); } }*/ - else//means we are getting closer to the lowest level of indentation or are at the same level + else//means we are getting closer to the lowest level of indentation or are at the same level { var toClose = lists.length > 0 ? listLevels[listLevels.length - 2] - line.listLevel : 0 if( toClose > 0){ @@ -455,7 +455,7 @@ function getHTMLFromAtext(pad, atext, authorColors) } } } - + for (var k = lists.length - 1; k >= 0; k--) { if(lists[k][1] == "number") @@ -484,14 +484,17 @@ exports.getPadHTMLDocument = function (padId, revNum, noDocType, callback) stylesForExportCSS += css; }); // Core inclusion of head etc. - var head = - (noDocType ? '' : '\n') + - '\n' + (noDocType ? '' : '\n' + + var head = + (noDocType ? '' : '\n') + + '\n' + (noDocType ? '' : '\n' + '' + Security.escapeHTML(padId) + '\n' + - '\n' + - '\n' + '\n') + + stylesForExportCSS + + '\n' + '\n') + ''; var foot = '\n\n'; diff --git a/src/node/utils/LibreOffice.js b/src/node/utils/LibreOffice.js new file mode 100644 index 000000000..415772456 --- /dev/null +++ b/src/node/utils/LibreOffice.js @@ -0,0 +1,93 @@ +/** + * Controls the communication with LibreOffice + */ + +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS-IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +var async = require("async"); +var fs = require("fs"); +var os = require("os"); +var path = require("path"); +var settings = require("./Settings"); +var spawn = require("child_process").spawn; + +// Conversion tasks will be queued up, so we don't overload the system +var queue = async.queue(doConvertTask, 1); + +/** + * Convert a file from one type to another + * + * @param {String} srcFile The path on disk to convert + * @param {String} destFile The path on disk where the converted file should be stored + * @param {String} type The type to convert into + * @param {Function} callback Standard callback function + */ +exports.convertFile = function(srcFile, destFile, type, callback) { + queue.push({"srcFile": srcFile, "destFile": destFile, "type": type, "callback": callback}); +}; + +function doConvertTask(task, callback) { + var tmpDir = os.tmpdir(); + + async.series([ + // Generate a PDF file with LibreOffice + function(callback) { + var soffice = spawn(settings.soffice, [ + '--headless', + '--invisible', + '--nologo', + '--nolockcheck', + '--convert-to', task.type, + task.srcFile, + '--outdir', tmpDir + ]); + + var stdoutBuffer = ''; + + // Delegate the processing of stdout to another function + soffice.stdout.on('data', function(data) { + stdoutBuffer += data.toString(); + }); + + // Append error messages to the buffer + soffice.stderr.on('data', function(data) { + stdoutBuffer += data.toString(); + }); + + // Throw an exception if libreoffice failed + soffice.on('exit', function(code) { + if (code != 0) { + return callback("LibreOffice died with exit code " + code + " and message: " + stdoutBuffer); + } + + callback(); + }) + }, + + // Move the PDF file to the correct place + function(callback) { + var filename = path.basename(task.srcFile); + var pdfFilename = filename.substr(0, filename.lastIndexOf('.')) + '.' + task.type; + var pdfPath = path.join(tmpDir, pdfFilename); + fs.rename(pdfPath, task.destFile, callback); + } + ], function(err) { + // Invoke the callback for the local queue + callback(); + + // Invoke the callback for the task + task.callback(err); + }); +} diff --git a/src/node/utils/Settings.js b/src/node/utils/Settings.js index 2c2f90bf8..d03e2a6cd 100644 --- a/src/node/utils/Settings.js +++ b/src/node/utils/Settings.js @@ -152,6 +152,11 @@ exports.minify = true; */ exports.abiword = null; +/** + * The path of the libreoffice executable + */ +exports.soffice = null; + /** * The path of the tidy executable */ From 7289e8907028520fcc16d31b67827d44054145e6 Mon Sep 17 00:00:00 2001 From: Simon Gaeremynck Date: Thu, 22 Oct 2015 16:09:44 +0100 Subject: [PATCH 2/2] Use `Etherpad` rather than `Etherpad Lite` when exporting HTML --- src/node/utils/ExportHtml.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/node/utils/ExportHtml.js b/src/node/utils/ExportHtml.js index fef2508c5..ffc7bc58c 100644 --- a/src/node/utils/ExportHtml.js +++ b/src/node/utils/ExportHtml.js @@ -488,9 +488,9 @@ exports.getPadHTMLDocument = function (padId, revNum, noDocType, callback) (noDocType ? '' : '\n') + '\n' + (noDocType ? '' : '\n' + '' + Security.escapeHTML(padId) + '\n' + - '\n' + - '\n' + - '\n' + + '\n' + + '\n' + + '\n' + '\n' + '