view odfcast/convert.py @ 85:c018570c1a2d 1.2

Ignore existing Bookmarks when merging documemts
author Frank Koormann <frank.koormann@intevation.de>
date Thu, 02 Apr 2015 15:45:55 +0200
parents 3492c79d83c8
children 4f45ebecb057
line wrap: on
line source
# -*- coding: utf-8 -*-

import logging
import tempfile

from flask import request, Response, json, render_template
from flask.views import MethodView

from py3o.template import Template

from PyPDF2 import PdfFileMerger
from PyPDF2.utils import PyPdfError

from werkzeug.utils import escape

log = logging.getLogger(__name__)

ALLOWED_FORMATS = ["pdf", "doc", "docx", "odt"]

PDF_MIMETYPE = "application/pdf"
JSON_MIMETYPE = "application/json"
HTML_MIMETYPE = "text/html"

MIMETYPES = {
    "odt": "application/vnd.oasis.opendocument.text",
    "doc": "application/msword",
    "docx": "application/vnd.openxmlformats-officedocument"
    ".wordprocessingml.document",
    "pdf": PDF_MIMETYPE,
}

DEFAULT_MIMETYPE = "application/octet-stream"


class ErrorResponse(Response):

    BAD_REQUEST_ERROR_CODE = 400

    def __init__(self, title, error_code, details,
                 html_error_code=BAD_REQUEST_ERROR_CODE):
        data, mime_type = self.get_response_data(title, error_code, details)
        super(ErrorResponse, self).__init__(response=data, mimetype=mime_type,
                                            status=html_error_code)

    def json(self, title, error_code, details):
        return json.dumps({
            "error": title,
            "error_code": error_code,
            "details": details,
        }), JSON_MIMETYPE

    def html(self, title, error_code, details):
        data = (
            u'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">\n'
            u'<title>%(code)s %(name)s</title>\n'
            u'<h1>%(name)s</h1>\n'
            u'%(details)s\n'
        ) % {
            "code": error_code,
            "name": escape(title),
            "details": escape(details),
        }
        return data, HTML_MIMETYPE

    def get_response_data(self, title, error_code, details):
        if self.is_wants_json():
            return self.json(title, error_code, details)
        return self.html(title, error_code, details)

    def is_wants_json(self):
        best = request.accept_mimetypes.best_match([JSON_MIMETYPE,
                                                    HTML_MIMETYPE])
        return best == JSON_MIMETYPE and \
            request.accept_mimetypes[best] > \
            request.accept_mimetypes[HTML_MIMETYPE]


class TemplateErrorResponse(ErrorResponse):

    TEMPLATE_ERROR_CODE = 100

    def __init__(self, details, error_code=TEMPLATE_ERROR_CODE):
        super(TemplateErrorResponse, self).__init__(
            title="TemplateError", error_code=error_code, details=details,
            html_error_code=500)


class ConversionErrorResponse(ErrorResponse):

    CONVERSION_ERROR_CODE = 200

    def __init__(self, details, error_code=CONVERSION_ERROR_CODE):
        super(ConversionErrorResponse, self).__init__(
            title="ConversionError", error_code=error_code, details=details,
            html_error_code=500)


class MergeErrorResponse(ErrorResponse):

    MERGE_ERROR_CODE = 300

    def __init__(self, details, error_code=MERGE_ERROR_CODE):
        super(MergeErrorResponse, self).__init__(
            title="MergeError", error_code=error_code, details=details,
            html_error_code=500)


class ConvertView(MethodView):

    def __init__(self, pyuno_driver_name="", hostname="localhost", port=2001):
        driver_module = self._load_driver_module(pyuno_driver_name)
        self.convertor = driver_module.Convertor(hostname, port)

    def _load_driver_module(self, pyuno_driver_name):
        return __import__(pyuno_driver_name, globals(), locals(),
                          ["Convertor"])

    def is_format_supported(self, fformat):
        return fformat and fformat.lower() in ALLOWED_FORMATS

    def post(self):
        log.debug("Converting document")
        ffile = request.files['file']
        if not ffile.filename:
            return ErrorResponse(
                "Upload file missing", error_code=101,
                details="Please upload a file for conversion",
                html_error_code=400)

        fformat = request.form['format']
        if not self.is_format_supported(fformat):
            return ErrorResponse(
                "Invalid format", error_code=102,
                details="Format %s not allowed" % fformat,
                html_error_code=400)

        datadict = self.get_datadict()

        if datadict:
            log.debug("  with datadict")

        mimetype = self.get_mimetype_for_format(fformat)

        log.debug("  to %s" % fformat)

        outfile = self.save_form_file(ffile)

        if datadict:
            try:
                tfile = tempfile.NamedTemporaryFile()
                t = Template(outfile, tfile, ignore_undefined_variables=True)
                t.render(datadict)
                outfile.close()
                outfile = tfile
            except Exception, e:
                log.exception("Template error")
                return TemplateErrorResponse(details=str(e))

        if fformat != "odt":
            try:
                outfile = self.convert(outfile, fformat)
            except Exception, e:
                log.exception("Conversion error")
                return ConversionErrorResponse(details=str(e))

        log.debug("Document converted")
        return Response(outfile, mimetype=mimetype)

    def get(self):
        return render_template("convert.html")

    def save_form_file(self, infile):
        outfile = tempfile.NamedTemporaryFile()
        infile.save(outfile)
        infile.close()
        outfile.seek(0)
        return outfile

    def convert(self, infile, fformat):
        outfile = tempfile.NamedTemporaryFile()

        self.convertor.convert(infile.name, outfile.name, fformat)

        infile.close()
        return outfile

    def get_mimetype_for_format(self, fformat):
        return MIMETYPES.get(fformat, DEFAULT_MIMETYPE)

    def get_datadict(self):
        vars = request.form.get('datadict')
        if not vars:
            return None
        return json.loads(vars)


class MergeView(MethodView):

    def get(self):
        return render_template("merge.html")

    def post(self):
        log.debug("Merging PDF documents")

        merger = PdfFileMerger()

        ffiles = []

        # allow files to have arbitray form names
        # order files by their form names
        for key, value in sorted(request.files.iterlists(),
                                 key=lambda x: x[0].lower()):
            ffiles.extend(value)

        for ffile in ffiles:
            try:
                merger.append(ffile, import_bookmarks=False)
            except Exception, e:
                log.exception("Error merging file %s" % ffile)
                if self.is_ignore_file_errors():
                    continue
                else:
                    return MergeErrorResponse(details=str(e))

        outfile = tempfile.NamedTemporaryFile()

        try:
            merger.write(outfile)
            merger.close()
            outfile.seek(0)
        except PyPdfError, e:
            log.exception("Merge error")
            return MergeErrorResponse(details=str(e))

        log.debug("PDF documents merged")
        return Response(outfile, mimetype=PDF_MIMETYPE)

    def is_ignore_file_errors(self):
        return request.args.get("ignore_file_errors", False) or \
            request.form.get("ignore_file_errors", False)

class CheckView(MethodView):

    def get(self):
        return render_template("check.html")

    def post(self):
        log.debug("Checking a PDF document's readiness for merging")

        merger = PdfFileMerger()

        ffile = request.files['file']
        if not ffile.filename:
            return ErrorResponse(
                "Upload file missing", error_code=101,
                details="Please upload a file for conversion",
                html_error_code=400)

        try:
            merger.append(ffile, import_bookmarks=False)
        except Exception, e:
            log.exception("Error merging file %s" % ffile)
            return MergeErrorResponse(details=str(e))

        merger.close()

        log.debug("PDF document %s checked." % ffile)
        return Response("Okay.")


class TemplateView(MethodView):

    template_name = ""

    def __init__(self, template_name=None):
        if template_name:
            self.template_name = template_name

    def get_template_name(self):
        return self.template_name

    def get(self):
        return render_template(self.get_template_name())
This site is hosted by Intevation GmbH (Datenschutzerklärung und Impressum | Privacy Policy and Imprint)