view src/converter.cpp @ 70:3438f5cce8cc

Force text codec for html to use UTF-8
author Andre Heinecke <andre.heinecke@intevation.de>
date Thu, 29 Sep 2016 14:41:48 +0200
parents dab9c19252bd
children 7e195eacb0da
line wrap: on
line source
/* Copyright (C) 2016 by ETH Zürich
 * Software engineering by Intevation GmbH
 *
 * This file is Free Software under the GNU GPL (v>=2)
 * and comes with ABSOLUTELY NO WARRANTY!
 * See LICENSE.txt for details.
 */

#include "converter.h"
#include <QDebug>
#include <QRegularExpression>
#include <QRegularExpressionMatch>
#include <QTextDocument>
#include <QPrinter>
#include <QImage>
#include <QPainter>

#include "xlsxdocument.h"
#include "xlsxconditionalformatting.h"

#include "constants.h"

QTXLSX_USE_NAMESPACE

Converter::Converter(const QString &input, const QStringList &outputs,
                     const QString &title):
    QThread(Q_NULLPTR),
    mInput(input),
    mOutputs(outputs),
    mTitle(title)
{
    mTitleFmt.setFontUnderline(Format::FontUnderlineSingle);
    mTitleFmt.setFontSize(18);
    mTitleFmt.setFontName("Calibri");
    mTitleFmt.setFontBold(true);
    mTitleFmt.setVerticalAlignment(Format::AlignTop);


    mQuestionFmt.setFontSize(11);
    mQuestionFmt.setFontName("Calibri");
    mQuestionFmt.setFontBold(true);
    mQuestionFmt.setTopBorderStyle(Format::BorderThin);
    mQuestionFmt.setBottomBorderStyle(Format::BorderThin);
    mQuestionFmt.setTextWarp(true);

    mAnswerChoiceFmt.setFontSize(11);
    mAnswerChoiceFmt.setFontName("Calibri");
    mAnswerChoiceFmt.setHorizontalAlignment(Format::AlignLeft);
    mAnswerChoiceFmt.setTextWarp(true);

    mChoiceTextFmt = mAnswerChoiceFmt;
    mChoiceTextFmt.setVerticalAlignment(Format::AlignVCenter);
    mChoiceTextFmt.setTextWarp(true);

    mChoiceVotesFmt = mChoiceTextFmt;
    mChoiceVotesFmt.setFontSize(10);

    mFreeTextFmt = mQuestionFmt;
    mFreeTextFmt.setFontBold(false);

    mAnswerTextFmt = mQuestionFmt;
    mAnswerTextFmt.setVerticalAlignment(Format::AlignVCenter);
    mAnswerTextFmt.setHorizontalAlignment(Format::AlignLeft);

    mTitleStyle = QStringLiteral("<tr><td colspan='3' style='vertical-align: top;"
                                 "font-weight: bold; text-decoration:underline; font-size: 18pt;'>"
                                 "%1</td></tr><tr/>");
    mQuestionStyle = QStringLiteral("<tr><td colspan='3' style='font-size: 11pt;font-weight: bold;'"
                                    "><hr/>%1<hr/></td></tr>");
    mAnswerChoiceStyle= QStringLiteral("<tr><td colspan='3' style='text-align: left; font-size: 11pt;'>Answer</td></tr>");
    mChoiceTextStyle= QStringLiteral("<tr><td align='right' style='vertical-align: middle; font-size:11pt;'>%1</td>");
    mChoiceVotesStyle = QStringLiteral("<td style='text-align: left;vertical-algin: center;"
                                       "font-size:10pt;'</td>%1</td></tr>");
    mAnswerTextStyle = QStringLiteral("<tr><td colspan='3' style='font-weight: bold;vertical-algin: middle;"
                                      "font-size:11pt;'>Answer<hr/></td></tr>");
    mFreeTextStyle = QStringLiteral("<tr><td colspan='3'; font-size:11pt;'>%1<hr/></td></tr>");
    mEmptyRow = QStringLiteral("<tr style='height: %1px'/>").arg(CHOICE_ROW_HEIGHT);
}

void Converter::run()
{
    QFile infile;

    if (mInput.isEmpty()) {
        if (!infile.open(stdin, QIODevice::ReadOnly)) {
            mErrors << tr("Failed to open standard input and no input file provided.");
            return;
        }
    } else {
        infile.setFileName(mInput);
        if (!infile.open(QIODevice::ReadOnly)) {
            mErrors << tr("Failed to open %1 for reading.").arg(mInput);
            return;
        }
    }
    QTextStream instream(&infile);

    QList<QFile*> outfiles;

    if (mOutputs.isEmpty()) {
        QFile *outfile = new QFile();
        if (!outfile->open(stdout, QIODevice::WriteOnly)) {
            mErrors << tr("Failed to open standard output and no output file provided.");
            return;
        }
        outfiles << outfile;
    }
    foreach (const QString &fileName, mOutputs) {
        QFile *outfile = new QFile();
        outfile->setFileName(fileName);
        if (!outfile->open(QIODevice::WriteOnly)) {
            mErrors << tr("Failed to open %1 for writing.").arg(fileName);
            return;
        }
        outfiles << outfile;
    }
    convertToXSLX(instream, outfiles);
}

static void makeBar(QTextStream &html, double percent, QTextDocument &doc)
{
    QImage image(QSize(IMAGE_WIDTH, 25), QImage::Format_RGB32);
    QPainter painter(&image);
    QRect rect = image.rect();
    if (percent) {
        rect.setRight(rect.right() / (100. / percent));
        painter.fillRect(rect, QColor(BAR_COLOR));
        rect.setLeft(rect.right());
    }
    qDebug() << "Filled " << rect << " with color";
    rect.setRight(IMAGE_WIDTH);
    painter.fillRect(rect, Qt::white);
    qDebug() << "Filled " << rect << " with white";
    doc.addResource(QTextDocument::ImageResource, QUrl(QStringLiteral("internal://bar%1.png").arg((int)percent)),
                    QVariant(image));
    html << QStringLiteral("<td style='vertical-align: middle'><img src=\"internal://bar%1.png\"/></td>").arg((int)percent);
    return;
}

static void unescapeRegex(QString &str, const QRegularExpression &exp)
{
    QRegularExpressionMatch match = exp.match(str);
    while (match.hasMatch()) {
        str.replace(match.capturedStart(), match.capturedLength(), match.captured(1));
        match = exp.match(str);
    }
}

static void unescapeString(QString &str)
{
    static const QRegularExpression imgEx(IMAGE_PATTERN);
    static const QRegularExpression texEx(LATEX_PATTERN);

    unescapeRegex(str, imgEx);
    unescapeRegex(str, texEx);
}

void Converter::convertToXSLX(QTextStream& instream, QList<QFile *>outputs)
{
    Document xlsx;
    QTextDocument doc;
    QString htmlString;
    QTextStream html (&htmlString);
    html.setCodec("UTF-8");

    ConditionalFormatting bars;

    bars.addDataBarRule(QColor(0xFF, 0x99, 0x33), ConditionalFormatting::VOT_Num,
                        "0", ConditionalFormatting::VOT_Num, "100", false);

    const double colWidth[] = COLUMN_WIDTHS;
    double sum = 0;

    for (int i = 1; i <= COLUMN_CNT; i++) {
        xlsx.setColumnWidth(i, colWidth[i-1]);
        sum += colWidth[i-1];
    }

    /* For the merged cell wordwrap trick. */
    xlsx.setColumnWidth(26, sum + 1);
    xlsx.setColumnHidden(26, true);

    int row = 1;
    html << "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=UTF-8\"/>"
            "</head><body><table border=\"0\" width:\"100%\">";
    html << QStringLiteral("<tr><th width=\"%1%\"></th>").arg(HTML_COL1_PERCENT);
    html << QStringLiteral("<th width=\"%1%\"></th>").arg(HTML_COL2_PERCENT);
    html << QStringLiteral("<th width=\"%1%\"></th>").arg(HTML_COL3_PERCENT);

    const QString title = mTitle.isEmpty() ? DEFAULT_TITLE : mTitle;
    // Set the title of the Questionaire
    xlsx.write(row++, 1, title, mTitleFmt);
    html << mTitleStyle.arg(title.toHtmlEscaped());
    xlsx.mergeCells("A1:C1");
    xlsx.setRowHeight(1, TITLE_ROW_HEIGHT);

    QString input = instream.readAll();

    QRegularExpression questionEx(QUESTION_PATTERN);
    QRegularExpression choiceEx(CHOICE_PATTERN);
    QRegularExpression choiceAltEx(CHOICE_UNFILLED_PATTERN);
    QRegularExpression freetxtEx(FREETXT_PATTERN);
    QRegularExpression firstQuestionEx(FIRST_QUESTION_PATTERN);

    input.replace("\r\n", "\n");
    input.replace("\n\r", "\n");
    input.replace("#NAME?\n", "");

    QRegularExpressionMatch match = firstQuestionEx.match(input);
    bool foundSomething = false;
    int cursor = match.capturedEnd();
    while (match.hasMatch() && cursor != -1) {
        /* We've matched a question pattern. With the answer
           line */
        if (!match.lastCapturedIndex() == 2) {
            /* Should not happen without misconfiguration. */
            mErrors << "Internal parser error.";
            return;
        }
        foundSomething = true;
        QString question = match.captured(1).trimmed();
        unescapeString(question);
        const QString answerLine = match.captured(2).trimmed();
        xlsx.write(row, 2, QString(" "), mQuestionFmt);
        xlsx.write(row, 3, QString(" "), mQuestionFmt);
        xlsx.write(row++, 1, question, mQuestionFmt);
        html << mQuestionStyle.arg(question.toHtmlEscaped());

        if (answerLine == QStringLiteral(CHOICE_IDENTIFIER)) {
            xlsx.setRowHeight(row, CHOICE_ROW_HEIGHT);
            xlsx.write(row++, 1, "Answer", mAnswerChoiceFmt);
            html << mAnswerChoiceStyle;
            int firstChoiceRow = row;
            int lastChoiceRow = row;
repeat:
            QRegularExpressionMatch choiceMatch = choiceEx.match(input, cursor);
            while (choiceMatch.hasMatch() && choiceMatch.capturedStart() == cursor + 1) {
                /* We use the cursor here to keep track of the state. Only if an answer
                   follows immediately behind the last answer we treat it as valid as
                   otherwise we can't figure out when the next question begins. */
                cursor = choiceMatch.capturedEnd();

                /* Write the values */
                QString choiceName = choiceMatch.captured(1).trimmed();
                if (choiceName.startsWith("=")) {
                    choiceName = " " + choiceName;
                }
                unescapeString(choiceName);
                xlsx.write(row, 1, choiceName, mChoiceTextFmt);
                html << mChoiceTextStyle.arg(choiceName.toHtmlEscaped());
                qDebug() << "Captured for choice: " << choiceMatch.captured(0);
                bool ok;
                QString percentStr = choiceMatch.captured("percent");
                double percent;
                if (percentStr.isEmpty()) {
                    percent = 0;
                    ok = true;
                } else {
                    percent = percentStr.toDouble(&ok);
                }
                if (!ok) {
                    mErrors << "Unparsable number in string: " + choiceMatch.captured();
                    percent = 0;
                    percentStr = QString();
                    /* PercentStr was not a number. */
                }
                makeBar(html, percent, doc);
                xlsx.write(row, 2, percent == 0 ? QVariant() : percent);
                const QString numStr = choiceMatch.captured("num");
                const QString numVotesString = QString("%1% | %2 Number of votes").
                           arg(percentStr.isEmpty() ? QStringLiteral("0") : percentStr).
                           arg(numStr.isEmpty() ? QStringLiteral("0") : numStr);
                html << mChoiceVotesStyle.arg(numVotesString.toHtmlEscaped());
                xlsx.write(row, 3, numVotesString, mChoiceVotesFmt);
                xlsx.setRowHeight(row, CHOICE_ROW_HEIGHT);
                /* As long as we can match a choice which is either before the next question
                   or before the end of the document */
                choiceMatch = choiceEx.match(input, cursor);
                row++;
                lastChoiceRow++;
            }
            choiceMatch = choiceAltEx.match(input, cursor);
            bool additionalFound = false;
            while (choiceMatch.hasMatch() && choiceMatch.capturedStart() <= cursor + 1) {
                additionalFound = true;
                QString choice = choiceMatch.captured(1);
                cursor = choiceMatch.capturedEnd();
                /* Alternative answer that is just a list of strings */
                unescapeString(choice);
                qDebug() << "Captured unfilled choice: " << choice;
                html << mChoiceTextStyle.arg(choice.toHtmlEscaped());
                makeBar(html, 0, doc);
                xlsx.write(row, 2, QVariant());
                const QString numVotesString = QStringLiteral("Keine eingegangenen Antworten");
                html << mChoiceVotesStyle.arg(numVotesString.toHtmlEscaped());
                xlsx.write(row, 3, numVotesString, mChoiceVotesFmt);
                xlsx.setRowHeight(row, CHOICE_ROW_HEIGHT);
                row++;
                lastChoiceRow++;
                choiceMatch = choiceAltEx.match(input, cursor);
                QRegularExpressionMatch realMatch = choiceEx.match(input, cursor);
                if (choiceMatch.hasMatch() && choiceMatch.capturedStart() == realMatch.capturedStart()) {
                    /* We have a real match so back to the other pattern. */
                    break;
                }
            }
            if (additionalFound) {
                goto repeat;
            }
            bars.addRange(QString("B%1:B%2").arg(firstChoiceRow).arg(lastChoiceRow));
//            xlsx.groupRows(firstChoiceRow - 2, lastChoiceRow - 1, false);
        } else if (answerLine == QStringLiteral(TEXT_IDENTIFIER)) {
            QRegularExpressionMatch textMatch = freetxtEx.match(input, cursor);
            xlsx.setRowHeight(row, CHOICE_ROW_HEIGHT);
            xlsx.write(row++, 1, "Answer", mAnswerTextFmt);
            html << mAnswerTextStyle;

            /* To handle the workaround for quotes in answers we store
             * the number of rows and only afterwards create the html rows. */
            int firstFreeRow = row;
            while (textMatch.hasMatch()) {
                if (textMatch.capturedStart() != cursor + 1) {
                    /* The format allows unescaped quotes in the text.
                       This makes a workaround neccessary. If we have
                       an Unquoted string between the next quoted string
                       and that Unquoted string is before the next question
                       we append the unquoted string and the next quoted string
                       with Quotes in the Row.*/
                    QRegularExpressionMatch nextQuestion = questionEx.match(input, cursor);
                    if (nextQuestion.hasMatch() &&
                        nextQuestion.capturedStart() < textMatch.capturedEnd()) {
                        /* The next question comes before the textMatch so we really have
                           a new question. */
                        break;
                    }
                    const QString lastRow = xlsx.read(row - 1, 26).toString();
                    int unquotedLen = textMatch.capturedStart() - cursor;
                    const QString unquoted = input.mid(cursor, unquotedLen);
                    qDebug() << "Found inner quoted string: " << unquoted;
                    /* Now combine */
                    const QString combined = QString("%1\"%2\"%3").arg(lastRow).
                                                                   arg(unquoted).
                                                                   arg(textMatch.captured(1).trimmed());
                    qDebug() << "Last row: " << lastRow;
                    qDebug() << "Next Question is at: " << nextQuestion.capturedStart();
                    qDebug() << "Text match is: " << textMatch.captured(1).trimmed();
                    qDebug() << "cursor is at: " << cursor;
                    qDebug() << "text match starts at: " << textMatch.capturedStart();
                    xlsx.write(row - 1, 26, combined, mFreeTextFmt);
                    xlsx.write(row - 1, 1, combined, mFreeTextFmt);
                    cursor = textMatch.capturedEnd();
                    textMatch = freetxtEx.match(input, cursor);
                    continue;
                }
                cursor = textMatch.capturedEnd();

                QString text = textMatch.captured(1).trimmed();
                qDebug() << "Found free text: " << text;
                if (text.startsWith("=")) {
                    text = " " + text;
                }

                /* Merge the cells */
                xlsx.mergeCells(QString("A%1:C%1").arg(row), mFreeTextFmt);

                /* Merged cells ignore wordwrap the following trick is based on:
                   http://excel.tips.net/T003207_Automatic_Row_Height_For_Merged_Cells_with_Text_Wrap.html
                */
                /* Write the values */
                xlsx.write(QString("Z%1").arg(row), text, mFreeTextFmt);
                xlsx.write(row, 1, text, mFreeTextFmt);
                row++;

                textMatch = freetxtEx.match(input, cursor);
            }
            for (int i = firstFreeRow; i < row; i++) {
                html << mFreeTextStyle.arg(xlsx.read(i, 1).toString().toHtmlEscaped());
            }
        }
        /* Insert Empty row. */
        xlsx.setRowHeight(row++, CHOICE_ROW_HEIGHT);
        match = questionEx.match(input, cursor);
        cursor = match.capturedEnd();
        html << mEmptyRow;
    }
    xlsx.addConditionalFormatting(bars);

    if (!foundSomething) {
        mErrors << tr("Failed to parse input document.");
    }

    html << "</table></body></html>";
    doc.setHtml(htmlString);

    /* Fixup images for html */
    QRegularExpression htmlRe = QRegularExpression("<td style='vertical-align: middle'><img src=\"internal://bar(\\d+).png\"/></td>");
    htmlString.replace(htmlRe, QStringLiteral("<td style='background:linear-gradient(to right,"
                                              BAR_COLOR ", " BAR_COLOR " \\1%, #ffffff \\1%)'></td>"));

    foreach (QFile *output, outputs) {
        const QString fName = output->fileName().toLower();
        if (fName.endsWith(".html")) {
            QTextStream outstream(output);
            outstream.setCodec("UTF-8");
            outstream << htmlString;
            output->close();
        } else if (fName.endsWith(".pdf")) {
            output->close();
            QPrinter printer(QPrinter::PrinterResolution);
            printer.setOutputFormat(QPrinter::PdfFormat);
            printer.setPaperSize(QPrinter::A4);
            printer.setOutputFileName(output->fileName());
            /*
            QPageLayout layout = printer.pageLayout();
            layout.setUnits(QPageLayout::Millimeter);
            layout.setMargins(QMarginsF(20, 20, 20, 20));
            printer.setPageLayout(layout);
            doc.setPageSize(printer.pageRect().size());
            */
            doc.print(&printer);
        } else {
            if (!xlsx.saveAs(output)) {
                mErrors << tr("Saving the XLSX document failed.");
            }
            output->close();
        }
    }
}
This site is hosted by Intevation GmbH (Datenschutzerklärung und Impressum | Privacy Policy and Imprint)