Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Image extract #261

Merged
merged 5 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ SHELL ?= /bin/bash
endif

#JAR_VERSION := $(shell mvn -q -Dexec.executable="echo" -Dexec.args='$${project.version}' --non-recursive exec:exec -DforceStdout)
JAR_VERSION := 1.96
JAR_VERSION := 1.97
JAR_FILE := mn2pdf-$(JAR_VERSION).jar

all: target/$(JAR_FILE)
Expand Down
10 changes: 5 additions & 5 deletions README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ You will need the Java Development Kit (JDK) version 8, Update 241 (8u241) or hi

[source,sh]
----
java -Xss5m -Xmx2048m -jar target/mn2pdf-1.96.jar --xml-file <XML-FileName> --xsl-file <XSLT-FileName> --pdf-file <Output-PDF-FileName> [--syntax-highlight]
java -Xss5m -Xmx2048m -jar target/mn2pdf-1.97.jar --xml-file <XML-FileName> --xsl-file <XSLT-FileName> --pdf-file <Output-PDF-FileName> [--syntax-highlight]
----

e.g.

[source,sh]
----
java -Xss5m -Xmx2048m -jar target/mn2pdf-1.96.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf
java -Xss5m -Xmx2048m -jar target/mn2pdf-1.97.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf
----

=== PDF encryption features
Expand Down Expand Up @@ -100,7 +100,7 @@ Update version in `pom.xml`, e.g.:
----
<groupId>org.metanorma.fop</groupId>
<artifactId>mn2pdf</artifactId>
<version>1.96</version>
<version>1.97</version>
<name>Metanorma XML to PDF converter</name>
----

Expand All @@ -111,8 +111,8 @@ Tag the same version in Git:

[source,xml]
----
git tag v1.96
git push origin v1.96
git tag v1.97
git push origin v1.97
----

Then the corresponding GitHub release will be automatically created at:
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.metanorma.fop</groupId>
<artifactId>mn2pdf</artifactId>
<version>1.96</version>
<version>1.97</version>
<name>Metanorma XML to PDF converter</name>
<packaging>jar</packaging>
<url>https://www.metanorma.org</url>
Expand Down
6 changes: 5 additions & 1 deletion src/main/java/org/metanorma/fop/PDFGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,10 @@ public boolean process() {
logger.info(String.format(OUTPUT_LOG, PDF_OUTPUT, fPDF));
logger.info("");

File fPresentationPartXML = getPresentationPartXML(fXML, fPDF.getParent());
PDFResult pdfResult = PDFResult.PDFResult(fPDF);

//File fPresentationPartXML = getPresentationPartXML(fXML, fPDF.getParent());
File fPresentationPartXML = getPresentationPartXML(fXML, pdfResult.getOutFolder());

sourceXMLDocument = new SourceXMLDocument(fPresentationPartXML);

Expand Down Expand Up @@ -341,6 +344,7 @@ public boolean process() {
}
xsltConverter.deleteTmpXSL();
fontcfg.deleteConfigFile();
pdfResult.flushOutTmpImagesFolder();
}

logger.info("Success!");
Expand Down
64 changes: 64 additions & 0 deletions src/main/java/org/metanorma/fop/PDFResult.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package org.metanorma.fop;

import java.io.File;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Comparator;
import java.util.UUID;

public class PDFResult {

private static PDFResult PDFResultSingleInstance = null;

private String outFolder;

private Path outTmpImagesPath;

private PDFResult() {
}

private PDFResult(File pdfFile) {
String parentFolder = pdfFile.getParent();
if (parentFolder == null) {
parentFolder = pdfFile.getAbsoluteFile().getParent();
} else {
parentFolder = new File(parentFolder).getAbsolutePath();
}
outTmpImagesPath = Paths.get(parentFolder, "_tmp_images_" + UUID.randomUUID().toString());
outFolder = parentFolder;
}

public static PDFResult PDFResult(File pdfFile)
{
if (PDFResultSingleInstance == null) {
PDFResultSingleInstance = new PDFResult(pdfFile);
}
return PDFResultSingleInstance;
}

public String getOutFolder() {
return outFolder;
}

public Path getOutTmpImagesPath() {
return outTmpImagesPath;
}


public void flushOutTmpImagesFolder () {
if (Files.exists(outTmpImagesPath)) {
try {
Files.walk(outTmpImagesPath)
.sorted(Comparator.reverseOrder())
.map(Path::toFile)
.forEach(File::delete);
Files.deleteIfExists(outTmpImagesPath);
} catch (Exception ex) {
ex.printStackTrace();
}
}

}

}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.metanorma.fop.ifhandler;

import org.apache.commons.lang3.StringEscapeUtils;
import org.metanorma.fop.PDFResult;
import org.metanorma.fop.Util;
import org.metanorma.utils.LoggerHelper;
import org.w3c.dom.Document;
Expand All @@ -18,15 +19,20 @@
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.logging.Logger;

/*
* This class is intended for removing the semantic part from Metanorma XML
* This class is intended for:
* - removing the semantic part from Metanorma XML
* - extract embedded images in base64 to binary format into temporary folder on disk
*/

public class FOPXMLPresentationHandler extends DefaultHandler {
Expand All @@ -39,6 +45,8 @@ public class FOPXMLPresentationHandler extends DefaultHandler {

private StringBuilder sbResult = new StringBuilder();

private String currentElement;

Stack<Character> stackChar = new Stack<>();

Stack<Boolean> skipElements = new Stack<>();
Expand All @@ -51,6 +59,8 @@ public void startDocument() {
@Override
public void startElement(String uri, String lName, String qName, Attributes attr) throws SAXException {

currentElement = qName;

if (qName.startsWith("semantic__") || qName.equals("emf")) {
// skip
skipElements.push(true);
Expand Down Expand Up @@ -82,11 +92,53 @@ private String copyAttributes(Attributes attr) {
StringBuilder sbTmp = new StringBuilder();
for (int i = 0; i < attr.getLength(); i++) {
sbTmp.append(" ");
sbTmp.append(attr.getLocalName(i));
String attrName = attr.getLocalName(i);
String attrValue = attr.getValue(i);
sbTmp.append(attrName);
sbTmp.append("=\"");
String value = StringEscapeUtils.escapeXml(attr.getValue(i));

String value = StringEscapeUtils.escapeXml(attrValue);;

boolean isExtractedImage = false;

if (currentElement.equals("image") && attrName.equals("src") &&
(attrValue.startsWith("data:image/") || attrValue.startsWith("data:application/")) &&
!(attrValue.startsWith("data:image/svg+xml;"))) {
String dataPrefix = "data:image/";
if (attrValue.startsWith("data:application/")) {
dataPrefix = "data:application/";
}
// extract embedded images in base64 to binary format into temporary folder on disk
int startPos = attrValue.indexOf(";base64,") + 8;
String base64data = attrValue.substring(startPos);
byte[] decodedBytes = Base64.getDecoder().decode(base64data);

String imageFormat = attrValue.substring(attrValue.indexOf(dataPrefix) + dataPrefix.length(), attrValue.indexOf(";base64,"));
PDFResult pdfResult = PDFResult.PDFResult(null);
String imageTmpName = UUID.randomUUID().toString() + "." + imageFormat;
Path imagePath = Paths.get(pdfResult.getOutTmpImagesPath().toString(), imageTmpName);
try {
Files.createDirectories(pdfResult.getOutTmpImagesPath());
Files.write(imagePath, decodedBytes);
// relative path to PDF out file
//File imageFile = new File(imagePath.toString());
//String imageFileParentFolder = imageFile.getParentFile().getName();
//value = Paths.get(imageFileParentFolder, imageTmpName).toString();
// absolutepath
value = imagePath.toAbsolutePath().toString();
isExtractedImage = true;
} catch (IOException ex) {
logger.severe("Can't save the image on disk '" + imagePath.toString() + "':");
logger.severe(ex.getMessage());
ex.printStackTrace();
}
}
sbTmp.append(value);
sbTmp.append("\"");

if (isExtractedImage) {
sbTmp.append(" extracted=\"true\"");
}
}
return sbTmp.toString();
}
Expand Down
Loading
Loading