Skip to content

Commit

Permalink
create preview content for tar files (#759)
Browse files Browse the repository at this point in the history
* create preview content for tar files

* Added right logs

* devided extractFile funs into several separated smaller funs

* added comment and removed empty line

* added empty lines and removed unwanted comments

* removed empty line

* used consts

* try incorrect identification level

* log errors and removed unneeded consts

---------

Co-authored-by: milanmajchrak <[email protected]>
  • Loading branch information
Paurikova2 and milanmajchrak authored Oct 17, 2024
1 parent 6c6de87 commit 8d115da
Showing 1 changed file with 131 additions and 59 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.sql.SQLException;
import java.util.ArrayList;
Expand All @@ -33,8 +34,8 @@
import javax.xml.parsers.ParserConfigurationException;

import org.apache.commons.compress.archivers.ArchiveException;
import org.apache.commons.compress.archivers.ArchiveInputStream;
import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Logger;
import org.dspace.app.rest.Parameter;
Expand Down Expand Up @@ -82,7 +83,8 @@
@Component(MetadataBitstreamWrapperRest.CATEGORY + "." + MetadataBitstreamWrapperRest.NAME)
public class MetadataBitstreamRestRepository extends DSpaceRestRepository<MetadataBitstreamWrapperRest, Integer> {
private static Logger log = org.apache.logging.log4j.LogManager.getLogger(MetadataBitstreamRestRepository.class);

private final String ARCHIVE_TYPE_ZIP = "zip";
private final String ARCHIVE_TYPE_TAR = "tar";
// This constant is used to limit the length of the preview content stored in the database to prevent
// the database from being overloaded with large amounts of data.
private static final int MAX_PREVIEW_COUNT_LENGTH = 2000;
Expand Down Expand Up @@ -329,17 +331,19 @@ private List<FileInfo> processInputStreamToFilePreview(Context context, Bitstrea
} else {
String data = "";
if (bitstream.getFormat(context).getMIMEType().equals("application/zip")) {
data = extractFile(inputStream, "zip");
data = extractFile(inputStream, ARCHIVE_TYPE_ZIP);
try {
fileInfos = FileTreeViewGenerator.parse(data);
} catch (Exception e) {
log.error("Cannot extract file content because: {}", e.getMessage());
}
} else if (bitstream.getFormat(context).getMIMEType().equals("application/x-tar")) {
ArchiveInputStream is = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.TAR,
inputStream);
data = extractFile(is, "tar");
fileInfos = FileTreeViewGenerator.parse(data);
data = extractFile(inputStream, ARCHIVE_TYPE_TAR);
try {
fileInfos = FileTreeViewGenerator.parse(data);
} catch (Exception e) {
log.error("Cannot extract file content because: {}", e.getMessage());
}
}
}
return fileInfos;
Expand Down Expand Up @@ -382,76 +386,108 @@ private String composePreviewURL(Context context, Item item, Bitstream bitstream
return url;
}


/**
* Convert ZIP file into structured String.
* @param inputStream Input stream with ZIP content
* @param fileType ZIP/TAR
* @return structured String
* Creates a temporary file with the appropriate extension based on the specified file type.
* @param fileType the type of file for which to create a temporary file
* @return a Path object representing the temporary file
* @throws IOException if an I/O error occurs while creating the file
*/
public String extractFile(InputStream inputStream, String fileType) {
List<String> filePaths = new ArrayList<>();
Path tempFile = null;
FileSystem zipFileSystem = null;
private Path createTempFile(String fileType) throws IOException {
String extension = ARCHIVE_TYPE_TAR.equals(fileType) ?
String.format(".%s", ARCHIVE_TYPE_TAR) : String.format(".%s", ARCHIVE_TYPE_ZIP);
return Files.createTempFile("temp", extension);
}

try {
switch (fileType) {
case "tar":
tempFile = Files.createTempFile("temp", ".tar");
break;
default:
tempFile = Files.createTempFile("temp", ".zip");
/**
* Adds a file path and its size to the list of file paths.
* If the path represents a directory, appends a "/" to the path.
* @param filePaths the list of file paths to add to
* @param path the file or directory path
* @param size the size of the file or directory
*/
private void addFilePath(List<String> filePaths, String path, long size) {
String fileInfo = (Files.isDirectory(Paths.get(path))) ? path + "/|" + size : path + "|" + size;
filePaths.add(fileInfo);
}

/**
* Processes a TAR file, extracting its entries and adding their paths to the provided list.
* @param filePaths the list to populate with the extracted file paths
* @param tempFile the temporary TAR file to process
* @throws IOException if an I/O error occurs while reading the TAR file
*/
private void processTarFile(List<String> filePaths, Path tempFile) throws IOException {
try (InputStream fi = Files.newInputStream(tempFile);
TarArchiveInputStream tis = new TarArchiveInputStream(fi)) {
TarArchiveEntry entry;
while ((entry = tis.getNextTarEntry()) != null) {
addFilePath(filePaths, entry.getName(), entry.getSize());
}
}
}

Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING);
/**
* Processes a ZIP file, extracting its entries and adding their paths to the provided list.
* @param filePaths the list to populate with the extracted file paths
* @param zipFileSystem the FileSystem object representing the ZIP file
* @throws IOException if an I/O error occurs while reading the ZIP file
*/
private void processZipFile(List<String> filePaths, FileSystem zipFileSystem) throws IOException {
Path root = zipFileSystem.getPath("/");
Files.walk(root).forEach(path -> {
try {
long fileSize = Files.size(path);
addFilePath(filePaths, path.toString().substring(1), fileSize);
} catch (IOException e) {
log.error("An error occurred while getting the size of the zip file.", e);
}
});
}

zipFileSystem = FileSystems.newFileSystem(tempFile, (ClassLoader) null);
Path root = zipFileSystem.getPath("/");
Files.walk(root)
.forEach(path -> {
try {
long fileSize = Files.size(path);
if (Files.isDirectory(path)) {
filePaths.add(path.toString().substring(1) + "/|" + fileSize );
} else {
filePaths.add(path.toString().substring(1) + "|" + fileSize );
}
} catch (IOException e) {
e.printStackTrace();
}
});
} catch (IOException e) {
e.printStackTrace();
} finally {
if (zipFileSystem != null) {
try {
zipFileSystem.close();
} catch (IOException e) {
e.printStackTrace();
}
/**
* Closes the specified FileSystem resource if it is not null.
* @param zipFileSystem the FileSystem to close
*/
private void closeFileSystem(FileSystem zipFileSystem) {
if (Objects.nonNull(zipFileSystem)) {
try {
zipFileSystem.close();
} catch (IOException e) {
log.error("An error occurred while closing the zip file.", e);
}
}
}

if (tempFile != null) {
try {
Files.delete(tempFile);
} catch (IOException e) {
e.printStackTrace();
}
/**
* Deletes the specified temporary file if it is not null.
* @param tempFile the Path object representing the temporary file to delete
*/
private void deleteTempFile(Path tempFile) {
if (Objects.nonNull(tempFile)) {
try {
Files.delete(tempFile);
} catch (IOException e) {
log.error("An error occurred while deleting temp file.", e);
}
}
}

/**
* Builds an XML response string based on the provided list of file paths.
* @param filePaths the list of file paths to include in the XML response
* @return an XML string representation of the file paths
*/
private String buildXmlResponse(List<String> filePaths) {
// Is a folder regex
String folderRegex = "/|\\d+";
Pattern pattern = Pattern.compile(folderRegex);

StringBuilder sb = new StringBuilder();
sb.append(("<root>"));
sb.append("<root>");
Iterator<String> iterator = filePaths.iterator();
int fileCounter = 0;
while ((iterator.hasNext() && fileCounter < maxPreviewCount)) {
while (iterator.hasNext() && fileCounter < maxPreviewCount) {
String filePath = iterator.next();

// Check if the file is a folder
Matcher matcher = pattern.matcher(filePath);
if (!matcher.matches()) {
Expand All @@ -464,10 +500,46 @@ public String extractFile(InputStream inputStream, String fileType) {
if (fileCounter > maxPreviewCount) {
sb.append("<element>...too many files...|0</element>");
}
sb.append(("</root>"));
sb.append("</root>");
return sb.toString();
}

/**
* Extracts files from an InputStream, processes them based on the specified file type (tar or zip),
* and returns an XML representation of the file paths.
* @param inputStream the InputStream containing the file data
* @param fileType the type of file to extract ("tar" or "zip")
* @return an XML string representing the extracted file paths
*/
public String extractFile(InputStream inputStream, String fileType) {
List<String> filePaths = new ArrayList<>();
Path tempFile = null;
FileSystem zipFileSystem = null;

try {
// Create a temporary file based on the file type
tempFile = createTempFile(fileType);

// Copy the input stream to the temporary file
Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING);

// Process the file based on its type
if (ARCHIVE_TYPE_TAR.equals(fileType)) {
processTarFile(filePaths, tempFile);
} else {
zipFileSystem = FileSystems.newFileSystem(tempFile, (ClassLoader) null);
processZipFile(filePaths, zipFileSystem);
}
} catch (IOException e) {
log.error(String.format("An error occurred while extracting file of type %s.", fileType), e);
} finally {
closeFileSystem(zipFileSystem);
deleteTempFile(tempFile);
}

return buildXmlResponse(filePaths);
}

/**
* Read input stream and return content as String
* @param inputStream to read
Expand Down

0 comments on commit 8d115da

Please sign in to comment.