Skip to content

Commit

Permalink
fix(imports) possible fix for imports running out of memory (#29169)
Browse files Browse the repository at this point in the history
This does a few things:
- Removes reading the file in as a byte[] and instead passes around a
`File` object
- Changes how the import is run inline (helpful for glowroot traces) or
to use the DotSubmitter thread pool. Config var is
`IMPORT_CONTENTLETS_ASYNC`
- Commit Granularity changed to 100, meaning every 100 contentlets will
be committed and a new transaction will be started.
- Copied CSV file to tmp dir.



ref: #29162

---------

Co-authored-by: erickgonzalez <[email protected]>
  • Loading branch information
wezell and erickgonzalez authored Aug 29, 2024
1 parent 08c7899 commit 0e6f1cf
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 106 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.dotmarketing.portlets.contentlet.action;

import com.dotcms.business.CloseDBIfOpened;
import com.dotcms.concurrent.DotConcurrentFactory;
import com.dotcms.mock.request.MockAttributeRequest;
import com.dotcms.mock.request.MockHeaderRequest;
import com.dotcms.mock.request.MockSessionRequest;
Expand All @@ -20,33 +21,28 @@
import com.dotmarketing.portlets.contentlet.struts.ImportContentletsForm;
import com.dotmarketing.portlets.structure.model.Field;
import com.dotmarketing.portlets.structure.model.Structure;
import com.dotmarketing.util.AdminLogger;
import com.dotmarketing.util.ImportUtil;
import com.dotmarketing.util.Logger;
import com.dotmarketing.util.UtilMethods;
import com.dotmarketing.util.*;
import com.liferay.portal.model.User;
import com.liferay.portal.util.Constants;
import com.liferay.portal.util.PortalUtil;
import com.liferay.portlet.ActionRequestImpl;
import com.liferay.portlet.ActionResponseImpl;
import com.liferay.util.FileUtil;
import com.liferay.util.servlet.SessionMessages;
import com.liferay.util.servlet.UploadPortletRequest;
import java.io.ByteArrayInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.List;
import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.http.HttpSession;
import org.mozilla.universalchardet.UniversalDetector;

/**
* This action class import content from csv/text files. The csv file should
Expand Down Expand Up @@ -103,17 +99,19 @@ public void processAction(ActionMapping mapping, final ActionForm form, final Po
/*
* We are submiting the file to process
*/
if ((cmd != null) && cmd.equals(com.dotmarketing.util.Constants.PREVIEW)) {
if (com.dotmarketing.util.Constants.PREVIEW.equals(cmd)) {

try {
Logger.debug(this, "Calling Preview Upload Method");

//Validation
UploadPortletRequest uploadReq = PortalUtil.getUploadPortletRequest(req);
byte[] bytes = FileUtil.getBytes(uploadReq.getFile("file"));


File file = uploadReq.getFile("file");
this.detectEncodeType(session, file);
Files.copy(file.toPath(), new File(ConfigUtils.getAssetTempPath()+java.io.File.separator+file.getName()).toPath(), java.nio.file.StandardCopyOption.REPLACE_EXISTING);
file = Paths.get(ConfigUtils.getAssetTempPath()+java.io.File.separator+file.getName()).toFile();


final ImportContentletsForm importContentletsForm = (ImportContentletsForm) form;
if(importContentletsForm.getStructure().isEmpty()){
Expand All @@ -124,7 +122,7 @@ else if(importContentletsForm.getWorkflowActionId().isEmpty()){
SessionMessages.add(req, ERROR, "Workflow-action-type-required");
setForward(req, PORTLET_EXT_CONTENTLET_IMPORT_CONTENTLETS);
}
else if (bytes == null || bytes.length == 0) {
else if (file == null || file.length() < 100) {
SessionMessages.add(req, ERROR, "message.contentlet.file.required");
setForward(req, PORTLET_EXT_CONTENTLET_IMPORT_CONTENTLETS);
} else {
Expand All @@ -138,19 +136,14 @@ else if (bytes == null || bytes.length == 0) {
return;
}

try {
Reader reader = null;
CsvReader csvreader = null;
Charset charset = importContentletsForm.getLanguage() == -1 ? Charset.defaultCharset() : FileUtil.detectEncodeType(file);
try(Reader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), charset))){

String[] csvHeaders = null;
int languageCodeHeaderColumn = -1;
int countryCodeHeaderColumn = -1;

if (importContentletsForm.getLanguage() == -1)
reader = new InputStreamReader(new ByteArrayInputStream(bytes), Charset.forName("UTF-8"));
else
reader = new InputStreamReader(new ByteArrayInputStream(bytes));

csvreader = new CsvReader(reader);

CsvReader csvreader = new CsvReader(reader);
csvreader.setSafetySwitch(false);

switch ((int) importContentletsForm.getLanguage()) {
Expand All @@ -172,7 +165,7 @@ else if (bytes == null || bytes.length == 0) {
SessionMessages.add(req, ERROR, "message.import.contentlet.csv_headers.required");
setForward(req, PORTLET_EXT_CONTENTLET_IMPORT_CONTENTLETS);
} else {
_generatePreview(0,req, res, config, form, user, bytes, csvHeaders, csvreader, languageCodeHeaderColumn, countryCodeHeaderColumn, reader);
_generatePreview(0,req, res, config, form, user, file, csvHeaders, csvreader, languageCodeHeaderColumn, countryCodeHeaderColumn, reader);
setForward(req, "portlet.ext.contentlet.import_contentlets_preview");
}
} else {
Expand All @@ -189,23 +182,19 @@ else if (bytes == null || bytes.length == 0) {
setForward(req, PORTLET_EXT_CONTENTLET_IMPORT_CONTENTLETS);
break;
default:
_generatePreview(0, req, res, config, form, user, bytes, csvHeaders, csvreader, languageCodeHeaderColumn, countryCodeHeaderColumn, reader);
_generatePreview(0, req, res, config, form, user, file, csvHeaders, csvreader, languageCodeHeaderColumn, countryCodeHeaderColumn, reader);
setForward(req, "portlet.ext.contentlet.import_contentlets_preview");
break;
}

csvreader.close();
} catch (Exception e) {
_handleException(e, req);
return;
}
}

} catch (Exception ae) {
_handleException(ae, req);
return;
}
} else if ((cmd != null) && cmd.equals(com.dotmarketing.util.Constants.PUBLISH)) {
} else if (com.dotmarketing.util.Constants.PUBLISH.equals(cmd)) {
AdminLogger.log(ImportContentletsAction.class, "processAction", "Importing Contentlets", user);
Long importIdComplete=(Long)session.getAttribute("importId");
String subcmd = req.getParameter("subcmd");
Expand All @@ -218,35 +207,32 @@ else if (bytes == null || bytes.length == 0) {
final HttpServletRequest httpReq = reqImpl.getHttpServletRequest();
final HttpSession httpSession = httpReq.getSession();
final long importId = ImportAuditUtil.createAuditRecord(user.getUserId(), (String)httpSession.getAttribute("fileName"));
Thread t=new Thread() {


Runnable runnable = new Runnable() {

@Override
@CloseDBIfOpened
public void run() {
try {

ImportContentletsForm importContentletsForm = (ImportContentletsForm) form;
File fileToImport = (File) httpSession.getAttribute("file_to_import");
Charset charset = importContentletsForm.getLanguage() == -1
? Charset.defaultCharset()
: FileUtil.detectEncodeType(fileToImport);
try(Reader reader = new BufferedReader(new InputStreamReader(new FileInputStream(fileToImport), charset))){

Logger.debug(this, "Calling Process File Method");

Reader reader;
CsvReader csvreader;


String[] csvHeaders = null;
int languageCodeHeaderColumn = -1;
int countryCodeHeaderColumn = -1;

byte[] bytes = (byte[]) httpSession.getAttribute("file_to_import");
ImportContentletsForm importContentletsForm = (ImportContentletsForm) form;
String eCode = (String) httpSession.getAttribute(ENCODE_TYPE);
if (importContentletsForm.getLanguage() == -1) {
reader = new InputStreamReader(new ByteArrayInputStream(bytes),
Charset.forName("UTF-8"));
}
else if(eCode != null) {
reader = new InputStreamReader(new ByteArrayInputStream(bytes),
Charset.forName(eCode));
}
else {
reader = new InputStreamReader(new ByteArrayInputStream(bytes));
}
csvreader = new CsvReader(reader);


CsvReader csvreader = new CsvReader(reader);
csvreader.setSafetySwitch(false);

if (importContentletsForm.getLanguage() == -1) {
if (csvreader.readHeaders()) {
csvHeaders = csvreader.getHeaders();
Expand All @@ -264,7 +250,7 @@ else if(eCode != null) {
}
}

final User user = _getUser(req);


HashMap<String, List<String>> importresults= new HashMap<>();
if(importSession.equals(httpSession.getAttribute("importSession"))){
Expand All @@ -273,7 +259,7 @@ else if(eCode != null) {
csvHeaders, csvreader, languageCodeHeaderColumn,
countryCodeHeaderColumn, reader,req);
}

final List<String> counters= importresults.get("counters");
int contentsToImport=0;
for(String counter: counters){
Expand All @@ -283,19 +269,20 @@ else if(eCode != null) {
contentsToImport + Integer.parseInt(counterArray[1]);
}
}

final List<String> inodes= importresults.get("lastInode");
if(!inodes.isEmpty()){
ImportAuditUtil.updateAuditRecord(inodes.get(0), contentsToImport, importId,importresults);
}

csvreader.close();


} catch (Exception ae) {
_handleException(ae, req);
return;

} finally{

if(!ImportAuditUtil.cancelledImports.containsKey(importId)){
ImportAuditUtil.setAuditRecordCompleted(importId);
}else{
Expand All @@ -304,7 +291,15 @@ else if(eCode != null) {
}
}
};
t.start();


if(Config.getBooleanProperty("IMPORT_CONTENTLETS_ASYNC", true)) {
DotConcurrentFactory.getInstance().getSubmitter("importContentlets").submit(runnable);
}else{
runnable.run();
}


req.setAttribute("previewResults", session.getAttribute("previewResults"));
session.removeAttribute("previewResults");
req.setAttribute("importId", importId);
Expand Down Expand Up @@ -337,37 +332,8 @@ else if(eCode != null) {

}

/**
*
* @param session
* @param file
* @throws IOException
*/
private void detectEncodeType(final HttpSession session, final File file) throws IOException {

String encodeType = null;

if (null != file && file.exists()) {
byte[] buf = new byte[4096];
try (InputStream is = Files.newInputStream(file.toPath())){


UniversalDetector detector = new UniversalDetector(null);
int nread;
while ((nread = is.read(buf)) > 0 && !detector.isDone()) {
detector.handleData(buf, 0, nread);
}
detector.dataEnd();
encodeType = detector.getDetectedCharset();
session.setAttribute(ENCODE_TYPE, encodeType);
detector.reset();
}catch (IOException e){
Logger.error(this.getClass(), e.getMessage());
throw e;
}
}
}

/**
* Provides the user a CSV template based on the selected Content Type. This
* assists users in the process of adding new content to dotCMS as it
Expand Down Expand Up @@ -459,8 +425,8 @@ private void _downloadCSVTemplate(ActionRequest req, ActionResponse res, Portlet
* the UI.
* @param user
* - The {@link User} performing this action.
* @param bytes
* - The byte array representation of the CSV file.
* @param file
* - The file representation of the CSV file.
* @param csvHeaders
* - The headers that make up the CSV file.
* @param csvreader
Expand All @@ -474,12 +440,12 @@ private void _downloadCSVTemplate(ActionRequest req, ActionResponse res, Portlet
* @throws Exception
* An error occurred when analyzing the CSV file.
*/
private void _generatePreview(long importId, ActionRequest req, ActionResponse res, PortletConfig config, ActionForm form, User user, byte[] bytes, String[] csvHeaders, CsvReader csvreader, int languageCodeHeaderColumn, int countryCodeHeaderColumn, Reader reader) throws Exception {
private void _generatePreview(long importId, ActionRequest req, ActionResponse res, PortletConfig config, ActionForm form, User user, File file, String[] csvHeaders, CsvReader csvreader, int languageCodeHeaderColumn, int countryCodeHeaderColumn, Reader reader) throws Exception {
// wraps request to get session object
ActionRequestImpl reqImpl = (ActionRequestImpl) req;
HttpServletRequest httpReq = reqImpl.getHttpServletRequest();
HttpSession session = httpReq.getSession();
httpReq.getSession().setAttribute("file_to_import", bytes);
httpReq.getSession().setAttribute("file_to_import", file);
httpReq.getSession().setAttribute("form_to_import", form);
ImportContentletsForm importForm = (ImportContentletsForm) form;
httpReq.getSession().setAttribute("fileName", importForm.getFileName());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ public abstract class WorkflowCache implements Cachable {
abstract protected List<WorkflowAction> addActions(WorkflowScheme scheme, List<WorkflowAction> actions);
abstract protected List<WorkflowActionClass> addActionClasses(WorkflowAction action, List<WorkflowActionClass> actionClasses);
abstract protected List<WorkflowAction> getActions(WorkflowStep step);

abstract protected List<WorkflowActionClass> getActionClasses(final WorkflowAction action);
abstract protected List<WorkflowAction> getActions(WorkflowScheme scheme);

Expand Down Expand Up @@ -165,4 +166,4 @@ public String[] getGroups() {
* @param mapping SystemActionWorkflowActionMapping
*/
public abstract void removeSystemActionWorkflowActionMapping(SystemActionWorkflowActionMapping mapping);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -532,4 +532,4 @@ public void removeSystemActionWorkflowActionMapping(final SystemActionWorkflowAc
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,7 @@ public WorkflowAction findAction(String id) throws DotDataException {
db.setSQL(WorkflowSQL.SELECT_ACTION);
db.addParam(id);
try {
return (WorkflowAction) this.convertListToObjects(db.loadObjectResults(),
return (WorkflowAction) this.convertListToObjects(db.loadObjectResults(),
WorkflowAction.class).get(0);
} catch (IndexOutOfBoundsException ioob) {
return null;
Expand Down Expand Up @@ -1853,7 +1853,6 @@ public void saveAction(final WorkflowAction action)
final WorkflowScheme proxyScheme = new WorkflowScheme();
proxyScheme.setId(action.getSchemeId());
cache.removeActions(proxyScheme);

// update workflowScheme mod date
final WorkflowScheme scheme = findScheme(action.getSchemeId());
saveScheme(scheme);
Expand Down
Loading

0 comments on commit 0e6f1cf

Please sign in to comment.