From 5fe399492a81d91d66cf927885721dbc0f07efe7 Mon Sep 17 00:00:00 2001 From: Adelaide Nxumalo <27953420+anxumalo@users.noreply.github.com> Date: Thu, 3 Aug 2023 12:01:42 +0100 Subject: [PATCH] [DOCS-7307] Add Document Transformation Engine 2.4 docs - initial commit --- _config.yml | 7 +- _data/toc/transformation-engine.yaml | 20 ++- transformation-engine/2.3/admin/index.md | 29 +++++ transformation-engine/2.3/config/index.md | 134 +++++++++++++++++++++ transformation-engine/2.3/index.md | 19 +++ transformation-engine/2.3/install/index.md | 60 +++++++++ transformation-engine/2.3/install/msi.md | 128 ++++++++++++++++++++ transformation-engine/2.3/install/sdk.md | 37 ++++++ transformation-engine/2.3/support/index.md | 22 ++++ transformation-engine/2.3/using/index.md | 33 +++++ 10 files changed, 487 insertions(+), 2 deletions(-) create mode 100644 transformation-engine/2.3/admin/index.md create mode 100644 transformation-engine/2.3/config/index.md create mode 100644 transformation-engine/2.3/index.md create mode 100644 transformation-engine/2.3/install/index.md create mode 100644 transformation-engine/2.3/install/msi.md create mode 100644 transformation-engine/2.3/install/sdk.md create mode 100644 transformation-engine/2.3/support/index.md create mode 100644 transformation-engine/2.3/using/index.md diff --git a/_config.yml b/_config.yml index 569fc211ed..05aa913de2 100644 --- a/_config.yml +++ b/_config.yml @@ -694,13 +694,18 @@ defaults: toc: "transformation-engine" support: true versions: + - 2.4 - 2.3 - 2.2 - scope: path: "transformation-engine/latest" values: - version: 2.3 + version: 2.4 latest: true + - scope: + path: "transformation-engine/2.3" + values: + version: 2.3 - scope: path: "transformation-engine/2.2" values: diff --git a/_data/toc/transformation-engine.yaml b/_data/toc/transformation-engine.yaml index 6734225004..b7d1ac0af5 100644 --- a/_data/toc/transformation-engine.yaml +++ b/_data/toc/transformation-engine.yaml @@ -1,5 +1,5 @@ # Document Transformation Engine -- version: 2.3 +- version: 2.4 pages: - title: 'Introduction' path: '/transformation-engine/latest/' @@ -17,6 +17,24 @@ path: '/transformation-engine/latest/admin/' - title: 'Using' path: '/transformation-engine/latest/using/' +- version: 2.3 + pages: + - title: 'Introduction' + path: '/transformation-engine/2.3/' + - title: 'Install' + pages: + - title: 'Overview' + path: '/transformation-engine/2.3/install/' + - title: 'Install with MSI' + path: '/transformation-engine/2.3/install/msi/' + - title: 'Install the SDK' + path: '/transformation-engine/2.3/install/sdk/' + - title: 'Configure' + path: '/transformation-engine/2.3/config/' + - title: 'Administer' + path: '/transformation-engine/2.3/admin/' + - title: 'Using' + path: '/transformation-engine/2.3/using/' - version: 2.2 pages: - title: 'Introduction' diff --git a/transformation-engine/2.3/admin/index.md b/transformation-engine/2.3/admin/index.md new file mode 100644 index 0000000000..520bd6f0eb --- /dev/null +++ b/transformation-engine/2.3/admin/index.md @@ -0,0 +1,29 @@ +--- +title: Administer the Document Transformation Engine +--- + +The Document Transformation Engine can be integrated with monitoring tools such as Nagios or Hyperic, by using HTTP REST calls. + +The tool should call the Document Transformation Engine URL with a set of parameters and then monitor the response. + +Two calls are available: + +* Connection tester call + + This call is also used by the Alfresco Transformation client to test availability. It checks the transformation service is up and responding. + + 1. URL: `http://:/transformation-backend/service/transform/v1/version` + + 2. HTTP Method: `GET` + + 3. Make sure that you include basic authentication credentials to your call. + +* Transformation execution call + + This call gets an Office file from the Transformation Service to check whether the transformation engine is still functioning (the Transformation Service makes an internal post, but the HTTP method is still a GET call). This can be used for more in-depth monitoring. + + 1. URL: `http://:/transformation-backend/service/transform/v1/available` + + 2. HTTP Method: `GET` + + 3. Make sure that you include basic authentication credentials to your call. diff --git a/transformation-engine/2.3/config/index.md b/transformation-engine/2.3/config/index.md new file mode 100644 index 0000000000..d67f08bf06 --- /dev/null +++ b/transformation-engine/2.3/config/index.md @@ -0,0 +1,134 @@ +--- +title: Configure the Document Transformation Engine +--- + +The standalone Document Transformation Engine can be configured using the Web Console. You only need to change the password of the transformation service. + +1. Open your browser and navigate to `http://:/transformation-server/#/settings` or `https://` if you are using SSL. + +2. Enter your login name and a password. + + By default, the login name is set to `alfresco`, and the password is set to `alfresco`. The login name `alfresco` cannot be changed. + +3. Enter a new password, and then click **Change** to save the password. + + + +## Configure DTE with SSL + +Below is a very basic example of how to configure Secure Sockets Layer (SSL) for DTE. It forms a good starting point for customers with experience and competencies in DevOps. + +1. Edit `C:\Program Files (x86)\TransformationServer\tomcat\conf\server.xml`: + + For example: + + 1. Comment out this connector: + + ```xml + + + + + + ``` + + 2. Uncomment this Connector: + + ```xml + + ``` + +2. Check the REST configuration URL under: `https://:8443/transformation-server/#/settings`: + + This should be set to: `https://:8443`. + +3. Edit `alfresco-global.properties`: + + Change `localTransform.transform-dte.url=http::8080/transform-dte` + + to `localTransform.transform-dte.url=http::8443/transform-dte` + +For more information on configuring SSL on Tomcat, see the Tomcat documentation [SSL/TLS Configuration How-To](https://tomcat.apache.org/tomcat-9.0-doc/ssl-howto.html){:target="_blank"}. diff --git a/transformation-engine/2.3/index.md b/transformation-engine/2.3/index.md new file mode 100644 index 0000000000..459b06ff2b --- /dev/null +++ b/transformation-engine/2.3/index.md @@ -0,0 +1,19 @@ +--- +title: Alfresco Document Transformation Engine +--- + +The Document Transformation Engine is a stable, fast, and scalable solution for high-quality transformations of Microsoft Office documents (Word, Excel, and PowerPoint only) to PDF. It is an enterprise alternative to LibreOffice. It is an Alfresco Content Services module that is enabled with a license key. + +The engine features an open architecture and offers the following features: + +* **High quality**: The Document Transformation Engine uses genuine Microsoft Office software to transform Word, Excel, and PowerPoint documents to PDF. This guarantees the handling of the supported file types and pixel-perfect transformations, and it corrects previous layout issues in the Share preview feature. + + The Document Transformation Engine can also be used to convert emails to PDFs. This is a useful feature in conjunction with the Outlook Plugin. + +* **Scalable**: The Document Transformation Engine communicates with Alfresco Content Services using an HTTP REST API, which means that you can scale up by adding multiple instances of the engine and connecting them through a standard HTTP Network Load Balancer. + +* **Stable**: If Microsoft Office can open and transform your document, then so can the Document Transformation Engine. Robust error handling will take care of corrupt and encrypted documents. A Web Console shows you a detailed report if there is a problem during transformation, allowing you to correct documents. + +* **Fast**: The Document Transformation Engine is two to three times faster when transforming multi-megabyte Office documents when compared with LibreOffice on the same hardware. + +* **Extensible format support**: The Document Transformation Engine supports the transformation of MS Office formats. diff --git a/transformation-engine/2.3/install/index.md b/transformation-engine/2.3/install/index.md new file mode 100644 index 0000000000..4cba5aec24 --- /dev/null +++ b/transformation-engine/2.3/install/index.md @@ -0,0 +1,60 @@ +--- +title: Installation overview +--- + +The standalone Document Transformation Engine runs on Microsoft Windows and provides file transformations. + +## Prerequisites + +There are a number of important notes to consider when installing the Document Transformation Engine in addition to the [supported platforms]({% link transformation-engine/2.3/support/index.md %}). + +* The Document Transformation Engine requires an installation of [Alfresco Transform Service]({% link transform-service/latest/install/index.md %}). + +* The standalone Document Transformation Engine requires the software components to be installed and available on the same machine. + +* Only install the English versions of Microsoft Windows Server 2012, Microsoft Windows Server 2016 or Microsoft Windows Server 2019, and Microsoft Office because other languages cause encoding issues resulting in unpredictable behavior. + + > **Note:** Although the engine must be configured in English, this has no impact on the transformation language used for documents. + +* Microsoft Office 2016 or 2019 32-bit & 64-bit. + +* To enable the Document Transformation Engine to work with non-English documents you must install the desired Microsoft Office language pack of the language you want to work with. + +* The Document Transformation Engine does not work with Windows non-English regional settings. + +* Make sure that the Windows print spooler service is running. + +### Sizing + +There are a number of recommendations for calculating sizing. You will need: + +* Four high clocked cores per engine, with between 4 GB and 6 GB RAM. If you find that you need more power, it is better to add another engine instance with a similar specification than to upgrade the hardware. The reason for this is that Microsoft Office is not very scalable. + +* Between 10 GB and 15 GB of free space. Storage is not that important, but if you have lots of large files, you should make sure that creating temporary copies of those files will not slow the system down. + +* Gigabit Ethernet. + +* At least one CPU for each concurrent transformation that is expected to be processed by the engine. + +### Disc I/O bandwidth + +Microsoft Office transformations are I/O-heavy, and so on some solutions, I/O contention can be a performance bottleneck. When multiple Word conversions occur in parallel, performance can suffer heavily from poor random read and write speeds. + +## Installation + +The Document Transformation Engine is installed using an `msi` file where you can select to install a T-Engine at the same time. Alternatively you can install the Document Transformation Engine using the `msi` and use Docker Compose to install the T-Engine. See [Install with MSI]({% link transformation-engine/2.3/install/msi.md %}) for more details. There is also an [SDK that can be installed]({% link transformation-engine/2.3/install/sdk.md %}). + +### Set `JAVA_HOME` + +If you're using any JDK which does not set a registry key, you need to manually set the `JAVA_HOME` system variable. This mostly happens when using a `zip` package installation of the JDK. + +1. Locate your JDK installation (it's most likely in a directory such as `C:\Program Files\jdk-11.x.x`). +2. Search for **Advanced system settings**. +3. Select **View advanced system settings > Environment Variables**. +4. In the **System variables** section, click **New** (or **User variables** for a single user setting). +5. Add the following settings: + + * Variable name = `JAVA_HOME` + * Variable value = path to the JDK installation (from step 1). + +6. Click **OK** (twice) and finally click **Apply** to save the changes. diff --git a/transformation-engine/2.3/install/msi.md b/transformation-engine/2.3/install/msi.md new file mode 100644 index 0000000000..829830e5d8 --- /dev/null +++ b/transformation-engine/2.3/install/msi.md @@ -0,0 +1,128 @@ +--- +title: Installation +--- + +The standalone Alfresco Document Transformation Engine is installed by using an `.msi` file where you can either: + +* Select to install a T-Engine wrapper from the `.msi`. +* Install a hybrid version by using a Docker Compose file to install the T-Engine. + +In previous versions the installation files were contained within a `.zip` file. This file also contained `.amp` files that enabled you to install the Document Transformation client into Alfresco Content Services. In the current version this is not possible. + +* [Install with MSI](#install-with-msi) +* [Install T-Engine using Docker Compose](#install-t-engine-using-docker-compose) + +## Install with MSI + +> **Note:** When upgrading the Document Transformation Engine, the previous installation must be uninstalled first. +> +> * If your old version is earlier than 1.3.1, use the Control Panel **Uninstall a program** option to remove the old version, and then manually remove the Document Transformation Engine directory. By default, the Document Transformation Engine directory is `C:\Program Files (x86)\Transformation Engine\`. +> * If your old version is 1.3.1 or later, the new Document Transformation Engine MSI prompts you to uninstall the previous version. When the uninstall is complete, you can run the MSI package again to install the new version. There is no need to manually remove anything. + +1. Download `alfresco-document-transformation-engine-server-2.3.1.msi` from [Hyland Community](https://community.hyland.com/){:target="_blank"}. + +2. Log into the Microsoft Windows Server as an administrator. + +3. Double click the `.msi` installer package, and then click **Next**. + +4. Review the supported software requirements, and then click **Next**. + +5. (Optional) Select DTE T-Engine. + + > **Important:** If you do not intend to use the DTE T-Engine Docker image, you must select this option for DTE to work correctly. + + > **Note:** + > + >* For Alfresco Content Services 7.x, you can only use the T-Engine approach for now. Installing the Alfresco Module Packages (AMP) files is not possible. + >* You can use Content Services 6.x with the T-Engine approach and with the old approach (i.e. installing the AMP files in Content Services). + +6. Click **Next** and the license information screen displays. + +7. Click **Next** and select an installation folder or accept the default folder, and then click **Next**. + +8. Click **Next** to start the installation. + + You will see a progress bar and a command line window during the installation. The installer will show a confirmation when the installation is finished. + +9. Click **Close** to finish the installation. + +10. Verify that the installation has completed successfully. + + 1. Check the Windows Services in the management console. + + 2. Locate the new service called **Document Transformation Engine**, and check that it is **Started**. + + > **Note:** Each time a file is transformed in Alfresco Content Services, the `.NET` program starts and Microsoft Office tries to check for a Certificate Revocation List (CRL). Depending on the access that the Document Transformation Engine has to the Internet when transforming a file, this check can delay the operation for up to two minutes, and will therefore, delay transformation of the file. To prevent this, use the Windows server firewall to block internet access for all office binaries. + +11. Add the following property to `alfresco-global.properties`: + + ```bash + localTransform.transform-dte.url=http::8080/transform-dte + ``` + + + +## Install T-Engine using Docker Compose + +To deploy the Document Transformation Engine T-Engine with the Transform Service, you'll need to update your Docker Compose file to include the Document Transformation Engine T-Engine. + +> **Important:** You still need to install the Document Transformation Engine using the `.msi`. + +> **Note:** While Docker Compose is often used for production deployments, the Docker Compose file provided is recommended for development and test environments only. Customers are expected to adapt this file to their own requirements, if they intend to use Docker Compose to deploy a production environment. + +1. Add the Document Transformation Engine T-Engine container to your `docker-compose.yaml` file: + + ```yaml + transform-dte-engine: + image: quay.io/alfresco/transform-dte-engine:1.2.0 + mem_limit: 2g + environment: + JAVA_OPTS: " -Xms256m -Xmx512m -DdteServerUrl=http://:8080/transformation-backend" + ACTIVEMQ_URL: "nio://activemq:61616" + ACTIVEMQ_USER: "admin" + ACTIVEMQ_PASSWORD: "admin" + FILE_STORE_URL: "http://shared-file-store:8099/alfresco/api/-default-/private/sfs/versions/1/file" + ports: + - 8091:8090 + links: + - activemq + ``` + +2. Add the following `JAVA_OPTS` property to the `alfresco` container: + + ```yaml + -DlocalTransform.transform-dte.url=http://transform-dte-engine:8090/ + ``` + +See the Content Services documentation - [T-Engine configuration](https://github.com/Alfresco/acs-packaging/blob/master/docs/creating-a-t-engine.md#t-engine-configuration){:target="_blank"} for more details. For further development, see [Content Transformers and Renditions Extension Points]({% link content-services/latest/develop/repo-ext-points/content-transformers-renditions.md %}). diff --git a/transformation-engine/2.3/install/sdk.md b/transformation-engine/2.3/install/sdk.md new file mode 100644 index 0000000000..5029cca3ee --- /dev/null +++ b/transformation-engine/2.3/install/sdk.md @@ -0,0 +1,37 @@ +--- +title: Install the SDK +--- + +Use this information to install the Document Transformation Engine SDK. + +Download the Document Transformation Engine SDK from [Hyland Community](https://community.hyland.com/){:target="_blank"}.This is an executable jar file with all dependencies that works as a command line client. The executable class is `com.westernacher.transformationserver.demo.DemoClient`. + +To invoke the Document Transformation Engine SDK jar file, use the following syntax: + +```java +java -jar alfresco-document-transformation-engine-sdk-2.3.0-plain.jar -in input.doc -out output.pdf -url http://trafo-url:8080/transformation-server +``` + +An API usage example is available at `com.westernacher.transformationserver.demo.ApiUsageExample`. You can copy, modify, and use this code in your own product. + +A list of the most important file formats is available at `mimetypes.properties`. These file formats have their mime type auto-detected by the file extension. Note that this is not the full list of supported formats. + +The most important source and target formats are: + +Source formats: + +* Most image formats +* Nearly all Microsoft Word, Excel, and PowerPoint formats +* `.eml` and `.msg` Emails + +Target formats: + +* PDF and PDF/A +* SWF +* Most image formats + +Functions that do not work with the SDK: + +* OCR +* Resizing an image, which is necessary to produce thumbnails +* PDF/A as a target format diff --git a/transformation-engine/2.3/support/index.md b/transformation-engine/2.3/support/index.md new file mode 100644 index 0000000000..367c53e384 --- /dev/null +++ b/transformation-engine/2.3/support/index.md @@ -0,0 +1,22 @@ +--- +title: Supported platforms +--- + +The following are the supported platforms for Document Transformation Engine 2.3.1: + +| Version | Notes | +| ------- | ----- | +| Content Services 7.1.x | *Optional.* Use with DTE T-Engine v1.2.0 | +| Content Services 7.0.x | *Optional.* Use with DTE T-Engine v1.2.0 | +| | | +| **Java** | | +| Oracle JDK 11 | | +| | | +| **Microsoft Windows Server** | | +| Microsoft Windows Server 2019 | | +| Microsoft Windows Server 2016 | | +| Microsoft Windows Server 2012 | | +| | | +| **Microsoft Office** | | +| Microsoft Office 2019 32/64 bit | | +| Microsoft Office 2016 32/64 bit | | diff --git a/transformation-engine/2.3/using/index.md b/transformation-engine/2.3/using/index.md new file mode 100644 index 0000000000..52f928c92f --- /dev/null +++ b/transformation-engine/2.3/using/index.md @@ -0,0 +1,33 @@ +--- +title: Using the Document Transformation Engine Web Console +--- + +The Document Transformation Engine is used when you upload files to Alfresco Content Services, and you can see results in the Alfresco Share preview. + +Administrators can view information about the engine and transformation errors using the Web Console which shows: + +* The status of the engine +* A historical view of all the transformations completed +* The number of successful and failed transformations + +**Note:** Only Administrators can access and use the Document Transformation Engine Web Console. + +1. To view the Document Transformation Engine Web Console, open a browser and navigate to `http://:/transformation-server/`, or `https://` if you are using SSL. + + The **Server Status** view is the default view when you open the Web Console. This displays an overview of the health and the memory use of the Document Transformation Engine. + +2. Click **History** view. + + Alternatively, you can go directly to the **History** view by navigating to `http://transformation-server/#/history`. + + The **History** view shows the details of the document transformations. It provides a number of search functions that allow administrators to find transformation problems for specific documents. + +3. You can query the transformation history using the following parameters: + + * Date-time From and To + * File name + * Status + * User name + * Document type From and To + +4. To investigate errors, set the **Outcome** field to **Error**. Hover over the warning sign to view an indication of the problem with the file.