Skip to content

Commit

Permalink
Relax ES checks in /api/v1/probes/startup REST Endpoint (#29947)
Browse files Browse the repository at this point in the history
### Proposed Changes
* Relax ES checks in `/api/v1/probes/startup` REST Endpoint in order to
prevent 503 errors when there's simply an empty or inactive ES Index.
* Update the Postman Tests to reflect the new code changes.

---------

Co-authored-by: Will Ezell <[email protected]>
  • Loading branch information
jcastro-dotcms and wezell authored Sep 11, 2024
1 parent ef1d65d commit 7f140fe
Show file tree
Hide file tree
Showing 5 changed files with 279 additions and 413 deletions.
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
package com.dotcms.rest.api.v1.system.monitor;

import static com.dotcms.content.elasticsearch.business.ESIndexAPI.INDEX_OPERATIONS_TIMEOUT_IN_MS;

import com.dotcms.content.elasticsearch.business.IndiciesInfo;
import com.dotcms.content.elasticsearch.util.RestHighLevelClientProvider;
import com.dotcms.enterprise.cluster.ClusterFactory;
import com.dotcms.exception.ExceptionUtil;
import com.dotcms.util.HttpRequestDataUtil;
import com.dotcms.util.network.IPUtils;
import com.dotmarketing.beans.Host;
import com.dotmarketing.business.APILocator;
import com.dotmarketing.common.db.DotConnect;
import com.dotmarketing.exception.DotRuntimeException;
import com.dotmarketing.util.Config;
import com.dotmarketing.util.ConfigUtils;
import com.dotmarketing.util.Logger;
Expand All @@ -21,104 +17,126 @@
import io.vavr.Tuple;
import io.vavr.Tuple2;
import io.vavr.control.Try;
import org.elasticsearch.client.RequestOptions;

import javax.servlet.http.HttpServletRequest;
import java.io.File;
import java.io.OutputStream;
import java.nio.file.Files;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicReference;
import javax.servlet.http.HttpServletRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.builder.SearchSourceBuilder;


/**
* This class provides several utility methods aimed to check the status of the different subsystems
* of dotCMS, namely:
* <ul>
* <li>Database server connectivity.</li>
* <li>Elasticsearch server connectivity.</li>
* <li>Caching framework or server connectivity.</li>
* <li>File System access.</li>
* <li>Assets folder write/delete operations.</li>
* </ul>
*
* @author Brent Griffin
* @since Jul 18th, 2018
*/
class MonitorHelper {

final boolean accessGranted ;
final boolean useExtendedFormat;

private static final String[] DEFAULT_IP_ACL_VALUE = new String[]{"127.0.0.1/32", "10.0.0.0/8", "172.16.0.0/12",
"192.168.0.0/16"};


private static final String IPV6_LOCALHOST = "0:0:0:0:0:0:0:1";
private static final String SYSTEM_STATUS_API_IP_ACL = "SYSTEM_STATUS_API_IP_ACL";


private static final long SYSTEM_STATUS_CACHE_RESPONSE_SECONDS = Config.getLongProperty(
"SYSTEM_STATUS_CACHE_RESPONSE_SECONDS", 10);

private static final String[] ACLS_IPS = Config.getStringArrayProperty(SYSTEM_STATUS_API_IP_ACL,
DEFAULT_IP_ACL_VALUE);


static final AtomicReference<Tuple2<Long, MonitorStats>> cachedStats = new AtomicReference<>();
boolean accessGranted = false;
boolean useExtendedFormat = false;

MonitorHelper(final HttpServletRequest request, final boolean heavyCheck) {
this.useExtendedFormat = heavyCheck;
this.accessGranted = isAccessGranted(request);
}

MonitorHelper(final HttpServletRequest request) {
/**
* Determines if the IP address of the request is allowed to access this monitor service. We use
* an ACL list to determine if the user/service accessing the monitor has permission to do so.
* ACL IPs can be defined via the {@code SYSTEM_STATUS_API_IP_ACL} property.
*
* @param request The current instance of the {@link HttpServletRequest}.
*
* @return If the IP address of the request is allowed to access this monitor service, returns
* {@code true}.
*/
boolean isAccessGranted(final HttpServletRequest request){
try {
this.useExtendedFormat = request.getParameter("extended") != null;

// set this.accessGranted
if(IPV6_LOCALHOST.equals(request.getRemoteAddr()) || ACLS_IPS == null || ACLS_IPS.length == 0){
return true;
}

final String clientIP = HttpRequestDataUtil.getIpAddress(request).toString().split(StringPool.SLASH)[1];
if (ACLS_IPS == null || ACLS_IPS.length == 0) {
this.accessGranted = true;
} else {
for (String aclIP : ACLS_IPS) {
if (IPUtils.isIpInCIDR(clientIP, aclIP)) {
this.accessGranted = true;
break;
}

for (final String aclIP : ACLS_IPS) {
if (IPUtils.isIpInCIDR(clientIP, aclIP)) {
return true;
}
}


} catch (Exception e) {
Logger.warnAndDebug(this.getClass(), e.getMessage(), e);
throw new DotRuntimeException(e);
} catch (final Exception e) {
Logger.warnEveryAndDebug(this.getClass(), e, 60000);
}
return false;
}

boolean startedUp() {
/**
* Determines if dotCMS has started up by checking if the {@code dotcms.started.up} system
* property has been set.
*
* @return If dotCMS has started up, returns {@code true}.
*/
boolean isStartedUp() {
return System.getProperty(WebKeys.DOTCMS_STARTED_UP)!=null;
}



/**
* Retrieves the current status of the different subsystems of dotCMS. This method caches the
* response for a period of time defined by the {@code SYSTEM_STATUS_CACHE_RESPONSE_SECONDS}
* property.
*
* @return An instance of {@link MonitorStats} containing the status of the different
* subsystems.
*/
MonitorStats getMonitorStats() {
if (cachedStats.get() != null && cachedStats.get()._1 > System.currentTimeMillis()) {
return cachedStats.get()._2;
}
return getMonitorStatsNoCache();
}


/**
* Retrieves the current status of the different subsystems of dotCMS. If cached monitor stats
* are available, return them instead.
*
* @return An instance of {@link MonitorStats} containing the status of the different
* subsystems.
*/
synchronized MonitorStats getMonitorStatsNoCache() {
// double check
if (cachedStats.get() != null && cachedStats.get()._1 > System.currentTimeMillis()) {
return cachedStats.get()._2;
}



final MonitorStats monitorStats = new MonitorStats();

final IndiciesInfo indiciesInfo = Try.of(()->APILocator.getIndiciesAPI().loadIndicies()).getOrElseThrow(DotRuntimeException::new);

monitorStats.subSystemStats.isDBHealthy = isDBHealthy();
monitorStats.subSystemStats.isLiveIndexHealthy = isIndexHealthy(indiciesInfo.getLive());
monitorStats.subSystemStats.isWorkingIndexHealthy = isIndexHealthy(indiciesInfo.getWorking());
monitorStats.subSystemStats.isCacheHealthy = isCacheHealthy();
monitorStats.subSystemStats.isLocalFileSystemHealthy = isLocalFileSystemHealthy();
monitorStats.subSystemStats.isAssetFileSystemHealthy = isAssetFileSystemHealthy();


monitorStats.serverId = getServerID();
monitorStats.clusterId = getClusterID();


final MonitorStats monitorStats = new MonitorStats
.Builder()
.cacheHealthy(isCacheHealthy())
.assetFSHealthy(isAssetFileSystemHealthy())
.localFSHealthy(isLocalFileSystemHealthy())
.dBHealthy(isDBHealthy())
.esHealthy(canConnectToES())
.build();
// cache a healthy response
if (monitorStats.isDotCMSHealthy()) {
cachedStats.set( Tuple.of(System.currentTimeMillis() + (SYSTEM_STATUS_CACHE_RESPONSE_SECONDS * 1000),
Expand All @@ -127,84 +145,72 @@ synchronized MonitorStats getMonitorStatsNoCache() {
return monitorStats;
}


/**
* Determines if the database server is healthy by executing a simple query.
*
* @return If the database server is healthy, returns {@code true}.
*/
boolean isDBHealthy() {


return Try.of(()->
new DotConnect().setSQL("SELECT count(*) as count FROM (SELECT 1 FROM dot_cluster LIMIT 1) AS t")
new DotConnect().setSQL("SELECT 1 as count")
.loadInt("count"))
.onFailure(e->Logger.warnAndDebug(MonitorHelper.class, "unable to connect to db:" + e.getMessage(),e))
.getOrElse(0) > 0;


}


boolean isIndexHealthy(final String index) {

SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(QueryBuilders.matchAllQuery());
searchSourceBuilder.size(0);
searchSourceBuilder.timeout(TimeValue
.timeValueMillis(INDEX_OPERATIONS_TIMEOUT_IN_MS));
searchSourceBuilder.fetchSource(new String[]{"inode"}, null);
SearchRequest searchRequest = new SearchRequest();
searchRequest.source(searchSourceBuilder);
searchRequest.indices(index);

long totalHits = Try.of(()->
RestHighLevelClientProvider
.getInstance()
.getClient()
.search(searchRequest,RequestOptions.DEFAULT)
.getHits()
.getTotalHits()
.value)
.onFailure(e->Logger.warnAndDebug(MonitorHelper.class, "unable to connect to index:" + e.getMessage(),e))
.getOrElse(0L);

return totalHits > 0;

/**
* Determines if dotCMS can connect to Elasticsearch by pinging the server.
*
* @return If dotCMS can connect to Elasticsearch, returns {@code true}.
*/
boolean canConnectToES() {
try {
return RestHighLevelClientProvider.getInstance().getClient().ping(RequestOptions.DEFAULT);
} catch (final Exception e) {
Logger.warnAndDebug(this.getClass(),
"Unable to connect to ES: " + ExceptionUtil.getErrorMessage(e), e);
return false;
}
}


/**
* Determines if the cache is healthy by checking if the SYSTEM_HOST identifier is available.
*
* @return If the cache is healthy, returns {@code true}.
*/
boolean isCacheHealthy() {
try {
APILocator.getIdentifierAPI().find(Host.SYSTEM_HOST);
return UtilMethods.isSet(APILocator.getIdentifierAPI().find(Host.SYSTEM_HOST).getId());
}
catch (Exception e){
} catch (final Exception e){
Logger.warnAndDebug(this.getClass(), "unable to find SYSTEM_HOST: " + e.getMessage(), e);
return false;
}

}

/**
* Determines if the local file system is healthy by writing a file to the Dynamic Content Path
* directory.
*
* @return If the local file system is healthy, returns {@code true}.
*/
boolean isLocalFileSystemHealthy() {

return new FileSystemTest(ConfigUtils.getDynamicContentPath()).call();

}

/**
* Determines if the asset file system is healthy by writing a file to the Asset Path
* directory.
*
* @return If the asset file system is healthy, returns {@code true}.
*/
boolean isAssetFileSystemHealthy() {

return new FileSystemTest(ConfigUtils.getAssetPath()).call();

}


private String getServerID() {
return APILocator.getServerAPI().readServerId();

}

private String getClusterID() {
return ClusterFactory.getClusterId();


}

/**
* This class is used to test the health of the file system by writing a file to a given path.
*/
static final class FileSystemTest implements Callable<Boolean> {

final String initialPath;
Expand All @@ -229,10 +235,11 @@ public Boolean call() {
return file.delete();
}
} catch (Exception e) {
Logger.warnAndDebug(this.getClass(), e.getMessage(), e);
Logger.warnAndDebug(this.getClass(), "Unable to write a file to: " + initialPath + " : " +e.getMessage(), e);
return false;
}
return false;
}
}

}
Loading

0 comments on commit 7f140fe

Please sign in to comment.