diff --git a/dotCMS/hotfix_tracking.md b/dotCMS/hotfix_tracking.md index 391e81ddddd0..7a5672ee1838 100644 --- a/dotCMS/hotfix_tracking.md +++ b/dotCMS/hotfix_tracking.md @@ -135,4 +135,5 @@ This maintenance release includes the following code fixes: 128. https://github.com/dotCMS/core/issues/26477 : Search filter can't find/filter images #26477 129. https://github.com/dotCMS/core/issues/27297 : Edit Page: Edit Contentlet Dialog Language Support #27297 130. https://github.com/dotCMS/core/issues/26413 : Template Builder: Container Layout Editing Issue #26413 -131. https://github.com/dotCMS/core/issues/27816 : Content Displacement Bug when Editing Template #27816 \ No newline at end of file +131. https://github.com/dotCMS/core/issues/27816 : Content Displacement Bug when Editing Template #27816 +132. https://github.com/dotCMS/core/issues/28163 : 'alive' and 'startup' healthcheck APIs return 503 on seemingly healthy app #28163 \ No newline at end of file diff --git a/dotCMS/src/main/java/com/dotcms/rest/api/v1/system/monitor/MonitorHelper.java b/dotCMS/src/main/java/com/dotcms/rest/api/v1/system/monitor/MonitorHelper.java index e1961e4d94b7..8ea6239fa5e4 100644 --- a/dotCMS/src/main/java/com/dotcms/rest/api/v1/system/monitor/MonitorHelper.java +++ b/dotCMS/src/main/java/com/dotcms/rest/api/v1/system/monitor/MonitorHelper.java @@ -1,7 +1,7 @@ package com.dotcms.rest.api.v1.system.monitor; -import com.dotcms.concurrent.DotConcurrentFactory; -import com.dotcms.concurrent.DotSubmitter; +import static com.dotcms.content.elasticsearch.business.ESIndexAPI.INDEX_OPERATIONS_TIMEOUT_IN_MS; + import com.dotcms.content.elasticsearch.business.IndiciesInfo; import com.dotcms.content.elasticsearch.util.RestHighLevelClientProvider; import com.dotcms.enterprise.cluster.ClusterFactory; @@ -10,81 +10,58 @@ import com.dotmarketing.beans.Host; import com.dotmarketing.business.APILocator; import com.dotmarketing.common.db.DotConnect; -import com.dotmarketing.db.DbConnectionFactory; import com.dotmarketing.exception.DotRuntimeException; -import com.dotmarketing.portlets.contentlet.model.Contentlet; import com.dotmarketing.util.Config; import com.dotmarketing.util.ConfigUtils; import com.dotmarketing.util.Logger; import com.dotmarketing.util.UUIDUtil; import com.dotmarketing.util.UtilMethods; +import com.dotmarketing.util.WebKeys; import com.liferay.util.StringPool; -import com.rainerhahnekamp.sneakythrow.Sneaky; -import io.vavr.Lazy; import io.vavr.Tuple; import io.vavr.Tuple2; -import net.jodah.failsafe.CircuitBreaker; -import net.jodah.failsafe.Failsafe; +import io.vavr.control.Try; +import java.io.File; +import java.io.OutputStream; +import java.nio.file.Files; +import java.util.concurrent.Callable; +import java.util.concurrent.atomic.AtomicReference; +import javax.servlet.http.HttpServletRequest; import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.builder.SearchSourceBuilder; -import javax.servlet.http.HttpServletRequest; -import javax.ws.rs.InternalServerErrorException; -import java.io.File; -import java.io.OutputStream; -import java.net.UnknownHostException; -import java.nio.file.Files; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; -import static com.dotcms.content.elasticsearch.business.ESIndexAPI.INDEX_OPERATIONS_TIMEOUT_IN_MS; +class MonitorHelper { -class MonitorHelper { - private static final long DEFAULT_LOCAL_FS_TIMEOUT = 1000; - private static final long DEFAULT_CACHE_TIMEOUT = 1000; - private static final long DEFAULT_ASSET_FS_TIMEOUT = 1000; - private static final long DEFAULT_INDEX_TIMEOUT = 1000; - private static final long DEFAULT_DB_TIMEOUT = 1000; - private static final String[] DEFAULT_IP_ACL_VALUE = new String[] {"127.0.0.1/32","10.0.0.0/8","172.16.0.0/12","192.168.0.0/16"}; + private static final String[] DEFAULT_IP_ACL_VALUE = new String[]{"127.0.0.1/32", "10.0.0.0/8", "172.16.0.0/12", + "192.168.0.0/16"}; - private static final String SYSTEM_STATUS_API_IP_ACL = "SYSTEM_STATUS_API_IP_ACL"; - private static final String SYSTEM_STATUS_API_LOCAL_FS_TIMEOUT = "SYSTEM_STATUS_API_LOCAL_FS_TIMEOUT"; - private static final String SYSTEM_STATUS_API_CACHE_TIMEOUT = "SYSTEM_STATUS_API_CACHE_TIMEOUT"; - private static final String SYSTEM_STATUS_API_ASSET_FS_TIMEOUT = "SYSTEM_STATUS_API_ASSET_FS_TIMEOUT"; - private static final String SYSTEM_STATUS_API_INDEX_TIMEOUT = "SYSTEM_STATUS_API_INDEX_TIMEOUT"; - private static final String SYSTEM_STATUS_API_DB_TIMEOUT = "SYSTEM_STATUS_API_DB_TIMEOUT"; + private static final String SYSTEM_STATUS_API_IP_ACL = "SYSTEM_STATUS_API_IP_ACL"; - private static final int SYSTEM_STATUS_CACHE_RESPONSE_SECONDS = Config.getIntProperty("SYSTEM_STATUS_CACHE_RESPONSE_SECONDS",10); - - private static final String[] ACLS_IPS = Config.getStringArrayProperty(SYSTEM_STATUS_API_IP_ACL, DEFAULT_IP_ACL_VALUE); + private static final long SYSTEM_STATUS_CACHE_RESPONSE_SECONDS = Config.getLongProperty( + "SYSTEM_STATUS_CACHE_RESPONSE_SECONDS", 10); - private static final long localFSTimeout = Config.getLongProperty(SYSTEM_STATUS_API_LOCAL_FS_TIMEOUT, DEFAULT_LOCAL_FS_TIMEOUT); - private static final long cacheTimeout = Config.getLongProperty(SYSTEM_STATUS_API_CACHE_TIMEOUT, DEFAULT_CACHE_TIMEOUT); - private static final long assetTimeout = Config.getLongProperty(SYSTEM_STATUS_API_ASSET_FS_TIMEOUT, DEFAULT_ASSET_FS_TIMEOUT); - private static final long indexTimeout = Config.getLongProperty(SYSTEM_STATUS_API_INDEX_TIMEOUT, DEFAULT_INDEX_TIMEOUT); - private static final long dbTimeout = Config.getLongProperty(SYSTEM_STATUS_API_DB_TIMEOUT, DEFAULT_DB_TIMEOUT); + private static final String[] ACLS_IPS = Config.getStringArrayProperty(SYSTEM_STATUS_API_IP_ACL, + DEFAULT_IP_ACL_VALUE); - + + static final AtomicReference> cachedStats = new AtomicReference<>(); boolean accessGranted = false; boolean useExtendedFormat = false; - MonitorHelper(final HttpServletRequest request) throws UnknownHostException { + + MonitorHelper(final HttpServletRequest request) { try { this.useExtendedFormat = request.getParameter("extended") != null; // set this.accessGranted - final String clientIP = HttpRequestDataUtil.getIpAddress(request).toString().split(StringPool.SLASH)[1]; if (ACLS_IPS == null || ACLS_IPS.length == 0) { this.accessGranted = true; @@ -96,234 +73,166 @@ class MonitorHelper { } } } - }catch(Exception e){ + + + } catch (Exception e) { Logger.warnAndDebug(this.getClass(), e.getMessage(), e); throw new DotRuntimeException(e); } } - - static Tuple2 cachedStats=null; - - MonitorStats getMonitorStats() throws Throwable{ - if(cachedStats!=null && cachedStats._1 > System.currentTimeMillis()) { - return cachedStats._2; + boolean startedUp() { + return System.getProperty(WebKeys.DOTCMS_STARTED_UP)!=null; + } + + + + MonitorStats getMonitorStats() { + if (cachedStats.get() != null && cachedStats.get()._1 > System.currentTimeMillis()) { + return cachedStats.get()._2; } return getMonitorStatsNoCache(); } - - MonitorStats getMonitorStatsNoCache() throws Throwable{ + + + synchronized MonitorStats getMonitorStatsNoCache() { + // double check + if (cachedStats.get() != null && cachedStats.get()._1 > System.currentTimeMillis()) { + return cachedStats.get()._2; + } + + final MonitorStats monitorStats = new MonitorStats(); - final IndiciesInfo indiciesInfo = APILocator.getIndiciesAPI().loadIndicies(); + final IndiciesInfo indiciesInfo = Try.of(()->APILocator.getIndiciesAPI().loadIndicies()).getOrElseThrow(DotRuntimeException::new); + + monitorStats.subSystemStats.isDBHealthy = isDBHealthy(); + monitorStats.subSystemStats.isLiveIndexHealthy = isIndexHealthy(indiciesInfo.getLive()); + monitorStats.subSystemStats.isWorkingIndexHealthy = isIndexHealthy(indiciesInfo.getWorking()); + monitorStats.subSystemStats.isCacheHealthy = isCacheHealthy(); + monitorStats.subSystemStats.isLocalFileSystemHealthy = isLocalFileSystemHealthy(); + monitorStats.subSystemStats.isAssetFileSystemHealthy = isAssetFileSystemHealthy(); + + + monitorStats.serverId = getServerID(); + monitorStats.clusterId = getClusterID(); - monitorStats.subSystemStats.isDBHealthy = isDBHealthy(dbTimeout); - monitorStats.subSystemStats.isLiveIndexHealthy = isIndexHealthy(indiciesInfo.getLive(), indexTimeout); - monitorStats.subSystemStats.isWorkingIndexHealthy = isIndexHealthy(indiciesInfo.getWorking(), indexTimeout); - monitorStats.subSystemStats.isCacheHealthy = isCacheHealthy(cacheTimeout); - monitorStats.subSystemStats.isLocalFileSystemHealthy = isLocalFileSystemHealthy(localFSTimeout); - monitorStats.subSystemStats.isAssetFileSystemHealthy = isAssetFileSystemHealthy(assetTimeout); - if (useExtendedFormat) { - monitorStats.serverId = getServerID(assetTimeout); - monitorStats.clusterId = getClusterID(dbTimeout); - } - // cache a healthy response - if(monitorStats.isDotCMSHealthy()) { - cachedStats = Tuple.of(System.currentTimeMillis()+(SYSTEM_STATUS_CACHE_RESPONSE_SECONDS*1000) , monitorStats); + if (monitorStats.isDotCMSHealthy()) { + cachedStats.set( Tuple.of(System.currentTimeMillis() + (SYSTEM_STATUS_CACHE_RESPONSE_SECONDS * 1000), + monitorStats)); } return monitorStats; } - boolean isDBHealthy(final long timeOut) throws Throwable { - - return Failsafe - .with(breaker()) - .withFallback(Boolean.FALSE) - .get(this.failFastBooleanPolicy(timeOut, () -> { - try{ - final DotConnect dc = new DotConnect(); - if(DbConnectionFactory.isPostgres()) { - return dc.setSQL("SELECT count(*) as count FROM (SELECT 1 FROM dot_cluster LIMIT 1) AS t").loadInt("count")>0; - } - else { - return dc.setSQL("SELECT count(*) as count from dot_cluster").loadInt("count")>0; - } - } - catch(Exception e) { - Logger.warn(getClass(), "db connection failing:" + e.getMessage() ); - return false; - } - finally{ - DbConnectionFactory.closeSilently(); - } - })); - } + boolean isDBHealthy() { - boolean isIndexHealthy(final String index, final long timeOut) throws Throwable { - - return Failsafe - .with(breaker()) - .withFallback(Boolean.FALSE) - .get(this.failFastBooleanPolicy(timeOut, () -> { - try{ - - SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); - searchSourceBuilder.query(QueryBuilders.matchAllQuery()); - searchSourceBuilder.size(0); - searchSourceBuilder.timeout(TimeValue - .timeValueMillis(INDEX_OPERATIONS_TIMEOUT_IN_MS)); - searchSourceBuilder.fetchSource(new String[] {"inode"}, null); - SearchRequest searchRequest = new SearchRequest(); - searchRequest.source(searchSourceBuilder); - searchRequest.indices(index); - - final SearchResponse response = Sneaky.sneak(()-> - RestHighLevelClientProvider.getInstance().getClient().search(searchRequest, - RequestOptions.DEFAULT)); - return response.getHits().getTotalHits().value>0; - }catch(Exception e) { - Logger.warn(getClass(), "ES connection failing: " + e.getMessage() ); - return false; - }finally{ - DbConnectionFactory.closeSilently(); - } - })); - } - boolean isCacheHealthy(final long timeOut) throws Throwable { - - return Failsafe - .with(breaker()) - .withFallback(Boolean.FALSE) - .get(this.failFastBooleanPolicy(timeOut, () -> { - try{ - // load system host contentlet - Contentlet con = APILocator.getContentletAPI().findContentletByIdentifier(Host.SYSTEM_HOST,false,APILocator.getLanguageAPI().getDefaultLanguage().getId(),APILocator.systemUser(),false); - return UtilMethods.isSet(con::getIdentifier); - }catch(Exception e) { - Logger.warn(getClass(), "Cache is failing: " + e.getMessage() ); - return false; - }finally{ - DbConnectionFactory.closeSilently(); - } - })); + return Try.of(()-> + new DotConnect().setSQL("SELECT count(*) as count FROM (SELECT 1 FROM dot_cluster LIMIT 1) AS t") + .loadInt("count")) + .onFailure(e->Logger.warnAndDebug(MonitorHelper.class, "unable to connect to db:" + e.getMessage(),e)) + .getOrElse(0) > 0; + } - final class FileSystemTest implements Callable { - final String initialPath; + boolean isIndexHealthy(final String index) { - public FileSystemTest(String initialPath) { - this.initialPath = initialPath.endsWith(File.separator) ? initialPath : initialPath + File.separator; - } + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + searchSourceBuilder.query(QueryBuilders.matchAllQuery()); + searchSourceBuilder.size(0); + searchSourceBuilder.timeout(TimeValue + .timeValueMillis(INDEX_OPERATIONS_TIMEOUT_IN_MS)); + searchSourceBuilder.fetchSource(new String[]{"inode"}, null); + SearchRequest searchRequest = new SearchRequest(); + searchRequest.source(searchSourceBuilder); + searchRequest.indices(index); - @Override - public Boolean call() throws Exception { - final String uuid=UUIDUtil.uuid(); - final String realPath = initialPath - + "monitor" - + File.separator - + uuid; - final File file = new File(realPath); - if(file.mkdirs() && file.delete() && file.createNewFile()) { - try(OutputStream os = Files.newOutputStream(file.toPath())){ - os.write(uuid.getBytes()); - } - return file.delete(); - } - return false; - } + long totalHits = Try.of(()-> + RestHighLevelClientProvider + .getInstance() + .getClient() + .search(searchRequest,RequestOptions.DEFAULT) + .getHits() + .getTotalHits() + .value) + .onFailure(e->Logger.warnAndDebug(MonitorHelper.class, "unable to connect to index:" + e.getMessage(),e)) + .getOrElse(0L); + return totalHits > 0; } - - boolean isLocalFileSystemHealthy(final long timeOut) throws Throwable { - - return Failsafe - .with(breaker()) - .withFallback(Boolean.FALSE) - .get(this.failFastBooleanPolicy(timeOut, new FileSystemTest(ConfigUtils.getDynamicContentPath()) - )); - } - boolean isAssetFileSystemHealthy(final long timeOut) throws Throwable { + boolean isCacheHealthy() { + try { + APILocator.getIdentifierAPI().find(Host.SYSTEM_HOST); + return UtilMethods.isSet(APILocator.getIdentifierAPI().find(Host.SYSTEM_HOST).getId()); + } + catch (Exception e){ + Logger.warnAndDebug(this.getClass(), "unable to find SYSTEM_HOST: " + e.getMessage(), e); + return false; + } - return Failsafe - .with(breaker()) - .withFallback(Boolean.FALSE) - .get(this.failFastBooleanPolicy(timeOut, new FileSystemTest(ConfigUtils.getAbsoluteAssetsRootPath()) - )); } - - private Callable failFastBooleanPolicy(long thresholdMilliseconds, final Callable callable) throws Throwable{ - return ()-> { - try { - final DotSubmitter executorService = DotConcurrentFactory.getInstance().getSubmitter(DotConcurrentFactory.DOT_SYSTEM_THREAD_POOL); - final Future task = executorService.submit(callable); - return task.get(thresholdMilliseconds, TimeUnit.MILLISECONDS); - } catch (ExecutionException e) { - throw new InternalServerErrorException("Internal exception ", e.getCause()); - } catch (TimeoutException e) { - throw new InternalServerErrorException("Execution aborted, exceeded allowed " + thresholdMilliseconds + " threshold", e.getCause()); - } - }; + + boolean isLocalFileSystemHealthy() { + + return new FileSystemTest(ConfigUtils.getDynamicContentPath()).call(); + } - private Callable failFastStringPolicy(long thresholdMilliseconds, final Callable callable) throws Throwable{ - return ()-> { - try { - final DotSubmitter executorService = DotConcurrentFactory.getInstance().getSubmitter(DotConcurrentFactory.DOT_SYSTEM_THREAD_POOL); - final Future task = executorService.submit(callable); - return task.get(thresholdMilliseconds, TimeUnit.MILLISECONDS); - } catch (ExecutionException e) { - throw new InternalServerErrorException("Internal exception ", e.getCause()); - } catch (TimeoutException e) { - throw new InternalServerErrorException("Execution aborted, exceeded allowed " + thresholdMilliseconds + " threshold", e.getCause()); - } - }; + boolean isAssetFileSystemHealthy() { + + return new FileSystemTest(ConfigUtils.getAssetPath()).call(); + } - private CircuitBreaker breaker(){ - return new CircuitBreaker(); + + private String getServerID() { + return APILocator.getServerAPI().readServerId(); + } - private String getServerID(final long timeOut) throws Throwable{ - return Failsafe - .with(breaker()) - .withFallback("UNKNOWN") - .get(failFastStringPolicy(timeOut, () -> { - String serverID = "UNKNOWN"; - try { - serverID=APILocator.getServerAPI().readServerId(); - } - catch (Throwable t) { - Logger.error(this, "Error - unable to get the serverID", t); - } - return serverID; - })); + private String getClusterID() { + return ClusterFactory.getClusterId(); + } - private String getClusterID(final long timeOut) throws Throwable{ - return Failsafe - .with(breaker()) - .withFallback("UNKNOWN") - .get(failFastStringPolicy(timeOut, () -> { - String clusterID = "UNKNOWN"; - try { - clusterID=ClusterFactory.getClusterId(); - } - catch (Throwable t) { - Logger.error(this, "Error - unable to get the clusterID", t); - } - return clusterID; - })); + static final class FileSystemTest implements Callable { + + final String initialPath; + + public FileSystemTest(String initialPath) { + this.initialPath = initialPath.endsWith(File.separator) ? initialPath : initialPath + File.separator; + } + @Override + public Boolean call() { + final String uuid = UUIDUtil.uuid(); + final String realPath = initialPath + + "monitor" + + File.separator + + uuid; + final File file = new File(realPath); + try { + if (file.mkdirs() && file.delete() && file.createNewFile()) { + try (OutputStream os = Files.newOutputStream(file.toPath())) { + os.write(uuid.getBytes()); + } + return file.delete(); + } + } catch (Exception e) { + Logger.warnAndDebug(this.getClass(), e.getMessage(), e); + return false; + } + return false; + } } -} +} \ No newline at end of file diff --git a/dotCMS/src/main/java/com/dotcms/rest/api/v1/system/monitor/MonitorResource.java b/dotCMS/src/main/java/com/dotcms/rest/api/v1/system/monitor/MonitorResource.java index 116e290998b0..4a422e956361 100644 --- a/dotCMS/src/main/java/com/dotcms/rest/api/v1/system/monitor/MonitorResource.java +++ b/dotCMS/src/main/java/com/dotcms/rest/api/v1/system/monitor/MonitorResource.java @@ -1,167 +1,121 @@ package com.dotcms.rest.api.v1.system.monitor; +import com.dotcms.business.CloseDBIfOpened; +import com.dotcms.rest.annotation.NoCache; +import java.util.Map; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.ws.rs.GET; import javax.ws.rs.Path; import javax.ws.rs.Produces; import javax.ws.rs.core.Context; -import javax.ws.rs.core.GenericEntity; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; -import javax.ws.rs.core.Response.ResponseBuilder; import org.glassfish.jersey.server.JSONP; -import com.dotcms.business.CloseDBIfOpened; -import com.dotcms.rest.annotation.NoCache; -import com.dotmarketing.util.WebKeys; -import com.dotmarketing.util.json.JSONObject; -import com.liferay.util.StringPool; - -import java.util.List; -import java.util.Map; @Path("/v1/{a:system-status|probes}") public class MonitorResource { - private static final int INSUFFICIENT_STORAGE = 507; + private static final int SERVICE_UNAVAILABLE = HttpServletResponse.SC_SERVICE_UNAVAILABLE; private static final int FORBIDDEN = HttpServletResponse.SC_FORBIDDEN; - @NoCache + + /** + * This /startup and /ready probe is heavy - it is intended to report on when dotCMS first comes up + * and can serve traffic. It gives a report on dotCMS and subsystems status + * and should not be run as an alive check + * + * it tests + * 1. dotCMS can connect to the db + * 2. dotCMS can connect to elasticsearch + * 3. dotCMS can write to the file systems, both local and nfs + * 4. that the cache is responding + * 5. that dotCMS has started up. + * + * It is important to note that this resource runs through the CMSFilter and the rules engine before responding. + * + * @param request + * @return + */ @GET @JSONP @Path("/") @Produces(MediaType.APPLICATION_JSON) @CloseDBIfOpened - public Response statusCheck(final @Context HttpServletRequest request) throws Throwable { - + public Response statusCheck(final @Context HttpServletRequest request) { final MonitorHelper helper = new MonitorHelper(request); + if(!helper.accessGranted) { + return Response.status(FORBIDDEN).entity(Map.of()).build(); + } + + if(!helper.startedUp()) { + return Response.status(SERVICE_UNAVAILABLE).build(); + } - ResponseBuilder builder = null; - if (helper.accessGranted) { - final MonitorStats stats = helper.getMonitorStats(); - - if (helper.useExtendedFormat) { - final JSONObject jo = new JSONObject(); - jo.put("serverID", stats.serverId); - jo.put("clusterID", stats.clusterId); - jo.put("dotCMSHealthy", stats.isDotCMSHealthy()); - jo.put("frontendHealthy", stats.isFrontendHealthy()); - jo.put("backendHealthy", stats.isBackendHealthy()); - - final JSONObject subsystems = new JSONObject(); - subsystems.put("dbSelectHealthy", stats.subSystemStats.isDBHealthy); - subsystems.put("indexLiveHealthy", stats.subSystemStats.isLiveIndexHealthy); - subsystems.put("indexWorkingHealthy", stats.subSystemStats.isWorkingIndexHealthy); - subsystems.put("cacheHealthy", stats.subSystemStats.isCacheHealthy); - subsystems.put("localFSHealthy", stats.subSystemStats.isLocalFileSystemHealthy); - subsystems.put("assetFSHealthy", stats.subSystemStats.isAssetFileSystemHealthy); - jo.put("subsystems", subsystems); - - builder = Response.ok(jo.toString(2), MediaType.APPLICATION_JSON); - } else { - if (stats.isDotCMSHealthy()) { - builder = Response.ok(StringPool.BLANK, MediaType.APPLICATION_JSON); - } else if (!stats.isBackendHealthy() && stats.isFrontendHealthy()) { - builder = Response.status(INSUFFICIENT_STORAGE).entity(StringPool.BLANK).type(MediaType.APPLICATION_JSON); - } else { - builder = Response.status(SERVICE_UNAVAILABLE).entity(StringPool.BLANK).type(MediaType.APPLICATION_JSON); - } - } + if(!helper.getMonitorStats().isDotCMSHealthy()) { + return Response.status(SERVICE_UNAVAILABLE).build(); } - else { - // Access is forbidden because IP is not in any range in ACL list - return Response.status(FORBIDDEN).build(); + if(helper.useExtendedFormat) { + return Response.ok(helper.getMonitorStats().toMap()).build(); } + return Response.ok().build(); + - return builder.build(); } - - - /** - * This probe is lightweight - it checks if the server is up (by the time a request gets here it has - * already run through the CMSFilter) and does a quick cache check to insure all is well. - * - * @param request - * @return - * @throws Throwable - */ - @GET - @Path("/alive") - @CloseDBIfOpened - @Produces(MediaType.APPLICATION_JSON) - public Response aliveCheck(final @Context HttpServletRequest request) throws Throwable { - final MonitorHelper helper = new MonitorHelper(request); - if(!helper.accessGranted) { - return Response.status(FORBIDDEN).build(); - } - //try this twice as it is an imperfect test - if(helper.isCacheHealthy(3000)) { - return Response.ok().build(); - } - if(helper.isCacheHealthy(3000)) { - return Response.ok().build(); - } - - return Response.status(SERVICE_UNAVAILABLE).build(); - - } - - - /** - * This probe tests all the dotCMS subsystems and will return either a success or failure based on - * the result. This is a valid readiness check as the request already runs through the CMSFilter - * (url resolution, rules firing) before reaching here. - * - * @param request - * @return - * @throws Throwable - */ + + @NoCache @GET - @Path("/ready") - @CloseDBIfOpened + @JSONP + @Path("/{a:startup|ready}") @Produces(MediaType.APPLICATION_JSON) - public Response readyCheck(final @Context HttpServletRequest request) throws Throwable { + @CloseDBIfOpened + public Response ready(final @Context HttpServletRequest request) { + + return statusCheck(request); - return startup(request); } - + + + + + /** - * This resource tests that dotCMS has started and queries all subsystems before returning an ok. - * + * This /alive probe is lightweight - it checks if the server is up by requesting a common object from + * the dotCMS cache layer twice in a row. By the time a request gets here it has + * already run through the CMSFilter) . + * + * It is intended to be used by kubernetes and load balancers to determine if the server is up. * @param request * @return - * @throws Throwable */ + @GET - @Path("/startup") + @Path("/alive") @CloseDBIfOpened @Produces(MediaType.APPLICATION_JSON) - public Response startup(final @Context HttpServletRequest request) throws Throwable { + public Response aliveCheck(final @Context HttpServletRequest request) { + final MonitorHelper helper = new MonitorHelper(request); if(!helper.accessGranted) { - return Response.status(FORBIDDEN).entity(Map.of()).build(); + return Response.status(FORBIDDEN).build(); } - - // this is set at the end of the InitServlet - if(System.getProperty(WebKeys.DOTCMS_STARTED_UP)==null) { + if(!helper.startedUp()) { return Response.status(SERVICE_UNAVAILABLE).build(); } - - if(!helper.getMonitorStats().isDotCMSHealthy()) { - return Response.status(SERVICE_UNAVAILABLE).build(); + + //try this twice as it is an imperfect test + if(helper.isCacheHealthy() && helper.isCacheHealthy()) { + return Response.ok().build(); } - - return Response.ok().build(); - + + + return Response.status(SERVICE_UNAVAILABLE).build(); + } - - - - -} +} \ No newline at end of file diff --git a/dotCMS/src/main/java/com/dotcms/rest/api/v1/system/monitor/MonitorStats.java b/dotCMS/src/main/java/com/dotcms/rest/api/v1/system/monitor/MonitorStats.java index bf812075ed17..9627c5422776 100644 --- a/dotCMS/src/main/java/com/dotcms/rest/api/v1/system/monitor/MonitorStats.java +++ b/dotCMS/src/main/java/com/dotcms/rest/api/v1/system/monitor/MonitorStats.java @@ -1,25 +1,53 @@ package com.dotcms.rest.api.v1.system.monitor; import com.liferay.util.StringPool; +import java.util.Map; + class MonitorStats { + + final MonitorSubSystemStats subSystemStats = new MonitorSubSystemStats(); String clusterId = StringPool.BLANK; String serverId = StringPool.BLANK; - final MonitorSubSystemStats subSystemStats = new MonitorSubSystemStats(); boolean isDotCMSHealthy() { return isBackendHealthy() && isFrontendHealthy(); } boolean isBackendHealthy() { - return subSystemStats.isDBHealthy && subSystemStats.isLiveIndexHealthy && subSystemStats.isWorkingIndexHealthy && - subSystemStats.isCacheHealthy && subSystemStats.isLocalFileSystemHealthy && subSystemStats.isAssetFileSystemHealthy; + return subSystemStats.isDBHealthy && subSystemStats.isLiveIndexHealthy && subSystemStats.isWorkingIndexHealthy + && + subSystemStats.isCacheHealthy && subSystemStats.isLocalFileSystemHealthy + && subSystemStats.isAssetFileSystemHealthy; } boolean isFrontendHealthy() { return subSystemStats.isDBHealthy && subSystemStats.isLiveIndexHealthy && subSystemStats.isCacheHealthy && subSystemStats.isLocalFileSystemHealthy && subSystemStats.isAssetFileSystemHealthy; } -} + Map toMap() { + + final Map subsystems = Map.of( + "dbSelectHealthy", subSystemStats.isDBHealthy, + "indexLiveHealthy", subSystemStats.isLiveIndexHealthy, + "indexWorkingHealthy", subSystemStats.isWorkingIndexHealthy, + "cacheHealthy", subSystemStats.isCacheHealthy, + "localFSHealthy", subSystemStats.isLocalFileSystemHealthy, + "assetFSHealthy", subSystemStats.isAssetFileSystemHealthy); + + return Map.of( + "serverID", this.serverId, + "clusterID", this.clusterId, + "dotCMSHealthy", this.isDotCMSHealthy(), + "frontendHealthy", this.isFrontendHealthy(), + "backendHealthy", this.isBackendHealthy(), + "subsystems", subsystems); + + + + + } + +} \ No newline at end of file