Skip to content

Commit

Permalink
Issue 29712 analytics collectors layer (#29791)
Browse files Browse the repository at this point in the history
First draft for analytics collectors layer

---------

Co-authored-by: freddyDOTCMS <[email protected]>
Co-authored-by: Jose Castro <[email protected]>
Co-authored-by: freddyDOTCMS <[email protected]>
  • Loading branch information
4 people authored Sep 18, 2024
1 parent b4acc93 commit 2c9e54a
Show file tree
Hide file tree
Showing 33 changed files with 1,318 additions and 41 deletions.
37 changes: 37 additions & 0 deletions dotCMS/src/main/java/com/dotcms/analytics/Util.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package com.dotcms.analytics;

import com.dotmarketing.beans.Host;
import com.dotmarketing.business.APILocator;
import com.dotmarketing.cms.urlmap.UrlMapContext;
import com.dotmarketing.util.Logger;
import com.dotmarketing.util.PageMode;
import io.vavr.control.Try;

import static com.dotcms.exception.ExceptionUtil.getErrorMessage;

/**
* This utility class exposes common-use methods for the Analytics APIs.
*
* @author Jose Castro
* @since Sep 13th, 2024
*/
public class Util {

private Util() {
// Singleton
}
/**
* Based on the specified URL Map Context, determines whether a given incoming URL maps to a URL
* Mapped content or not.
*
* @param urlMapContext UrlMapContext object containing the following information:
* @return If the URL maps to URL Mapped content, returns {@code true}.
*/
public static boolean isUrlMap(final UrlMapContext urlMapContext) {
return Try.of(() -> APILocator.getURLMapAPI().isUrlPattern(urlMapContext))
.onFailure(e -> Logger.error(Util.class, String.format("Failed to check for URL Mapped content for page '%s': %s",
urlMapContext.getUri(), getErrorMessage(e)), e))
.getOrElse(false);
}

}
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
package com.dotcms.analytics.track;

import com.dotcms.analytics.track.collectors.WebEventsCollectorServiceFactory;
import com.dotcms.analytics.track.matchers.FilesRequestMatcher;
import com.dotcms.analytics.track.matchers.PagesAndUrlMapsRequestMatcher;
import com.dotcms.analytics.track.matchers.RequestMatcher;
import com.dotcms.analytics.track.matchers.VanitiesRequestMatcher;
import com.dotcms.filters.interceptor.Result;
import com.dotcms.filters.interceptor.WebInterceptor;
import com.dotcms.jitsu.EventLogSubmitter;
import com.dotcms.util.CollectionsUtils;
import com.dotcms.util.WhiteBlackList;
import com.dotmarketing.util.Config;
import com.dotmarketing.util.Logger;
import com.dotmarketing.util.UUIDUtil;
import com.liferay.util.StringPool;

import javax.servlet.http.HttpServletRequest;
Expand All @@ -25,8 +30,6 @@ public class AnalyticsTrackWebInterceptor implements WebInterceptor {

private final static Map<String, RequestMatcher> requestMatchersMap = new ConcurrentHashMap<>();

private final EventLogSubmitter submitter;

/// private static final String[] DEFAULT_BLACKLISTED_PROPS = new String[]{"^/api/*"};
private static final String[] DEFAULT_BLACKLISTED_PROPS = new String[]{StringPool.BLANK};
private final WhiteBlackList whiteBlackList = new WhiteBlackList.Builder()
Expand All @@ -37,11 +40,10 @@ public class AnalyticsTrackWebInterceptor implements WebInterceptor {

public AnalyticsTrackWebInterceptor() {

submitter = new EventLogSubmitter();
addRequestMatcher(
new PagesAndUrlMapsRequestMatcher(),
new FilesRequestMatcher(),
new RulesRedirectsRequestMatcher(),
// new RulesRedirectsRequestMatcher(),
new VanitiesRequestMatcher());
}

Expand All @@ -64,30 +66,39 @@ public static void removeRequestMatcher(final String requestMatcherId) {
requestMatchersMap.remove(requestMatcherId);
}


@Override
public Result intercept(final HttpServletRequest request, final HttpServletResponse response) throws IOException {

if (whiteBlackList.isAllowed(request.getRequestURI())) {
final Optional<RequestMatcher> matcherOpt = this.anyMatcher(request, response, RequestMatcher::runBeforeRequest);
if (matcherOpt.isPresent()) {

addRequestId (request);
Logger.debug(this, () -> "intercept, Matched: " + matcherOpt.get().getId() + " request: " + request.getRequestURI());
//fireNextStep(request, response);
fireNext(request, response, matcherOpt.get());
}
}

return Result.NEXT;
}

private void addRequestId(final HttpServletRequest request) {
if (null == request.getAttribute("requestId")) {
request.setAttribute("requestId", UUIDUtil.uuid());
}
}

@Override
public boolean afterIntercept(final HttpServletRequest request, final HttpServletResponse response) {

if (whiteBlackList.isAllowed(request.getRequestURI())) {
final Optional<RequestMatcher> matcherOpt = this.anyMatcher(request, response, RequestMatcher::runAfterRequest);
if (matcherOpt.isPresent()) {

addRequestId (request);
Logger.debug(this, () -> "afterIntercept, Matched: " + matcherOpt.get().getId() + " request: " + request.getRequestURI());
//fireNextStep(request, response);
fireNext(request, response, matcherOpt.get());
}
}

Expand All @@ -102,4 +113,19 @@ private Optional<RequestMatcher> anyMatcher(final HttpServletRequest request, fi
.findFirst();
}

/**
* Since the Fire the next step on the Analytics pipeline
* @param request
* @param response
* @param requestMatcher
*/
protected void fireNext(final HttpServletRequest request, final HttpServletResponse response,
final RequestMatcher requestMatcher) {

Logger.debug(this, ()-> "fireNext, uri: " + request.getRequestURI() +
" requestMatcher: " + requestMatcher.getId());
WebEventsCollectorServiceFactory.getInstance().getWebEventsCollectorService().fireCollectors(request, response, requestMatcher);
}


}
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package com.dotcms.analytics.track.collectors;

import com.dotcms.analytics.track.matchers.VanitiesRequestMatcher;
import com.dotcms.vanityurl.model.CachedVanityUrl;
import com.dotcms.visitor.filter.characteristics.BaseCharacter;
import com.dotmarketing.beans.Host;
import com.dotmarketing.business.APILocator;
import com.dotmarketing.filters.CMSFilter;
import com.dotmarketing.filters.Constants;
import com.dotmarketing.portlets.contentlet.business.HostAPI;
import com.dotmarketing.util.UtilMethods;
import io.vavr.control.Try;

import java.util.HashMap;
import java.util.Map;

/**
* This asynchronized collector collects the page/asset information based on the vanity URL previous loaded on the
* @author jsanca
*/
public class AsyncVanitiesCollector implements Collector {

private final HostAPI hostAPI;
private final Map<CMSFilter.IAm, Collector> match = new HashMap<>();

public AsyncVanitiesCollector() {
this(APILocator.getHostAPI());
}

public AsyncVanitiesCollector(final HostAPI hostAPI) {

this.hostAPI = hostAPI;

match.put(CMSFilter.IAm.PAGE, new PagesCollector());
match.put(CMSFilter.IAm.FILE, new FilesCollector());
}

@Override
public boolean test(CollectorContextMap collectorContextMap) {
final CachedVanityUrl cachedVanityUrl = (CachedVanityUrl)collectorContextMap.get(Constants.VANITY_URL_OBJECT);

return VanitiesRequestMatcher.VANITIES_MATCHER_ID.equals(collectorContextMap.getRequestMatcher().getId()) &&
UtilMethods.isSet(cachedVanityUrl) && cachedVanityUrl.isForward();
}


@Override
public CollectorPayloadBean collect(final CollectorContextMap collectorContextMap,
final CollectorPayloadBean collectorPayloadBean) {

// this will be a new event
final CachedVanityUrl cachedVanityUrl = (CachedVanityUrl) collectorContextMap.get(Constants.VANITY_URL_OBJECT);

final Host currentHost = (Host)collectorContextMap.get("currentHost");
final Long languageId = (Long)collectorContextMap.get("langId");

final Host site = Try.of(()->this.hostAPI.find(currentHost.getIdentifier(), APILocator.systemUser(),
false)).get();
final CMSFilter.IAm whoIAM = BaseCharacter.resolveResourceType(cachedVanityUrl.forwardTo, site, languageId);

if (UtilMethods.isSet(whoIAM)) {

final CollectorContextMap innerCollectorContextMap = new WrapperCollectorContextMap(collectorContextMap,
Map.of("uri", cachedVanityUrl.forwardTo));
match.get(whoIAM).collect(innerCollectorContextMap, collectorPayloadBean);
}

collectorPayloadBean.put("comeFromVanityURL", "true");
return collectorPayloadBean;
}

@Override
public boolean isAsync() {
return true;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package com.dotcms.analytics.track.collectors;

import com.dotcms.enterprise.cluster.ClusterFactory;
import com.dotcms.util.FunctionUtils;
import com.dotmarketing.business.APILocator;

import java.time.Instant;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Objects;

public class BasicProfileCollector implements Collector {
private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS'Z'");
@Override
public boolean test(CollectorContextMap collectorContextMap) {

return true; // every one needs a basic profile
}

@Override
public CollectorPayloadBean collect(final CollectorContextMap collectorContextMap,
final CollectorPayloadBean collectorPayloadBean) {

final String requestId = (String)collectorContextMap.get("requestId");
final Long time = (Long)collectorContextMap.get("time");
final String clusterId = (String)collectorContextMap.get("cluster");
final String serverId = (String)collectorContextMap.get("server");
final String sessionId = (String)collectorContextMap.get("session");
final Boolean sessionNew = (Boolean)collectorContextMap.get("sessionNew");

final Long timestamp = FunctionUtils.getOrDefault(Objects.nonNull(time), () -> time, System::currentTimeMillis);
final Instant instant = Instant.ofEpochMilli(timestamp);
final ZonedDateTime zonedDateTimeUTC = instant.atZone(ZoneId.of("UTC"));

collectorPayloadBean.put("request_id", requestId);
collectorPayloadBean.put("utc_time", FORMATTER.format(zonedDateTimeUTC));
collectorPayloadBean.put("cluster",
FunctionUtils.getOrDefault(Objects.nonNull(clusterId), ()->clusterId, ClusterFactory::getClusterId));
collectorPayloadBean.put("server",
FunctionUtils.getOrDefault(Objects.nonNull(serverId), ()->serverId,()->APILocator.getServerAPI().readServerId()));
collectorPayloadBean.put("sessionId", sessionId);
collectorPayloadBean.put("sessionNew", sessionNew);
return collectorPayloadBean;
}

@Override
public boolean isAsync() {
return false;
}

@Override
public boolean isEventCreator(){
return false;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package com.dotcms.analytics.track.collectors;

import com.dotcms.analytics.track.matchers.RequestMatcher;
import com.dotcms.visitor.filter.characteristics.Character;

import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;

/**
* This Context Map has the character map
* @author jsanca
*/
public class CharacterCollectorContextMap implements CollectorContextMap {

private final Map<String, Object> contextMap = new HashMap<>();
private final RequestMatcher requestMatcher;
private final Map<String, Serializable> characterMap;

public CharacterCollectorContextMap(final Character character,
final RequestMatcher requestMatcher,
final Map<String, Object> contextMap) {

this.characterMap = character.getMap();
this.requestMatcher = requestMatcher;
this.contextMap.putAll(contextMap);
}



@Override
public Object get(final String key) {

if (this.characterMap.containsKey(key)) {
return this.characterMap.get(key);
}

if (this.contextMap.containsKey(key)) {
return this.contextMap.get(key);
}

return null;
}


@Override
public RequestMatcher getRequestMatcher() {
return this.requestMatcher;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package com.dotcms.analytics.track.collectors;

/**
* A collector command basically puts information into a collector payload bean
* @author jsanca
*/
public interface Collector {

/**
* Test if the collector should run
* @param collectorContextMap
* @return
*/
boolean test(final CollectorContextMap collectorContextMap);
/**
* This method is called in order to fire the collector
* @param collectorContextMap
* @param collectorPayloadBean
* @return CollectionCollectorPayloadBean
*/
CollectorPayloadBean collect(final CollectorContextMap collectorContextMap,
final CollectorPayloadBean collectorPayloadBean);

/**
* True if the collector should run async
* @return boolean
*/
default boolean isAsync() {
return false;
}

/**
* Return an id for the Collector, by default returns the class name.
* @return
*/
default String getId() {

return this.getClass().getName();
}

default boolean isEventCreator(){
return true;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.dotcms.analytics.track.collectors;

import com.dotcms.analytics.track.matchers.RequestMatcher;

public interface CollectorContextMap {

Object get(String key);
RequestMatcher getRequestMatcher(); // since we do not have the previous step phase we need to keep this as an object, but will be a RequestMatcher
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package com.dotcms.analytics.track.collectors;

import java.io.Serializable;
import java.util.Map;

/**
* Encapsulate the basic signature for a collector payload bean
* @author jsanca
*/
public interface CollectorPayloadBean {

CollectorPayloadBean put(String key, Serializable value);
Serializable get(String key);
Map<String, Serializable> toMap();

CollectorPayloadBean add(CollectorPayloadBean other);
}
Loading

0 comments on commit 2c9e54a

Please sign in to comment.