diff --git a/thirdeye/build b/thirdeye/build index d44c58c6d99b..4b93b3ede7c0 100755 --- a/thirdeye/build +++ b/thirdeye/build @@ -1,2 +1,2 @@ #!/bin/bash -mvn clean package install +mvn -T 1C clean compile package diff --git a/thirdeye/pom.xml b/thirdeye/pom.xml index 4ea05ff1ce7b..edd8df4b1789 100644 --- a/thirdeye/pom.xml +++ b/thirdeye/pom.xml @@ -17,6 +17,10 @@ thirdeye-realtime + + UTF-8 + + diff --git a/thirdeye/thirdeye-bootstrap/src/main/java/com/linkedin/thirdeye/bootstrap/ThirdEyeJob.java b/thirdeye/thirdeye-bootstrap/src/main/java/com/linkedin/thirdeye/bootstrap/ThirdEyeJob.java index f613ea105186..11b65bfd408c 100644 --- a/thirdeye/thirdeye-bootstrap/src/main/java/com/linkedin/thirdeye/bootstrap/ThirdEyeJob.java +++ b/thirdeye/thirdeye-bootstrap/src/main/java/com/linkedin/thirdeye/bootstrap/ThirdEyeJob.java @@ -6,13 +6,13 @@ import java.io.InputStream; import java.lang.reflect.Constructor; import java.lang.reflect.Method; +import java.net.URLDecoder; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Properties; -import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -20,6 +20,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.RemoteIterator; +import org.joda.time.DateTime; +import org.joda.time.format.ISODateTimeFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -28,7 +30,6 @@ import com.linkedin.thirdeye.bootstrap.aggregation.AggregationJobConstants; import com.linkedin.thirdeye.bootstrap.analysis.AnalysisJobConstants; import com.linkedin.thirdeye.bootstrap.analysis.AnalysisPhaseJob; -import com.linkedin.thirdeye.bootstrap.analysis.AnalysisPhaseStats; import com.linkedin.thirdeye.bootstrap.join.JoinPhaseJob; import com.linkedin.thirdeye.bootstrap.rollup.phase1.RollupPhaseOneConstants; import com.linkedin.thirdeye.bootstrap.rollup.phase1.RollupPhaseOneJob; @@ -46,49 +47,70 @@ import com.linkedin.thirdeye.bootstrap.startree.generation.StarTreeGenerationConstants; import com.linkedin.thirdeye.bootstrap.startree.generation.StarTreeGenerationJob; -/* - thirdeye.root={/path/to/user} - thirdeye.collection={collection} - input.time.min={collectionTime} - input.time.max={collectionTime} - input.paths=/path1,/path2,/path3 - - {root}/ - {collection}/ - config.yml - schema.avsc - data_{start}-{end}/ - aggregation/ - rollup/ - phase1/ - phase2/ - phase3/ - phase4/ - startree/ - generation/ - star-tree-{collection}/ - {collection}-tree.bin - bootstrap_phase1/ - bootstrap_phase2/ - task_* - data_{start}-{end}/ - startree/ - bootstrap_phase1/ - bootstrap_phase2/ - task_* - - +/** + * Wrapper to manage Hadoop flows for ThirdEye. + * + *

Config

+ * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
PropertyDescription
thirdeye.flowOne of {@link com.linkedin.thirdeye.bootstrap.ThirdEyeJob.FlowSpec}
thirdeye.flow.scheduleA string describing the flow schedule (used to tag segments)
thirdeye.phaseOne of {@link com.linkedin.thirdeye.bootstrap.ThirdEyeJob.PhaseSpec}
thirdeye.rootRoot directory on HDFS, under which all collection data is stored
thirdeye.collectionCollection name (data stored at ${thirdeye.root}/${thirdeye.collection}
thirdeye.server.uriURI prefix for thirdeye server (e.g. http://some-machine:10283)
thirdeye.time.pathA path to a properties file on HDFS containing thirdeye.time.min, thirdeye.time.max
thirdeye.time.minManually override thirdeye.time.min from thirdeye.time.path
thirdeye.time.maxManually override thirdeye.time.max from thirdeye.time.path
*/ public class ThirdEyeJob { private static final Logger LOG = LoggerFactory.getLogger(ThirdEyeJob.class); + private static final String ENCODING = "UTF-8"; private static final String USAGE = "usage: phase_name job.properties"; - private static final String AVRO_SCHEMA = "schema.avsc"; - private static final String TREE_FILE_FORMAT = ".bin"; + private enum FlowSpec + { + BOOTSTRAP, + INCREMENT, + PATCH + } + private enum PhaseSpec { JOIN @@ -106,7 +128,13 @@ String getDescription() } @Override - Properties getJobProperties(Properties inputConfig, String root, String collection, long minTime, long maxTime, String inputPaths) + Properties getJobProperties(Properties inputConfig, + String root, + String collection, + FlowSpec flowSpec, + DateTime minTime, + DateTime maxTime, + String inputPaths) { return inputConfig; } @@ -126,7 +154,13 @@ String getDescription() } @Override - Properties getJobProperties(Properties inputConfig, String root, String collection, long minTime, long maxTime, String inputPaths) + Properties getJobProperties(Properties inputConfig, + String root, + String collection, + FlowSpec flowSpec, + DateTime minTime, + DateTime maxTime, + String inputPaths) { Properties config = new Properties(); config.setProperty(AnalysisJobConstants.ANALYSIS_INPUT_AVRO_SCHEMA.toString(), @@ -156,7 +190,13 @@ String getDescription() } @Override - Properties getJobProperties(Properties inputConfig, String root, String collection, long minTime, long maxTime, String inputPaths) + Properties getJobProperties(Properties inputConfig, + String root, + String collection, + FlowSpec flowSpec, + DateTime minTime, + DateTime maxTime, + String inputPaths) throws Exception { Properties config = new Properties(); @@ -168,7 +208,7 @@ Properties getJobProperties(Properties inputConfig, String root, String collecti config.setProperty(AggregationJobConstants.AGG_INPUT_PATH.toString(), inputPaths); config.setProperty(AggregationJobConstants.AGG_OUTPUT_PATH.toString(), - getTimeDir(root, collection, minTime, maxTime) + File.separator + AGGREGATION.getName()); + getTimeDir(root, collection, flowSpec, minTime, maxTime) + File.separator + AGGREGATION.getName()); return config; } @@ -188,16 +228,22 @@ String getDescription() } @Override - Properties getJobProperties(Properties inputConfig, String root, String collection, long minTime, long maxTime, String inputPaths) + Properties getJobProperties(Properties inputConfig, + String root, + String collection, + FlowSpec flowSpec, + DateTime minTime, + DateTime maxTime, + String inputPaths) throws Exception { Properties config = new Properties(); config.setProperty(RollupPhaseOneConstants.ROLLUP_PHASE1_CONFIG_PATH.toString(), getConfigPath(root, collection)); config.setProperty(RollupPhaseOneConstants.ROLLUP_PHASE1_INPUT_PATH.toString(), - getTimeDir(root, collection, minTime, maxTime) + File.separator + AGGREGATION.getName()); + getTimeDir(root, collection, flowSpec, minTime, maxTime) + File.separator + AGGREGATION.getName()); config.setProperty(RollupPhaseOneConstants.ROLLUP_PHASE1_OUTPUT_PATH.toString(), - getTimeDir(root, collection, minTime, maxTime) + File.separator + ROLLUP_PHASE1.getName()); + getTimeDir(root, collection, flowSpec, minTime, maxTime) + File.separator + ROLLUP_PHASE1.getName()); return config; } @@ -217,16 +263,22 @@ String getDescription() } @Override - Properties getJobProperties(Properties inputConfig, String root, String collection, long minTime, long maxTime, String inputPaths) + Properties getJobProperties(Properties inputConfig, + String root, + String collection, + FlowSpec flowSpec, + DateTime minTime, + DateTime maxTime, + String inputPaths) throws Exception { Properties config = new Properties(); config.setProperty(RollupPhaseTwoConstants.ROLLUP_PHASE2_CONFIG_PATH.toString(), getConfigPath(root, collection)); config.setProperty(RollupPhaseTwoConstants.ROLLUP_PHASE2_INPUT_PATH.toString(), - getTimeDir(root, collection, minTime, maxTime) + File.separator + ROLLUP_PHASE1.getName() + File.separator + "belowThreshold"); + getTimeDir(root, collection, flowSpec, minTime, maxTime) + File.separator + ROLLUP_PHASE1.getName() + File.separator + "belowThreshold"); config.setProperty(RollupPhaseTwoConstants.ROLLUP_PHASE2_OUTPUT_PATH.toString(), - getTimeDir(root, collection, minTime, maxTime) + File.separator + ROLLUP_PHASE2.getName()); + getTimeDir(root, collection, flowSpec, minTime, maxTime) + File.separator + ROLLUP_PHASE2.getName()); return config; } @@ -246,16 +298,22 @@ String getDescription() } @Override - Properties getJobProperties(Properties inputConfig, String root, String collection, long minTime, long maxTime, String inputPaths) + Properties getJobProperties(Properties inputConfig, + String root, + String collection, + FlowSpec flowSpec, + DateTime minTime, + DateTime maxTime, + String inputPaths) throws Exception { Properties config = new Properties(); config.setProperty(RollupPhaseThreeConstants.ROLLUP_PHASE3_CONFIG_PATH.toString(), getConfigPath(root, collection)); config.setProperty(RollupPhaseThreeConstants.ROLLUP_PHASE3_INPUT_PATH.toString(), - getTimeDir(root, collection, minTime, maxTime) + File.separator + ROLLUP_PHASE2.getName()); + getTimeDir(root, collection, flowSpec, minTime, maxTime) + File.separator + ROLLUP_PHASE2.getName()); config.setProperty(RollupPhaseThreeConstants.ROLLUP_PHASE3_OUTPUT_PATH.toString(), - getTimeDir(root, collection, minTime, maxTime) + File.separator + ROLLUP_PHASE3.getName()); + getTimeDir(root, collection, flowSpec, minTime, maxTime) + File.separator + ROLLUP_PHASE3.getName()); return config; } @@ -275,17 +333,23 @@ String getDescription() } @Override - Properties getJobProperties(Properties inputConfig, String root, String collection, long minTime, long maxTime, String inputPaths) + Properties getJobProperties(Properties inputConfig, + String root, + String collection, + FlowSpec flowSpec, + DateTime minTime, + DateTime maxTime, + String inputPaths) throws Exception { Properties config = new Properties(); config.setProperty(RollupPhaseFourConstants.ROLLUP_PHASE4_CONFIG_PATH.toString(), getConfigPath(root, collection)); config.setProperty(RollupPhaseFourConstants.ROLLUP_PHASE4_INPUT_PATH.toString(), - getTimeDir(root, collection, minTime, maxTime) + File.separator + ROLLUP_PHASE3.getName() + "," + - getTimeDir(root, collection, minTime, maxTime) + File.separator + ROLLUP_PHASE1.getName() + File.separator + "aboveThreshold"); + getTimeDir(root, collection, flowSpec, minTime, maxTime) + File.separator + ROLLUP_PHASE3.getName() + "," + + getTimeDir(root, collection, flowSpec, minTime, maxTime) + File.separator + ROLLUP_PHASE1.getName() + File.separator + "aboveThreshold"); config.setProperty(RollupPhaseFourConstants.ROLLUP_PHASE4_OUTPUT_PATH.toString(), - getTimeDir(root, collection, minTime, maxTime) + File.separator + ROLLUP_PHASE4.getName()); + getTimeDir(root, collection, flowSpec, minTime, maxTime) + File.separator + ROLLUP_PHASE4.getName()); return config; } @@ -305,16 +369,22 @@ String getDescription() } @Override - Properties getJobProperties(Properties inputConfig, String root, String collection, long minTime, long maxTime, String inputPaths) + Properties getJobProperties(Properties inputConfig, + String root, + String collection, + FlowSpec flowSpec, + DateTime minTime, + DateTime maxTime, + String inputPaths) throws Exception { Properties config = new Properties(); config.setProperty(StarTreeGenerationConstants.STAR_TREE_GEN_CONFIG_PATH.toString(), getConfigPath(root, collection)); config.setProperty(StarTreeGenerationConstants.STAR_TREE_GEN_INPUT_PATH.toString(), - getTimeDir(root, collection, minTime, maxTime) + File.separator + ROLLUP_PHASE4.getName()); + getTimeDir(root, collection, flowSpec, minTime, maxTime) + File.separator + ROLLUP_PHASE4.getName()); config.setProperty(StarTreeGenerationConstants.STAR_TREE_GEN_OUTPUT_PATH.toString(), - getTimeDir(root, collection, minTime, maxTime) + File.separator + STARTREE_GENERATION.getName()); + getTimeDir(root, collection, flowSpec, minTime, maxTime) + File.separator + STARTREE_GENERATION.getName()); return config; } @@ -334,7 +404,13 @@ String getDescription() } @Override - Properties getJobProperties(Properties inputConfig, String root, String collection, long minTime, long maxTime, String inputPaths) throws IOException + Properties getJobProperties(Properties inputConfig, + String root, + String collection, + FlowSpec flowSpec, + DateTime minTime, + DateTime maxTime, + String inputPaths) throws Exception { Properties config = new Properties(); config.setProperty(StarTreeBootstrapPhaseOneConstants.STAR_TREE_BOOTSTRAP_CONFIG_PATH.toString(), @@ -346,7 +422,7 @@ Properties getJobProperties(Properties inputConfig, String root, String collecti config.setProperty(StarTreeBootstrapPhaseOneConstants.STAR_TREE_BOOTSTRAP_INPUT_PATH.toString(), inputPaths); config.setProperty(StarTreeBootstrapPhaseOneConstants.STAR_TREE_BOOTSTRAP_OUTPUT_PATH.toString(), - getTimeDir(root, collection, minTime, maxTime) + File.separator + STARTREE_BOOTSTRAP_PHASE1.getName()); + getTimeDir(root, collection, flowSpec, minTime, maxTime) + File.separator + STARTREE_BOOTSTRAP_PHASE1.getName()); return config; } @@ -366,7 +442,13 @@ String getDescription() } @Override - Properties getJobProperties(Properties inputConfig, String root, String collection, long minTime, long maxTime, String inputPaths) throws IOException + Properties getJobProperties(Properties inputConfig, + String root, + String collection, + FlowSpec flowSpec, + DateTime minTime, + DateTime maxTime, + String inputPaths) throws Exception { Properties config = new Properties(); @@ -375,14 +457,14 @@ Properties getJobProperties(Properties inputConfig, String root, String collecti config.setProperty(StarTreeBootstrapPhaseTwoConstants.STAR_TREE_GENERATION_OUTPUT_PATH.toString(), getLatestTreeDirPath(root, collection) + File.separator + STARTREE_GENERATION.getName()); config.setProperty(StarTreeBootstrapPhaseTwoConstants.STAR_TREE_BOOTSTRAP_PHASE2_INPUT_PATH.toString(), - getTimeDir(root, collection, minTime, maxTime) + File.separator + STARTREE_BOOTSTRAP_PHASE1.getName()); + getTimeDir(root, collection, flowSpec, minTime, maxTime) + File.separator + STARTREE_BOOTSTRAP_PHASE1.getName()); config.setProperty(StarTreeBootstrapPhaseTwoConstants.STAR_TREE_BOOTSTRAP_PHASE2_OUTPUT_PATH.toString(), - getTimeDir(root, collection, minTime, maxTime) + File.separator + STARTREE_BOOTSTRAP_PHASE2.getName()); + getTimeDir(root, collection, flowSpec, minTime, maxTime) + File.separator + STARTREE_BOOTSTRAP_PHASE2.getName()); return config; } }, - SERVER_UPDATE + SERVER_PUSH { @Override Class getKlazz() @@ -393,60 +475,49 @@ Class getKlazz() @Override String getDescription() { - return "Pushes metric data from startree_bootstrap_phase2 to thirdeye.server.uri"; + return "Pushes data to thirdeye.server.uri"; } @Override - Properties getJobProperties(Properties inputConfig, String root, String collection, long minTime, long maxTime, String inputPaths) + Properties getJobProperties(Properties inputConfig, String root, String collection, FlowSpec flowSpec, DateTime minTime, DateTime maxTime, String inputPaths) throws Exception { return null; // unused } - }, - SERVER_BOOTSTRAP - { - @Override - Class getKlazz() - { - return null; - } - - @Override - String getDescription() - { - return "Pushes star tree, dimension, and metric data from startree_bootstrap_phase2 to thirdeye.server.uri"; - } - - @Override - Properties getJobProperties(Properties inputConfig, String root, String collection, long minTime, long maxTime, String inputPaths) - { - return null; - } }; abstract Class getKlazz(); abstract String getDescription(); - abstract Properties getJobProperties(Properties inputConfig,String root, String collection, long minTime, long maxTime, String inputPaths) throws Exception; + abstract Properties getJobProperties(Properties inputConfig, + String root, + String collection, + FlowSpec flowSpec, + DateTime minTime, + DateTime maxTime, + String inputPaths) throws Exception; String getName() { return this.name().toLowerCase(); } - String getCollectionDir(String root, String collection) - { - return root == null ? collection : root + File.separator + collection; - } - String getAnalysisPath(String root, String collection) { return getCollectionDir(root, collection) + File.separator + "analysis"; } - String getTimeDir(String root, String collection, long minTime, long maxTime) + String getTimeDir(String root, + String collection, + FlowSpec flowSpec, + DateTime minTime, + DateTime maxTime) throws IOException { - return getCollectionDir(root, collection) + File.separator + "data_" + minTime + "-" + maxTime; + return getCollectionDir(root, collection) + + File.separator + flowSpec.name() + + File.separator + "data_" + + StarTreeConstants.DATE_TIME_FORMATTER.print(minTime) + "_" + + StarTreeConstants.DATE_TIME_FORMATTER.print(maxTime); } String getConfigPath(String root, String collection) @@ -458,52 +529,6 @@ String getSchemaPath(String root, String collection) { return getCollectionDir(root, collection) + File.separator + AVRO_SCHEMA; } - - /* - * Iterates in the data dir's generated in reverse order and returns the path - * of the latest dir which contains tree.bin file. - */ - String getLatestTreeDirPath(String root, String collection) throws IOException - { - FileSystem fs = FileSystem.get(new Configuration()); - Path collectionDir = new Path(getCollectionDir(root, collection)); - - PathFilter dataDirFilter = new PathFilter() { - public boolean accept(Path path) { - return path.getName().startsWith("data_"); - } - }; - - Comparator dataDirComparator = new Comparator() { - - public int compare(FileStatus dataDir1, FileStatus dataDir2) { - return getMaxTimeFromPath(dataDir2.getPath().toString()) - getMaxTimeFromPath(dataDir1.getPath().toString()); - } - - private int getMaxTimeFromPath(String path){ - String []tokens = path.split("/"); - String dataDirName = tokens[tokens.length - 1]; - tokens = dataDirName.split("-"); - return Integer.parseInt(tokens[tokens.length-1]); - } - }; - - ListlistFiles = Arrays.asList(fs.listStatus(collectionDir, dataDirFilter)); - Collections.sort(listFiles,dataDirComparator); - for(int i = 0;i< listFiles.size();i++){ - System.out.println(listFiles.get(i).getPath().toString()); - RemoteIterator fileStatusListIterator = fs.listFiles(listFiles.get(i).getPath(), true); - while(fileStatusListIterator.hasNext()){ - LocatedFileStatus fileStatus = fileStatusListIterator.next(); - if(fileStatus.getPath().getName().endsWith(TREE_FILE_FORMAT)){ - return getCollectionDir(root, collection) + File.separator + listFiles.get(i).getPath().getName(); - } - } - } - throw new IllegalStateException("Could not find star tree directory"); - } - - } private static void usage() @@ -567,62 +592,51 @@ public void run() throws Exception String root = getAndCheck(ThirdEyeJobConstants.THIRDEYE_ROOT.getPropertyName(), inputConfig); String collection = getAndCheck(ThirdEyeJobConstants.THIRDEYE_COLLECTION.getPropertyName(), inputConfig); String inputPaths = getAndCheck(ThirdEyeJobConstants.INPUT_PATHS.getPropertyName(), inputConfig); - String NUM_REDUCERS_PROP = StarTreeBootstrapPhaseTwoConstants.THIRDEYE_STARTREE_BOOTSTRAP_PHASE2_REDUCERS.name(); - String numberOfReducers = inputConfig.getProperty(NUM_REDUCERS_PROP); - - long minTime = -1; - long maxTime = -1; - - if (!PhaseSpec.ANALYSIS.equals(phaseSpec)) // analysis phase computes these values + FlowSpec flowSpec = FlowSpec.valueOf(getAndCheck(ThirdEyeJobConstants.THIRDEYE_FLOW.getPropertyName(), inputConfig).toUpperCase()); + String numberReducersProp = StarTreeBootstrapPhaseTwoConstants.THIRDEYE_STARTREE_BOOTSTRAP_PHASE2_REDUCERS.name(); + String numberOfReducers = inputConfig.getProperty(numberReducersProp); + + // Get min / max time + DateTime minTime; + DateTime maxTime; + + String minTimeProp + = inputConfig.getProperty(ThirdEyeJobConstants.THIRDEYE_TIME_MIN.getPropertyName()); + String maxTimeProp + = inputConfig.getProperty(ThirdEyeJobConstants.THIRDEYE_TIME_MAX.getPropertyName()); + String timePathProp + = inputConfig.getProperty(ThirdEyeJobConstants.THIRDEYE_TIME_PATH.getPropertyName()); + + if (minTimeProp != null && maxTimeProp != null) // user provided, override + { + minTime = ISODateTimeFormat.dateTimeParser().parseDateTime(minTimeProp); + maxTime = ISODateTimeFormat.dateTimeParser().parseDateTime(maxTimeProp); + } + else if (timePathProp != null) // use path managed by preparation jobs { FileSystem fileSystem = FileSystem.get(new Configuration()); + InputStream inputStream = fileSystem.open(new Path(timePathProp)); - // Load analysis results - InputStream inputStream - = fileSystem.open(new Path(phaseSpec.getAnalysisPath(root, collection), - AnalysisJobConstants.ANALYSIS_FILE_NAME.toString())); - AnalysisPhaseStats stats = AnalysisPhaseStats.fromBytes(IOUtils.toByteArray(inputStream)); + Properties timePathProps = new Properties(); + timePathProps.load(inputStream); inputStream.close(); - // Check input paths - if (!inputPaths.equals(stats.getInputPath())) - { - throw new IllegalStateException("Last analysis was done for input paths " - + stats.getInputPath() + " not " + inputPaths); - } + minTimeProp = timePathProps.getProperty(ThirdEyeJobConstants.THIRDEYE_TIME_MIN.getPropertyName()); + maxTimeProp = timePathProps.getProperty(ThirdEyeJobConstants.THIRDEYE_TIME_MAX.getPropertyName()); - minTime = stats.getMinTime(); - maxTime = stats.getMaxTime(); + minTime = ISODateTimeFormat.dateTimeParser().parseDateTime(minTimeProp); + maxTime = ISODateTimeFormat.dateTimeParser().parseDateTime(maxTimeProp); } - - if (PhaseSpec.SERVER_UPDATE.equals(phaseSpec)) + else { - String thirdEyeServerUri = inputConfig.getProperty(ThirdEyeJobConstants.THIRDEYE_SERVER_URI.getPropertyName()); - if (thirdEyeServerUri == null) - { - throw new IllegalArgumentException( - "Must provide " + ThirdEyeJobConstants.THIRDEYE_SERVER_URI.getPropertyName() + " in properties"); - } - - FileSystem fileSystem = FileSystem.get(new Configuration()); - - // Push data (no dimensions) - Path dataPath = new Path(PhaseSpec.STARTREE_BOOTSTRAP_PHASE2.getTimeDir(root, collection, minTime, maxTime) - + File.separator + PhaseSpec.STARTREE_BOOTSTRAP_PHASE2.getName()); - RemoteIterator itr = fileSystem.listFiles(dataPath, false); - while (itr.hasNext()) - { - LocatedFileStatus fileStatus = itr.next(); - if (fileStatus.getPath().getName().startsWith("task_")) - { - InputStream leafData = fileSystem.open(fileStatus.getPath()); - int responseCode = StarTreeJobUtils.pushData(leafData, thirdEyeServerUri, collection, false); - leafData.close(); - LOG.info("Load {} #=> {}", fileStatus.getPath(), responseCode); - } - } + throw new IllegalStateException( + "Must specify either " + + ThirdEyeJobConstants.THIRDEYE_TIME_PATH.getPropertyName() + " or " + + ThirdEyeJobConstants.THIRDEYE_TIME_MIN.getPropertyName() + " and " + + ThirdEyeJobConstants.THIRDEYE_TIME_MAX.getPropertyName()); } - else if (PhaseSpec.SERVER_BOOTSTRAP.equals(phaseSpec)) + + if (PhaseSpec.SERVER_PUSH.equals(phaseSpec)) { String thirdEyeServerUri = inputConfig.getProperty(ThirdEyeJobConstants.THIRDEYE_SERVER_URI.getPropertyName()); if (thirdEyeServerUri == null) @@ -633,26 +647,16 @@ else if (PhaseSpec.SERVER_BOOTSTRAP.equals(phaseSpec)) FileSystem fileSystem = FileSystem.get(new Configuration()); - // Push config + // Push config (may 409 but that's okay) Path configPath = new Path(root + File.separator + collection - + File.separator + StarTreeConstants.CONFIG_FILE_NAME); + + File.separator + StarTreeConstants.CONFIG_FILE_NAME); InputStream configData = fileSystem.open(configPath); int responseCode = StarTreeJobUtils.pushConfig(configData, thirdEyeServerUri, collection); configData.close(); LOG.info("Load {} #=> {}", configPath, responseCode); - // Push star tree - Path treePath = new Path(PhaseSpec.STARTREE_GENERATION.getTimeDir(root, collection, minTime, maxTime) - + File.separator + PhaseSpec.STARTREE_GENERATION.getName() - + File.separator + "star-tree-" + collection - + File.separator + collection + "-" + StarTreeConstants.TREE_FILE_NAME); - InputStream treeData = fileSystem.open(treePath); - responseCode = StarTreeJobUtils.pushTree(treeData, thirdEyeServerUri, collection); - treeData.close(); - LOG.info("Load {} #=> {}", treePath, responseCode); - - // Push data (with dimensions) - Path dataPath = new Path(PhaseSpec.STARTREE_BOOTSTRAP_PHASE2.getTimeDir(root, collection, minTime, maxTime) + // Push data + Path dataPath = new Path(PhaseSpec.STARTREE_BOOTSTRAP_PHASE2.getTimeDir(root, collection, flowSpec, minTime, maxTime) + File.separator + PhaseSpec.STARTREE_BOOTSTRAP_PHASE2.getName()); RemoteIterator itr = fileSystem.listFiles(dataPath, false); while (itr.hasNext()) @@ -661,7 +665,13 @@ else if (PhaseSpec.SERVER_BOOTSTRAP.equals(phaseSpec)) if (fileStatus.getPath().getName().startsWith("task_")) { InputStream leafData = fileSystem.open(fileStatus.getPath()); - responseCode = StarTreeJobUtils.pushData(leafData, thirdEyeServerUri, collection, true); + responseCode = StarTreeJobUtils.pushData( + leafData, + thirdEyeServerUri, + collection, + minTime, + maxTime, + inputConfig.getProperty(ThirdEyeJobConstants.THIRDEYE_FLOW_SCHEDULE.getPropertyName())); leafData.close(); LOG.info("Load {} #=> {}", fileStatus.getPath(), responseCode); } @@ -670,10 +680,10 @@ else if (PhaseSpec.SERVER_BOOTSTRAP.equals(phaseSpec)) else // Hadoop job { // Construct job properties - Properties jobProperties = phaseSpec.getJobProperties(inputConfig, root, collection, minTime, maxTime, inputPaths); + Properties jobProperties = phaseSpec.getJobProperties(inputConfig, root, collection, flowSpec, minTime, maxTime, inputPaths); if(PhaseSpec.STARTREE_BOOTSTRAP_PHASE2.equals(phaseSpec) && numberOfReducers != null){ - jobProperties.setProperty(NUM_REDUCERS_PROP, numberOfReducers); + jobProperties.setProperty(numberReducersProp, numberOfReducers); } // Instantiate the job Constructor constructor = phaseSpec.getKlazz ().getConstructor(String.class, Properties.class); @@ -699,4 +709,69 @@ public static void main(String[] args) throws Exception config.load(new FileInputStream(args[1])); new ThirdEyeJob(phaseName, config).run(); } + + private static String getCollectionDir(String root, String collection) + { + return root == null ? collection : root + File.separator + collection; + } + + /* + * Iterates in the data dir's generated in reverse order and returns the path + * of the latest dir which contains tree.bin file. + */ + private static String getLatestTreeDirPath(String root, String collection) throws IOException + { + FileSystem fs = FileSystem.get(new Configuration()); + Path bootstrapDir = new Path(getCollectionDir(root, collection), FlowSpec.BOOTSTRAP.name()); + + PathFilter dataDirFilter = new PathFilter() + { + public boolean accept(Path path) + { + return path.getName().startsWith("data_"); + } + }; + + Comparator dataDirComparator = new Comparator() + { + + public int compare(FileStatus dataDir1, FileStatus dataDir2) + { + return (int) (getMaxTimeFromPath(dataDir2.getPath().toString()) - getMaxTimeFromPath(dataDir1.getPath().toString())); + } + + private long getMaxTimeFromPath(String path) + { + try + { + String[] pathTokens = path.split(File.separator); + String[] dataDirTokens = pathTokens[pathTokens.length - 1].split("_"); + String maxTimeString = URLDecoder.decode(dataDirTokens[dataDirTokens.length - 1], ENCODING); + DateTime maxTime = StarTreeConstants.DATE_TIME_FORMATTER.parseDateTime(maxTimeString); + return maxTime.getMillis(); + } + catch (Exception e) + { + throw new IllegalStateException(e); + } + } + }; + + List listFiles = Arrays.asList(fs.listStatus(bootstrapDir, dataDirFilter)); + Collections.sort(listFiles, dataDirComparator); + for (int i = 0; i < listFiles.size(); i++) + { + RemoteIterator fileStatusListIterator = fs.listFiles(listFiles.get(i).getPath(), true); + + while (fileStatusListIterator.hasNext()) + { + LocatedFileStatus fileStatus = fileStatusListIterator.next(); + if (fileStatus.getPath().getName().endsWith(TREE_FILE_FORMAT)) + { + return listFiles.get(i).getPath().toString(); + } + } + } + throw new IllegalStateException("Could not find star tree directory"); + } } diff --git a/thirdeye/thirdeye-bootstrap/src/main/java/com/linkedin/thirdeye/bootstrap/ThirdEyeJobConstants.java b/thirdeye/thirdeye-bootstrap/src/main/java/com/linkedin/thirdeye/bootstrap/ThirdEyeJobConstants.java index f336fc92607b..94db1e8c542b 100644 --- a/thirdeye/thirdeye-bootstrap/src/main/java/com/linkedin/thirdeye/bootstrap/ThirdEyeJobConstants.java +++ b/thirdeye/thirdeye-bootstrap/src/main/java/com/linkedin/thirdeye/bootstrap/ThirdEyeJobConstants.java @@ -2,10 +2,15 @@ public enum ThirdEyeJobConstants { + THIRDEYE_FLOW("thirdeye.flow"), + THIRDEYE_FLOW_SCHEDULE("thirdeye.flow.schedule"), THIRDEYE_PHASE("thirdeye.phase"), THIRDEYE_ROOT("thirdeye.root"), THIRDEYE_COLLECTION("thirdeye.collection"), THIRDEYE_SERVER_URI("thirdeye.server.uri"), + THIRDEYE_TIME_PATH("thirdeye.time.path"), + THIRDEYE_TIME_MIN("thirdeye.time.min"), + THIRDEYE_TIME_MAX("thirdeye.time.max"), INPUT_PATHS("input.paths"); private final String propertyName; diff --git a/thirdeye/thirdeye-bootstrap/src/main/java/com/linkedin/thirdeye/bootstrap/startree/StarTreeJobUtils.java b/thirdeye/thirdeye-bootstrap/src/main/java/com/linkedin/thirdeye/bootstrap/startree/StarTreeJobUtils.java index 0ffe33862940..298b49074dbd 100644 --- a/thirdeye/thirdeye-bootstrap/src/main/java/com/linkedin/thirdeye/bootstrap/startree/StarTreeJobUtils.java +++ b/thirdeye/thirdeye-bootstrap/src/main/java/com/linkedin/thirdeye/bootstrap/startree/StarTreeJobUtils.java @@ -6,9 +6,15 @@ import com.linkedin.thirdeye.api.StarTreeRecord; import com.linkedin.thirdeye.api.DimensionKey; import org.apache.commons.compress.utils.IOUtils; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.joda.time.DateTime; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.InputStream; +import java.io.ObjectInputStream; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLEncoder; @@ -19,15 +25,51 @@ public class StarTreeJobUtils { + private static final Logger LOG = LoggerFactory.getLogger(StarTreeJobUtils.class); + private static final String ENCODING = "UTF-8"; + + public static String getTreeId(FileSystem fileSystem, Path treePath) throws Exception + { + ObjectInputStream inputStream = null; + try + { + inputStream = new ObjectInputStream(fileSystem.open(treePath)); + StarTreeNode root = (StarTreeNode) inputStream.readObject(); + return root.getId().toString(); + } + finally + { + if (inputStream != null) + { + inputStream.close(); + } + } + } + public static int pushConfig(InputStream configData, String thirdEyeUri, String collection) throws IOException { - String url = thirdEyeUri + "/collections/" + URLEncoder.encode(collection, "UTF-8"); + String url = thirdEyeUri + "/collections/" + URLEncoder.encode(collection, ENCODING); return executeHttpPost(configData, url); } - public static int pushTree(InputStream treeData, String thirdEyeUri, String collection) throws IOException + public static int pushTree(InputStream treeData, + String thirdEyeUri, + String collection, + String treeId, + DateTime minTime, + DateTime maxTime, + String schedule) throws IOException { - String url = thirdEyeUri + "/collections/" + URLEncoder.encode(collection, "UTF-8") + "/starTree"; + String url = thirdEyeUri + "/collections/" + URLEncoder.encode(collection, ENCODING) + + "/starTree/" + URLEncoder.encode(treeId, ENCODING) + + "/" + minTime.getMillis() + + "/" + maxTime.getMillis(); + + if (schedule != null) + { + url += "?schedule=" + URLEncoder.encode(schedule, ENCODING); + } + return executeHttpPost(treeData, url); } @@ -37,13 +79,24 @@ public static int pushTree(InputStream treeData, String thirdEyeUri, String coll * @return * The status code of the HTTP response */ - public static int pushData(InputStream leafData, String thirdEyeUri, String collection, boolean includeDimensions) throws IOException + public static int pushData(InputStream leafData, + String thirdEyeUri, + String collection, + DateTime minTime, + DateTime maxTime, + String schedule) throws IOException { - String url = thirdEyeUri + "/collections/" + URLEncoder.encode(collection, "UTF-8") + "/data"; - if (includeDimensions) + String url = thirdEyeUri + "/collections/" + URLEncoder.encode(collection, ENCODING) + "/data/" + + minTime.getMillis() + "/" + + maxTime.getMillis(); + + if (schedule != null) { - url += "?includeDimensions=true"; + url += "?schedule=" + URLEncoder.encode(schedule, ENCODING); } + + LOG.info("POST {}", url); + return executeHttpPost(leafData, url); } diff --git a/thirdeye/thirdeye-bootstrap/src/main/java/com/linkedin/thirdeye/bootstrap/startree/bootstrap/phase2/StarTreeBootstrapPhaseTwoJob.java b/thirdeye/thirdeye-bootstrap/src/main/java/com/linkedin/thirdeye/bootstrap/startree/bootstrap/phase2/StarTreeBootstrapPhaseTwoJob.java index e321bb3c11e8..96ff359a1cbb 100644 --- a/thirdeye/thirdeye-bootstrap/src/main/java/com/linkedin/thirdeye/bootstrap/startree/bootstrap/phase2/StarTreeBootstrapPhaseTwoJob.java +++ b/thirdeye/thirdeye-bootstrap/src/main/java/com/linkedin/thirdeye/bootstrap/startree/bootstrap/phase2/StarTreeBootstrapPhaseTwoJob.java @@ -184,6 +184,7 @@ public static class BootstrapPhaseTwoReducer extends private String localOutputDataDir = "./leaf-data-output"; private String hdfsOutputDir; private StarTreeConfig starTreeConfig; + private Path pathToTree; @Override public void setup(Context context) throws IOException, InterruptedException { @@ -210,7 +211,7 @@ public void setup(Context context) throws IOException, InterruptedException { try { collectionName = config.getCollectionName(); - Path pathToTree = new Path(starTreeOutputPath + "/" + "star-tree-" + pathToTree = new Path(starTreeOutputPath + "/" + "star-tree-" + collectionName, collectionName + "-tree.bin"); InputStream is = dfs.open(pathToTree); starTreeRootNode = StarTreePersistanceUtil.loadStarTree(is); @@ -324,7 +325,8 @@ protected void cleanup(Context context) throws IOException, LOG.info("Generating " + leafDataTarGz + " from " + localOutputDataDir); // Combine - FixedBufferUtil.combineDataFiles(new File(localTmpDataDir), new File(localOutputDataDir, "data")); + FixedBufferUtil.combineDataFiles( + dfs.open(pathToTree), new File(localTmpDataDir), new File(localOutputDataDir, "data")); // Create tar gz of directory TarGzCompressionUtils.createTarGzOfDirectory(localOutputDataDir, leafDataTarGz); diff --git a/thirdeye/thirdeye-bootstrap/src/test/java/com/linkedin/thirdeye/bootstrap/startree/bootstrap/phase1/TestStarTreeBootstrapPhase1.java b/thirdeye/thirdeye-bootstrap/src/test/java/com/linkedin/thirdeye/bootstrap/startree/bootstrap/phase1/TestStarTreeBootstrapPhase1.java index ecc08347e97e..5f04c68703b0 100644 --- a/thirdeye/thirdeye-bootstrap/src/test/java/com/linkedin/thirdeye/bootstrap/startree/bootstrap/phase1/TestStarTreeBootstrapPhase1.java +++ b/thirdeye/thirdeye-bootstrap/src/test/java/com/linkedin/thirdeye/bootstrap/startree/bootstrap/phase1/TestStarTreeBootstrapPhase1.java @@ -13,6 +13,7 @@ import org.apache.avro.generic.GenericRecord; import org.apache.avro.hadoop.io.AvroSerialization; import org.apache.avro.mapred.AvroKey; +import org.apache.commons.io.FileUtils; import org.apache.commons.math.random.RandomDataImpl; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -23,6 +24,7 @@ import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; import org.apache.hadoop.mrunit.types.Pair; import org.testng.Assert; +import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -169,6 +171,12 @@ public void setUp() throws IOException config.set(StarTreeBootstrapPhaseOneConstants.STAR_TREE_BOOTSTRAP_CONFIG_PATH.toString(), ClassLoader.getSystemResource(CONF_FILE).toString()); } + @AfterClass + public void tearDown() throws IOException + { + FileUtils.forceDelete(new File(".leaf-data.tar.gz.crc")); + } + @Test public void testStarTreeBootstrapPhase1() throws Exception { diff --git a/thirdeye/thirdeye-core/pom.xml b/thirdeye/thirdeye-core/pom.xml index 8c8d023f7310..fcaaffcb4f20 100644 --- a/thirdeye/thirdeye-core/pom.xml +++ b/thirdeye/thirdeye-core/pom.xml @@ -44,11 +44,21 @@ jackson-dataformat-yaml 2.4.0 + + com.fasterxml.jackson.datatype + jackson-datatype-joda + 2.4.0 + commons-io commons-io 2.4 + + joda-time + joda-time + 2.3 + org.apache.commons diff --git a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/api/StarTreeConstants.java b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/api/StarTreeConstants.java index cf980da3238c..34883b795af1 100644 --- a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/api/StarTreeConstants.java +++ b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/api/StarTreeConstants.java @@ -1,5 +1,8 @@ package com.linkedin.thirdeye.api; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; + public final class StarTreeConstants { public static final String STAR = "*"; @@ -12,8 +15,7 @@ public final class StarTreeConstants public static final String CONFIG_FILE_NAME = "config.yml"; public static final String TREE_FILE_NAME = "tree.bin"; - public static final String SCHEMA_FILE_NAME = "schema.avsc"; - public static final String DATA_DIR_NAME = "data"; + public static final String DATA_DIR_PREFIX = "data"; public static final String KAFKA_CONFIG_FILE_NAME = "kafka.yml"; public static final String INDEX_FILE_SUFFIX = ".idx"; @@ -23,4 +25,6 @@ public final class StarTreeConstants public static final String METRIC_STORE = "metricStore"; public static final String DIMENSION_STORE = "dimensionStore"; public static final String DICT_STORE = "dictStore"; + + public static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormat.forPattern("YYYY-MM-dd-HHmmss"); } diff --git a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/api/StarTreeManager.java b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/api/StarTreeManager.java index 8053c481e933..558ae916c1bf 100644 --- a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/api/StarTreeManager.java +++ b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/api/StarTreeManager.java @@ -2,6 +2,7 @@ import java.io.File; import java.io.IOException; +import java.util.Map; import java.util.Set; public interface StarTreeManager @@ -14,22 +15,20 @@ public interface StarTreeManager /** * @return - * The StarTree for a collection. + * The StarTrees for a collection. */ - StarTree getStarTree(String collection); +// Set getStarTrees(String collection); + + /** @return a map of data directory to star tree index for a collection */ + Map getStarTrees(String collection); + + StarTreeConfig getConfig(String collection); /** * Restores a previously constructed tree. */ void restore(File rootDir, String collection) throws Exception; - /** - * Removes and closes a star tree for a collection. - */ - void remove(String collection) throws IOException; - - void open(String collection) throws IOException; - /** * Closes all star trees this manager is managing */ diff --git a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/StarTreeImpl.java b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/StarTreeImpl.java index 1aa27e85e92a..39448a31d1fc 100644 --- a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/StarTreeImpl.java +++ b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/StarTreeImpl.java @@ -453,4 +453,23 @@ public void getStats(StarTreeNode node, StarTreeStats stats) getStats(node.getStarNode(), stats); } } + + @Override + public boolean equals(Object o) + { + if (!(o instanceof StarTree)) + { + return false; + } + + StarTree starTree = (StarTree) o; + + return root.getId().equals(starTree.getRoot().getId()); + } + + @Override + public int hashCode() + { + return root.hashCode(); + } } diff --git a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/StarTreeManagerImpl.java b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/StarTreeManagerImpl.java index 9d783d7c3e5a..87dfeae478dc 100644 --- a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/StarTreeManagerImpl.java +++ b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/StarTreeManagerImpl.java @@ -1,18 +1,32 @@ package com.linkedin.thirdeye.impl; +import static java.nio.file.StandardWatchEventKinds.ENTRY_CREATE; +import static java.nio.file.StandardWatchEventKinds.ENTRY_DELETE; +import static java.nio.file.StandardWatchEventKinds.ENTRY_MODIFY; +import static java.nio.file.StandardWatchEventKinds.OVERFLOW; + import com.linkedin.thirdeye.api.StarTree; import com.linkedin.thirdeye.api.StarTreeConfig; import com.linkedin.thirdeye.api.StarTreeConstants; import com.linkedin.thirdeye.api.StarTreeManager; import com.linkedin.thirdeye.api.StarTreeNode; +import com.linkedin.thirdeye.impl.storage.StorageUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.FileInputStream; +import java.io.FilenameFilter; import java.io.IOException; import java.io.ObjectInputStream; +import java.nio.file.FileSystems; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.WatchEvent; +import java.nio.file.WatchKey; +import java.nio.file.WatchService; import java.util.HashSet; +import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; @@ -20,24 +34,34 @@ public class StarTreeManagerImpl implements StarTreeManager { private static final Logger LOG = LoggerFactory.getLogger(StarTreeManagerImpl.class); + private static final long REFRESH_WAIT_SLEEP_MILLIS = 1000; + private static final long REFRESH_WAIT_TIMEOUT_MILLIS = 30000; - private final ConcurrentMap trees; - private final Set openTrees; + private final ConcurrentMap configs; + private final ConcurrentMap> trees; + private final Set openCollections; public StarTreeManagerImpl() { - this.trees = new ConcurrentHashMap(); - this.openTrees = new HashSet(); + this.configs = new ConcurrentHashMap(); + this.trees = new ConcurrentHashMap>(); + this.openCollections = new HashSet(); } @Override public Set getCollections() { - return openTrees; + return openCollections; + } + + @Override + public StarTreeConfig getConfig(String collection) + { + return configs.get(collection); } @Override - public StarTree getStarTree(String collection) + public Map getStarTrees(String collection) { return trees.get(collection); } @@ -50,81 +74,206 @@ public void restore(File rootDir, String collection) throws Exception if (!trees.containsKey(collection)) { LOG.info("Creating new startree for {}", collection); + trees.put(collection, new ConcurrentHashMap()); File collectionDir = new File(rootDir, collection); - // Read tree structure - File treeFile = new File(collectionDir, StarTreeConstants.TREE_FILE_NAME); - ObjectInputStream inputStream = new ObjectInputStream(new FileInputStream(treeFile)); - StarTreeNode root = (StarTreeNode) inputStream.readObject(); + // Data dirs + File[] dataDirs = collectionDir.listFiles(new FilenameFilter() + { + @Override + public boolean accept(File dir, String name) + { + return name.startsWith(StorageUtils.getDataDirPrefix()); + } + }); + + if (dataDirs == null) + { + throw new IllegalArgumentException("No data dirs for collection " + collection); + } // Read config File configFile = new File(collectionDir, StarTreeConstants.CONFIG_FILE_NAME); StarTreeConfig config = StarTreeConfig.decode(new FileInputStream(configFile)); + configs.put(collection, config); + + for (File dataDir : dataDirs) + { + // Read tree structure + File treeFile = new File(dataDir, StarTreeConstants.TREE_FILE_NAME); + ObjectInputStream inputStream = new ObjectInputStream(new FileInputStream(treeFile)); + StarTreeNode root = (StarTreeNode) inputStream.readObject(); - // Create tree - StarTree starTree = new StarTreeImpl(config, new File(collectionDir, StarTreeConstants.DATA_DIR_NAME), root); - trees.put(collection, starTree); + // Create tree + StarTree starTree = new StarTreeImpl(config, dataDir, root); + trees.get(collection).put(dataDir, starTree); + starTree.open(); + openCollections.add(collection); + LOG.info("Opened tree {} for collection {}", starTree.getRoot(), collection); + } + + // Register watch on collection dir + DataRefreshWatcher refreshWatcher = new DataRefreshWatcher(config); + refreshWatcher.register(Paths.get(collectionDir.getAbsolutePath())); + Thread watcherThread = new Thread(refreshWatcher); + watcherThread.setDaemon(true); + watcherThread.start(); } } } @Override - public void remove(String collection) throws IOException + public void close(String collection) throws IOException { synchronized (trees) { - StarTree starTree = trees.remove(collection); - if (starTree != null) + Map starTrees = trees.remove(collection); + if (starTrees != null) { - LOG.info("Closing startree for {}", collection); - starTree.close(); + for (StarTree starTree : starTrees.values()) + { + starTree.close(); + } + LOG.info("Closed trees for collection {}", collection); } - openTrees.remove(collection); + openCollections.remove(collection); } } - @Override - public void open(String collection) throws IOException + private class DataRefreshWatcher implements Runnable { - synchronized (trees) - { - if (openTrees.contains(collection)) - { - return; - } + private final StarTreeConfig config; + private final WatchService watchService; + private final ConcurrentMap keys; - StarTree starTree = trees.get(collection); - if (starTree == null) - { - throw new IllegalArgumentException("No star tree for collection " + collection); - } + DataRefreshWatcher(StarTreeConfig config) throws IOException + { + this.config = config; + this.watchService = FileSystems.getDefault().newWatchService(); + this.keys = new ConcurrentHashMap(); + } - starTree.open(); - openTrees.add(collection); - LOG.info("Opened tree for collection {}", collection); + void register(Path dir) throws IOException + { + WatchKey key = dir.register(watchService, ENTRY_CREATE, ENTRY_DELETE, ENTRY_MODIFY); + keys.put(key, dir); } - } - @Override - public void close(String collection) throws IOException - { - synchronized (trees) + @Override + @SuppressWarnings("unchecked") + public void run() { - if (!openTrees.contains(collection)) + for (;;) { - return; + WatchKey key = null; + try + { + try + { + key = watchService.take(); + } + catch (InterruptedException e) + { + continue; + } + + Path dir = keys.get(key); + if (dir == null) + { + LOG.error("WatchKey not recognized: {}", key); + continue; + } + + for (WatchEvent event : key.pollEvents()) + { + if (event.kind() == OVERFLOW) + { + LOG.info("Received a overflow event"); + continue; + } + + WatchEvent ev = (WatchEvent) event; + Path path = dir.resolve(ev.context()); + File file = path.toFile(); + + LOG.info("{} {}", ev.kind(), path); + + if (file.getName().startsWith(StorageUtils.getDataDirPrefix())) + { + StorageUtils.waitForModifications(file, REFRESH_WAIT_SLEEP_MILLIS, REFRESH_WAIT_TIMEOUT_MILLIS); + + synchronized (trees) + { + // Read tree structure + File treeFile = new File(file, StarTreeConstants.TREE_FILE_NAME); + ObjectInputStream inputStream = new ObjectInputStream(new FileInputStream(treeFile)); + StarTreeNode root = (StarTreeNode) inputStream.readObject(); + + Map existingTrees = trees.get(config.getCollection()); + if (existingTrees == null) + { + LOG.error("There is a watch on collection {} but no open trees!", config.getCollection()); + } + else + { + // If tree is already open, close it + StarTree existingTree = existingTrees.get(file); + if (existingTree != null) + { + existingTree.close(); + LOG.info("Closed existing tree {} in {}", existingTree.getRoot().getId(), file); + } + + // Create tree + try + { + StarTree starTree = new StarTreeImpl(config, file, root); + starTree.open(); + trees.get(config.getCollection()).put(file, starTree); + LOG.info("Opened tree {} from {}", starTree.getRoot().getId(), file); + } + catch (Exception e) + { + // n.b. there may be partial data i.e. during a push; another watch will be fired later + if (LOG.isDebugEnabled()) + { + LOG.debug("{}", e); + } + } + } + } + } + } + } + catch (Exception e) + { + LOG.error("{}", e); + } + + if (key != null) + { + boolean valid = key.reset(); + if (!valid) + { + keys.remove(key); + if (keys.isEmpty()) + { + break; + } + } + } } - StarTree starTree = trees.remove(collection); - if (starTree != null) + try { - starTree.close(); - LOG.info("Closed tree for collection {}", collection); + watchService.close(); + } + catch (IOException e) + { + LOG.warn("Failed to close watcher service ",e); } - - openTrees.remove(collection); } } } diff --git a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/TarUtils.java b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/TarUtils.java index c8fb0e17bdb9..f159b56236d8 100644 --- a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/TarUtils.java +++ b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/TarUtils.java @@ -18,6 +18,13 @@ public class TarUtils { + public static void extractGzippedTarArchive(InputStream source, + File outputDir, + int stripComponents) throws IOException + { + extractGzippedTarArchive(source, outputDir, stripComponents, null); + } + public static void extractGzippedTarArchive(InputStream source, File outputDir, int stripComponents, diff --git a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/DataUpdateManager.java b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/DataUpdateManager.java new file mode 100644 index 000000000000..cd4a755bd8f8 --- /dev/null +++ b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/DataUpdateManager.java @@ -0,0 +1,123 @@ +package com.linkedin.thirdeye.impl.storage; + +import com.linkedin.thirdeye.api.StarTreeConstants; +import com.linkedin.thirdeye.api.StarTreeNode; +import com.linkedin.thirdeye.impl.TarUtils; +import org.apache.commons.io.FileUtils; +import org.joda.time.DateTime; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +public class DataUpdateManager +{ + private static final Logger LOG = LoggerFactory.getLogger(DataUpdateManager.class); + + private final File rootDir; + private final ConcurrentMap collectionLocks; + + public DataUpdateManager(File rootDir) + { + this.rootDir = rootDir; + this.collectionLocks = new ConcurrentHashMap(); + } + + public void deleteCollection(String collection) throws Exception + { + File collectionDir = new File(rootDir, collection); + + if (!collectionDir.isAbsolute()) + { + throw new IllegalArgumentException("Collection dir cannot be relative " + collectionDir); + } + + FileUtils.forceDelete(collectionDir); + } + + public void updateData(String collection, + String schedule, + DateTime minTime, + DateTime maxTime, + byte[] data) throws Exception + { + Lock lock = collectionLocks.get(collection); + if (lock == null) + { + collectionLocks.putIfAbsent(collection, new ReentrantLock()); + lock = collectionLocks.get(collection); + } + + lock.lock(); + LOG.info("Locked collection {} using lock {} for data update", collection, lock); + try + { + File collectionDir = new File(rootDir, collection); + if (!collectionDir.exists()) + { + FileUtils.forceMkdir(collectionDir); + LOG.info("Created {}", collectionDir); + } + + if (schedule.contains("_")) + { + throw new IOException("schedule cannot contain '_'"); + } + + String loadId = "load_" + UUID.randomUUID(); + File tmpDir = new File(new File(rootDir, collection), loadId); + File tmpMetricStore = new File(tmpDir, StarTreeConstants.METRIC_STORE); + File tmpDimensionStore = new File(tmpDir, StarTreeConstants.DIMENSION_STORE); + + try + { + // Extract into tmp dir + FileUtils.forceMkdir(tmpDir); + TarUtils.extractGzippedTarArchive(new ByteArrayInputStream(data), tmpDir, 2); + LOG.info("Extracted data into {}", tmpDir); + + // Prefix all files with minTime / maxTime string + StorageUtils.prefixFilesWithTime(tmpMetricStore, schedule, minTime, maxTime); + StorageUtils.prefixFilesWithTime(tmpDimensionStore, schedule, minTime, maxTime); + + // Read tree to get ID + File tmpTreeFile = new File(tmpDir, StarTreeConstants.TREE_FILE_NAME); + ObjectInputStream treeStream = new ObjectInputStream(new FileInputStream(tmpTreeFile)); + StarTreeNode rootNode = (StarTreeNode) treeStream.readObject(); + String treeId = rootNode.getId().toString(); + LOG.info("Tree ID for {} is {}", loadId, treeId); + + // Move into data dir + File dataDir = new File(collectionDir, StorageUtils.getDataDirName(treeId, schedule, minTime, maxTime)); + FileUtils.forceMkdir(dataDir); + StorageUtils.moveAllFiles(tmpDir, dataDir); + LOG.info("Moved files from {} to {}", tmpDir, dataDir); + + // Touch data dir to trigger watch service + if (!dataDir.setLastModified(System.currentTimeMillis())) + { + LOG.warn("setLastModified on dataDir failed - watch service will not be triggered!"); + } + } + finally + { + FileUtils.forceDelete(tmpDir); + LOG.info("Deleted tmp dir {}", tmpDir); + } + } + finally + { + lock.unlock(); + LOG.info("Unlocked collection {} using lock {} for data update", collection, lock); + } + } +} diff --git a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/FixedBufferUtil.java b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/FixedBufferUtil.java index 9aebd1edf07e..f18335c2e98f 100644 --- a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/FixedBufferUtil.java +++ b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/FixedBufferUtil.java @@ -14,6 +14,7 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; import java.io.ObjectOutputStream; import java.io.RandomAccessFile; import java.nio.ByteBuffer; @@ -142,7 +143,7 @@ public static void createLeafBufferFiles(File outputDir, metricFile.close(); } - public static void combineDataFiles(File inputDir, File outputDir) throws IOException + public static void combineDataFiles(InputStream starTree, File inputDir, File outputDir) throws IOException { UUID fileId = UUID.randomUUID(); @@ -153,6 +154,10 @@ public static void combineDataFiles(File inputDir, File outputDir) throws IOExce FileUtils.forceMkdir(dimensionStore); FileUtils.forceMkdir(metricStore); + // Tree + File starTreeFile = new File(outputDir, StarTreeConstants.TREE_FILE_NAME); + FileUtils.copyInputStreamToFile(starTree, starTreeFile); + // Dictionaries File combinedDictionaryFile = new File(dimensionStore, fileId + StarTreeConstants.DICT_FILE_SUFFIX); Map> dictionaryMetadata = combineFiles(new File(inputDir, StarTreeConstants.DICT_STORE), combinedDictionaryFile, false); @@ -185,6 +190,7 @@ public static void combineDataFiles(File inputDir, File outputDir) throws IOExce dimensionIndexEntries.add(new DimensionIndexEntry( nodeId, fileId, dictionaryStartOffset, dictionaryLength, bufferStartOffset, bufferLength)); } + File dimensionIndexFile = new File(dimensionStore, fileId + StarTreeConstants.INDEX_FILE_SUFFIX); writeObjects(dimensionIndexEntries, dimensionIndexFile); @@ -199,8 +205,12 @@ public static void combineDataFiles(File inputDir, File outputDir) throws IOExce long maxTime = entry.getValue().get(3); metricIndexEntries.add(new MetricIndexEntry(nodeId, fileId, startOffset, length, new TimeRange(minTime, maxTime))); } - File metricIndexFile = new File(metricStore, fileId + StarTreeConstants.INDEX_FILE_SUFFIX); - writeObjects(metricIndexEntries, metricIndexFile); + + if (!metricIndexEntries.isEmpty()) + { + File metricIndexFile = new File(metricStore, fileId + StarTreeConstants.INDEX_FILE_SUFFIX); + writeObjects(metricIndexEntries, metricIndexFile); + } } private static Map> combineFiles(File inputDir, File outputFile, boolean hasTime) throws IOException diff --git a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/MetricStoreMutableImpl.java b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/MetricStoreMutableImpl.java index b9b153d8dd76..00e31e203a9b 100644 --- a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/MetricStoreMutableImpl.java +++ b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/MetricStoreMutableImpl.java @@ -11,8 +11,6 @@ import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicLong; -import sun.reflect.generics.reflectiveObjects.NotImplementedException; - public class MetricStoreMutableImpl implements MetricStore { private final MetricSchema metricSchema; diff --git a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/StarTreeRecordStoreFactoryDefaultImpl.java b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/StarTreeRecordStoreFactoryDefaultImpl.java index 8771ce42c544..aeaa9ca80c33 100644 --- a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/StarTreeRecordStoreFactoryDefaultImpl.java +++ b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/StarTreeRecordStoreFactoryDefaultImpl.java @@ -1,10 +1,5 @@ package com.linkedin.thirdeye.impl.storage; -import static java.nio.file.StandardWatchEventKinds.ENTRY_CREATE; -import static java.nio.file.StandardWatchEventKinds.ENTRY_DELETE; -import static java.nio.file.StandardWatchEventKinds.ENTRY_MODIFY; -import static java.nio.file.StandardWatchEventKinds.OVERFLOW; - import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileFilter; @@ -14,11 +9,6 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.FileChannel; -import java.nio.file.FileSystems; -import java.nio.file.Path; -import java.nio.file.WatchEvent; -import java.nio.file.WatchKey; -import java.nio.file.WatchService; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -121,6 +111,9 @@ public class StarTreeRecordStoreFactoryDefaultImpl implements StarTreeRecordStor // fileId to buffer private final Map metricSegments = new HashMap(); + // fileId to descriptor + private final Map indexDescriptors = new HashMap(); + // index fileId to dimension index entries private final Map> dimensionIndexByFile = new HashMap>(); @@ -191,20 +184,6 @@ public void init(File rootDir, StarTreeConfig starTreeConfig, Properties recordS { loadMetricBuffers(entryGroup); } - - if (!metricStoreMutable) - { - MetricStoreRefreshWatcher refreshWatcher = new MetricStoreRefreshWatcher(); - - Path metricPath = FileSystems.getDefault().getPath(rootDir.getAbsolutePath(), StarTreeConstants.METRIC_STORE); - refreshWatcher.register(metricPath); - LOG.info("Registered watch on {}", metricPath); - - Thread watcherThread = new Thread(refreshWatcher); - watcherThread.setDaemon(true); - watcherThread.start(); - LOG.info("Started file system watcher in {}", rootDir); - } } } @@ -261,26 +240,31 @@ private void loadDimensionIndex(File indexFile) throws IOException { List entries = StorageUtils.readDimensionIndex(indexFile); - UUID fileId = UUID.fromString(indexFile.getName().substring(0, indexFile.getName().lastIndexOf(StarTreeConstants.INDEX_FILE_SUFFIX))); + FileDescriptor fileDescriptor = FileDescriptor.fromString(indexFile.getName(), StarTreeConstants.INDEX_FILE_SUFFIX); - dimensionIndexByFile.put(fileId, new HashSet()); + dimensionIndexByFile.put(fileDescriptor.getId(), new HashSet()); + indexDescriptors.put(fileDescriptor.getId(), fileDescriptor); for (DimensionIndexEntry entry : entries) { dimensionIndex.put(entry.getNodeId(), entry); - dimensionIndexByFile.get(fileId).add(entry); + dimensionIndexByFile.get(fileDescriptor.getId()).add(entry); } - LOG.info("Loaded dimension index {}", indexFile); + if (LOG.isDebugEnabled()) + { + LOG.debug("Loaded dimension index {}", indexFile); + } } private void loadMetricIndex(File indexFile) throws IOException { List entries = StorageUtils.readMetricIndex(indexFile); - UUID fileId = getFileId(indexFile.getName(), StarTreeConstants.INDEX_FILE_SUFFIX); + FileDescriptor fileDescriptor = FileDescriptor.fromString(indexFile.getName(), StarTreeConstants.INDEX_FILE_SUFFIX); - metricIndexByFile.put(fileId, new HashSet()); + metricIndexByFile.put(fileDescriptor.getId(), new HashSet()); + indexDescriptors.put(fileDescriptor.getId(), fileDescriptor); for (MetricIndexEntry entry : entries) { @@ -291,10 +275,13 @@ private void loadMetricIndex(File indexFile) throws IOException metricIndex.put(entry.getNodeId(), nodeEntries); } nodeEntries.add(entry); - metricIndexByFile.get(fileId).add(entry); + metricIndexByFile.get(fileDescriptor.getId()).add(entry); } - LOG.info("Loaded metric index {}", indexFile); + if (LOG.isDebugEnabled()) + { + LOG.debug("Loaded metric index {}", indexFile); + } } private void loadDimensionBuffers(Collection indexEntries) throws IOException @@ -302,18 +289,30 @@ private void loadDimensionBuffers(Collection indexEntries) File dimensionStore = new File(rootDir, StarTreeConstants.DIMENSION_STORE); for (DimensionIndexEntry indexEntry : indexEntries) { + FileDescriptor associatedDescriptor = indexDescriptors.get(indexEntry.getFileId()); + if (associatedDescriptor == null) + { + throw new IllegalStateException("No index descriptor for " + indexEntry); + } + if (!dimensionSegments.containsKey(indexEntry.getFileId())) { - File bufferFile = new File(dimensionStore, indexEntry.getFileId().toString() + StarTreeConstants.BUFFER_FILE_SUFFIX); + File bufferFile = new File(dimensionStore, associatedDescriptor.toString(StarTreeConstants.BUFFER_FILE_SUFFIX)); dimensionSegments.put(indexEntry.getFileId(), mapBuffer(bufferFile)); - LOG.info("Loaded buffer file {}", bufferFile); + if (LOG.isDebugEnabled()) + { + LOG.debug("Loaded buffer file {}", bufferFile); + } } if (!dictionarySegments.containsKey(indexEntry.getFileId())) { - File bufferFile = new File(dimensionStore, indexEntry.getFileId().toString() + StarTreeConstants.DICT_FILE_SUFFIX); + File bufferFile = new File(dimensionStore, associatedDescriptor.toString(StarTreeConstants.DICT_FILE_SUFFIX)); dictionarySegments.put(indexEntry.getFileId(), mapBuffer(bufferFile)); - LOG.info("Loaded buffer file {}", bufferFile); + if (LOG.isDebugEnabled()) + { + LOG.debug("Loaded buffer file {}", bufferFile); + } } } } @@ -323,11 +322,20 @@ private void loadMetricBuffers(Collection indexEntries) throws File metricStore = new File(rootDir, StarTreeConstants.METRIC_STORE); for (MetricIndexEntry indexEntry : indexEntries) { + FileDescriptor associatedDescriptor = indexDescriptors.get(indexEntry.getFileId()); + if (associatedDescriptor == null) + { + throw new IllegalStateException("No index descriptor for " + indexEntry); + } + if (!metricSegments.containsKey(indexEntry.getFileId())) { - File bufferFile = new File(metricStore, indexEntry.getFileId().toString() + StarTreeConstants.BUFFER_FILE_SUFFIX); + File bufferFile = new File(metricStore, associatedDescriptor.toString(StarTreeConstants.BUFFER_FILE_SUFFIX)); metricSegments.put(indexEntry.getFileId(), mapBuffer(bufferFile)); - LOG.info("Loaded buffer file {}", bufferFile); + if (LOG.isDebugEnabled()) + { + LOG.debug("Loaded buffer file {}", bufferFile); + } } } } @@ -410,205 +418,67 @@ public boolean accept(File file) } }; - private class MetricStoreRefreshWatcher implements Runnable + private static class FileDescriptor { - private final WatchService watchService; - private final Map keys; + private final UUID id; + private final String schedule; + private final String minTime; + private final String maxTime; + private final String suffix; - MetricStoreRefreshWatcher() throws IOException + FileDescriptor(String schedule, UUID id, String minTime, String maxTime, String suffix) { - this.watchService = FileSystems.getDefault().newWatchService(); - this.keys = new HashMap(); + this.schedule = schedule; + this.id = id; + this.minTime = minTime; + this.maxTime = maxTime; + this.suffix = suffix; } - void register(Path dir) throws IOException + @Override + public String toString() { - WatchKey key = dir.register(watchService, ENTRY_CREATE, ENTRY_DELETE, ENTRY_MODIFY); - keys.put(key, dir); + return minTime + "_" + maxTime + "_" + id + suffix; } - @Override - @SuppressWarnings("unchecked") - public void run() + public String toString(String alternateSuffix) { - for (;;) - { - try - { - WatchKey key; - try - { - key = watchService.take(); - } - catch (InterruptedException e) - { - continue; - } + return schedule + "_" + minTime + "_" + maxTime + "_" + id + alternateSuffix; + } - Path dir = keys.get(key); - if (dir == null) - { - LOG.error("WatchKey not recognized: {}", key); - continue; - } + public String getSchedule() + { + return schedule; + } - synchronized (sync) - { - for (WatchEvent event : key.pollEvents()) - { - if(event.kind() == OVERFLOW) - { - LOG.info("Recieved a overflow event"); - continue; - } - WatchEvent ev = (WatchEvent) event; - Path path = dir.resolve(ev.context()); - File file = path.toFile(); - - if (LOG.isDebugEnabled()) - { - LOG.debug("{} {}", ev.kind(), path); - } - - if (file.getName().endsWith(StarTreeConstants.INDEX_FILE_SUFFIX)) - { - UUID fileId = getFileId(path.toFile().getName(), StarTreeConstants.INDEX_FILE_SUFFIX); - - // Clear existing index / metric stores for this file (always) - Set indexEntries = metricIndexByFile.remove(fileId); - if (indexEntries != null) - { - for (MetricIndexEntry indexEntry : indexEntries) - { - List indexEntriesByNode = metricIndex.get(indexEntry.getNodeId()); - if (indexEntriesByNode != null) - { - indexEntriesByNode.remove(indexEntry); - } - - MetricStoreListener metricStoreListener = metricStoreListeners.get(indexEntry.getNodeId()); - if (metricStoreListener != null) - { - metricStoreListener.notifyDelete(indexEntry.getTimeRange()); - } - } - } - - if (ENTRY_CREATE.equals(event.kind()) || ENTRY_MODIFY.equals(event.kind())) - { - try - { - waitForWriteComplete(file); - loadMetricIndex(file); - - // Notify create if buffer exists too - File bufferFile = new File(path.toFile().getParent(), fileId + StarTreeConstants.BUFFER_FILE_SUFFIX); - if (bufferFile.exists()) - { - waitForWriteComplete(bufferFile); - for (MetricIndexEntry indexEntry : metricIndexByFile.get(fileId)) - { - MetricStoreListener metricStoreListener = metricStoreListeners.get(indexEntry.getNodeId()); - if (metricStoreListener != null) - { - metricStoreListener.notifyCreate(indexEntry.getTimeRange(), getMetricBuffer(indexEntry)); - } - } - LOG.info("Notified of creation of metric index and buffer for {}", fileId); - } - } - catch (Exception e) - { - LOG.warn("Error loading index file {}", path, e); - } - } - else if (ENTRY_DELETE.equals(event.kind())) - { - LOG.info("Deleted metric index for file {}", fileId); - } - - } - else if (file.getName().endsWith(StarTreeConstants.BUFFER_FILE_SUFFIX)) - { - UUID fileId = getFileId(path.toFile().getName(), StarTreeConstants.BUFFER_FILE_SUFFIX); - - if (ENTRY_CREATE.equals(event.kind()) || ENTRY_MODIFY.equals(event.kind())) - { - try - { - waitForWriteComplete(file); - ByteBuffer buffer = mapBuffer(file); - metricSegments.put(fileId, buffer); - LOG.info("Loaded buffer file {}: {}", file, buffer); - - // Touch index file to trigger another event - File indexFile = new File(path.toFile().getParent(), fileId + StarTreeConstants.INDEX_FILE_SUFFIX); - if (indexFile.exists()) - { - indexFile.setLastModified(System.currentTimeMillis()); - } - } - catch (Exception e) - { - LOG.warn("Error loading buffer file {}", file, e); - } - } - else if (ENTRY_DELETE.equals(event.kind())) - { - ByteBuffer buffer = metricSegments.remove(fileId); - if (buffer != null) - { - LOG.info("Removed existing buffer file {}", fileId); - } - } - } - else - { - LOG.warn("Unrecognized file type {}", path); - } - } - } + public UUID getId() + { + return id; + } - boolean valid = key.reset(); - if (!valid) - { - keys.remove(key); - if (keys.isEmpty()) - { - break; - } - } - } - catch(Exception e) - { - LOG.error("Error while processing event in Watcher Service ", e); - } - } - try - { - watchService.close(); - } - catch (IOException e) - { - LOG.warn("Failed to close watcher service ",e); - } + public String getMinTime() + { + return minTime; } - } - private static void waitForWriteComplete(File file) throws InterruptedException - { - long startTime = System.currentTimeMillis(); - long fileSize; - do + public String getMaxTime() { - fileSize = file.length(); - Thread.sleep(100); // wait for some writes + return maxTime; } - while (fileSize < file.length() && System.currentTimeMillis() - startTime < 60000); - } - private static UUID getFileId(String fileName, String suffix) - { - return UUID.fromString(fileName.substring(0, fileName.lastIndexOf(suffix))); + public String getSuffix() + { + return suffix; + } + + static FileDescriptor fromString(String fileName, String expectedSuffix) + { + String[] tokens = fileName.split("_"); + String schedule = tokens[0]; + String minTime = tokens[1]; + String maxTime = tokens[2]; + UUID id = UUID.fromString(tokens[3].substring(0, tokens[3].lastIndexOf(expectedSuffix))); + return new FileDescriptor(schedule, id, minTime, maxTime, expectedSuffix); + } } } diff --git a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/StorageUtils.java b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/StorageUtils.java index f5dc1b62af23..6971cfba9523 100644 --- a/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/StorageUtils.java +++ b/thirdeye/thirdeye-core/src/main/java/com/linkedin/thirdeye/impl/storage/StorageUtils.java @@ -4,16 +4,22 @@ import com.linkedin.thirdeye.api.MetricSpec; import com.linkedin.thirdeye.api.MetricTimeSeries; import com.linkedin.thirdeye.api.StarTreeConfig; +import com.linkedin.thirdeye.api.StarTreeConstants; import com.linkedin.thirdeye.impl.NumberUtils; +import org.apache.commons.io.FileUtils; import org.apache.commons.io.input.CountingInputStream; +import org.joda.time.DateTime; import java.io.File; import java.io.FileInputStream; +import java.io.FilenameFilter; import java.io.IOException; import java.io.ObjectInputStream; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; import java.util.List; public class StorageUtils @@ -58,6 +64,116 @@ public static void addToMetricStore(StarTreeConfig config, } } + public static String getDataDirName(String treeId, String schedule, DateTime minTime, DateTime maxTime) + { + return StarTreeConstants.DATA_DIR_PREFIX + + "_" + schedule + + "_" + StarTreeConstants.DATE_TIME_FORMATTER.print(minTime) + + "_" + (maxTime == null ? "LATEST" : StarTreeConstants.DATE_TIME_FORMATTER.print(maxTime)) + + "_" + treeId; + } + + public static String getDataDirPrefix() + { + return StarTreeConstants.DATA_DIR_PREFIX; + } + + public static void prefixFilesWithTime(File dir, + String schedule, + DateTime minTime, + DateTime maxTime) throws IOException + { + File[] files = dir.listFiles(); + + if (files != null) + { + for (File file : files) + { + String minTimeComponent = StarTreeConstants.DATE_TIME_FORMATTER.print(minTime); + String maxTimeComponent = StarTreeConstants.DATE_TIME_FORMATTER.print(maxTime); + File renamed = new File( + file.getParent(), schedule + "_" + minTimeComponent + "_" + maxTimeComponent + "_" + file.getName()); + FileUtils.moveFile(file, renamed); + } + } + } + + public static File findLatestDataDir(File collectionDir) + { + File[] dataDirs = collectionDir.listFiles(new FilenameFilter() + { + @Override + public boolean accept(File dir, String name) + { + return name.startsWith(StorageUtils.getDataDirPrefix()); + } + }); + + if (dataDirs == null) + { + return null; + } + + Arrays.sort(dataDirs, new Comparator() + { + @Override + public int compare(File f1, File f2) + { + String[] f1Tokens = f1.getName().split("_"); + String[] f2Tokens = f2.getName().split("_"); + + if ("LATEST".equals(f1Tokens[3])) + { + return -1; + } + else if ("LATEST".equals(f2Tokens[3])) + { + return 1; + } + + DateTime f1MaxTime = StarTreeConstants.DATE_TIME_FORMATTER.parseDateTime(f1Tokens[3]); + DateTime f2MaxTime = StarTreeConstants.DATE_TIME_FORMATTER.parseDateTime(f2Tokens[3]); + + return (int) (f1MaxTime.getMillis() - f2MaxTime.getMillis()); + } + }); + + return dataDirs[dataDirs.length - 1]; + } + + public static void moveAllFiles(File srcDataDir, File dstDataDir) throws IOException + { + // Tree + File srcTreeFile = new File(srcDataDir, StarTreeConstants.TREE_FILE_NAME); + File dstTreeFile = new File(dstDataDir, StarTreeConstants.TREE_FILE_NAME); + if (!dstTreeFile.exists()) + { + FileUtils.moveFile(srcTreeFile, dstTreeFile); + } + + // Dimensions + File[] dimensionFiles = new File(srcDataDir, StarTreeConstants.DIMENSION_STORE).listFiles(); + File dstDimensionStore = new File(dstDataDir, StarTreeConstants.DIMENSION_STORE); + if (dimensionFiles != null) + { + for (File file : dimensionFiles) + { + FileUtils.moveFile(file, new File(dstDimensionStore, file.getName())); + } + } + + // Metrics + File[] metricFiles = new File(srcDataDir, StarTreeConstants.METRIC_STORE).listFiles(); + File dstMetricStore = new File(dstDataDir, StarTreeConstants.METRIC_STORE); + if (metricFiles != null) + { + for (File file : metricFiles) + { + FileUtils.moveFile(file, new File(dstMetricStore, file.getName())); + } + } + } + public static List readMetricIndex(File indexFile) throws IOException { List objects = readObjectFile(indexFile); @@ -114,4 +230,28 @@ private static List readObjectFile(File objectFile) throws IOException return objects; } + + /** @return true if file was not modified in sleepMillis before timeoutMillis */ + public static boolean waitForModifications(File file, long sleepMillis, long timeoutMillis) + throws InterruptedException + { + long startTimeMillis = System.currentTimeMillis(); + long lastModified = file.lastModified(); + + do + { + Thread.sleep(sleepMillis); + + long currentLastModified = file.lastModified(); + if (lastModified == currentLastModified) + { + return true; + } + + lastModified = currentLastModified; + } + while (System.currentTimeMillis() - startTimeMillis < timeoutMillis); + + return false; + } } diff --git a/thirdeye/thirdeye-core/src/test/java/com/linkedin/thirdeye/impl/storage/TestStarTreeRecordStoreFactoryFixedImpl.java b/thirdeye/thirdeye-core/src/test/java/com/linkedin/thirdeye/impl/storage/TestStarTreeRecordStoreFactoryFixedImpl.java deleted file mode 100644 index 04bb093eab42..000000000000 --- a/thirdeye/thirdeye-core/src/test/java/com/linkedin/thirdeye/impl/storage/TestStarTreeRecordStoreFactoryFixedImpl.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.linkedin.thirdeye.impl.storage; - -import com.linkedin.thirdeye.api.StarTreeConfig; -import com.linkedin.thirdeye.api.StarTreeRecord; -import com.linkedin.thirdeye.api.StarTreeRecordStore; -import com.linkedin.thirdeye.api.StarTreeRecordStoreFactory; - -import java.io.File; -import java.io.FileInputStream; -import java.util.UUID; - -public class TestStarTreeRecordStoreFactoryFixedImpl -{ - public static void main(String[] args) throws Exception - { - StarTreeConfig config = StarTreeConfig.decode(new FileInputStream( - "/Users/gbrandt/IdeaProjects/thirdeye-mirror/thirdeye-server/target/test-classes/abook-config.yml")); - StarTreeRecordStoreFactory factory = new StarTreeRecordStoreFactoryDefaultImpl(); - factory.init(new File("/tmp/thirdeye/abook/data"), config, null); - UUID id = UUID.fromString("c5872fda-501d-415a-9fd0-332fec4a0f2f"); - StarTreeRecordStore recordStore = factory.createRecordStore(id); - - for (StarTreeRecord record : recordStore) - { - System.out.println(record); - } - } -} diff --git a/thirdeye/thirdeye-realtime/pom.xml b/thirdeye/thirdeye-realtime/pom.xml index c4a1a05a1d7a..cfae31a6a59a 100644 --- a/thirdeye/thirdeye-realtime/pom.xml +++ b/thirdeye/thirdeye-realtime/pom.xml @@ -26,6 +26,10 @@ org.mortbay.jetty jetty + + org.jboss.netty + netty + diff --git a/thirdeye/thirdeye-realtime/src/main/java/com/linkedin/thirdeye/realtime/ThirdEyeKafkaConfig.java b/thirdeye/thirdeye-realtime/src/main/java/com/linkedin/thirdeye/realtime/ThirdEyeKafkaConfig.java index 971b0013b5a8..b7d1ccedeb64 100644 --- a/thirdeye/thirdeye-realtime/src/main/java/com/linkedin/thirdeye/realtime/ThirdEyeKafkaConfig.java +++ b/thirdeye/thirdeye-realtime/src/main/java/com/linkedin/thirdeye/realtime/ThirdEyeKafkaConfig.java @@ -1,6 +1,8 @@ package com.linkedin.thirdeye.realtime; import com.fasterxml.jackson.annotation.JsonProperty; +import com.linkedin.thirdeye.api.TimeGranularity; +import org.joda.time.DateTime; import java.util.Properties; import java.util.concurrent.TimeUnit; @@ -8,17 +10,17 @@ public class ThirdEyeKafkaConfig { private static final String DEFAULT_DECODER_CLASS = ThirdEyeKafkaDecoderAvroImpl.class.getCanonicalName(); - private static final long DEFAULT_PERSIST_INTERVAL_MILLIS = TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES); + private static final TimeGranularity DEFAULT_PERSIST_INTERVAL = new TimeGranularity(5, TimeUnit.MINUTES); private static final String DEFAULT_GROUP_ID = "THIRDEYE"; - private static final long DEFAULT_START_TIME_MILLIS = 0; // i.e. all time + private static final DateTime DEFAULT_START_TIME = new DateTime(0); private String zkAddress; private String topicName; private String groupId = DEFAULT_GROUP_ID; private String decoderClass = DEFAULT_DECODER_CLASS; - private long persistIntervalMillis = DEFAULT_PERSIST_INTERVAL_MILLIS; - private long startTimeMillis = DEFAULT_START_TIME_MILLIS; + private TimeGranularity persistInterval = DEFAULT_PERSIST_INTERVAL; + private DateTime startTime = DEFAULT_START_TIME; private Properties decoderConfig = new Properties(); private Properties consumerConfig = new Properties(); @@ -98,26 +100,26 @@ public void setGroupId(String groupId) } @JsonProperty - public long getPersistIntervalMillis() + public TimeGranularity getPersistInterval() { - return persistIntervalMillis; + return persistInterval; } @JsonProperty - public void setPersistIntervalMillis(long persistIntervalMillis) + public void setPersistInterval(TimeGranularity persistInterval) { - this.persistIntervalMillis = persistIntervalMillis; + this.persistInterval = persistInterval; } @JsonProperty - public long getStartTimeMillis() + public DateTime getStartTime() { - return startTimeMillis; + return startTime; } @JsonProperty - public void setStartTimeMillis(long startTimeMillis) + public void setStartTime(DateTime startTime) { - this.startTimeMillis = startTimeMillis; + this.startTime = startTime; } } diff --git a/thirdeye/thirdeye-realtime/src/main/java/com/linkedin/thirdeye/realtime/ThirdEyeKafkaConsumer.java b/thirdeye/thirdeye-realtime/src/main/java/com/linkedin/thirdeye/realtime/ThirdEyeKafkaConsumer.java index 426cca953d56..28c501498e14 100644 --- a/thirdeye/thirdeye-realtime/src/main/java/com/linkedin/thirdeye/realtime/ThirdEyeKafkaConsumer.java +++ b/thirdeye/thirdeye-realtime/src/main/java/com/linkedin/thirdeye/realtime/ThirdEyeKafkaConsumer.java @@ -10,10 +10,13 @@ import kafka.consumer.KafkaStream; import kafka.javaapi.consumer.ConsumerConnector; import kafka.message.MessageAndMetadata; +import org.apache.commons.io.FileUtils; +import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; +import java.io.IOException; import java.util.Collections; import java.util.List; import java.util.Map; @@ -31,6 +34,7 @@ public class ThirdEyeKafkaConsumer { private static final Logger LOG = LoggerFactory.getLogger(ThirdEyeKafkaConsumer.class); + private static final String TMP_DIR_PREFIX = "kafka_load_"; private final StarTree starTree; private final ThirdEyeKafkaConfig config; @@ -39,15 +43,18 @@ public class ThirdEyeKafkaConsumer private final ScheduledExecutorService persistScheduler; private final MetricRegistry metricRegistry; private final File metricStoreDirectory; + private final File tmpMetricStoreDirectory; private final ConcurrentMap streamStats; private final ReadWriteLock persistLock; + private final String schedule; + private final File kafkaDataDir; public ThirdEyeKafkaConsumer(StarTree starTree, ThirdEyeKafkaConfig config, ExecutorService executorService, ScheduledExecutorService persistScheduler, MetricRegistry metricRegistry, - File rootDir) + File kafkaDataDir) { this.starTree = starTree; this.config = config; @@ -57,11 +64,10 @@ public ThirdEyeKafkaConsumer(StarTree starTree, this.metricRegistry = metricRegistry; this.streamStats = new ConcurrentHashMap(); this.persistLock = new ReentrantReadWriteLock(true); - - this.metricStoreDirectory = new File(rootDir.getAbsolutePath() - + File.separator + starTree.getConfig().getCollection() - + File.separator + StarTreeConstants.DATA_DIR_NAME - + File.separator + StarTreeConstants.METRIC_STORE); + this.schedule = config.getPersistInterval().getSize() + "-" + config.getPersistInterval().getUnit(); + this.metricStoreDirectory = new File(kafkaDataDir, StarTreeConstants.METRIC_STORE); + this.tmpMetricStoreDirectory = new File(kafkaDataDir, TMP_DIR_PREFIX + StarTreeConstants.METRIC_STORE); + this.kafkaDataDir = kafkaDataDir; } public Map getStreamStats() @@ -69,6 +75,48 @@ public Map getStreamStats() return streamStats; } + private void doPersist(ThirdEyeKafkaStats stats) throws IOException + { + persistLock.writeLock().lock(); + try + { + long persistTime = System.currentTimeMillis(); + DateTime minTime = new DateTime(stats.getLastPersistTimeMillis().get()); + DateTime maxTime = new DateTime(persistTime); + + if (tmpMetricStoreDirectory.exists()) + { + FileUtils.forceDelete(tmpMetricStoreDirectory); + } + + ThirdEyeKafkaPersistenceUtils.persistMetrics(starTree, tmpMetricStoreDirectory); + prefixFilesWithTime(tmpMetricStoreDirectory, minTime, maxTime, schedule); + moveAllFiles(tmpMetricStoreDirectory, metricStoreDirectory); + + if (tmpMetricStoreDirectory.exists()) + { + FileUtils.forceDelete(tmpMetricStoreDirectory); + } + + stats.getLastPersistTimeMillis().set(persistTime); + starTree.clear(); + + // Trigger watch on collection dir + if (!kafkaDataDir.setLastModified(stats.getLastPersistTimeMillis().get())) + { + LOG.warn("Could not trigger watch on collection dir {}", kafkaDataDir.getParentFile()); + } + } + catch (Exception e) + { + LOG.error("Error persisting data from Kafka", e); + } + finally + { + persistLock.writeLock().unlock(); + } + } + public void start() throws Exception { if (isShutdown.getAndSet(false)) @@ -108,24 +156,17 @@ public void start() throws Exception @Override public void run() { - persistLock.writeLock().lock(); try { - ThirdEyeKafkaPersistenceUtils.persistMetrics(starTree, metricStoreDirectory); - stats.getLastPersistTimeMillis().set(System.currentTimeMillis()); - starTree.clear(); + doPersist(stats); consumer.commitOffsets(); } catch (Exception e) { - LOG.error("Error persisting data from Kafka", e); - } - finally - { - persistLock.writeLock().unlock(); + LOG.error("{}", e); } } - }, config.getPersistIntervalMillis(), config.getPersistIntervalMillis(), TimeUnit.MILLISECONDS); + }, config.getPersistInterval().getSize(), config.getPersistInterval().getSize(), config.getPersistInterval().getUnit()); executorService.submit(new Runnable() { @@ -156,7 +197,7 @@ public void run() Collections.min(record.getMetricTimeSeries().getTimeWindowSet()) * starTree.getConfig().getTime().getBucket().getSize(), starTree.getConfig().getTime().getBucket().getUnit()); - if (minTimeMillis < config.getStartTimeMillis()) + if (minTimeMillis < config.getStartTime().getMillis()) { stats.getRecordsSkippedExpired().mark(); continue; @@ -195,23 +236,14 @@ public void run() } // Persist any remaining data we've consumed and commit the offsets - persistLock.writeLock().lock(); try { - persistFuture.cancel(true); - ThirdEyeKafkaPersistenceUtils.persistMetrics(starTree, metricStoreDirectory); - stats.getLastPersistTimeMillis().set(System.currentTimeMillis()); - starTree.clear(); + doPersist(stats); consumer.commitOffsets(); - LOG.info("Persisted all data before shutdown for {}", starTree.getConfig().getCollection()); } catch (Exception e) { - LOG.error("Error persisting data during shutdown for {}", starTree.getConfig().getCollection(), e); - } - finally - { - persistLock.writeLock().unlock(); + LOG.error("{}", e); } } }); @@ -229,4 +261,35 @@ public void shutdown() throws Exception LOG.info("Shutdown kafka consumer for {}", starTree.getConfig().getCollection()); } } + + private static void prefixFilesWithTime(File dir, + DateTime minTime, + DateTime maxTime, + String schedule) throws IOException + { + File[] files = dir.listFiles(); + + if (files != null) + { + for (File file : files) + { + String minTimeComponent = StarTreeConstants.DATE_TIME_FORMATTER.print(minTime); + String maxTimeComponent = StarTreeConstants.DATE_TIME_FORMATTER.print(maxTime); + File renamed = new File(file.getParent(), schedule + "_" + minTimeComponent + "_" + maxTimeComponent + "_" + file.getName()); + FileUtils.moveFile(file, renamed); + } + } + } + + private static void moveAllFiles(File srcMetricDir, File dstMetricDir) throws IOException + { + File[] metricFiles = srcMetricDir.listFiles(); + if (metricFiles != null) + { + for (File file : metricFiles) + { + FileUtils.moveFile(file, new File(dstMetricDir, file.getName())); + } + } + } } diff --git a/thirdeye/thirdeye-realtime/src/main/java/com/linkedin/thirdeye/realtime/ThirdEyeKafkaPersistenceUtils.java b/thirdeye/thirdeye-realtime/src/main/java/com/linkedin/thirdeye/realtime/ThirdEyeKafkaPersistenceUtils.java index f60ec8a5fe00..7199cbed6a77 100644 --- a/thirdeye/thirdeye-realtime/src/main/java/com/linkedin/thirdeye/realtime/ThirdEyeKafkaPersistenceUtils.java +++ b/thirdeye/thirdeye-realtime/src/main/java/com/linkedin/thirdeye/realtime/ThirdEyeKafkaPersistenceUtils.java @@ -13,8 +13,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; +import java.io.InputStream; +import java.io.ObjectOutputStream; import java.util.HashMap; import java.util.Map; @@ -83,8 +87,13 @@ public void call(StarTreeNode node) LOG.info("Wrote leaf buffers to {}", leafBufferDirectory); // Combine leaf buffers + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ObjectOutputStream treeOutputStream = new ObjectOutputStream(baos); + treeOutputStream.writeObject(starTree.getRoot()); + treeOutputStream.flush(); + InputStream treeInputStream = new ByteArrayInputStream(baos.toByteArray()); final File combinedBufferDirectory = new File(THIRDEYE_TMP_DIR, COMBINED_BUFFER_DIRECTORY_NAME); - FixedBufferUtil.combineDataFiles(leafBufferDirectory, combinedBufferDirectory); + FixedBufferUtil.combineDataFiles(treeInputStream, leafBufferDirectory, combinedBufferDirectory); LOG.info("Wrote combined leaf buffers for {}", starTree.getConfig().getCollection()); // Copy metric buffers to metric store diff --git a/thirdeye/thirdeye-realtime/src/main/java/com/linkedin/thirdeye/realtime/ThirdEyeKafkaStats.java b/thirdeye/thirdeye-realtime/src/main/java/com/linkedin/thirdeye/realtime/ThirdEyeKafkaStats.java index 3c68bf18b4e2..8fb2a636614a 100644 --- a/thirdeye/thirdeye-realtime/src/main/java/com/linkedin/thirdeye/realtime/ThirdEyeKafkaStats.java +++ b/thirdeye/thirdeye-realtime/src/main/java/com/linkedin/thirdeye/realtime/ThirdEyeKafkaStats.java @@ -18,7 +18,7 @@ public class ThirdEyeKafkaStats public static final String DATA_TIME_MILLIS = "dataTimeMillis"; public static final String DATA_LAG_MILLIS = "dataLagMillis"; - private final AtomicLong lastPersistTimeMillis = new AtomicLong(-1); + private final AtomicLong lastPersistTimeMillis = new AtomicLong(System.currentTimeMillis()); private final AtomicLong lastConsumedRecordTimeMillis = new AtomicLong(-1); private final AtomicLong dataTimeMillis = new AtomicLong(-1); private final Meter recordsAdded; diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/ThirdEyeApplication.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/ThirdEyeApplication.java index e877f70d7a26..6706cdb8bdf5 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/ThirdEyeApplication.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/ThirdEyeApplication.java @@ -11,7 +11,8 @@ import com.linkedin.thirdeye.api.TimeGranularity; import com.linkedin.thirdeye.healthcheck.CollectionConsistencyHealthCheck; import com.linkedin.thirdeye.impl.StarTreeManagerImpl; -import com.linkedin.thirdeye.managed.KafkaConsumerManager; +import com.linkedin.thirdeye.impl.storage.DataUpdateManager; +import com.linkedin.thirdeye.managed.ThirdEyeKafkaConsumerManager; import com.linkedin.thirdeye.resource.AdminResource; import com.linkedin.thirdeye.resource.AggregateResource; import com.linkedin.thirdeye.resource.CollectionsResource; @@ -19,7 +20,6 @@ import com.linkedin.thirdeye.resource.FunnelResource; import com.linkedin.thirdeye.resource.HeatMapResource; import com.linkedin.thirdeye.resource.TimeSeriesResource; -import com.linkedin.thirdeye.task.ExpireTask; import com.linkedin.thirdeye.task.KafkaStartTask; import com.linkedin.thirdeye.task.KafkaStopTask; import com.linkedin.thirdeye.task.ViewDimensionIndexTask; @@ -41,7 +41,6 @@ import org.slf4j.LoggerFactory; import java.io.File; -import java.io.IOException; import java.util.HashSet; import java.util.Set; import java.util.concurrent.ExecutorService; @@ -100,13 +99,16 @@ public void run(final Config config, Environment environment) throws Exception final StarTreeManager starTreeManager = new StarTreeManagerImpl(); - final KafkaConsumerManager kafkaConsumerManager - = new KafkaConsumerManager(starTreeManager, rootDir, kafkaConsumerExecutor, kafkaPersistScheduler, environment.metrics()); + final DataUpdateManager dataUpdateManager = new DataUpdateManager(rootDir); + + final ThirdEyeKafkaConsumerManager kafkaConsumerManager + = new ThirdEyeKafkaConsumerManager(starTreeManager, rootDir, kafkaConsumerExecutor, kafkaPersistScheduler, environment.metrics()); final AnomalyDetectionTaskManager anomalyDetectionTaskManager = new AnomalyDetectionTaskManager(starTreeManager, anomalyDetectionTaskScheduler, - config.getAnomalyDetectionInterval()); + config.getAnomalyDetectionInterval(), + rootDir); environment.lifecycle().manage(anomalyDetectionTaskManager); environment.lifecycle().manage(new Managed() @@ -122,7 +124,6 @@ public void start() throws Exception for (String collection : collections) { starTreeManager.restore(rootDir, collection); - starTreeManager.open(collection); } } @@ -141,7 +142,14 @@ public void stop() throws Exception { kafkaConsumerManager.stop(); LOG.info("Stopped kafka consumer manager"); + } + catch (Exception e) + { + LOG.error("{}", e); + } + try + { Set collections = new HashSet(starTreeManager.getCollections()); for (String collection : collections) { @@ -149,9 +157,9 @@ public void stop() throws Exception } LOG.info("Closed star tree manager"); } - catch (IOException e) + catch (Exception e) { - LOG.error("Caught exception while closing StarTree manager {}", e); + LOG.error("{}", e); } } }); @@ -175,18 +183,17 @@ public void stop() throws Exception FunnelResource funnelResource = new FunnelResource(starTreeManager); HeatMapResource heatMapResource = new HeatMapResource(starTreeManager, parallelQueryExecutor); environment.jersey().register(new CollectionsResource( - starTreeManager, environment.metrics(), rootDir)); + starTreeManager, environment.metrics(), dataUpdateManager, rootDir)); environment.jersey().register(new AdminResource()); environment.jersey().register(new AggregateResource(starTreeManager)); environment.jersey().register(timeSeriesResource); environment.jersey().register(funnelResource); environment.jersey().register(heatMapResource); environment.jersey().register(new DashboardResource( - starTreeManager, timeSeriesResource, funnelResource, heatMapResource, config.getFeedbackAddress())); + starTreeManager, timeSeriesResource, funnelResource, heatMapResource, config.getFeedbackAddress())); // Tasks environment.admin().addTask(new RestoreTask(starTreeManager, rootDir)); - environment.admin().addTask(new ExpireTask(starTreeManager, rootDir)); environment.admin().addTask(new ViewTreeTask(starTreeManager)); environment.admin().addTask(new ViewDimensionIndexTask(rootDir)); environment.admin().addTask(new ViewMetricIndexTask(rootDir)); diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/CollectionConsistencyHealthCheck.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/CollectionConsistencyHealthCheck.java index deb4c30a75a6..6adfd6e67fc7 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/CollectionConsistencyHealthCheck.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/CollectionConsistencyHealthCheck.java @@ -2,6 +2,7 @@ import com.codahale.metrics.health.HealthCheck; import com.google.common.base.Joiner; +import com.linkedin.thirdeye.api.StarTree; import com.linkedin.thirdeye.api.StarTreeConstants; import com.linkedin.thirdeye.api.StarTreeManager; import com.linkedin.thirdeye.api.StarTreeNode; @@ -42,119 +43,122 @@ protected Result check() throws Exception { for (String collection : manager.getCollections()) { - // Get leaf nodes - Set leafNodes = new HashSet(); - StarTreeUtils.traverseAndGetLeafNodes(leafNodes, manager.getStarTree(collection).getRoot()); - Map allNodeStats = new HashMap(); - for (StarTreeNode leafNode : leafNodes) + for (StarTree starTree : manager.getStarTrees(collection).values()) { - allNodeStats.put(leafNode.getId(), new NodeStats()); - } - - // Check dimension stores - File dimensionStoreDir = new File(PATH_JOINER.join( - rootDir, collection, StarTreeConstants.DATA_DIR_NAME, StarTreeConstants.DIMENSION_STORE)); - File[] dimensionIndexFiles = dimensionStoreDir.listFiles(INDEX_FILE_FILTER); - if (dimensionIndexFiles != null) - { - for (File dimensionIndexFile : dimensionIndexFiles) + // Get leaf nodes + Set leafNodes = new HashSet(); + StarTreeUtils.traverseAndGetLeafNodes(leafNodes, starTree.getRoot()); + Map allNodeStats = new HashMap(); + for (StarTreeNode leafNode : leafNodes) { - List indexEntries = StorageUtils.readDimensionIndex(dimensionIndexFile); + allNodeStats.put(leafNode.getId(), new NodeStats()); + } - for (DimensionIndexEntry indexEntry : indexEntries) + // Check dimension stores + File dimensionStoreDir = new File(PATH_JOINER.join( + rootDir, collection, StarTreeConstants.DATA_DIR_PREFIX, StarTreeConstants.DIMENSION_STORE)); + File[] dimensionIndexFiles = dimensionStoreDir.listFiles(INDEX_FILE_FILTER); + if (dimensionIndexFiles != null) + { + for (File dimensionIndexFile : dimensionIndexFiles) { - NodeStats nodeStats = allNodeStats.get(indexEntry.getNodeId()); + List indexEntries = StorageUtils.readDimensionIndex(dimensionIndexFile); - // Check node in index exists - if (nodeStats == null) + for (DimensionIndexEntry indexEntry : indexEntries) { - throw new IllegalStateException("Found node in dimension index which does not exist in tree: " + - "nodeId=" + indexEntry.getNodeId() + - "; indexFileId=" + indexEntry.getFileId()); - } + NodeStats nodeStats = allNodeStats.get(indexEntry.getNodeId()); + + // Check node in index exists + if (nodeStats == null) + { + throw new IllegalStateException("Found node in dimension index which does not exist in tree: " + + "nodeId=" + indexEntry.getNodeId() + + "; indexFileId=" + indexEntry.getFileId()); + } - nodeStats.incrementDimensionIndexCount(); + nodeStats.incrementDimensionIndexCount(); + } } } - } - // Check metric stores - File metricStoreDir = new File(PATH_JOINER.join( - rootDir, collection, StarTreeConstants.DATA_DIR_NAME, StarTreeConstants.METRIC_STORE)); - File[] metricIndexFiles = metricStoreDir.listFiles(INDEX_FILE_FILTER); - if (metricIndexFiles != null) - { - for (File metricIndexFile : metricIndexFiles) + // Check metric stores + File metricStoreDir = new File(PATH_JOINER.join( + rootDir, collection, StarTreeConstants.DATA_DIR_PREFIX, StarTreeConstants.METRIC_STORE)); + File[] metricIndexFiles = metricStoreDir.listFiles(INDEX_FILE_FILTER); + if (metricIndexFiles != null) { - List indexEntries = StorageUtils.readMetricIndex(metricIndexFile); - - for (MetricIndexEntry indexEntry : indexEntries) + for (File metricIndexFile : metricIndexFiles) { - NodeStats nodeStats = allNodeStats.get(indexEntry.getNodeId()); + List indexEntries = StorageUtils.readMetricIndex(metricIndexFile); - // Check node in index exists - if (nodeStats == null) + for (MetricIndexEntry indexEntry : indexEntries) { - throw new IllegalStateException("Found node in metric index which does not exist in tree: " + - "nodeId=" + indexEntry.getNodeId() + - "; indexFileId=" + indexEntry.getFileId()); - } + NodeStats nodeStats = allNodeStats.get(indexEntry.getNodeId()); - nodeStats.incrementMetricIndexCount(); + // Check node in index exists + if (nodeStats == null) + { + throw new IllegalStateException("Found node in metric index which does not exist in tree: " + + "nodeId=" + indexEntry.getNodeId() + + "; indexFileId=" + indexEntry.getFileId()); + } - nodeStats.addTimeRange(indexEntry.getTimeRange()); + nodeStats.incrementMetricIndexCount(); + + nodeStats.addTimeRange(indexEntry.getTimeRange()); + } } } - } - - Integer metricIndexCount = null; - for (StarTreeNode leafNode : leafNodes) - { - NodeStats nodeStats = allNodeStats.get(leafNode.getId()); - if (nodeStats == null) - { - throw new IllegalStateException("No node stats for leaf " + leafNode.getId()); - } + Integer metricIndexCount = null; - if (metricIndexCount == null) + for (StarTreeNode leafNode : leafNodes) { - metricIndexCount = nodeStats.getMetricIndexCount(); - } + NodeStats nodeStats = allNodeStats.get(leafNode.getId()); + if (nodeStats == null) + { + throw new IllegalStateException("No node stats for leaf " + leafNode.getId()); + } - // Check there is one dimension store for each node - if (nodeStats.getDimensionIndexCount() != 1) - { - throw new IllegalStateException("There must be one and only one dimension index for node " + leafNode.getId()); - } + if (metricIndexCount == null) + { + metricIndexCount = nodeStats.getMetricIndexCount(); + } - // Check all nodes have the same number of metric segments - if (metricIndexCount != nodeStats.getMetricIndexCount()) - { - throw new IllegalStateException("There are " + nodeStats.getMetricIndexCount() - + " metric index entries for node " + leafNode.getId() - + ", but expected " + metricIndexCount - + ". This probably indicates some segments were lost"); - } + // Check there is one dimension store for each node + if (nodeStats.getDimensionIndexCount() != 1) + { + throw new IllegalStateException("There must be one and only one dimension index for node " + leafNode.getId()); + } - if (leafNode.getRecordStore().getRecordCountEstimate() > 0) - { - // Check the record store max time is the same as that in index - if (!leafNode.getRecordStore().getMaxTime().equals(nodeStats.getMaxTimeInIndex())) + // Check all nodes have the same number of metric segments + if (metricIndexCount != nodeStats.getMetricIndexCount()) { - throw new IllegalStateException("Record store max time differs from that in index: " - + leafNode.getRecordStore().getMaxTime() - + " vs " + nodeStats.getMaxTimeInIndex() - + " for node " + leafNode.getId()); + throw new IllegalStateException("There are " + nodeStats.getMetricIndexCount() + + " metric index entries for node " + leafNode.getId() + + ", but expected " + metricIndexCount + + ". This probably indicates some segments were lost"); } - // Check the record store min time is the same as that in index - if (!leafNode.getRecordStore().getMinTime().equals(nodeStats.getMinTimeInIndex())) + if (leafNode.getRecordStore().getRecordCountEstimate() > 0) { - throw new IllegalStateException("Record store min time differs from that in index: " - + leafNode.getRecordStore().getMinTime() - + " vs " + nodeStats.getMinTimeInIndex() - + " for node " + leafNode.getId()); + // Check the record store max time is the same as that in index + if (!leafNode.getRecordStore().getMaxTime().equals(nodeStats.getMaxTimeInIndex())) + { + throw new IllegalStateException("Record store max time differs from that in index: " + + leafNode.getRecordStore().getMaxTime() + + " vs " + nodeStats.getMaxTimeInIndex() + + " for node " + leafNode.getId()); + } + + // Check the record store min time is the same as that in index + if (!leafNode.getRecordStore().getMinTime().equals(nodeStats.getMinTimeInIndex())) + { + throw new IllegalStateException("Record store min time differs from that in index: " + + leafNode.getRecordStore().getMinTime() + + " vs " + nodeStats.getMinTimeInIndex() + + " for node " + leafNode.getId()); + } } } } diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/KafkaConsumerLagHealthCheck.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/KafkaConsumerLagHealthCheck.java index 81cc1f046878..52d84678ab64 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/KafkaConsumerLagHealthCheck.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/KafkaConsumerLagHealthCheck.java @@ -1,7 +1,7 @@ package com.linkedin.thirdeye.healthcheck; import com.codahale.metrics.health.HealthCheck; -import com.linkedin.thirdeye.managed.KafkaConsumerManager; +import com.linkedin.thirdeye.managed.ThirdEyeKafkaConsumerManager; import com.linkedin.thirdeye.realtime.ThirdEyeKafkaStats; import java.util.Map; @@ -13,9 +13,9 @@ public class KafkaConsumerLagHealthCheck extends HealthCheck private static final long MINIMUM_ACCEPTABLE_CONSUMER_LAG_MILLIS = TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES); - private final KafkaConsumerManager kafkaConsumerManager; + private final ThirdEyeKafkaConsumerManager kafkaConsumerManager; - public KafkaConsumerLagHealthCheck(KafkaConsumerManager kafkaConsumerManager) + public KafkaConsumerLagHealthCheck(ThirdEyeKafkaConsumerManager kafkaConsumerManager) { this.kafkaConsumerManager = kafkaConsumerManager; } diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/KafkaDataLagHealthCheck.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/KafkaDataLagHealthCheck.java index 7eba6a859b48..db5226a46a4d 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/KafkaDataLagHealthCheck.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/KafkaDataLagHealthCheck.java @@ -1,7 +1,7 @@ package com.linkedin.thirdeye.healthcheck; import com.codahale.metrics.health.HealthCheck; -import com.linkedin.thirdeye.managed.KafkaConsumerManager; +import com.linkedin.thirdeye.managed.ThirdEyeKafkaConsumerManager; import com.linkedin.thirdeye.realtime.ThirdEyeKafkaStats; import java.util.Map; @@ -13,9 +13,9 @@ public class KafkaDataLagHealthCheck extends HealthCheck private static final long MINIMUM_ACCEPTABLE_DATA_LAG_MILLIS = TimeUnit.MILLISECONDS.convert(3, TimeUnit.HOURS); - private final KafkaConsumerManager kafkaConsumerManager; + private final ThirdEyeKafkaConsumerManager kafkaConsumerManager; - public KafkaDataLagHealthCheck(KafkaConsumerManager kafkaConsumerManager) + public KafkaDataLagHealthCheck(ThirdEyeKafkaConsumerManager kafkaConsumerManager) { this.kafkaConsumerManager = kafkaConsumerManager; } diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/StarTreeHealthCheck.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/StarTreeHealthCheck.java index 636927258e3d..10847938d3d6 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/StarTreeHealthCheck.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/StarTreeHealthCheck.java @@ -11,6 +11,7 @@ import com.codahale.metrics.health.HealthCheck; import com.google.common.base.Joiner; +import com.linkedin.thirdeye.api.StarTree; import com.linkedin.thirdeye.api.StarTreeConstants; import com.linkedin.thirdeye.api.StarTreeManager; import com.linkedin.thirdeye.api.StarTreeNode; @@ -38,54 +39,57 @@ protected Result check() throws Exception { for (String collection : manager.getCollections()) { - Map nodeCount = new HashMap(); - Set leafNodes = new HashSet(); - StarTreeUtils.traverseAndGetLeafNodes(leafNodes, manager.getStarTree(collection).getRoot()); - - for (StarTreeNode leafNode : leafNodes) + for (StarTree starTree : manager.getStarTrees(collection).values()) { - nodeCount.put(leafNode.getId(), 0); - } - - // Check dimension stores for extra entries - File dimensionStoreDir = new File(PATH_JOINER.join( - rootDir, collection, StarTreeConstants.DATA_DIR_NAME, StarTreeConstants.DIMENSION_STORE)); - File[] dimensionIndexFiles = dimensionStoreDir.listFiles(INDEX_FILE_FILTER); + Map nodeCount = new HashMap(); + Set leafNodes = new HashSet(); + StarTreeUtils.traverseAndGetLeafNodes(leafNodes, starTree.getRoot()); - if (dimensionIndexFiles != null) - { - for (File dimensionIndexFile : dimensionIndexFiles) + for (StarTreeNode leafNode : leafNodes) { - List indexEntries = StorageUtils.readDimensionIndex(dimensionIndexFile); + nodeCount.put(leafNode.getId(), 0); + } - for (DimensionIndexEntry indexEntry : indexEntries) - { + // Check dimension stores for extra entries + File dimensionStoreDir = new File(PATH_JOINER.join( + rootDir, collection, StarTreeConstants.DATA_DIR_PREFIX, StarTreeConstants.DIMENSION_STORE)); + File[] dimensionIndexFiles = dimensionStoreDir.listFiles(INDEX_FILE_FILTER); - Integer count = nodeCount.get(indexEntry.getNodeId()); + if (dimensionIndexFiles != null) + { + for (File dimensionIndexFile : dimensionIndexFiles) + { + List indexEntries = StorageUtils.readDimensionIndex(dimensionIndexFile); - if (count == null) + for (DimensionIndexEntry indexEntry : indexEntries) { - throw new IllegalStateException("Found node in dimension index which does not exist in tree: " + - "nodeId=" + indexEntry.getNodeId() + - "; indexFileId=" + indexEntry.getFileId()); - } - else - { - nodeCount.put(indexEntry.getNodeId(), count + 1); + + Integer count = nodeCount.get(indexEntry.getNodeId()); + + if (count == null) + { + throw new IllegalStateException("Found node in dimension index which does not exist in tree: " + + "nodeId=" + indexEntry.getNodeId() + + "; indexFileId=" + indexEntry.getFileId()); + } + else + { + nodeCount.put(indexEntry.getNodeId(), count + 1); + } } } } - } - // Ensure every leaf node has exactly 1 entry in the dimension indexes - for (StarTreeNode leafNode : leafNodes) - { - if (nodeCount.get(leafNode.getId()) != 1) + // Ensure every leaf node has exactly 1 entry in the dimension indexes + for (StarTreeNode leafNode : leafNodes) { - throw new IllegalStateException("There must be one and only one dimension index for node " + leafNode.getId()); + if (nodeCount.get(leafNode.getId()) != 1) + { + throw new IllegalStateException("There must be one and only one dimension index for node " + leafNode.getId()); + } } - } + } } return Result.healthy(); diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/TimeRangeContiguityHealthCheck.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/TimeRangeContiguityHealthCheck.java index 9de487921442..809e271ff58a 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/TimeRangeContiguityHealthCheck.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/TimeRangeContiguityHealthCheck.java @@ -41,7 +41,7 @@ protected Result check() throws Exception List missingRanges = new ArrayList(); File metricStoreDir = new File(PATH_JOINER.join( - rootDir, collection, StarTreeConstants.DATA_DIR_NAME, StarTreeConstants.METRIC_STORE)); + rootDir, collection, StarTreeConstants.DATA_DIR_PREFIX, StarTreeConstants.METRIC_STORE)); File[] metricIndexFiles = metricStoreDir.listFiles(INDEX_FILE_FILTER); if (metricIndexFiles != null) diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/TimeRangeHealthCheck.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/TimeRangeHealthCheck.java index 42281128e4d5..986eaf309747 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/TimeRangeHealthCheck.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/healthcheck/TimeRangeHealthCheck.java @@ -9,6 +9,7 @@ import com.codahale.metrics.health.HealthCheck; import com.google.common.base.Joiner; +import com.linkedin.thirdeye.api.StarTree; import com.linkedin.thirdeye.api.StarTreeCallback; import com.linkedin.thirdeye.api.StarTreeConstants; import com.linkedin.thirdeye.api.StarTreeManager; @@ -42,7 +43,7 @@ protected Result check() throws Exception { final Map> nodeToTimeRangeMap = new HashMap>(); File metricStoreDir = new File(PATH_JOINER.join( - rootDir, collection, StarTreeConstants.DATA_DIR_NAME, StarTreeConstants.METRIC_STORE)); + rootDir, collection, StarTreeConstants.DATA_DIR_PREFIX, StarTreeConstants.METRIC_STORE)); File[] metricIndexFiles = metricStoreDir.listFiles(INDEX_FILE_FILTER); //Create mapping of node id to number of times each timerange appears on that node, from the index files @@ -86,53 +87,56 @@ protected Result check() throws Exception { } - // Traverse tree structure and ensure for each node, the time ranges loaded into the metric store are the same as those in the index - manager.getStarTree(collection).eachLeaf(new StarTreeCallback() + // Traverse tree structure and ensure for each node, the time ranges loaded into the metric store are the same as those in the index + for (StarTree starTree : manager.getStarTrees(collection).values()) { + starTree.eachLeaf(new StarTreeCallback() + { - @Override - public void call(StarTreeNode leafNode) { - - Map indexTimeRangeToCount = nodeToTimeRangeMap.get(leafNode.getId()); + @Override + public void call(StarTreeNode leafNode) { - if (indexTimeRangeToCount == null) - { - if (leafNode.getRecordStore().getTimeRangeCount().size() != 0) - { - throw new IllegalStateException("Found node "+ leafNode.getId()+ - " which has no metric segments on disk but has metric segments loaded in memory"); - } - } - else - { - Map nodeTimeRangeToCount = leafNode.getRecordStore().getTimeRangeCount(); + Map indexTimeRangeToCount = nodeToTimeRangeMap.get(leafNode.getId()); - if (indexTimeRangeToCount.size() != nodeTimeRangeToCount.size()) + if (indexTimeRangeToCount == null) { - throw new IllegalStateException("Number of timeranges in metric store are not same as index for node " - +leafNode.getId()); + if (leafNode.getRecordStore().getTimeRangeCount().size() != 0) + { + throw new IllegalStateException("Found node "+ leafNode.getId()+ + " which has no metric segments on disk but has metric segments loaded in memory"); + } } - - for (Map.Entry entry : nodeTimeRangeToCount.entrySet()) + else { - TimeRange nodeTimeRange = entry.getKey(); - Integer nodeCount = entry.getValue(); + Map nodeTimeRangeToCount = leafNode.getRecordStore().getTimeRangeCount(); - Integer indexCount = indexTimeRangeToCount.get(nodeTimeRange); - if (indexCount == null) + if (indexTimeRangeToCount.size() != nodeTimeRangeToCount.size()) { - throw new IllegalStateException("Timerange "+nodeTimeRange.toString() - + "exists in metric store but not in index for node "+leafNode.getId()); + throw new IllegalStateException("Number of timeranges in metric store are not same as index for node " + +leafNode.getId()); } - if (indexCount != nodeCount) + + for (Map.Entry entry : nodeTimeRangeToCount.entrySet()) { - throw new IllegalStateException("Timerange "+nodeTimeRange.toString()+" appears "+nodeCount - + " times in metric store but "+indexCount+" times in index, for node "+leafNode.getId()); + TimeRange nodeTimeRange = entry.getKey(); + Integer nodeCount = entry.getValue(); + + Integer indexCount = indexTimeRangeToCount.get(nodeTimeRange); + if (indexCount == null) + { + throw new IllegalStateException("Timerange "+nodeTimeRange.toString() + + "exists in metric store but not in index for node "+leafNode.getId()); + } + if (indexCount != nodeCount) + { + throw new IllegalStateException("Timerange "+nodeTimeRange.toString()+" appears "+nodeCount + + " times in metric store but "+indexCount+" times in index, for node "+leafNode.getId()); + } } } } - } - }); + }); + } } return Result.healthy(); diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/managed/AnomalyDetectionTaskManager.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/managed/AnomalyDetectionTaskManager.java index e9a63a17fa86..da6488c32d5b 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/managed/AnomalyDetectionTaskManager.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/managed/AnomalyDetectionTaskManager.java @@ -7,11 +7,14 @@ import com.linkedin.thirdeye.api.StarTree; import com.linkedin.thirdeye.api.StarTreeManager; import com.linkedin.thirdeye.api.TimeGranularity; +import com.linkedin.thirdeye.impl.storage.StorageUtils; import io.dropwizard.lifecycle.Managed; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; import java.util.HashSet; +import java.util.Map; import java.util.Set; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; @@ -24,15 +27,18 @@ public class AnomalyDetectionTaskManager implements Managed private final ScheduledExecutorService scheduler; private final TimeGranularity executionInterval; private final Set tasks; + private final File rootDir; public AnomalyDetectionTaskManager(StarTreeManager starTreeManager, ScheduledExecutorService scheduler, - TimeGranularity executionInterval) + TimeGranularity executionInterval, + File rootDir) { this.starTreeManager = starTreeManager; this.scheduler = scheduler; this.executionInterval = executionInterval; this.tasks = new HashSet(); + this.rootDir = rootDir; } @Override @@ -48,14 +54,34 @@ public void start() throws Exception { for (String collection : starTreeManager.getCollections()) { - StarTree starTree = starTreeManager.getStarTree(collection); + Map starTrees = starTreeManager.getStarTrees(collection); + if (starTrees == null) + { + LOG.warn("No star trees available for {}", collection); + continue; + } + + File latestDataDir = StorageUtils.findLatestDataDir(new File(rootDir, collection)); + if (latestDataDir == null) + { + LOG.warn("No latest data dir for {}", collection); + continue; + } + + StarTree starTree = starTrees.get(latestDataDir); + if (starTree == null) + { + LOG.error("Manager does not have star tree for data dir {}", latestDataDir); + continue; + } + String functionClass = starTree.getConfig().getAnomalyDetectionFunctionClass(); if (functionClass != null) { // Function AnomalyDetectionFunction function - = (AnomalyDetectionFunction) Class.forName(functionClass).getConstructor().newInstance(); + = (AnomalyDetectionFunction) Class.forName(functionClass).getConstructor().newInstance(); function.init(starTree.getConfig(), starTree.getConfig().getAnomalyDetectionFunctionConfig()); // Handler @@ -65,21 +91,21 @@ public void start() throws Exception handlerClass = AnomalyResultHandlerLoggerImpl.class.getCanonicalName(); } AnomalyResultHandler handler - = (AnomalyResultHandler) Class.forName(handlerClass).getConstructor().newInstance(); + = (AnomalyResultHandler) Class.forName(handlerClass).getConstructor().newInstance(); handler.init(starTree.getConfig(), starTree.getConfig().getAnomalyHandlerConfig()); // Mode AnomalyDetectionTask.Mode mode = starTree.getConfig().getAnomalyDetectionMode() == null - ? AnomalyDetectionTask.Mode.LEAF_PREFIX - : AnomalyDetectionTask.Mode.valueOf(starTree.getConfig().getAnomalyDetectionMode()); + ? AnomalyDetectionTask.Mode.LEAF_PREFIX + : AnomalyDetectionTask.Mode.valueOf(starTree.getConfig().getAnomalyDetectionMode()); LOG.info("Starting anomaly detection for {} using function {} and handler {} at interval of {} {}", collection, functionClass, handlerClass, executionInterval.getSize(), executionInterval.getUnit()); // Task tasks.add(scheduler.scheduleAtFixedRate( - new AnomalyDetectionTask(starTree, function, handler, mode), - 0, executionInterval.getSize(), executionInterval.getUnit())); + new AnomalyDetectionTask(starTree, function, handler, mode), + 0, executionInterval.getSize(), executionInterval.getUnit())); } } } diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/managed/KafkaConsumerManager.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/managed/ThirdEyeKafkaConsumerManager.java similarity index 64% rename from thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/managed/KafkaConsumerManager.java rename to thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/managed/ThirdEyeKafkaConsumerManager.java index 384f7d5cac9e..3c9589be7ed0 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/managed/KafkaConsumerManager.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/managed/ThirdEyeKafkaConsumerManager.java @@ -3,6 +3,7 @@ import com.codahale.metrics.MetricRegistry; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import com.fasterxml.jackson.datatype.joda.JodaModule; import com.linkedin.thirdeye.api.StarTree; import com.linkedin.thirdeye.api.StarTreeConfig; import com.linkedin.thirdeye.api.StarTreeConstants; @@ -10,26 +11,37 @@ import com.linkedin.thirdeye.api.StarTreeNode; import com.linkedin.thirdeye.impl.StarTreeImpl; import com.linkedin.thirdeye.impl.storage.StarTreeRecordStoreFactoryDefaultImpl; +import com.linkedin.thirdeye.impl.storage.StorageUtils; import com.linkedin.thirdeye.realtime.ThirdEyeKafkaConfig; import com.linkedin.thirdeye.realtime.ThirdEyeKafkaConsumer; import com.linkedin.thirdeye.realtime.ThirdEyeKafkaStats; import io.dropwizard.lifecycle.Managed; +import org.apache.commons.io.FileUtils; +import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.FileInputStream; +import java.io.FilenameFilter; import java.io.ObjectInputStream; +import java.util.Arrays; +import java.util.Comparator; import java.util.HashMap; import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.ScheduledExecutorService; -public class KafkaConsumerManager implements Managed +public class ThirdEyeKafkaConsumerManager implements Managed { - private static final Logger LOG = LoggerFactory.getLogger(KafkaConsumerManager.class); + private static final Logger LOG = LoggerFactory.getLogger(ThirdEyeKafkaConsumerManager.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(new YAMLFactory()); + static + { + OBJECT_MAPPER.registerModule(new JodaModule()); + } + private final StarTreeManager starTreeManager; private final File rootDir; private final ExecutorService executorService; @@ -37,13 +49,11 @@ public class KafkaConsumerManager implements Managed private final MetricRegistry metricRegistry; private final Map kafkaConsumers; - private boolean isShutdown; - - public KafkaConsumerManager(StarTreeManager starTreeManager, - File rootDir, - ExecutorService executorService, - ScheduledExecutorService persistScheduler, - MetricRegistry metricRegistry) + public ThirdEyeKafkaConsumerManager(StarTreeManager starTreeManager, + File rootDir, + ExecutorService executorService, + ScheduledExecutorService persistScheduler, + MetricRegistry metricRegistry) { this.starTreeManager = starTreeManager; this.rootDir = rootDir; @@ -136,29 +146,53 @@ public void start(String collection) throws Exception if (kafkaFile.exists()) { StarTreeConfig starTreeConfig - = OBJECT_MAPPER.readValue(new File(new File(rootDir, collection), StarTreeConstants.CONFIG_FILE_NAME), StarTreeConfig.class); + = OBJECT_MAPPER.readValue(new File(collectionDir, StarTreeConstants.CONFIG_FILE_NAME), StarTreeConfig.class); + ThirdEyeKafkaConfig kafkaConfig + = OBJECT_MAPPER.readValue(kafkaFile, ThirdEyeKafkaConfig.class); + starTreeConfig.getRecordStoreFactoryConfig() .setProperty(StarTreeRecordStoreFactoryDefaultImpl.PROP_METRIC_STORE_MUTABLE, "true"); - // Read tree structure - ObjectInputStream inputStream = new ObjectInputStream( - new FileInputStream(new File(collectionDir, StarTreeConstants.TREE_FILE_NAME))); - StarTreeNode root = (StarTreeNode) inputStream.readObject(); - final StarTree mutableStarTree = new StarTreeImpl(starTreeConfig, new File(collectionDir, StarTreeConstants.DATA_DIR_NAME), root); - mutableStarTree.open(); - + // Get tree + File latestDataDir = StorageUtils.findLatestDataDir(collectionDir); + if (latestDataDir == null) + { + throw new IllegalStateException("No available star tree"); + } + File starTreeFile = new File(latestDataDir, StarTreeConstants.TREE_FILE_NAME); + FileInputStream fis = new FileInputStream(new File(latestDataDir, StarTreeConstants.TREE_FILE_NAME)); + ObjectInputStream ois = new ObjectInputStream(fis); + StarTreeNode root = (StarTreeNode) ois.readObject(); + LOG.info("Using tree {} from {} for collection {}", root.getId(), latestDataDir, collection); + + // Create data directory for kafka consumer + File kafkaDataDir = new File( + collectionDir, StorageUtils.getDataDirName(root.getId().toString(), "KAFKA", new DateTime(), null)); + FileUtils.forceMkdir(kafkaDataDir); + + // Copy the dimension store and tree + FileUtils.copyFile( + starTreeFile, + new File(kafkaDataDir, StarTreeConstants.TREE_FILE_NAME)); + FileUtils.copyDirectory( + new File(latestDataDir, StarTreeConstants.DIMENSION_STORE), + new File(kafkaDataDir, StarTreeConstants.DIMENSION_STORE)); + LOG.info("Bootstrapped {} with tree / dimension store from {}", kafkaDataDir, latestDataDir); + + // Create and open tree + StarTree mutableTree = new StarTreeImpl(starTreeConfig, kafkaDataDir, root); + mutableTree.open(); + + // Start consumer ThirdEyeKafkaConsumer kafkaConsumer - = new ThirdEyeKafkaConsumer(mutableStarTree, - OBJECT_MAPPER.readValue(kafkaFile, ThirdEyeKafkaConfig.class), + = new ThirdEyeKafkaConsumer(mutableTree, + kafkaConfig, executorService, persistScheduler, metricRegistry, - rootDir); - + kafkaDataDir); kafkaConsumers.put(collection, kafkaConsumer); - kafkaConsumer.start(); - LOG.info("Started kafka consumer for {}", collection); } else diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/AdminResource.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/AdminResource.java index 45bf37154316..db962656f3f4 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/AdminResource.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/AdminResource.java @@ -17,7 +17,6 @@ public class AdminResource { @GET - @Timed public Response returnDefaultDashboard() { return Response.seeOther(URI.create("/dashboard")).build(); @@ -25,7 +24,6 @@ public Response returnDefaultDashboard() @GET @Path("/admin") - @Timed public String sayGood() { return "GOOD"; diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/AggregateResource.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/AggregateResource.java index 00381c254751..9b3eda38145c 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/AggregateResource.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/AggregateResource.java @@ -4,7 +4,7 @@ import com.linkedin.thirdeye.api.DimensionKey; import com.linkedin.thirdeye.api.MetricSpec; import com.linkedin.thirdeye.api.MetricTimeSeries; -import com.linkedin.thirdeye.api.StarTree; +import com.linkedin.thirdeye.api.StarTreeConfig; import com.linkedin.thirdeye.api.StarTreeManager; import com.linkedin.thirdeye.util.QueryUtils; import com.sun.jersey.api.NotFoundException; @@ -34,7 +34,6 @@ public AggregateResource(StarTreeManager starTreeManager) @GET @Path("/{collection}/{startMillis}/{endMillis}") - @Timed @Produces(MediaType.APPLICATION_JSON) public List getAggregate( @PathParam("collection") String collection, @@ -42,23 +41,24 @@ public List getAggregate( @PathParam("endMillis") Long endMillis, @Context UriInfo uriInfo) { - StarTree starTree = starTreeManager.getStarTree(collection); - if (starTree == null) + StarTreeConfig config = starTreeManager.getConfig(collection); + if (config == null) { throw new NotFoundException("No collection " + collection); } int bucketSize - = starTree.getConfig().getTime().getBucket().getSize(); + = config.getTime().getBucket().getSize(); TimeUnit bucketUnit - = starTree.getConfig().getTime().getBucket().getUnit(); + = config.getTime().getBucket().getUnit(); // Get collection times long start = bucketUnit.convert(startMillis, TimeUnit.MILLISECONDS) / bucketSize; long end = bucketUnit.convert(endMillis, TimeUnit.MILLISECONDS) / bucketSize; long queryStartTime = System.currentTimeMillis(); - Map queryResult = QueryUtils.doQuery(starTree, start, end, uriInfo); + Map queryResult + = QueryUtils.doQuery(starTreeManager.getStarTrees(collection).values(), start, end, uriInfo); long queryTimeMillis = System.currentTimeMillis() - queryStartTime; List clientResult = new ArrayList(queryResult.size()); @@ -66,15 +66,15 @@ public List getAggregate( for (Map.Entry entry : queryResult.entrySet()) { Map dimensionValues - = QueryUtils.convertDimensionKey(starTree.getConfig().getDimensions(), entry.getKey()); + = QueryUtils.convertDimensionKey(config.getDimensions(), entry.getKey()); Number[] metricSums = entry.getValue().getMetricSums(); Map metricValues = new HashMap(metricSums.length); - for (int i = 0; i < starTree.getConfig().getMetrics().size(); i++) + for (int i = 0; i < config.getMetrics().size(); i++) { - MetricSpec metricSpec = starTree.getConfig().getMetrics().get(i); + MetricSpec metricSpec = config.getMetrics().get(i); metricValues.put(metricSpec.getName(), metricSums[i]); } diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/CollectionsResource.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/CollectionsResource.java index 877d94232d2e..26e8791c6e43 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/CollectionsResource.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/CollectionsResource.java @@ -3,21 +3,18 @@ import com.codahale.metrics.Gauge; import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.annotation.Timed; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableSet; -import com.linkedin.thirdeye.api.StarTree; import com.linkedin.thirdeye.api.StarTreeConfig; import com.linkedin.thirdeye.api.StarTreeConstants; import com.linkedin.thirdeye.api.StarTreeManager; -import com.linkedin.thirdeye.api.StarTreeStats; -import com.linkedin.thirdeye.impl.TarUtils; -import com.linkedin.thirdeye.realtime.ThirdEyeKafkaConfig; +import com.linkedin.thirdeye.impl.storage.DataUpdateManager; import com.sun.jersey.api.ConflictException; import com.sun.jersey.api.NotFoundException; import org.apache.commons.io.FileUtils; +import org.joda.time.DateTime; import javax.ws.rs.Consumes; import javax.ws.rs.DELETE; +import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; import javax.ws.rs.POST; import javax.ws.rs.Path; @@ -28,15 +25,12 @@ import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; -import java.io.ObjectOutputStream; import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.Set; import java.util.concurrent.atomic.AtomicLong; @Path("/collections") @@ -48,14 +42,17 @@ public class CollectionsResource private final StarTreeManager manager; private final File rootDir; private final AtomicLong lastPostDataMillis; + private final DataUpdateManager dataUpdateManager; public CollectionsResource(StarTreeManager manager, MetricRegistry metricRegistry, + DataUpdateManager dataUpdateManager, File rootDir) { this.manager = manager; this.rootDir = rootDir; this.lastPostDataMillis = new AtomicLong(-1); + this.dataUpdateManager = dataUpdateManager; // Metric for time we last received a POST to update collection's data metricRegistry.register(MetricRegistry.name(CollectionsResource.class, LAST_POST_DATA_MILLIS), @@ -71,7 +68,6 @@ public Long getValue() @GET - @Timed public List getCollections() { List collections = new ArrayList(manager.getCollections()); @@ -81,44 +77,35 @@ public List getCollections() @GET @Path("/{collection}") - @Timed public StarTreeConfig getConfig(@PathParam("collection") String collection) { - StarTree starTree = manager.getStarTree(collection); - if (starTree == null) + StarTreeConfig config = manager.getConfig(collection); + if (config == null) { - throw new NotFoundException("No tree for collection " + collection); + throw new NotFoundException("No collection " + collection); } - return starTree.getConfig(); + return config; } @DELETE @Path("/{collection}") - @Timed - public Response deleteCollection(@PathParam("collection") String collection) throws IOException + public Response deleteCollection(@PathParam("collection") String collection) throws Exception { - StarTree starTree = manager.getStarTree(collection); - if (starTree == null) + StarTreeConfig config = manager.getConfig(collection); + if (config == null) { - throw new NotFoundException("No tree for collection " + collection); + throw new NotFoundException("No collection " + collection); } - manager.remove(collection); - - File collectionDir = new File(rootDir, collection); - - if (!collectionDir.isAbsolute()) - { - throw new WebApplicationException(Response.Status.BAD_REQUEST); - } + manager.close(collection); try { - FileUtils.forceDelete(collectionDir); + dataUpdateManager.deleteCollection(collection); } - catch (FileNotFoundException fe) + catch (FileNotFoundException e) { - throw new NotFoundException("Collection "+collection+" not found"); + throw new NotFoundException(e.getMessage()); } return Response.noContent().build(); @@ -126,7 +113,6 @@ public Response deleteCollection(@PathParam("collection") String collection) thr @POST @Path("/{collection}") - @Timed @Consumes(MediaType.APPLICATION_OCTET_STREAM) public Response postConfig(@PathParam("collection") String collection, byte[] configBytes) throws IOException { @@ -151,7 +137,6 @@ public Response postConfig(@PathParam("collection") String collection, byte[] co @GET @Path("/{collection}/kafkaConfig") - @Timed public byte[] getKafkaConfig(@PathParam("collection") String collection) throws Exception { File kafkaConfigFile = new File(new File(rootDir, collection), StarTreeConstants.KAFKA_CONFIG_FILE_NAME); @@ -169,7 +154,6 @@ public byte[] getKafkaConfig(@PathParam("collection") String collection) throws @POST @Path("/{collection}/kafkaConfig") - @Timed public Response postKafkaConfig(@PathParam("collection") String collection, byte[] kafkaConfigBytes) throws Exception { File collectionDir = new File(rootDir, collection); @@ -191,7 +175,6 @@ public Response postKafkaConfig(@PathParam("collection") String collection, byte @DELETE @Path("/{collection}/kafkaConfig") - @Timed public Response deleteKafkaConfig(@PathParam("collection") String collection) throws Exception { File collectionDir = new File(rootDir, collection); @@ -211,95 +194,22 @@ public Response deleteKafkaConfig(@PathParam("collection") String collection) th return Response.noContent().build(); } - @GET - @Path("/{collection}/stats") - @Timed - public StarTreeStats getStats(@PathParam("collection") String collection) - { - StarTree starTree = manager.getStarTree(collection); - if (starTree == null) - { - throw new NotFoundException("No tree for collection " + collection); - } - return starTree.getStats(); - } - - @GET - @Path("/{collection}/starTree") - @Timed - @Produces(MediaType.APPLICATION_OCTET_STREAM) - public Response getStarTree(@PathParam("collection") String collection) throws IOException - { - StarTree starTree = manager.getStarTree(collection); - if (starTree == null) - { - throw new NotFoundException("No tree for collection " + collection); - } - - // Serialize - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ObjectOutputStream oos = new ObjectOutputStream(baos); - oos.writeObject(starTree.getRoot()); - oos.flush(); - - return Response.ok(baos.toByteArray(), MediaType.APPLICATION_OCTET_STREAM).build(); - } - @POST - @Path("/{collection}/starTree") - @Timed + @Path("/{collection}/data/{minTime}/{maxTime}") @Consumes(MediaType.APPLICATION_OCTET_STREAM) - public Response postStarTree(@PathParam("collection") String collection, byte[] starTreeBytes) throws IOException - { - File collectionDir = new File(rootDir, collection); - if (!collectionDir.exists()) - { - FileUtils.forceMkdir(collectionDir); - } - - File starTreeFile = new File(collectionDir, StarTreeConstants.TREE_FILE_NAME); - - if (!starTreeFile.exists()) - { - FileUtils.copyInputStreamToFile(new ByteArrayInputStream(starTreeBytes), starTreeFile); - } - else - { - throw new ConflictException(starTreeFile.getPath()+" already exists. A DELETE of /collections/{collection} is required first"); - } - - return Response.ok().build(); - } - - @POST - @Path("/{collection}/data") @Timed - @Consumes(MediaType.APPLICATION_OCTET_STREAM) public Response postData(@PathParam("collection") String collection, - @QueryParam("includeDimensions") boolean includeDimensions, + @PathParam("minTime") long minTimeMillis, + @PathParam("maxTime") long maxTimeMillis, + @QueryParam("schedule") @DefaultValue("UNKNOWN") String schedule, byte[] dataBytes) throws Exception { - File collectionDir = new File(rootDir, collection); - if (!collectionDir.exists()) - { - FileUtils.forceMkdir(collectionDir); - } - - File dataDir = new File(collectionDir, StarTreeConstants.DATA_DIR_NAME); - if (!dataDir.exists()) - { - FileUtils.forceMkdir(dataDir); - } - - // TODO: This only works for StarTreeRecordStoreFixedImpl - if we want to be generic, record store should do following logic - - // n.b. for partial updates, we will not include dimensions - Set blacklist = includeDimensions ? null : ImmutableSet.of(StarTreeConstants.DIMENSION_STORE); - - // Extract into data dir, stripping first two path components - TarUtils.extractGzippedTarArchive(new ByteArrayInputStream(dataBytes), dataDir, 2, blacklist); - - lastPostDataMillis.set(System.currentTimeMillis()); + dataUpdateManager.updateData( + collection, + schedule, + new DateTime(minTimeMillis), + new DateTime(maxTimeMillis), + dataBytes); return Response.ok().build(); } diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/DashboardResource.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/DashboardResource.java index f0fc214e0fbe..88f8d98c91ef 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/DashboardResource.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/DashboardResource.java @@ -1,8 +1,11 @@ package com.linkedin.thirdeye.resource; import com.codahale.metrics.annotation.Timed; +import com.google.common.collect.Range; import com.linkedin.thirdeye.api.StarTree; +import com.linkedin.thirdeye.api.StarTreeConfig; import com.linkedin.thirdeye.api.StarTreeManager; +import com.linkedin.thirdeye.util.QueryUtils; import com.linkedin.thirdeye.views.DefaultDashboardView; import com.linkedin.thirdeye.views.DefaultLandingView; import com.linkedin.thirdeye.views.DefaultSelectionView; @@ -10,6 +13,7 @@ import com.linkedin.thirdeye.views.HeatMapComponentView; import com.linkedin.thirdeye.views.TimeSeriesComponentView; import com.sun.jersey.api.NotFoundException; +import org.joda.time.DateTime; import javax.ws.rs.GET; import javax.ws.rs.Path; @@ -47,7 +51,6 @@ public DashboardResource(StarTreeManager starTreeManager, } @GET - @Timed public DefaultSelectionView getDefaultSelectionView() { List collections = new ArrayList(starTreeManager.getCollections()); @@ -64,16 +67,17 @@ public DefaultSelectionView getDefaultSelectionView() @GET @Path("/{collection}") - @Timed public DefaultLandingView getDefaultLandingView(@PathParam("collection") String collection) { - StarTree starTree = starTreeManager.getStarTree(collection); - if (starTree == null) + StarTreeConfig config = starTreeManager.getConfig(collection); + if (config == null) { throw new NotFoundException("No collection " + collection); } - return new DefaultLandingView(starTree.getConfig(), feedbackAddress); + Range dataTimeRange = QueryUtils.getDataTimeRange(starTreeManager.getStarTrees(collection).values()); + + return new DefaultLandingView(config, feedbackAddress, dataTimeRange.lowerEndpoint(), dataTimeRange.upperEndpoint()); } @GET @@ -91,8 +95,8 @@ public DefaultDashboardView getDefaultDashboardView( @PathParam("normalized") String normalized, @Context UriInfo uriInfo) throws Exception { - StarTree starTree = starTreeManager.getStarTree(collection); - if (starTree == null) + StarTreeConfig config = starTreeManager.getConfig(collection); + if (config == null) { throw new NotFoundException("No collection " + collection); } @@ -170,7 +174,9 @@ public DefaultDashboardView getDefaultDashboardView( } } - return new DefaultDashboardView(starTree.getConfig(), + Range dataTimeRange = QueryUtils.getDataTimeRange(starTreeManager.getStarTrees(collection).values()); + + return new DefaultDashboardView(config, metric, endMillis, disabledDimensions, @@ -178,6 +184,8 @@ public DefaultDashboardView getDefaultDashboardView( timeSeriesComponentView, funnelComponentView, heatMapComponentView, - feedbackAddress); + feedbackAddress, + dataTimeRange.lowerEndpoint(), + dataTimeRange.upperEndpoint()); } } diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/FunnelResource.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/FunnelResource.java index 7d6a0accca4d..ea60102be4c6 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/FunnelResource.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/FunnelResource.java @@ -2,10 +2,10 @@ import com.codahale.metrics.annotation.Timed; import com.linkedin.thirdeye.api.DimensionKey; -import com.linkedin.thirdeye.api.DimensionSpec; import com.linkedin.thirdeye.api.MetricSpec; import com.linkedin.thirdeye.api.MetricTimeSeries; import com.linkedin.thirdeye.api.StarTree; +import com.linkedin.thirdeye.api.StarTreeConfig; import com.linkedin.thirdeye.api.StarTreeManager; import com.linkedin.thirdeye.api.StarTreeStats; import com.linkedin.thirdeye.api.TimeRange; @@ -28,9 +28,11 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; @Path("/funnel") @@ -46,7 +48,6 @@ public FunnelResource(StarTreeManager starTreeManager) @GET @Path("/{type}/{collection}/{metrics}/{startMillis}/{endMillis}{aggregate:(/aggregate/[^/]+?)?}{movingAverage:(/movingAverage/[^/]+?)?}") - @Timed public FunnelComponentView getFunnelView( @PathParam("type") String type, @PathParam("collection") String collection, @@ -62,7 +63,6 @@ public FunnelComponentView getFunnelView( @GET @Path("/{type}/{collection}/{metrics}/{startMillis}/{endMillis}{aggregate:(/aggregate/[^/]+?)?}{movingAverage:(/movingAverage/[^/]+?)?}") - @Timed @Produces(MediaType.APPLICATION_JSON) public List getFunnelViewJson( @PathParam("type") String type, @@ -74,11 +74,12 @@ public List getFunnelViewJson( @PathParam("movingAverage") String movingAverage, @Context UriInfo uriInfo) throws Exception { - StarTree starTree = starTreeManager.getStarTree(collection); - if (starTree == null) + StarTreeConfig config = starTreeManager.getConfig(collection); + if (config == null) { throw new NotFoundException("No collection " + collection); } + Collection starTrees = starTreeManager.getStarTrees(collection).values(); Funnel.Type funnelType; try @@ -102,7 +103,7 @@ public List getFunnelViewJson( // Get top metric spec MetricSpec topMetric = null; Map metricSpecs = new HashMap(); - for (MetricSpec metricSpec : starTree.getConfig().getMetrics()) + for (MetricSpec metricSpec : config.getMetrics()) { if (metricSpec.getName().equals(funnelMetrics.get(0))) { @@ -117,9 +118,9 @@ public List getFunnelViewJson( } int bucketSize - = starTree.getConfig().getTime().getBucket().getSize(); + = config.getTime().getBucket().getSize(); TimeUnit bucketUnit - = starTree.getConfig().getTime().getBucket().getUnit(); + = config.getTime().getBucket().getUnit(); // Should use aggregate? Long aggregateValue = "".equals(aggregate) @@ -142,20 +143,8 @@ public List getFunnelViewJson( end = (end / aggregateValue) * aggregateValue; } - // Check time - StarTreeStats stats = starTree.getStats(); - if (!new TimeRange(stats.getMinTime(), stats.getMaxTime()).contains(new TimeRange(start, end))) - { - throw new NotFoundException( - "Query (" + QueryUtils.getDateTime(start, bucketSize, bucketUnit) + ", " - + QueryUtils.getDateTime(end, bucketSize, bucketUnit) - + ") not in range (" - + QueryUtils.getDateTime(stats.getMinTime(), bucketSize, bucketUnit) - + ", " + QueryUtils.getDateTime(stats.getMaxTime(), bucketSize, bucketUnit) + ")"); - } - - //Check dimensions - String invalidDimension = QueryUtils.checkDimensions(starTree, uriInfo); + //Check dimensions + String invalidDimension = QueryUtils.checkDimensions(config, uriInfo); if (invalidDimension != null) { throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST). @@ -166,19 +155,19 @@ public List getFunnelViewJson( Map result; if (movingAverageValue == null && aggregateValue == null) { - result = QueryUtils.doQuery(starTree, start, end, uriInfo); + result = QueryUtils.doQuery(starTrees, start, end, uriInfo); } else if (movingAverageValue != null && aggregateValue == null) { - result = QueryUtils.doQuery(starTree, start - movingAverageValue, end, uriInfo); + result = QueryUtils.doQuery(starTrees, start - movingAverageValue, end, uriInfo); } else if (movingAverageValue == null && aggregateValue != null) { - result = QueryUtils.doQuery(starTree, start, end + aggregateValue, uriInfo); + result = QueryUtils.doQuery(starTrees, start, end + aggregateValue, uriInfo); } else { - result = QueryUtils.doQuery(starTree, start - (movingAverageValue / aggregateValue) * aggregateValue, end + aggregateValue, uriInfo); + result = QueryUtils.doQuery(starTrees, start - (movingAverageValue / aggregateValue) * aggregateValue, end + aggregateValue, uriInfo); } // Compose funnels @@ -263,7 +252,7 @@ else if (movingAverageValue == null && aggregateValue != null) } } - funnels.add(new Funnel(QueryUtils.convertDimensionKey(starTree.getConfig().getDimensions(), entry.getKey()), rows)); + funnels.add(new Funnel(QueryUtils.convertDimensionKey(config.getDimensions(), entry.getKey()), rows)); } return funnels; diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/HeatMapResource.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/HeatMapResource.java index ac370f0f227d..a7066bda2dd1 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/HeatMapResource.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/HeatMapResource.java @@ -6,6 +6,7 @@ import com.linkedin.thirdeye.api.MetricTimeSeries; import com.linkedin.thirdeye.api.MetricType; import com.linkedin.thirdeye.api.StarTree; +import com.linkedin.thirdeye.api.StarTreeConfig; import com.linkedin.thirdeye.api.StarTreeManager; import com.linkedin.thirdeye.api.TimeRange; import com.linkedin.thirdeye.heatmap.ContributionDifferenceHeatMap; @@ -29,7 +30,6 @@ import javax.ws.rs.core.Response; import javax.ws.rs.core.UriInfo; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -51,7 +51,6 @@ public HeatMapResource(StarTreeManager starTreeManager, ExecutorService parallel @GET @Path("/{type}/{collection}/{metric}/{startMillis}/{endMillis}{aggregate:(/aggregate/[^/]+?)?}{movingAverage:(/movingAverage/[^/]+?)?}") - @Timed public HeatMapComponentView getHeatMapComponentView( @PathParam("type") String type, @PathParam("collection") String collection, @@ -62,14 +61,19 @@ public HeatMapComponentView getHeatMapComponentView( @PathParam("movingAverage") String movingAverage, final @Context UriInfo uriInfo) throws Exception { + StarTreeConfig config = starTreeManager.getConfig(collection); + if (config == null) + { + throw new NotFoundException("No collection " + collection); + } + return new HeatMapComponentView(getHeatMapComponentViewJson( type, collection, metric, startMillis, endMillis, aggregate, movingAverage, uriInfo), - starTreeManager.getStarTree(collection).getConfig().getDimensions()); + config.getDimensions()); } @GET @Path("/{type}/{collection}/{metric}/{startMillis}/{endMillis}{aggregate:(/aggregate/[^/]+?)?}{movingAverage:(/movingAverage/[^/]+?)?}") - @Timed @Produces(MediaType.APPLICATION_JSON) public Map> getHeatMapComponentViewJson( @PathParam("type") String type, @@ -81,16 +85,16 @@ public Map> getHeatMapComponentViewJson( @PathParam("movingAverage") String movingAverage, final @Context UriInfo uriInfo) throws Exception { - final StarTree starTree = starTreeManager.getStarTree(collection); - if (starTree == null) + final StarTreeConfig config = starTreeManager.getConfig(collection); + if (config == null) { throw new NotFoundException("No collection " + collection); } int bucketSize - = starTree.getConfig().getTime().getBucket().getSize(); + = config.getTime().getBucket().getSize(); TimeUnit bucketUnit - = starTree.getConfig().getTime().getBucket().getUnit(); + = config.getTime().getBucket().getUnit(); Long aggregateValue = "".equals(aggregate) ? null @@ -132,7 +136,7 @@ else if (movingAverageValue == null && aggregateValue != null) } //Check dimensions - String invalidDimension = QueryUtils.checkDimensions(starTree, uriInfo); + String invalidDimension = QueryUtils.checkDimensions(config, uriInfo); if (invalidDimension != null) { throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST). @@ -144,10 +148,15 @@ else if (movingAverageValue == null && aggregateValue != null) Map> data = new HashMap>(); - for (DimensionSpec dimension : starTree.getConfig().getDimensions()) + for (DimensionSpec dimension : config.getDimensions()) { - Map timeSeriesByDimensionValue - = QueryUtils.groupByQuery(parallelQueryExecutor, starTree, dimension.getName(), timeRange, uriInfo); + Map timeSeriesByDimensionValue = QueryUtils.groupByQuery( + parallelQueryExecutor, + starTreeManager.getStarTrees(collection).values(), + config, + dimension.getName(), + timeRange, + uriInfo); for (Map.Entry entry : timeSeriesByDimensionValue.entrySet()) { @@ -177,7 +186,7 @@ else if (movingAverageValue == null && aggregateValue != null) // Get metric type MetricType metricType = null; - for (MetricSpec metricSpec : starTree.getConfig().getMetrics()) + for (MetricSpec metricSpec : config.getMetrics()) { if (metricSpec.getName().equals(metric)) { diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/TimeSeriesResource.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/TimeSeriesResource.java index 484678f82132..cc7edd18eb05 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/TimeSeriesResource.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/resource/TimeSeriesResource.java @@ -2,14 +2,12 @@ import com.codahale.metrics.annotation.Timed; import com.linkedin.thirdeye.api.DimensionKey; -import com.linkedin.thirdeye.api.DimensionSpec; import com.linkedin.thirdeye.api.MetricSpec; import com.linkedin.thirdeye.api.MetricTimeSeries; import com.linkedin.thirdeye.api.MetricType; import com.linkedin.thirdeye.api.StarTree; +import com.linkedin.thirdeye.api.StarTreeConfig; import com.linkedin.thirdeye.api.StarTreeManager; -import com.linkedin.thirdeye.api.StarTreeStats; -import com.linkedin.thirdeye.api.TimeRange; import com.linkedin.thirdeye.impl.MetricTimeSeriesUtils; import com.linkedin.thirdeye.impl.NumberUtils; import com.linkedin.thirdeye.timeseries.FlotTimeSeries; @@ -18,9 +16,6 @@ import com.linkedin.thirdeye.views.TimeSeriesComponentView; import com.sun.jersey.api.NotFoundException; -import org.joda.time.DateTime; -import org.joda.time.DateTimeZone; - import javax.ws.rs.GET; import javax.ws.rs.Path; import javax.ws.rs.PathParam; @@ -33,6 +28,7 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -53,7 +49,6 @@ public TimeSeriesResource(StarTreeManager starTreeManager) @GET @Path("/{collection}/{metrics}/{startMillis}/{endMillis}{aggregate:(/aggregate/[^/]+?)?}{movingAverage:(/movingAverage/[^/]+?)?}{normalized:(/normalized/[^/]+?)?}") - @Timed public TimeSeriesComponentView getTimeSeriesComponentView( @PathParam("collection") String collection, @PathParam("metrics") String metrics, @@ -82,16 +77,15 @@ public List getTimeSeriesComponentViewJson( @PathParam("normalized") String normalized, @Context UriInfo uriInfo) throws Exception { - StarTree starTree = starTreeManager.getStarTree(collection); - if (starTree == null) + StarTreeConfig config = starTreeManager.getConfig(collection); + if (config == null) { throw new NotFoundException("No collection " + collection); } + Collection starTrees = starTreeManager.getStarTrees(collection).values(); - int bucketSize - = starTree.getConfig().getTime().getBucket().getSize(); - TimeUnit bucketUnit - = starTree.getConfig().getTime().getBucket().getUnit(); + int bucketSize = config.getTime().getBucket().getSize(); + TimeUnit bucketUnit = config.getTime().getBucket().getUnit(); // Should use aggregate? Long aggregateValue = "".equals(aggregate) @@ -138,20 +132,8 @@ public List getTimeSeriesComponentViewJson( long adjustedStartMillis = TimeUnit.MILLISECONDS.convert(start * bucketSize, bucketUnit); long adjustedEndMillis = TimeUnit.MILLISECONDS.convert(end * bucketSize, bucketUnit); - // Check time - StarTreeStats stats = starTree.getStats(); - if (!new TimeRange(stats.getMinTime(), stats.getMaxTime()).contains(new TimeRange(start, end))) - { - throw new NotFoundException( - "Query (" + QueryUtils.getDateTime(start, bucketSize, bucketUnit) + ", " - + QueryUtils.getDateTime(end, bucketSize, bucketUnit) - + ") not in range (" - + QueryUtils.getDateTime(stats.getMinTime(), bucketSize, bucketUnit) - + ", " + QueryUtils.getDateTime(stats.getMaxTime(), bucketSize, bucketUnit) + ")"); - } - //Check dimensions - String invalidDimension = QueryUtils.checkDimensions(starTree, uriInfo); + String invalidDimension = QueryUtils.checkDimensions(config, uriInfo); if (invalidDimension != null) { throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST). @@ -162,19 +144,19 @@ public List getTimeSeriesComponentViewJson( Map result; if (movingAverageValue == null && aggregateValue == null) { - result = QueryUtils.doQuery(starTree, start, end, uriInfo); + result = QueryUtils.doQuery(starTrees, start, end, uriInfo); } else if (movingAverageValue != null && aggregateValue == null) { - result = QueryUtils.doQuery(starTree, start - movingAverageValue, end, uriInfo); + result = QueryUtils.doQuery(starTrees, start - movingAverageValue, end, uriInfo); } else if (movingAverageValue == null && aggregateValue != null) { - result = QueryUtils.doQuery(starTree, start, end + aggregateValue, uriInfo); + result = QueryUtils.doQuery(starTrees, start, end + aggregateValue, uriInfo); } else { - result = QueryUtils.doQuery(starTree, start - (movingAverageValue / aggregateValue) * aggregateValue, end + aggregateValue, uriInfo); + result = QueryUtils.doQuery(starTrees, start - (movingAverageValue / aggregateValue) * aggregateValue, end + aggregateValue, uriInfo); } // Compose result @@ -215,7 +197,7 @@ else if (movingAverageValue == null && aggregateValue != null) } Set allMetrics = new HashSet(); - for (MetricSpec metricSpec : starTree.getConfig().getMetrics()) + for (MetricSpec metricSpec : config.getMetrics()) { allMetrics.add(metricSpec.getName()); } @@ -278,7 +260,7 @@ else if (movingAverageValue == null && aggregateValue != null) flotSeries.add(new FlotTimeSeries( metricName, label, - QueryUtils.convertDimensionKey(starTree.getConfig().getDimensions(), entry.getKey()), + QueryUtils.convertDimensionKey(config.getDimensions(), entry.getKey()), data, adjustedStartMillis, adjustedEndMillis)); diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/ExpireTask.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/ExpireTask.java deleted file mode 100644 index 37f48154acb1..000000000000 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/ExpireTask.java +++ /dev/null @@ -1,145 +0,0 @@ -package com.linkedin.thirdeye.task; - -import com.google.common.base.Joiner; -import com.google.common.collect.ImmutableMultimap; -import com.linkedin.thirdeye.api.StarTree; -import com.linkedin.thirdeye.api.StarTreeConstants; -import com.linkedin.thirdeye.api.StarTreeManager; -import com.linkedin.thirdeye.impl.storage.MetricIndexEntry; -import com.linkedin.thirdeye.impl.storage.StorageUtils; -import com.sun.jersey.api.NotFoundException; -import io.dropwizard.servlets.tasks.Task; -import org.apache.commons.io.FileUtils; -import org.joda.time.DateTime; -import org.joda.time.DateTimeZone; - -import java.io.File; -import java.io.FileFilter; -import java.io.PrintWriter; -import java.util.Collection; -import java.util.List; -import java.util.concurrent.TimeUnit; - -public class ExpireTask extends Task -{ - private static final Joiner PATH_JOINER = Joiner.on(File.separator); - - private final StarTreeManager manager; - private final File rootDir; - - public ExpireTask(StarTreeManager manager, File rootDir) - { - super("expire"); - this.manager = manager; - this.rootDir = rootDir; - } - - @Override - public void execute(ImmutableMultimap params, PrintWriter printWriter) throws Exception - { - Collection collectionParam = params.get("collection"); - if (collectionParam == null || collectionParam.isEmpty()) - { - throw new IllegalArgumentException("Must provide collection"); - } - String collection = collectionParam.iterator().next(); - - StarTree starTree = manager.getStarTree(collection); - if (starTree == null) - { - throw new NotFoundException("No star tree for collection " + collection); - } - - File metricStoreDir = new File(PATH_JOINER.join( - rootDir.getAbsolutePath(), collection, StarTreeConstants.DATA_DIR_NAME, StarTreeConstants.METRIC_STORE)); - - File[] metricIndexFiles = metricStoreDir.listFiles(new FileFilter() - { - @Override - public boolean accept(File pathname) - { - return pathname.getName().endsWith(StarTreeConstants.INDEX_FILE_SUFFIX); - } - }); - - long retentionPeriod - = TimeUnit.MILLISECONDS.convert(starTree.getConfig().getTime().getRetention().getSize(), - starTree.getConfig().getTime().getRetention().getUnit()); - - long oldestValidTime = System.currentTimeMillis() - retentionPeriod; - - if (metricIndexFiles != null) - { - for (File metricIndexFile : metricIndexFiles) - { - List indexEntries = StorageUtils.readMetricIndex(metricIndexFile); - - boolean expired = true; - - Long minTime = null; - Long maxTime = null; - - for (MetricIndexEntry indexEntry : indexEntries) - { - long startTimeMillis - = TimeUnit.MILLISECONDS.convert(indexEntry.getTimeRange().getStart(), - starTree.getConfig().getTime().getBucket().getUnit()); - - long endTimeMillis - = TimeUnit.MILLISECONDS.convert(indexEntry.getTimeRange().getEnd(), - starTree.getConfig().getTime().getBucket().getUnit()); - - if (startTimeMillis >= 0 && (minTime == null || startTimeMillis < minTime)) - { - minTime = startTimeMillis; - } - - if (endTimeMillis >= 0 && (maxTime == null || endTimeMillis > maxTime)) - { - maxTime = endTimeMillis; - } - - if (endTimeMillis >= oldestValidTime) - { - expired = false; - break; - } - } - - // Only delete file if all time ranges in index entry are expired - if (expired) - { - FileUtils.forceDelete(metricIndexFile); - - FileUtils.forceDelete(getBufferFile(metricIndexFile)); - - printWriter.print("Expired metric file ID " + getFileId(metricIndexFile)); - - if (minTime != null && maxTime != null) - { - printWriter.print("("); - printWriter.print(new DateTime(minTime, DateTimeZone.UTC)); - printWriter.print(", "); - printWriter.print(new DateTime(maxTime, DateTimeZone.UTC)); - printWriter.print(")"); - } - - printWriter.println(); - printWriter.flush(); - } - } - } - } - - private static File getBufferFile(File metricIndexFile) - { - return new File(metricIndexFile.getParent(), getFileId(metricIndexFile) + StarTreeConstants.BUFFER_FILE_SUFFIX); - } - - private static String getFileId(File metricIndexFile) - { - return metricIndexFile.getName() - .substring(0, metricIndexFile.getName() - .lastIndexOf(StarTreeConstants.INDEX_FILE_SUFFIX)); - } -} diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/KafkaStartTask.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/KafkaStartTask.java index 508cdfaacd4d..8b0d1b9f1b9d 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/KafkaStartTask.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/KafkaStartTask.java @@ -1,7 +1,7 @@ package com.linkedin.thirdeye.task; import com.google.common.collect.ImmutableMultimap; -import com.linkedin.thirdeye.managed.KafkaConsumerManager; +import com.linkedin.thirdeye.managed.ThirdEyeKafkaConsumerManager; import io.dropwizard.servlets.tasks.Task; import java.io.PrintWriter; @@ -9,9 +9,9 @@ public class KafkaStartTask extends Task { - private final KafkaConsumerManager kafkaConsumerManager; + private final ThirdEyeKafkaConsumerManager kafkaConsumerManager; - public KafkaStartTask(KafkaConsumerManager kafkaConsumerManager) + public KafkaStartTask(ThirdEyeKafkaConsumerManager kafkaConsumerManager) { super("kafkaStart"); this.kafkaConsumerManager = kafkaConsumerManager; diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/KafkaStopTask.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/KafkaStopTask.java index 7a15c43bebd7..b7fcde2243ae 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/KafkaStopTask.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/KafkaStopTask.java @@ -1,7 +1,7 @@ package com.linkedin.thirdeye.task; import com.google.common.collect.ImmutableMultimap; -import com.linkedin.thirdeye.managed.KafkaConsumerManager; +import com.linkedin.thirdeye.managed.ThirdEyeKafkaConsumerManager; import io.dropwizard.servlets.tasks.Task; import java.io.PrintWriter; @@ -9,9 +9,9 @@ public class KafkaStopTask extends Task { - private final KafkaConsumerManager kafkaConsumerManager; + private final ThirdEyeKafkaConsumerManager kafkaConsumerManager; - public KafkaStopTask(KafkaConsumerManager kafkaConsumerManager) + public KafkaStopTask(ThirdEyeKafkaConsumerManager kafkaConsumerManager) { super("kafkaStop"); this.kafkaConsumerManager = kafkaConsumerManager; diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/RestoreTask.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/RestoreTask.java index e43ffa5df666..1e340056b176 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/RestoreTask.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/RestoreTask.java @@ -30,6 +30,5 @@ public void execute(ImmutableMultimap params, PrintWriter printW } String collection = collectionParam.iterator().next(); manager.restore(rootDir, collection); - manager.open(collection); } } diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/ViewDimensionIndexTask.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/ViewDimensionIndexTask.java index 751f502038d5..93ad17ae463f 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/ViewDimensionIndexTask.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/ViewDimensionIndexTask.java @@ -36,7 +36,7 @@ public void execute(ImmutableMultimap params, PrintWriter printW String collection = collectionParam.iterator().next(); File dimensionStoreDir = new File(PATH_JOINER.join( - rootDir.getAbsolutePath(), collection, StarTreeConstants.DATA_DIR_NAME, StarTreeConstants.DIMENSION_STORE)); + rootDir.getAbsolutePath(), collection, StarTreeConstants.DATA_DIR_PREFIX, StarTreeConstants.DIMENSION_STORE)); File[] dimensionIndexFiles = dimensionStoreDir.listFiles(new FileFilter() { diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/ViewMetricIndexTask.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/ViewMetricIndexTask.java index e7052d0a2031..9465ebdd3257 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/ViewMetricIndexTask.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/ViewMetricIndexTask.java @@ -36,7 +36,7 @@ public void execute(ImmutableMultimap params, PrintWriter printW String collection = collectionParam.iterator().next(); File metricStoreDir = new File(PATH_JOINER.join( - rootDir.getAbsolutePath(), collection, StarTreeConstants.DATA_DIR_NAME, StarTreeConstants.METRIC_STORE)); + rootDir.getAbsolutePath(), collection, StarTreeConstants.DATA_DIR_PREFIX, StarTreeConstants.METRIC_STORE)); File[] metricIndexFiles = metricStoreDir.listFiles(new FileFilter() { diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/ViewTreeTask.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/ViewTreeTask.java index eb5478171815..776d2626fa23 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/ViewTreeTask.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/task/ViewTreeTask.java @@ -2,8 +2,10 @@ import com.google.common.collect.ImmutableMultimap; import com.linkedin.thirdeye.api.StarTree; +import com.linkedin.thirdeye.api.StarTreeConfig; import com.linkedin.thirdeye.api.StarTreeManager; import com.linkedin.thirdeye.impl.StarTreeUtils; +import com.sun.jersey.api.NotFoundException; import io.dropwizard.servlets.tasks.Task; import java.io.PrintWriter; @@ -29,13 +31,16 @@ public void execute(ImmutableMultimap params, PrintWriter printW } String collection = collectionParam.iterator().next(); - StarTree starTree = manager.getStarTree(collection); - if (starTree == null) + StarTreeConfig config = manager.getConfig(collection); + if (config == null) { - throw new IllegalArgumentException("No star tree for collection " + collection); + throw new NotFoundException("No collection " + collection); } - StarTreeUtils.printNode(printWriter, starTree.getRoot(), 0); + for (StarTree starTree : manager.getStarTrees(collection).values()) + { + StarTreeUtils.printNode(printWriter, starTree.getRoot(), 0); + } printWriter.flush(); } diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/util/QueryUtils.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/util/QueryUtils.java index 44d03f51fae0..f2036aca4d75 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/util/QueryUtils.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/util/QueryUtils.java @@ -1,12 +1,15 @@ package com.linkedin.thirdeye.util; +import com.google.common.collect.Range; import com.linkedin.thirdeye.api.DimensionKey; import com.linkedin.thirdeye.api.DimensionSpec; import com.linkedin.thirdeye.api.MetricTimeSeries; import com.linkedin.thirdeye.api.StarTree; +import com.linkedin.thirdeye.api.StarTreeConfig; import com.linkedin.thirdeye.api.StarTreeConstants; import com.linkedin.thirdeye.api.StarTreeQuery; import com.linkedin.thirdeye.api.StarTreeStats; +import com.linkedin.thirdeye.api.TimeGranularity; import com.linkedin.thirdeye.api.TimeRange; import com.linkedin.thirdeye.impl.StarTreeUtils; import com.sun.jersey.api.NotFoundException; @@ -16,11 +19,12 @@ import javax.ws.rs.core.UriInfo; -import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; @@ -29,22 +33,35 @@ public class QueryUtils { - public static Map doQuery(StarTree starTree, long start, long end, UriInfo uriInfo) + public static Map doQuery(Collection starTrees, long start, long end, UriInfo uriInfo) { - // Expand queries - List queries - = StarTreeUtils.expandQueries(starTree, - UriUtils.createQueryBuilder(starTree, uriInfo) - .setTimeRange(new TimeRange(start, end)).build(starTree.getConfig())); - - // Filter queries - queries = StarTreeUtils.filterQueries(starTree.getConfig(), queries, uriInfo.getQueryParameters()); - - // Do queries - Map result = new HashMap(queries.size()); - for (StarTreeQuery query : queries) + Map result = new HashMap(); + + for (StarTree starTree : starTrees) { - result.put(query.getDimensionKey(), starTree.getTimeSeries(query)); + // Expand queries + List queries + = StarTreeUtils.expandQueries(starTree, + UriUtils.createQueryBuilder(starTree, uriInfo) + .setTimeRange(new TimeRange(start, end)).build(starTree.getConfig())); + + // Filter queries + queries = StarTreeUtils.filterQueries(starTree.getConfig(), queries, uriInfo.getQueryParameters()); + + // Do queries + for (StarTreeQuery query : queries) + { + MetricTimeSeries timeSeries = starTree.getTimeSeries(query); + MetricTimeSeries existingTimeSeries = result.get(query.getDimensionKey()); + if (existingTimeSeries == null) + { + result.put(query.getDimensionKey(), timeSeries); + } + else + { + existingTimeSeries.aggregate(timeSeries); + } + } } return result; @@ -64,54 +81,68 @@ public static Map convertDimensionKey(List dimens public static Map groupByQuery( ExecutorService parallelQueryExecutor, - final StarTree starTree, + final Collection starTrees, + StarTreeConfig config, String dimensionName, TimeRange timeRange, UriInfo uriInfo) throws InterruptedException, ExecutionException { - StarTreeQuery baseQuery = UriUtils.createQueryBuilder(starTree, uriInfo).setTimeRange(timeRange).build(starTree.getConfig()); + Map result = new HashMap(); - // Set target dimension to all - int dimensionIndex = -1; - for (int i = 0; i < starTree.getConfig().getDimensions().size(); i++) + for (final StarTree starTree : starTrees) { - if (starTree.getConfig().getDimensions().get(i).getName().equals(dimensionName)) + StarTreeQuery baseQuery = UriUtils.createQueryBuilder(starTree, uriInfo).setTimeRange(timeRange).build(config); + + // Set target dimension to all + int dimensionIndex = -1; + for (int i = 0; i < config.getDimensions().size(); i++) { - baseQuery.getDimensionKey().getDimensionValues()[i] = StarTreeConstants.ALL; - dimensionIndex = i; - break; + if (config.getDimensions().get(i).getName().equals(dimensionName)) + { + baseQuery.getDimensionKey().getDimensionValues()[i] = StarTreeConstants.ALL; + dimensionIndex = i; + break; + } + } + if (dimensionIndex < 0) + { + throw new NotFoundException("No dimension " + dimensionName); } - } - if (dimensionIndex < 0) - { - throw new NotFoundException("No dimension " + dimensionName); - } - // Generate all queries - List queries = StarTreeUtils.expandQueries(starTree, baseQuery); - queries = StarTreeUtils.filterQueries(starTree.getConfig(), queries, uriInfo.getQueryParameters()); + // Generate all queries + List queries = StarTreeUtils.expandQueries(starTree, baseQuery); + queries = StarTreeUtils.filterQueries(config, queries, uriInfo.getQueryParameters()); - // Do queries - Map> futures - = new HashMap>(queries.size()); - for (final StarTreeQuery query : queries) - { - futures.put(query, parallelQueryExecutor.submit(new Callable() + // Do queries + Map> futures + = new HashMap>(queries.size()); + for (final StarTreeQuery query : queries) { - @Override - public MetricTimeSeries call() throws Exception + futures.put(query, parallelQueryExecutor.submit(new Callable() { - return starTree.getTimeSeries(query); - } - })); - } + @Override + public MetricTimeSeries call() throws Exception + { + return starTree.getTimeSeries(query); + } + })); + } - // Compose result - // n.b. all dimension values in results will be distinct because "!" used for query - Map result = new HashMap(); - for (Map.Entry> entry : futures.entrySet()) - { - result.put(entry.getKey().getDimensionKey().getDimensionValues()[dimensionIndex], entry.getValue().get()); + // Compose result + // n.b. all dimension values in results will be distinct because "!" used for query + for (Map.Entry> entry : futures.entrySet()) + { + String dimensionValue = entry.getKey().getDimensionKey().getDimensionValues()[dimensionIndex]; + MetricTimeSeries existingTimeSeries = result.get(dimensionValue); + if (existingTimeSeries == null) + { + result.put(dimensionValue, entry.getValue().get()); + } + else + { + existingTimeSeries.aggregate(entry.getValue().get()); + } + } } return result; @@ -122,11 +153,11 @@ public static DateTime getDateTime(long time, long bucketSize, TimeUnit bucketUn return new DateTime(TimeUnit.MILLISECONDS.convert(time * bucketSize, bucketUnit), DateTimeZone.UTC); } - public static String checkDimensions(StarTree starTree, UriInfo uriInfo) + public static String checkDimensions(StarTreeConfig config, UriInfo uriInfo) { List allDimensions = new ArrayList(); - for (DimensionSpec dimensionSpec : starTree.getConfig().getDimensions()) + for (DimensionSpec dimensionSpec : config.getDimensions()) { allDimensions.add(dimensionSpec.getName()); } @@ -149,4 +180,33 @@ public static String checkDimensions(StarTree starTree, UriInfo uriInfo) return null; } + + public static Range getDataTimeRange(Collection starTrees) + { + long globalMinTimeMillis = -1; + long globalMaxTimeMillis = -1; + + if (starTrees != null) + { + for (StarTree starTree : starTrees) + { + TimeGranularity bucket = starTree.getConfig().getTime().getBucket(); + StarTreeStats stats = starTree.getStats(); + long minTimeMillis = TimeUnit.MILLISECONDS.convert(stats.getMinTime() * bucket.getSize(), bucket.getUnit()); + long maxTimeMillis = TimeUnit.MILLISECONDS.convert(stats.getMaxTime() * bucket.getSize(), bucket.getUnit()); + + if (globalMinTimeMillis == -1 || minTimeMillis < globalMaxTimeMillis) + { + globalMinTimeMillis = minTimeMillis; + } + + if (globalMaxTimeMillis == -1 || maxTimeMillis > globalMaxTimeMillis) + { + globalMaxTimeMillis = maxTimeMillis; + } + } + } + + return Range.closed(new DateTime(globalMinTimeMillis), new DateTime(globalMaxTimeMillis)); + } } diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/views/DefaultDashboardView.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/views/DefaultDashboardView.java index 5662b28433e0..4631e310e5c9 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/views/DefaultDashboardView.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/views/DefaultDashboardView.java @@ -5,6 +5,9 @@ import com.linkedin.thirdeye.funnel.Funnel; import com.linkedin.thirdeye.heatmap.HeatMapCell; import io.dropwizard.views.View; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.format.ISODateTimeFormat; import java.io.IOException; import java.util.ArrayList; @@ -22,6 +25,8 @@ public class DefaultDashboardView extends View private final FunnelComponentView funnelComponentView; private final HeatMapComponentView heatMapComponentView; private final String feedbackAddress; + private final DateTime minTime; + private final DateTime maxTime; public DefaultDashboardView(StarTreeConfig config, String metricName, @@ -31,7 +36,9 @@ public DefaultDashboardView(StarTreeConfig config, TimeSeriesComponentView timeSeriesComponentView, FunnelComponentView funnelComponentView, HeatMapComponentView heatMapComponentView, - String feedbackAddress) + String feedbackAddress, + DateTime minTime, + DateTime maxTime) { super("default-dashboard-view.ftl"); this.config = config; @@ -43,6 +50,8 @@ public DefaultDashboardView(StarTreeConfig config, this.funnelComponentView = funnelComponentView; this.heatMapComponentView = heatMapComponentView; this.feedbackAddress = feedbackAddress; + this.minTime = minTime; + this.maxTime = maxTime; } public String getCollection() @@ -110,4 +119,14 @@ public List getFunnels() } return null; } + + public String getMaxTime() + { + return maxTime.toDateTime(DateTimeZone.UTC).toString(); + } + + public String getMinTime() + { + return minTime.toDateTime(DateTimeZone.UTC).toString(); + } } diff --git a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/views/DefaultLandingView.java b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/views/DefaultLandingView.java index 9d4247d90a43..439bc6b64935 100644 --- a/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/views/DefaultLandingView.java +++ b/thirdeye/thirdeye-server/src/main/java/com/linkedin/thirdeye/views/DefaultLandingView.java @@ -3,6 +3,8 @@ import com.linkedin.thirdeye.api.MetricSpec; import com.linkedin.thirdeye.api.StarTreeConfig; import io.dropwizard.views.View; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; import java.util.ArrayList; import java.util.List; @@ -11,12 +13,16 @@ public class DefaultLandingView extends View { private final StarTreeConfig config; private final String feedbackAddress; + private final DateTime minTime; + private final DateTime maxTime; - public DefaultLandingView(StarTreeConfig config, String feedbackAddress) + public DefaultLandingView(StarTreeConfig config, String feedbackAddress, DateTime minTime, DateTime maxTime) { super("default-landing-view.ftl"); this.config = config; this.feedbackAddress = feedbackAddress; + this.minTime = minTime; + this.maxTime = maxTime; } public String getCollection() @@ -50,4 +56,14 @@ public List getMetricNames() return metricNames; } + + public String getMaxTime() + { + return maxTime.toDateTime(DateTimeZone.UTC).toString(); + } + + public String getMinTime() + { + return minTime.toDateTime(DateTimeZone.UTC).toString(); + } } diff --git a/thirdeye/thirdeye-server/src/main/resources/assets/stylesheets/dashboard.css b/thirdeye/thirdeye-server/src/main/resources/assets/stylesheets/dashboard.css index 89561ec95304..6922c13a4361 100644 --- a/thirdeye/thirdeye-server/src/main/resources/assets/stylesheets/dashboard.css +++ b/thirdeye/thirdeye-server/src/main/resources/assets/stylesheets/dashboard.css @@ -83,3 +83,7 @@ input[type=number] { #selection-area { text-align: center; } + +.nav-time-range { + font-size: 10pt; +} diff --git a/thirdeye/thirdeye-server/src/main/resources/com/linkedin/thirdeye/views/default-dashboard-view.ftl b/thirdeye/thirdeye-server/src/main/resources/com/linkedin/thirdeye/views/default-dashboard-view.ftl index 01cde825ecae..078516700fa0 100644 --- a/thirdeye/thirdeye-server/src/main/resources/com/linkedin/thirdeye/views/default-dashboard-view.ftl +++ b/thirdeye/thirdeye-server/src/main/resources/com/linkedin/thirdeye/views/default-dashboard-view.ftl @@ -1,6 +1,8 @@ + + diff --git a/thirdeye/thirdeye-server/src/main/resources/com/linkedin/thirdeye/views/default-landing-view.ftl b/thirdeye/thirdeye-server/src/main/resources/com/linkedin/thirdeye/views/default-landing-view.ftl index 47c65047b2d1..a14dbbc38b7f 100644 --- a/thirdeye/thirdeye-server/src/main/resources/com/linkedin/thirdeye/views/default-landing-view.ftl +++ b/thirdeye/thirdeye-server/src/main/resources/com/linkedin/thirdeye/views/default-landing-view.ftl @@ -1,6 +1,8 @@ + + diff --git a/thirdeye/thirdeye-server/src/main/resources/com/linkedin/thirdeye/views/default-selection-view.ftl b/thirdeye/thirdeye-server/src/main/resources/com/linkedin/thirdeye/views/default-selection-view.ftl index 0caf456527f3..f5be2a90d220 100644 --- a/thirdeye/thirdeye-server/src/main/resources/com/linkedin/thirdeye/views/default-selection-view.ftl +++ b/thirdeye/thirdeye-server/src/main/resources/com/linkedin/thirdeye/views/default-selection-view.ftl @@ -1,6 +1,8 @@ + + diff --git a/thirdeye/thirdeye-server/src/main/resources/com/linkedin/thirdeye/views/nav-bar-component.ftl b/thirdeye/thirdeye-server/src/main/resources/com/linkedin/thirdeye/views/nav-bar-component.ftl index 421d2418c8d7..2a7de3587af5 100644 --- a/thirdeye/thirdeye-server/src/main/resources/com/linkedin/thirdeye/views/nav-bar-component.ftl +++ b/thirdeye/thirdeye-server/src/main/resources/com/linkedin/thirdeye/views/nav-bar-component.ftl @@ -1,6 +1,8 @@ diff --git a/thirdeye/thirdeye-server/src/test/java/com/linkedin/thirdeye/resource/TestCollectionsResource.java b/thirdeye/thirdeye-server/src/test/java/com/linkedin/thirdeye/resource/TestCollectionsResource.java index 9af024a0cfd5..d5121d1b8a27 100644 --- a/thirdeye/thirdeye-server/src/test/java/com/linkedin/thirdeye/resource/TestCollectionsResource.java +++ b/thirdeye/thirdeye-server/src/test/java/com/linkedin/thirdeye/resource/TestCollectionsResource.java @@ -1,17 +1,21 @@ package com.linkedin.thirdeye.resource; import java.io.File; +import java.util.UUID; import static org.mockito.Mockito.*; import com.codahale.metrics.MetricRegistry; import com.linkedin.thirdeye.api.StarTreeConstants; import com.linkedin.thirdeye.api.StarTreeManager; +import com.linkedin.thirdeye.impl.storage.DataUpdateManager; +import com.linkedin.thirdeye.impl.storage.StorageUtils; import com.sun.jersey.api.ConflictException; import javax.ws.rs.core.Response; import org.apache.commons.io.FileUtils; +import org.joda.time.DateTime; import org.testng.Assert; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; @@ -36,7 +40,7 @@ public void beforeMethod() throws Exception try { FileUtils.forceDelete(rootDir); } catch (Exception e) { /* ok */ } try { FileUtils.forceMkdir(rootDir); } catch (Exception e) { /* ok */ } - testCollectionsResource = new CollectionsResource(mockStarTreeManager, mockMetricRegistry, rootDir); + testCollectionsResource = new CollectionsResource(mockStarTreeManager, mockMetricRegistry, new DataUpdateManager(rootDir), rootDir); collection = "dummy"; } @@ -73,31 +77,4 @@ public void testPostConfigOverwrite() throws Exception Response postConfigResponse = testCollectionsResource.postConfig(collection, configBytes); } - - @Test - public void testPostStarTree() throws Exception - { - byte[] starTreeBytes = "Dummy star tree file".getBytes(); - Response postStarTreeResponse = testCollectionsResource.postStarTree(collection, starTreeBytes); - Assert.assertEquals(postStarTreeResponse.getStatus(), Response.Status.OK.getStatusCode()); - } - - - @Test(expectedExceptions = ConflictException.class) - public void testPostStarTreeOverwrite() throws Exception - { - File collectionDir = new File(rootDir, collection); - if (!collectionDir.exists()) - { - FileUtils.forceMkdir(collectionDir); - } - - File starTreeFile = new File(collectionDir, StarTreeConstants.TREE_FILE_NAME); - - FileUtils.writeByteArrayToFile(starTreeFile, "Dummy existing star tree file".getBytes()); - - byte[] starTreeBytes = "Dummy star tree file to overwrite".getBytes(); - Response postStarTreeResponse = testCollectionsResource.postStarTree(collection, starTreeBytes); - - } } diff --git a/thirdeye/thirdeye-tools/pom.xml b/thirdeye/thirdeye-tools/pom.xml index 654cc5edcba8..88c5dc40a05f 100644 --- a/thirdeye/thirdeye-tools/pom.xml +++ b/thirdeye/thirdeye-tools/pom.xml @@ -20,11 +20,6 @@ org.apache.httpcomponents httpclient 4.3.3 - - - com.linkedin.thirdeye - thirdeye-bootstrap - 1.0-SNAPSHOT commons-cli @@ -76,7 +71,6 @@ 2.3 true - true *:* diff --git a/thirdeye/thirdeye-tools/src/main/java/com/linkedin/thirdeye/tools/BootstrapPhaseOneOutputDumpTool.java b/thirdeye/thirdeye-tools/src/main/java/com/linkedin/thirdeye/tools/BootstrapPhaseOneOutputDumpTool.java deleted file mode 100644 index e014f0e1317a..000000000000 --- a/thirdeye/thirdeye-tools/src/main/java/com/linkedin/thirdeye/tools/BootstrapPhaseOneOutputDumpTool.java +++ /dev/null @@ -1,107 +0,0 @@ -package com.linkedin.thirdeye.tools; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.SequenceFile; -import org.apache.hadoop.io.SequenceFile.Reader; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.Lists; -import com.linkedin.thirdeye.api.MetricSchema; -import com.linkedin.thirdeye.api.MetricTimeSeries; -import com.linkedin.thirdeye.api.MetricType; -import com.linkedin.thirdeye.bootstrap.startree.bootstrap.phase1.BootstrapPhaseMapOutputKey; -import com.linkedin.thirdeye.bootstrap.startree.bootstrap.phase1.BootstrapPhaseMapOutputValue; -import com.linkedin.thirdeye.bootstrap.startree.bootstrap.phase1.StarTreeBootstrapPhaseOneConfig; - -/** - * Utility to read the output of Bootstrap Phase One - * - * @author kgopalak - * - */ -public class BootstrapPhaseOneOutputDumpTool { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final StarTreeBootstrapPhaseOneConfig config; - - public BootstrapPhaseOneOutputDumpTool(StarTreeBootstrapPhaseOneConfig config) { - this.config = config; - } - - public static void main(String[] args) throws IOException, - InstantiationException, IllegalAccessException { - // data output directory generated by BootstrapPhaseOneJob - Path dataPath = new Path(args[0]); - // config file used by BootstrapPhaseOneJob - Path configPath = new Path(args[1]); - FileSystem fs = FileSystem.get(new Configuration()); - - // read the config file - FSDataInputStream is = fs.open(configPath); - StarTreeBootstrapPhaseOneConfig config; - config = OBJECT_MAPPER.readValue(is, StarTreeBootstrapPhaseOneConfig.class); - // instantiate BootstrapPhaseOneOutputDumpTool instance - BootstrapPhaseOneOutputDumpTool tool; - tool = new BootstrapPhaseOneOutputDumpTool(config); - if (fs.isFile(dataPath)) { - tool.process(dataPath); - } else { - FileStatus[] listStatus = fs.listStatus(dataPath); - for (FileStatus fileStatus : listStatus) { - tool.process(fileStatus.getPath()); - } - } - } - - public void process(Path path) throws IOException, InstantiationException, - IllegalAccessException { - SequenceFile.Reader reader = new SequenceFile.Reader(new Configuration(), - Reader.file(path)); - System.out.println(reader.getKeyClass()); - System.out.println(reader.getValueClassName()); - WritableComparable key = (WritableComparable) reader.getKeyClass() - .newInstance(); - Writable val = (Writable) reader.getValueClass().newInstance(); - ArrayList names = Lists.newArrayList("m1", "m2", "m3", "m4", "m5"); - ArrayList types = Lists.newArrayList(MetricType.INT, - MetricType.INT, MetricType.INT, MetricType.INT, MetricType.INT); - - MetricSchema schema = new MetricSchema(names, types); - int[] metrics = new int[names.size()]; - while (reader.next(key, val)) { - BytesWritable keyWritable = (BytesWritable) key; - BootstrapPhaseMapOutputKey outputKey = BootstrapPhaseMapOutputKey - .fromBytes(keyWritable.getBytes()); - BytesWritable valWritable = (BytesWritable) val; - BootstrapPhaseMapOutputValue outputVal = BootstrapPhaseMapOutputValue - .fromBytes(valWritable.getBytes(), schema); - - System.out.println(outputVal.getDimensionKey()); - MetricTimeSeries metricTimeSeries = outputVal.getMetricTimeSeries(); - for (long timeWindow : metricTimeSeries.getTimeWindowSet()) { - boolean nonZeroMetric = false; - for (int i = 0; i < names.size(); i++) { - String name = names.get(i); - metrics[i] = metricTimeSeries.get(timeWindow, name).intValue(); - if (metrics[i] > 0) { - nonZeroMetric = true; - } - } - // print only if any of the metric is non zero - if (nonZeroMetric) { - System.out.println(timeWindow + ":" + Arrays.toString(metrics)); - } - } - } - } -} diff --git a/thirdeye/thirdeye-tools/src/main/java/com/linkedin/thirdeye/tools/BufferViewer.java b/thirdeye/thirdeye-tools/src/main/java/com/linkedin/thirdeye/tools/BufferViewer.java index a318eaf89187..48875a6aaee8 100644 --- a/thirdeye/thirdeye-tools/src/main/java/com/linkedin/thirdeye/tools/BufferViewer.java +++ b/thirdeye/thirdeye-tools/src/main/java/com/linkedin/thirdeye/tools/BufferViewer.java @@ -59,7 +59,7 @@ public static void main(String[] args) throws Exception // Read dimension index - File dataDir = new File(args[0], StarTreeConstants.DATA_DIR_NAME); + File dataDir = new File(args[0], StarTreeConstants.DATA_DIR_PREFIX); File dimensionStoreDir = new File(dataDir, StarTreeConstants.DIMENSION_STORE); diff --git a/thirdeye/thirdeye-tools/src/main/java/com/linkedin/thirdeye/tools/DataLoadTool.java b/thirdeye/thirdeye-tools/src/main/java/com/linkedin/thirdeye/tools/DataLoadTool.java index e4a6631465ce..41ece3193214 100644 --- a/thirdeye/thirdeye-tools/src/main/java/com/linkedin/thirdeye/tools/DataLoadTool.java +++ b/thirdeye/thirdeye-tools/src/main/java/com/linkedin/thirdeye/tools/DataLoadTool.java @@ -3,17 +3,12 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Joiner; -import com.google.common.collect.ImmutableSet; import com.linkedin.thirdeye.api.StarTreeConstants; import com.linkedin.thirdeye.api.TimeRange; -import com.linkedin.thirdeye.impl.TarUtils; -import com.linkedin.thirdeye.impl.storage.MetricIndexEntry; -import com.linkedin.thirdeye.impl.storage.StorageUtils; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Options; -import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.http.HttpHost; import org.apache.http.HttpRequest; @@ -33,6 +28,8 @@ import org.apache.http.impl.client.BasicCredentialsProvider; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; +import org.joda.time.DateTime; +import org.joda.time.format.ISODateTimeFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,20 +42,16 @@ import javax.security.auth.login.LoginContext; import java.io.Console; import java.io.File; -import java.io.FileFilter; -import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.ObjectInputStream; import java.io.OutputStream; import java.net.URI; +import java.net.URLDecoder; import java.net.URLEncoder; import java.security.Principal; import java.security.PrivilegedAction; -import java.util.ArrayList; import java.util.HashSet; -import java.util.List; import java.util.Set; public class DataLoadTool implements Runnable @@ -66,7 +59,6 @@ public class DataLoadTool implements Runnable private static final Logger LOG = LoggerFactory.getLogger(DataLoadTool.class); private static final Joiner URI_JOINER = Joiner.on("/"); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final String ENCODING = "UTF-8"; private static String USAGE = "usage: [opts] hdfsUri thirdEyeUri collection"; @@ -79,15 +71,19 @@ public class DataLoadTool implements Runnable private static String DEFAULT_KRB5_CONF = System.getProperty("user.home") + File.separator + ".krb5.conf"; + private enum Mode + { + BOOTSTRAP, + INCREMENT, + PATCH + } + private final String user; private final String password; private final String collection; private final URI hdfsUri; private final URI thirdEyeUri; - private final boolean includeStarTree; - private final boolean includeConfig; - private final boolean includeDimensions; - private final boolean overwrite; + private final Mode mode; private final TimeRange globalTimeRange; private final HttpHost httpHost; private final HttpClient httpClient; @@ -97,24 +93,18 @@ public DataLoadTool(String user, URI hdfsUri, URI thirdEyeUri, String collection, - boolean includeStarTree, - boolean includeConfig, - boolean includeDimensions, - boolean overwrite, + Mode mode, TimeRange timeRange) { this.user = user; this.password = password; this.hdfsUri = hdfsUri; this.thirdEyeUri = thirdEyeUri; - this.includeStarTree = includeStarTree; - this.includeConfig = includeConfig; - this.includeDimensions = includeDimensions; + this.mode = mode; this.globalTimeRange = timeRange; this.collection = collection; this.httpHost = new HttpHost(hdfsUri.getHost(), hdfsUri.getPort(), hdfsUri.getScheme()); this.httpClient = getHttpClient(); - this.overwrite = overwrite; } @Override @@ -122,6 +112,7 @@ public void run() { try { + String uri; HttpRequest hdfsReq; HttpResponse hdfsRes; @@ -129,11 +120,7 @@ public void run() // Construct loader based on URI scheme ThirdEyeLoader thirdEyeLoader; - if ("file".equals(thirdEyeUri.getScheme())) - { - thirdEyeLoader = new ThirdEyeFileLoader(new File(thirdEyeUri.getPath(), collection)); - } - else if ("http".equals(thirdEyeUri.getScheme())) + if ("http".equals(thirdEyeUri.getScheme())) { thirdEyeLoader = new ThirdEyeHttpLoader(new HttpHost(thirdEyeUri.getHost(), thirdEyeUri.getPort()), collection); } @@ -143,11 +130,10 @@ else if ("http".equals(thirdEyeUri.getScheme())) } LOG.info("Loading into {}", thirdEyeUri); - // Get latest time in server - long loadedHighWaterMark = thirdEyeLoader.getLoadedHighWaterMark(); - // Get available times - hdfsReq = new HttpGet(createListTimeRequest()); + uri = createListTimeRequest(); + LOG.info("GET {}", uri); + hdfsReq = new HttpGet(uri); hdfsRes = executePrivileged(loginContext, hdfsReq); if (hdfsRes.getStatusLine().getStatusCode() != 200) { @@ -164,11 +150,11 @@ else if ("http".equals(thirdEyeUri.getScheme())) if (pathSuffix.startsWith("data_")) { - String[] timeRangeTokens - = pathSuffix.substring("data_".length(), pathSuffix.length()) - .split("-"); + String[] pathTokens = pathSuffix.split("_"); + DateTime minTime = StarTreeConstants.DATE_TIME_FORMATTER.parseDateTime(pathTokens[1]); + DateTime maxTime = StarTreeConstants.DATE_TIME_FORMATTER.parseDateTime(pathTokens[2]); - TimeRange timeRange = new TimeRange(Long.valueOf(timeRangeTokens[0]), Long.valueOf(timeRangeTokens[1])); + TimeRange timeRange = new TimeRange(minTime.getMillis(), maxTime.getMillis()); if (globalTimeRange.contains(timeRange)) { @@ -183,47 +169,24 @@ else if (!globalTimeRange.isDisjoint(timeRange)) } } - // Remove all loaded time ranges - if (!overwrite) - { - Set filteredTimeRanges = new HashSet(); - - for (TimeRange timeRange : timeRanges) - { - if (timeRange.getStart() > loadedHighWaterMark) - { - filteredTimeRanges.add(timeRange); - } - } - - timeRanges = filteredTimeRanges; - } - // Load config - if (includeConfig) - { - hdfsReq = new HttpGet(createConfigRequest()); - hdfsRes = executePrivileged(loginContext, hdfsReq); - thirdEyeLoader.handleConfig(hdfsRes.getEntity().getContent()); - EntityUtils.consume(hdfsRes.getEntity()); - } + uri = createConfigRequest(); + LOG.info("GET {}", uri); + hdfsReq = new HttpGet(uri); + hdfsRes = executePrivileged(loginContext, hdfsReq); + thirdEyeLoader.handleConfig(hdfsRes.getEntity().getContent()); + EntityUtils.consume(hdfsRes.getEntity()); // Load star tree / data for (TimeRange timeRange : timeRanges) { - if (includeStarTree) - { - hdfsReq = new HttpGet(createStarTreeRequest(timeRange)); - hdfsRes = executePrivileged(loginContext, hdfsReq); - if (hdfsRes.getStatusLine().getStatusCode() == 200) - { - thirdEyeLoader.handleStarTree(hdfsRes.getEntity().getContent()); - } - EntityUtils.consume(hdfsRes.getEntity()); - } + DateTime minTime = new DateTime(timeRange.getStart()); + DateTime maxTime = new DateTime(timeRange.getEnd()); // List data files - hdfsReq = new HttpGet(createListDataRequest(timeRange)); + uri = createListDataRequest(timeRange); + LOG.info("GET {}", uri); + hdfsReq = new HttpGet(uri); hdfsRes = executePrivileged(loginContext, hdfsReq); fileStatuses = OBJECT_MAPPER.readTree(hdfsRes.getEntity().getContent()); EntityUtils.consume(hdfsRes.getEntity()); @@ -235,9 +198,12 @@ else if (!globalTimeRange.isDisjoint(timeRange)) if (pathSuffix.startsWith("task_")) { - hdfsReq = new HttpGet(createGetDataRequest(timeRange, pathSuffix)); + uri = createGetDataRequest(timeRange, pathSuffix); + LOG.info("GET {}", uri); + hdfsReq = new HttpGet(uri); hdfsRes = executePrivileged(loginContext, hdfsReq); - thirdEyeLoader.handleData(pathSuffix, hdfsRes.getEntity().getContent(), includeDimensions); + thirdEyeLoader.handleData( + pathSuffix, hdfsRes.getEntity().getContent(), minTime, maxTime, Mode.BOOTSTRAP.equals(mode)); EntityUtils.consume(hdfsRes.getEntity()); } } @@ -251,92 +217,8 @@ else if (!globalTimeRange.isDisjoint(timeRange)) private interface ThirdEyeLoader { - long getLoadedHighWaterMark() throws IOException; void handleConfig(InputStream config) throws IOException; - void handleStarTree(InputStream starTree) throws IOException; - void handleData(String fileName, InputStream data, boolean includeDimensions) throws IOException; - } - - private class ThirdEyeFileLoader implements ThirdEyeLoader - { - private final File collectionDir; - - ThirdEyeFileLoader(File collectionDir) - { - this.collectionDir = collectionDir; - } - - @Override - public long getLoadedHighWaterMark() throws IOException - { - long loadedHighWaterMark = 0; - - File[] metricIndexes = new File(collectionDir + File.separator - + StarTreeConstants.DATA_DIR_NAME + File.separator - + StarTreeConstants.METRIC_STORE).listFiles(new FileFilter() - { - @Override - public boolean accept(File pathname) - { - return pathname.getName().endsWith("idx"); - } - }); - - if (metricIndexes != null) - { - for (File metricIndex : metricIndexes) - { - List indexEntries = StorageUtils.readMetricIndex(metricIndex); - - for (MetricIndexEntry indexEntry : indexEntries) - { - if (indexEntry.getTimeRange().getEnd() > loadedHighWaterMark) - { - loadedHighWaterMark = indexEntry.getTimeRange().getEnd(); - } - } - } - } - - return loadedHighWaterMark; - } - - @Override - public void handleConfig(InputStream config) throws IOException - { - if (!collectionDir.exists()) - { - FileUtils.forceMkdir(collectionDir); - } - File configFile = new File(collectionDir, StarTreeConstants.CONFIG_FILE_NAME); - FileUtils.copyInputStreamToFile(config, configFile); - LOG.info("Copied config to file {}", configFile); - } - - @Override - public void handleStarTree(InputStream starTree) throws IOException - { - if (!collectionDir.exists()) - { - FileUtils.forceMkdir(collectionDir); - } - File starTreeFile = new File(collectionDir, StarTreeConstants.TREE_FILE_NAME); - FileUtils.copyInputStreamToFile(starTree, starTreeFile); - LOG.info("Copied star tree to file {}", starTreeFile); - } - - @Override - public void handleData(String fileName, InputStream data, boolean includeDimensions) throws IOException - { - File dataDir = new File(collectionDir, StarTreeConstants.DATA_DIR_NAME); - if (!dataDir.exists()) - { - FileUtils.forceMkdir(dataDir); - } - Set blacklist = includeDimensions ? null : ImmutableSet.of(StarTreeConstants.DIMENSION_STORE); - TarUtils.extractGzippedTarArchive(data, dataDir, 2, blacklist); - LOG.info("Copied data from {} to data dir {}", fileName, dataDir); - } + void handleData(String fileName, InputStream data, DateTime minTime, DateTime maxTime, boolean includeDimensions) throws IOException; } private class ThirdEyeHttpLoader implements ThirdEyeLoader @@ -350,38 +232,13 @@ private class ThirdEyeHttpLoader implements ThirdEyeLoader this.collection = collection; } - @Override - public long getLoadedHighWaterMark() throws IOException - { - String uri = "/collections/" + URLEncoder.encode(collection, "UTF-8") + "/stats"; - HttpResponse res = httpClient.execute(host, new HttpGet(uri)); - if (res.getStatusLine().getStatusCode() == 200) - { - JsonNode stats = OBJECT_MAPPER.readTree(res.getEntity().getContent()); - EntityUtils.consume(res.getEntity()); - return stats.get("maxTime").asLong(); - } - return 0; - } - @Override public void handleConfig(InputStream config) throws IOException { String uri = "/collections/" + URLEncoder.encode(collection, "UTF-8"); HttpResponse res = execute(uri, config); - if (res.getStatusLine().getStatusCode() != 200) - { - throw new IOException(res.getStatusLine().toString()); - } - LOG.info("POST {} #=> {}", uri, res.getStatusLine()); - } - - @Override - public void handleStarTree(InputStream starTree) throws IOException - { - String uri = "/collections/" + URLEncoder.encode(collection, "UTF-8") + "/starTree"; - HttpResponse res = execute(uri, starTree); - if (res.getStatusLine().getStatusCode() != 200) + if (res.getStatusLine().getStatusCode() != 200 + && res.getStatusLine().getStatusCode() != 409) // conflict means one already there { throw new IOException(res.getStatusLine().toString()); } @@ -389,9 +246,14 @@ public void handleStarTree(InputStream starTree) throws IOException } @Override - public void handleData(String fileName, InputStream data, boolean includeDimensions) throws IOException + public void handleData(String fileName, + InputStream data, + DateTime minTime, + DateTime maxTime, + boolean includeDimensions) throws IOException { - String uri = "/collections/" + URLEncoder.encode(collection, "UTF-8") + "/data"; + String uri = "/collections/" + URLEncoder.encode(collection, "UTF-8") + "/data/" + + minTime.getMillis() + "/" + maxTime.getMillis(); if (includeDimensions) { uri += "?includeDimensions=true"; @@ -419,19 +281,23 @@ private String createListTimeRequest() throws IOException String encodedCollection = URLEncoder.encode(collection, ENCODING); return URI_JOINER.join(WEB_HDFS_PREFIX + hdfsUri.getPath(), - encodedCollection + "?op=LISTSTATUS"); + encodedCollection, + mode + "?op=LISTSTATUS"); } private String createStarTreeRequest(TimeRange timeRange) throws IOException { String encodedCollection = URLEncoder.encode(collection, ENCODING); + String encodedStartTime = StarTreeConstants.DATE_TIME_FORMATTER.print(new DateTime(timeRange.getStart())); + String encodedEndTime = StarTreeConstants.DATE_TIME_FORMATTER.print(new DateTime(timeRange.getEnd())); return URI_JOINER.join(WEB_HDFS_PREFIX + hdfsUri.getPath(), - encodedCollection, - "data_" + timeRange.getStart() + "-" + timeRange.getEnd(), - "startree_generation", - "star-tree-" + encodedCollection, - encodedCollection + "-tree.bin?op=OPEN"); + encodedCollection, + mode, + "data_" + encodedStartTime + "_" + encodedEndTime, + "startree_generation", + "star-tree-" + encodedCollection, + encodedCollection + "-tree.bin?op=OPEN"); } private String createConfigRequest() throws IOException @@ -446,22 +312,28 @@ private String createConfigRequest() throws IOException private String createListDataRequest(TimeRange timeRange) throws IOException { String encodedCollection = URLEncoder.encode(collection, ENCODING); + String encodedStartTime = StarTreeConstants.DATE_TIME_FORMATTER.print(new DateTime(timeRange.getStart())); + String encodedEndTime = StarTreeConstants.DATE_TIME_FORMATTER.print(new DateTime(timeRange.getEnd())); return URI_JOINER.join(WEB_HDFS_PREFIX + hdfsUri.getPath(), - encodedCollection, - "data_" + timeRange.getStart() + "-" + timeRange.getEnd(), - "startree_bootstrap_phase2?op=LISTSTATUS"); + encodedCollection, + mode, + "data_" + encodedStartTime + "_" + encodedEndTime, + "startree_bootstrap_phase2?op=LISTSTATUS"); } private String createGetDataRequest(TimeRange timeRange, String pathSuffix) throws IOException { String encodedCollection = URLEncoder.encode(collection, ENCODING); + String encodedStartTime = StarTreeConstants.DATE_TIME_FORMATTER.print(new DateTime(timeRange.getStart())); + String encodedEndTime = StarTreeConstants.DATE_TIME_FORMATTER.print(new DateTime(timeRange.getEnd())); return URI_JOINER.join(WEB_HDFS_PREFIX + hdfsUri.getPath(), - encodedCollection, - "data_" + timeRange.getStart() + "-" + timeRange.getEnd(), - "startree_bootstrap_phase2", - pathSuffix + "?op=OPEN"); + encodedCollection, + mode, + "data_" + encodedStartTime + "_" + encodedEndTime, + "startree_bootstrap_phase2", + pathSuffix + "?op=OPEN"); } private LoginContext login() throws Exception @@ -546,13 +418,10 @@ public static void main(String[] args) throws Exception Options options = new Options(); options.addOption("krb5", true, "Path to krb5.conf file (default: ~/.krb5.conf)"); options.addOption("debug", false, "Debug logging"); - options.addOption("includeStarTree", false, "Copy star tree binary"); - options.addOption("includeConfig", false, "Copy config file"); - options.addOption("includeDimensions", false, "Copy dimension data"); - options.addOption("overwrite", false, "Overwrite any already loaded time ranges"); options.addOption("help", false, "Prints this help message"); options.addOption("minTime", true, "Min time to load"); options.addOption("maxTime", true, "Max time to load"); + options.addOption("mode", true, "Data load mode (BOOTSTRAP, INCREMENT, PATCH)"); CommandLine commandLine = new GnuParser().parse(options, args); @@ -584,22 +453,27 @@ public static void main(String[] args) throws Exception String password = new String(passwordChars); // Get time range - long minTime = commandLine.hasOption("minTime") - ? Long.valueOf(commandLine.getOptionValue("minTime")) - : 0; - long maxTime = commandLine.hasOption("maxTime") - ? Long.valueOf(commandLine.getOptionValue("maxTime")) - : Long.MAX_VALUE; + DateTime minTime = commandLine.hasOption("minTime") + ? parseDateTime(commandLine.getOptionValue("minTime")) + : new DateTime(0); + DateTime maxTime = commandLine.hasOption("maxTime") + ? parseDateTime(commandLine.getOptionValue("maxTime")) + : new DateTime(Long.MAX_VALUE); + + // Get mode + Mode mode = Mode.valueOf(commandLine.getOptionValue("mode", "BOOTSTRAP").toUpperCase()); new DataLoadTool(user, - password, - URI.create(commandLine.getArgs()[0]), - URI.create(commandLine.getArgs()[1]), - commandLine.getArgs()[2], - commandLine.hasOption("includeStarTree"), - commandLine.hasOption("includeConfig"), - commandLine.hasOption("includeDimensions"), - commandLine.hasOption("overwrite"), - new TimeRange(minTime, maxTime)).run(); + password, + URI.create(commandLine.getArgs()[0]), + URI.create(commandLine.getArgs()[1]), + commandLine.getArgs()[2], + mode, + new TimeRange(minTime.getMillis(), maxTime.getMillis())).run(); + } + + private static DateTime parseDateTime(String dateTime) throws IOException + { + return ISODateTimeFormat.dateTimeParser().parseDateTime(URLDecoder.decode(dateTime, ENCODING)); } }