forked from NationalSecurityAgency/datawave
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
WIP adapt DW PR#2568 to use accumulo PR#4898
These draft changes build on NationalSecurityAgency#2568 with the following differences. * Compute bulkv2 load plans using new unreleased APIs in accumulo PR 4898 * The table splits are loaded at the beginning of writing to rfiles instead of at the end. Not sure about the overall implications on on memory use in reducers of this change. The load plan could be computed after the rfile is closed using a new API in 4898 if defering the loading of tablet splits is desired. * Switches to using accumulo public APIs for writing rfiles instaead of internal accumulo methods. Well public once they are actually released. * The algorithm to compute the load plan does less work per key/value. Should be rougly constant time vs log(N). * Adds a simple SortedList class. This reason this was added is that this code does binary searches on list, however it was not certain those list were actually sorted. If the list was not sorted it would not cause exceptions in binary search but could lead to incorrect load plans and lost data. This new SortedList class ensures list are sorted and allows this assurance to travel around in the code. Maybe this change should be its own PR.
- Loading branch information
1 parent
a2430be
commit 6dd45df
Showing
11 changed files
with
177 additions
and
284 deletions.
There are no files selected for viewing
Submodule datawave
updated
1 files
+2 −1 | src/main/java/datawave/microservice/authorization/preauth/ProxiedEntityX509Filter.java |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
265 changes: 41 additions & 224 deletions
265
...se/ingest-core/src/main/java/datawave/ingest/mapreduce/job/MultiRFileOutputFormatter.java
Large diffs are not rendered by default.
Oops, something went wrong.
77 changes: 77 additions & 0 deletions
77
warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/SortedList.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
package datawave.ingest.mapreduce.job; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.Comparator; | ||
import java.util.List; | ||
|
||
import org.apache.log4j.Logger; | ||
|
||
/** | ||
* Wraps a list that is immutable and verified as sorted. | ||
*/ | ||
public class SortedList<T> { | ||
|
||
private static final Logger log = Logger.getLogger(SortedList.class); | ||
|
||
private final List<T> list; | ||
|
||
private SortedList(List<T> list) { | ||
this.list = list; | ||
} | ||
|
||
public List<T> get() { | ||
return list; | ||
} | ||
|
||
private static final SortedList<?> EMPTY = new SortedList<>(List.of()); | ||
|
||
@SuppressWarnings("unchecked") | ||
public static <T2> SortedList<T2> empty() { | ||
return (SortedList<T2>) EMPTY; | ||
} | ||
|
||
/** | ||
* For a list that is expected to be sorted this will verify it is sorted and if so return an immutable copy of it. If this list is not sorted it will log a | ||
* warning, copy it, sort the copy, and return an immutable version of the copy. | ||
*/ | ||
public static <T2> SortedList<T2> fromSorted(List<T2> list) { | ||
if (list.isEmpty()) { | ||
return empty(); | ||
} | ||
|
||
var copy = List.copyOf(list); | ||
|
||
// verify after copying because nothing can change at this point | ||
boolean isSorted = true; | ||
for (int i = 1; i < copy.size(); i++) { | ||
@SuppressWarnings("unchecked") | ||
var prev = (Comparable<? super T2>) copy.get(i - 1); | ||
if (prev.compareTo(copy.get(i)) > 0) { | ||
isSorted = false; | ||
} | ||
} | ||
|
||
if (isSorted) { | ||
return new SortedList<>(copy); | ||
} else { | ||
log.warn("Input list of size " + copy.size() + " was expected to be sorted but was not", new IllegalArgumentException()); | ||
return fromUnsorted(copy); | ||
} | ||
} | ||
|
||
/** | ||
* Copies a list and sorts the copy returning an immutable version of the copy. | ||
*/ | ||
public static <T2> SortedList<T2> fromUnsorted(List<T2> list) { | ||
if (list.isEmpty()) { | ||
return empty(); | ||
} | ||
|
||
var copy = new ArrayList<>(list); | ||
@SuppressWarnings("unchecked") | ||
var compartor = (Comparator<? super T2>) Comparator.naturalOrder(); | ||
list.sort(compartor); | ||
return new SortedList<>(Collections.unmodifiableList(copy)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.