Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve OpenSearch permission logic #940

Merged
merged 29 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
1661acd
publicness on encounter/index; catch index-busting exception
naknomum Dec 11, 2024
d409db2
take 1 of N for offloading permissions indexing
naknomum Dec 12, 2024
238059e
take 2: iterate over encounters
naknomum Dec 12, 2024
0378361
base.opensearchUpdate() and friends
naknomum Dec 12, 2024
c038cc9
actually update index documents woohoo
naknomum Dec 12, 2024
4f74818
take1 at improved search permission query
naknomum Dec 12, 2024
0b40acf
[#779] catch bad lat/lon; catch index exceptions; startNum on opensea…
naknomum Dec 12, 2024
3f75d2e
take1 of actual permissions backgroundness
naknomum Dec 12, 2024
a8d28d8
setPermissionsNeeded() in a few places to start
naknomum Dec 13, 2024
8ef476b
submitterUserId in index; smarter viewUsers perm logic; alpha new per…
naknomum Dec 13, 2024
89368c2
userIdsWithViewAccess() no longer abstract on base class, used only f…
naknomum Dec 16, 2024
0e75166
resolve conflicts
naknomum Dec 16, 2024
091a3e3
linting
naknomum Dec 16, 2024
e28aa36
tweaks post-commit
naknomum Dec 16, 2024
ae40133
set these values via properties, now that we have that option
naknomum Dec 16, 2024
9e1deb5
throw exception rather than be forgiving when adding permissions to q…
naknomum Dec 16, 2024
6e75953
resolve conflicts
naknomum Dec 16, 2024
9742308
use the newly available Boolean SystemValue
naknomum Dec 16, 2024
61c8db1
Merge branch 'main' into 785_improve_search_permission_logic
naknomum Dec 16, 2024
ad98174
little more info
naknomum Dec 17, 2024
ea7014c
enc.modified is nice to index
naknomum Dec 17, 2024
1fc60be
name change for clarity
naknomum Dec 17, 2024
9799625
use raw sql instead of Encounter objects, for speed; also some debugg…
naknomum Dec 17, 2024
5a4e4d0
try/catch safety
naknomum Dec 17, 2024
9524b0e
shepherd-less flavor
naknomum Dec 17, 2024
b169ece
collab persistence triggers permissionsNeeded
naknomum Dec 17, 2024
ce8d810
attempt to handle the case where Encounter is first created (so cant …
naknomum Dec 18, 2024
1d06bb8
add a couple try/catch to opensearchIndexPermissions()
naknomum Dec 18, 2024
fa16a54
close query in finally
naknomum Dec 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions config/indices.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
-- UGH, i guess _IDX (in caps!) should be what we standardize on. that is what datanucleus does.
-- to make matters worse, we have some places we named them explicitely in package.jdo (but those should fix themselves?)

BEGIN;

CREATE INDEX IF NOT EXISTS "MEDIAASSET_PARENTID_idx" ON "MEDIAASSET" ("PARENTID");
CREATE INDEX IF NOT EXISTS "MEDIAASSET_HASHCODE_idx" ON "MEDIAASSET" ("HASHCODE");

CREATE INDEX IF NOT EXISTS "ENCOUNTER_LOCATIONID_idx" ON "ENCOUNTER" ("LOCATIONID");
CREATE INDEX IF NOT EXISTS "ENCOUNTER_STATE_idx" ON "ENCOUNTER" ("STATE");
CREATE INDEX IF NOT EXISTS "ENCOUNTER_MODIFIED_idx" ON "ENCOUNTER" ("MODIFIED");
CREATE INDEX IF NOT EXISTS "ENCOUNTER_INDIVIDUALID_idx" ON "ENCOUNTER" ("INDIVIDUALID");
CREATE INDEX IF NOT EXISTS "ENCOUNTER_DATEINMILLISECONDS_idx" ON "ENCOUNTER" ("DATEINMILLISECONDS");
CREATE INDEX IF NOT EXISTS "ENCOUNTER_DECIMALLATITUDE_idx" ON "ENCOUNTER" ("DECIMALLATITUDE");
Expand Down Expand Up @@ -45,3 +48,5 @@ CREATE INDEX IF NOT EXISTS "TASK_CREATED_IDX" ON "TASK" ("CREATED");

INSERT INTO "RELATIONSHIP" ("RELATIONSHIP_ID", "MARKEDINDIVIDUALNAME1", "MARKEDINDIVIDUALNAME2", "MARKEDINDIVIDUALROLE1", "MARKEDINDIVIDUALROLE2", "TYPE", "STARTTIME", "ENDTIME") VALUES (0, (SELECT "INDIVIDUALID" FROM "MARKEDINDIVIDUAL" ORDER BY random() LIMIT 1), (SELECT "INDIVIDUALID" FROM "MARKEDINDIVIDUAL" ORDER BY random() LIMIT 1), 'placeholder', 'placeholder', 'placeholder-to-prevent-empty-table', 0, 0);

END;

27 changes: 25 additions & 2 deletions src/main/java/org/ecocean/Base.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,10 @@
*/
public abstract void addComments(final String newComments);

public abstract List<String> userIdsWithViewAccess(Shepherd myShepherd);
public abstract List<String> userIdsWithEditAccess(Shepherd myShepherd);
// issue 785 makes this no longer necessary; the overrides are left on Occurrence and MarkedIndividual
// for now as reference -- but are not called. they will need to be addressed when these classes are searchable
// public abstract List<String> userIdsWithViewAccess(Shepherd myShepherd);
// public abstract List<String> userIdsWithEditAccess(Shepherd myShepherd);

public abstract String opensearchIndexName();

Expand All @@ -97,6 +99,8 @@ public JSONObject opensearchMapping() {
map.put("version", new org.json.JSONObject("{\"type\": \"long\"}"));
// id should be keyword for the sake of sorting
map.put("id", new org.json.JSONObject("{\"type\": \"keyword\"}"));
map.put("viewUsers", new org.json.JSONObject("{\"type\": \"keyword\"}"));
map.put("editUsers", new org.json.JSONObject("{\"type\": \"keyword\"}"));
return map;
}

Expand Down Expand Up @@ -153,11 +157,24 @@ public void opensearchUnindexDeep()
this.opensearchUnindex();
}

public void opensearchUpdate(final JSONObject updateData)
throws IOException {
if (updateData == null) return;
OpenSearch opensearch = new OpenSearch();

opensearch.indexUpdate(this.opensearchIndexName(), this.getId(), updateData);
}

// should be overridden
public void opensearchDocumentSerializer(JsonGenerator jgen, Shepherd myShepherd)
throws IOException, JsonProcessingException {
jgen.writeStringField("id", this.getId());
jgen.writeNumberField("version", this.getVersion());
jgen.writeNumberField("indexTimestamp", System.currentTimeMillis());

/*
these are no longer computed in the general opensearchIndex() call.
they are too expensive. see Encounter.opensearchIndexPermission()

jgen.writeFieldName("viewUsers");
jgen.writeStartArray();
Expand All @@ -172,6 +189,7 @@ public void opensearchDocumentSerializer(JsonGenerator jgen, Shepherd myShepherd
jgen.writeString(id);
}
jgen.writeEndArray();
*/
}

public void opensearchDocumentSerializer(JsonGenerator jgen)
Expand All @@ -196,6 +214,11 @@ public static JSONObject opensearchQuery(final String indexname, final JSONObjec
return res;
}

// this is so we can call it on Base obj, but really is only needed by [overridden by] Encounter (currently)
public boolean getOpensearchProcessPermissions() {
return false;
}

public static Map<String, Long> getAllVersions(Shepherd myShepherd, String sql) {
Query query = myShepherd.getPM().newQuery("javax.jdo.query.SQL", sql);
Map<String, Long> rtn = new HashMap<String, Long>();
Expand Down
205 changes: 194 additions & 11 deletions src/main/java/org/ecocean/Encounter.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
Expand Down Expand Up @@ -118,6 +119,7 @@ public class Encounter extends Base implements java.io.Serializable {
private Double immunoglobin;
private Boolean sampleTakenForDiet;
private Boolean injured;
private boolean opensearchProcessPermissions = false;

private ArrayList<Observation> observations = new ArrayList<Observation>();

Expand Down Expand Up @@ -3141,25 +3143,31 @@ public boolean isUserOwner(User user) { // the definition of this might change?
return false;
}

@Override public List<String> userIdsWithViewAccess(Shepherd myShepherd) {
// new logic means we only need users who are in collab with submitting user
// and if public, we dont need to do this at all
public List<String> userIdsWithViewAccess(Shepherd myShepherd) {
List<String> ids = new ArrayList<String>();

for (User user : myShepherd.getAllUsers()) {
if ((user.getId() != null) && this.canUserView(user, myShepherd))
ids.add(user.getId());
if (this.isPubliclyReadable()) return ids;
List<Collaboration> collabs = Collaboration.collaborationsForUser(myShepherd,
this.getSubmitterID());
for (Collaboration collab : collabs) {
User user = myShepherd.getUser(collab.getOtherUsername(this.getSubmitterID()));
if (user != null) ids.add(user.getId());
}
return ids;
}

@Override public List<String> userIdsWithEditAccess(Shepherd myShepherd) {
/*
public List<String> userIdsWithEditAccess(Shepherd myShepherd) {
List<String> ids = new ArrayList<String>();

for (User user : myShepherd.getAllUsers()) {
for (User user : myShepherd.getUsersWithUsername()) {
if ((user.getId() != null) && this.canUserEdit(user)) ids.add(user.getId());
}
return ids;
}

*/
public JSONObject sanitizeJson(HttpServletRequest request, JSONObject jobj)
throws JSONException {
boolean fullAccess = this.canUserAccess(request);
Expand Down Expand Up @@ -3853,6 +3861,151 @@ public int hashCode() { // we need this along with equals() for collections meth
return this.getCatalogNumber().hashCode();
}

// sadly, this mess needs to carry on the tradition set up in User.isUsernameAnonymous()
// thanks to the logic in Collaboration.canUserAccessOwnedObject()
public boolean isPubliclyReadable() {
if (!Collaboration.securityEnabled("context0")) return true;
return User.isUsernameAnonymous(this.submitterID);
}

public boolean getOpensearchProcessPermissions() {
return opensearchProcessPermissions;
}

public void setOpensearchProcessPermissions(boolean value) {
opensearchProcessPermissions = value;
}

// wrapper for below, that checks if we really need to be run
public static void opensearchIndexPermissionsBackground(Shepherd myShepherd) {
boolean runIt = false;
Long lastRun = OpenSearch.getPermissionsTimestamp(myShepherd);
long now = System.currentTimeMillis();

if ((lastRun == null) ||
((now - lastRun) > OpenSearch.BACKGROUND_PERMISSIONS_MAX_FORCE_MINUTES * 60000)) {
System.out.println(
"opensearchIndexPermissionsBackground: forced run due to max time since previous");
runIt = true;
}
boolean needed = OpenSearch.getPermissionsNeeded(myShepherd);
if (needed && !runIt) {
System.out.println("opensearchIndexPermissionsBackground: running due to needed=true");
runIt = true;
}
if (!runIt) {
System.out.println("opensearchIndexPermissionsBackground: running not required; done");
return;
}
// i think we should set these first... tho they may not get persisted til after?
OpenSearch.setPermissionsTimestamp(myShepherd);
OpenSearch.setPermissionsNeeded(myShepherd, false);
opensearchIndexPermissions();
System.out.println("opensearchIndexPermissionsBackground: running completed");
}

/* note: there are a great deal of users with *no username* that seem to appear in enc.submitters array.
however, very few (2 out of 5600+) encounters with such .submitters have a blank submitterID value
therefore: submitterID will be assumed to be a required value on users which need to be

this seems further validated by the facts that:
- canUserAccess(user) returns false if no username on user
- a user wihtout a username cant be logged in (and thus cant search)

"admin" users are just ignored entirely, as they will be exempt from the viewUsers criteria during searching.

other than "ownership" (via submitterID), a user can view if they have view or edit collab with
another user. so we frontload *approved* collabs for every user here too.

in terms of "public" encounters, it seems that (based on Collaboration.canUserAccessEncounter()),
encounters with submitterID in (NULL, "public", "", "N/A" [ugh]) is readable by anyone; so we will
skip these from processing as they should be flagged with the boolean isPubliclyReadable in indexing
*/
public static void opensearchIndexPermissions() {
Util.mark("perm start");
long startT = System.currentTimeMillis();
System.out.println("opensearchIndexPermissions(): begin...");
// no security => everything publiclyReadable - saves us work, no?
if (!Collaboration.securityEnabled("context0")) return;
OpenSearch os = new OpenSearch();
Map<String, Set<String> > collab = new HashMap<String, Set<String> >();
Map<String, String> usernameToId = new HashMap<String, String>();
Shepherd myShepherd = new Shepherd("context0");
myShepherd.setAction("Encounter.opensearchIndexPermissions");
myShepherd.beginDBTransaction();
int nonAdminCt = 0;
// it seems as though user.uuid is *required* so we can trust that
for (User user : myShepherd.getUsersWithUsername()) {
usernameToId.put(user.getUsername(), user.getId());
if (user.isAdmin(myShepherd)) continue;
nonAdminCt++;
List<Collaboration> collabsFor = Collaboration.collaborationsForUser(myShepherd,
user.getUsername());
if (Util.collectionIsEmptyOrNull(collabsFor)) continue;
for (Collaboration col : collabsFor) {
if (!col.isApproved() && !col.isEditApproved()) continue;
if (!collab.containsKey(user.getId()))
collab.put(user.getId(), new HashSet<String>());
collab.get(user.getId()).add(col.getOtherUsername(user.getUsername()));
}
}
Util.mark("perm: user build done", startT);
System.out.println("opensearchIndexPermissions(): " + usernameToId.size() +
" total users; " + nonAdminCt + " non-admin; " + collab.size() + " have active collab");
// now iterated over (non-public) encounters
int encCount = 0;
org.json.JSONObject updateData = new org.json.JSONObject();
// we do not need full Encounter objects here to update index docs, so lets do this via sql/fields - much faster
String sql =
"SELECT \"CATALOGNUMBER\", \"SUBMITTERID\" FROM \"ENCOUNTER\" WHERE \"SUBMITTERID\" IS NOT NULL AND \"SUBMITTERID\" != '' AND \"SUBMITTERID\" != 'N/A' AND \"SUBMITTERID\" != 'public'";
Query q = myShepherd.getPM().newQuery("javax.jdo.query.SQL", sql);
List results = (List)q.execute();
Iterator it = results.iterator();
Util.mark("perm: start encs, size=" + results.size(), startT);
while (it.hasNext()) {
Object[] row = (Object[])it.next();
String id = (String)row[0];
String submitterId = (String)row[1];
org.json.JSONArray viewUsers = new org.json.JSONArray();
String uid = usernameToId.get(submitterId);
if (uid == null) {
// see issue 939 for example :(
System.out.println("opensearchIndexPermissions(): WARNING invalid username " +
submitterId + " on enc " + id);
continue;
}
encCount++;
if (encCount % 1000 == 0) Util.mark("enc[" + encCount + "]", startT);
// viewUsers.put(uid); // we no longer do this as we use submitterUserId from regular indexing in query filter
if (collab.containsKey(uid)) {
for (String colUsername : collab.get(uid)) {
String colId = usernameToId.get(colUsername);
if (colId == null) {
System.out.println(
"opensearchIndexPermissions(): WARNING invalid username " +
colUsername + " in collaboration with userId=" + uid);
continue;
}
viewUsers.put(colId);
}
}
if (viewUsers.length() > 0) {
updateData.put("viewUsers", viewUsers);
try {
os.indexUpdate("encounter", id, updateData);
} catch (Exception ex) {
// keeping this quiet cuz it can get noise while index builds
// System.out.println("opensearchIndexPermissions(): WARNING failed to update viewUsers on enc " + enc.getId() + "; likely has not been indexed yet: " + ex);
}
}
}
q.closeAll();
Util.mark("perm: done encs", startT);
myShepherd.rollbackAndClose();
System.out.println("opensearchIndexPermissions(): ...end [" + encCount + " encs; " +
Math.round((System.currentTimeMillis() - startT) / 1000) + "sec]");
}

public static org.json.JSONObject opensearchQuery(final org.json.JSONObject query, int numFrom,
int pageSize, String sort, String sortOrder)
throws IOException {
Expand Down Expand Up @@ -3896,6 +4049,7 @@ public void opensearchDocumentSerializer(JsonGenerator jgen, Shepherd myShepherd
jgen.writeStringField("state", this.getState());
jgen.writeStringField("occurrenceRemarks", this.getOccurrenceRemarks());
jgen.writeStringField("otherCatalogNumbers", this.getOtherCatalogNumbers());
jgen.writeBooleanField("publiclyReadable", this.isPubliclyReadable());

String featuredAssetId = null;
List<MediaAsset> mas = this.getMedia();
Expand All @@ -3906,8 +4060,11 @@ public void opensearchDocumentSerializer(JsonGenerator jgen, Shepherd myShepherd
jgen.writeStartObject();
jgen.writeNumberField("id", ma.getId());
jgen.writeStringField("uuid", ma.getUUID());
java.net.URL url = ma.safeURL(myShepherd);
if (url != null) jgen.writeStringField("url", url.toString());
try {
// historic data might throw IllegalArgumentException: Path not under given root
java.net.URL url = ma.safeURL(myShepherd);
if (url != null) jgen.writeStringField("url", url.toString());
} catch (Exception ex) {}
jgen.writeEndObject();
if (featuredAssetId == null) featuredAssetId = ma.getUUID();
}
Expand All @@ -3917,6 +4074,8 @@ public void opensearchDocumentSerializer(JsonGenerator jgen, Shepherd myShepherd
jgen.writeNullField("assignedUsername");
} else {
jgen.writeStringField("assignedUsername", this.submitterID);
User submitter = this.getSubmitterUser(myShepherd);
if (submitter != null) jgen.writeStringField("submitterUserId", submitter.getId());
}
jgen.writeArrayFieldStart("submitters");
for (String id : this.getAllSubmitterIds(myShepherd)) {
Expand Down Expand Up @@ -4027,7 +4186,8 @@ public void opensearchDocumentSerializer(JsonGenerator jgen, Shepherd myShepherd

Double dlat = this.getDecimalLatitudeAsDouble();
Double dlon = this.getDecimalLongitudeAsDouble();
if ((dlat == null) || (dlon == null)) {
if ((dlat == null) || !Util.isValidDecimalLatitude(dlat) || (dlon == null) ||
!Util.isValidDecimalLongitude(dlon)) {
jgen.writeNullField("locationGeoPoint");
} else {
jgen.writeObjectFieldStart("locationGeoPoint");
Expand Down Expand Up @@ -4127,6 +4287,19 @@ public void opensearchDocumentSerializer(JsonGenerator jgen, Shepherd myShepherd
jgen.writeNumberField(type, bmeas.get(type).getValue());
}
jgen.writeEndObject();
// this gets set on specific single-encounter-only actions, when extra expense is okay
// otherwise this will be computed by permissions backgrounding
if (this.getOpensearchProcessPermissions()) {
System.out.println("opensearchProcessPermissions=true for " + this.getId() +
"; indexing permissions");
jgen.writeFieldName("viewUsers");
jgen.writeStartArray();
for (String id : this.userIdsWithViewAccess(myShepherd)) {
System.out.println("opensearch whhhh: " + id);
jgen.writeString(id);
}
jgen.writeEndArray();
}
}

@Override public long getVersion() {
Expand Down Expand Up @@ -4155,6 +4328,7 @@ public org.json.JSONObject opensearchMapping() {
map.put("taxonomy", keywordType);
map.put("occurrenceId", keywordType);
map.put("state", keywordType);
map.put("submitterUserId", keywordType);

// all case-insensitive keyword-ish types
map.put("locationId", keywordNormalType);
Expand Down Expand Up @@ -4214,7 +4388,13 @@ public static int[] opensearchSyncIndex(Shepherd myShepherd, int stopAfter)
int ct = 0;
for (String id : needIndexing) {
Encounter enc = myShepherd.getEncounter(id);
if (enc != null) os.index(indexName, enc);
try {
if (enc != null) os.index(indexName, enc);
} catch (Exception ex) {
System.out.println("Encounter.opensearchSyncIndex(): index failed " + enc + " => " +
ex.toString());
ex.printStackTrace();
}
if (ct % 500 == 0)
System.out.println("Encounter.opensearchSyncIndex needIndexing: " + ct + "/" +
rtn[0]);
Expand Down Expand Up @@ -4504,6 +4684,7 @@ public void sendCreationEmails(Shepherd myShepherd, String langCode) {
public void opensearchIndexDeep()
throws IOException {
final String encId = this.getId();
final Encounter origEnc = this;
ExecutorService executor = Executors.newFixedThreadPool(4);
Runnable rn = new Runnable() {
public void run() {
Expand All @@ -4513,6 +4694,8 @@ public void run() {
try {
Encounter enc = bgShepherd.getEncounter(encId);
if (enc == null) {
// we use origEnc if we can (especially necessary on initial creation of Encounter)
if (origEnc != null) origEnc.opensearchIndex();
bgShepherd.rollbackAndClose();
executor.shutdown();
return;
Expand Down
Loading
Loading