Skip to content

Commit

Permalink
Fixed regex scanner issues in GraphUpdater service
Browse files Browse the repository at this point in the history
  • Loading branch information
SiddharthaHaldar committed Sep 26, 2024
1 parent dd45b12 commit c5aca98
Show file tree
Hide file tree
Showing 8 changed files with 149 additions and 5,068 deletions.
76 changes: 76 additions & 0 deletions architecture.d2
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
Direction: left
k8s{
NATS : NATS {
EventsUpdate : {
shape : queue
}
EndpointScanner\.githubEndpoints : {
shape : queue
}
EndpointScanner\.webEndpoints : {
shape : queue
}
EndpointScanner\.containerEndpoints : {
shape : queue
}
}
CronJob : {
shape : image
icon : https://icons.terrastruct.com/essentials%2F033-repeat.svg
}
GraphUpdater : Graph Updater {
shape : image
icon : https://icons.terrastruct.com/azure%2FManagement%20and%20Governance%20Service%20Color%2FResource%20Graph%20Explorer.svg
}
API : {

}
ArangoDB : {

}
scanner1 : {}
scanner2 : {}
scanner3 : {}
UI : UI Server {
shape : image
icon : https://icons.terrastruct.com/dev%2Freact.svg
}
}
PhacDnsRepo : PHAC DNS Repo {
shape : image
icon : https://icons.terrastruct.com/dev%2Fgithub.svg
}
ServiceRepo: Service Git Repo{
shape : image
icon : https://icons.terrastruct.com/dev%2Fgithub.svg
}
Client Browser {
shape : image
icon : https://icons.terrastruct.com/tech%2Fbrowser-2.svg
# near : bottom-left
}
k8s.CronJob <- PhacDnsRepo : scan periodically
k8s.CronJob -> k8s.NATS.EventsUpdate : pub
k8s.GraphUpdater <- ServiceRepo : 1. extract referencing endpoints
k8s.GraphUpdater -> k8s.NATS.EventsUpdate : sub
# k8s.GraphUpdater <- k8s.NATS.EventsUpdate : sub
k8s.GraphUpdater -> k8s.API : update
k8s.GraphUpdater -> k8s.NATS.EndpointScanner\.githubEndpoints : pub
k8s.GraphUpdater -> k8s.NATS.EndpointScanner\.webEndpoints : pub
k8s.GraphUpdater -> k8s.NATS.EndpointScanner\.containerEndpoints : pub
k8s.scanner1 -> k8s.NATS.EndpointScanner\.githubEndpoints : sub
k8s.scanner2 -> k8s.NATS.EndpointScanner\.webEndpoints : sub
k8s.scanner3 -> k8s.NATS.EndpointScanner\.containerEndpoints : sub
# k8s.scanner1 <- k8s.NATS.EndpointScanner\.githubEndpoints : sub
# k8s.scanner2 <- k8s.NATS.EndpointScanner\.webEndpoints : sub
# k8s.scanner3 <- k8s.NATS.EndpointScanner\.containerEndpoints : sub
k8s.scanner1 -> k8s.API : update
k8s.scanner2 -> k8s.API : update
k8s.scanner3 -> k8s.API : update
# k8s.Scanners -> k8s.API : update
k8s.API <- k8s.ArangoDB : Read
k8s.API -> k8s.ArangoDB : Write

k8s.UI <- k8s.API : Read

Client Browser -> k8s.UI
109 changes: 70 additions & 39 deletions graph-updater/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ process.on('SIGINT', () => process.exit(0))
try{
let endpointEventPayload = await jc.decode(message.data)

console.log('\n**************************************************************')
console.log(`\n****************************** ${new Date()} ********************************`)
console.log(`Recieved from ... ${message.subject}:\n ${JSON.stringify(endpointEventPayload)}`)

let githubEndpoints = [], webEndpoints = [], containerEndpoints = [];
let githubEndpoints = new Set(), webEndpoints = new Set(), containerEndpoints = new Set();

if(endpointEventPayload.endpoint.payloadType &&
endpointEventPayload.endpoint.payloadType == 'service'){
Expand Down Expand Up @@ -81,35 +81,35 @@ process.on('SIGINT', () => process.exit(0))

// Append the web URL for the service
// to the webEndpoints list, such that it can be scanned.
webEndpoints.push(payload.webEndpoint);
webEndpoints.add(payload.webEndpoint);
}

// Every endpoint handler knows how to get its own graph metadata
// from its endpoint.
if(endpointEventPayload.endpoint !== "None"){


const endpointKind = getEndpointKind(endpointEventPayload.endpoint)[0];
// Each kind of endpoint knows how to get its own metadata (i.e. what endpoints are
// related to this endpoint?); polymorphic method getGraphMetadata knows how to
// parse the endpointEventPayload object to extract metadata about related endpoints.
console.log('endpointKind', endpointKind)

// If the kind is github then add the git repo URL to the set for scanning
if(endpointKind == 'github'){
githubEndpoints.add(endpointEventPayload.endpoint);
}

const endpointHandler = getEndpoint(endpointKind);
let newEndpointsWithKind = await endpointHandler.getGraphMetaData(endpointEventPayload);

newEndpointsWithKind = Array.from(newEndpointsWithKind);

//console.log(newEndpointsWithKind);

//Extract only URLS
const newEndpoints = Array.from(newEndpointsWithKind).map((endpoint,idx)=> {

//let jsonString = endpoint.replace('url', '"url"').replace('kind', '"kind"');
//console.log(jsonString,idx);
//const obj = JSON.parse(jsonString);
return endpoint.url;
});

//JSON Stringify each entry in newEndpointsWithKind
//Stringify each entry in newEndpointsWithKind
newEndpointsWithKind = newEndpointsWithKind.map(endpoint => {
return `{url : "${endpoint.url}", kind : "${endpoint.kind}"}`;

Expand All @@ -119,9 +119,6 @@ process.on('SIGINT', () => process.exit(0))
const newEndpointsString = `["${Array.from(newEndpoints).join('", "')}"]`;
const newEndpointsWithKindString= `[${Array.from(newEndpointsWithKind).join(',')}]`

//console.log(newEndpointsString);
//console.log(newEndpointsWithKindString);

// Mutation to add a graph for the new endpoints
const mutation = gql`
mutation {
Expand All @@ -140,40 +137,72 @@ process.on('SIGINT', () => process.exit(0))
// Now that we've written the new graph to the database, we need to query
// the same subgraph since there may be existing nodes in the database
// that require a new scan.
// const query = gql`
// query {
// endpoints(urls: ${newEndpointsString}) {
// url
// }
// }
// `;

// const queryResponse = await graphqlClient.request(query);

// Based on the current approach, the regex matcher finds matching URLs, OLD or NEW
// This removes the need to fire a qurey to find existing URLs, beacuse the scanner is going
// to pick up EVERYTHING. If the regex matcher does not pick up on an OLD URL, that means
// that URL is no longer in use and should not be scanned
const query = gql`
query {
endpoints(urls: ${newEndpointsString}) {
url
}
}
`;

const queryResponse = await graphqlClient.request(query);

const endpointDispatch = {
githubEndpoint: githubEndpoints,
webEndpoint: webEndpoints,
containerEndpoint: containerEndpoints,
}


newEndpointsWithKind.forEach(endpoint => {
const ep = JSON.parse(endpoint.replace('url','"url"').replace('kind','"kind"'));
const kind = ep.kind;
if(kind == 'Github'){
endpointDispatch['githubEndpoint'].add(ep.url);
}
else if(kind == 'Docker'){
endpointDispatch['containerEndpoint'].add(ep.url);
}
else if(kind == 'Database'){
// TODO
}
else if(kind == 'EmailService'){
// TODO
}
else if(kind == 'MessageBroker'){
// TODO
}
else if(kind == 'CloudStorage'){
// TODO
}
});

// // TODO :also graph relation updater needs to know how to figure out what kind of
// // url each endpoint is (e.g. github endpoint, web endpoint, etc.)
for (let i = 0; i < queryResponse.endpoints.length; i++) {
const endpointKinds = getEndpointKind(queryResponse.endpoints[i]["url"]);
for (let j = 0; j < endpointKinds.length; j++) {
if(endpointDispatch[endpointKinds[j]]){
endpointDispatch[endpointKinds[j]].push(queryResponse.endpoints[i]["url"]);
}
}
}
// for (let i = 0; i < queryResponse.endpoints.length; i++) {
// //const endpointKinds = getEndpointKind(queryResponse.endpoints[i]["url"]);
// for (let j = 0; j < endpointKinds.length; j++) {
// if(endpointDispatch[endpointKinds[j]]){
// endpointDispatch[endpointKinds[j]].push(queryResponse.endpoints[i]["url"]);
// }
// }
// }

console.log(endpointDispatch);

// Queue up new endpoints to be analyzed by the appropriate scanners
// await publishToNats(nc, jc, CONTAINER_ENDPOINT_QUEUE, endpointDispatch["containerEndpoint"]);
// console.log("published container endpoint events");
// await publishToNats(nc, jc, WEB_ENDPOINT_QUEUE, endpointDispatch["webEndpoint"]);
// console.log("published web endpoint events");
// await publishToNats(nc, jc, GITHUB_ENDPOINT_QUEUE, endpointDispatch["githubEndpoint"])
// console.log("published github endpoint event");
//Queue up new endpoints to be analyzed by the appropriate scanners
await publishToNats(nc, jc, CONTAINER_ENDPOINT_QUEUE, Array.from(endpointDispatch["containerEndpoint"]));
console.log("published container endpoint events");
await publishToNats(nc, jc, WEB_ENDPOINT_QUEUE, Array.from(endpointDispatch["webEndpoint"]));
console.log("published web endpoint events");
await publishToNats(nc, jc, GITHUB_ENDPOINT_QUEUE, Array.from(endpointDispatch["githubEndpoint"]));
console.log("published github endpoint event");

githubEndpoints.clear()

// TODO: anything under event collectors should not include any extra metadata beyond
// the URL itself, because any given event endpoint won't necessarily include info about
Expand All @@ -183,6 +212,8 @@ process.on('SIGINT', () => process.exit(0))
catch(error){
console.log(error.message);
}
finally {
}
}
})();

Expand Down
2 changes: 1 addition & 1 deletion graph-updater/src/endpoint.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ export function getEndpointKind(url) {
if (url.includes("canada.ca") || url.includes("gc.ca")) {
matches.push(webEndpoint);
}
if (url.includes("github.com")) {
if (url.includes("github.com") || url.includes(".git")) {
matches.push(githubEndpoint);
}

Expand Down
4 changes: 0 additions & 4 deletions graph-updater/src/github-endpoint.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,10 @@ export class GithubEndpoint {

const payloadEndpointKind = getEndpointKind(payload.endpoint)[0];
var kind = payloadEndpointKind.split('E')[0];
console.log(kind);
kind = kind[0].toUpperCase() + kind.substring(1);
console.log(extraEndpoints)
var newEndpoints = new Set([
{url : payload.endpoint, kind : kind},
...extraEndpoints.map(endpoint => {return {url : endpoint.url.replace(/\x00/g, ''), kind :endpoint.kind}})
//`{url : "${payload.endpoint}", kind : "${kind}"}`,
// ...extraEndpoints.map(endpoint => `{url : "${endpoint.url}", kind : "${endpoint.kind}"}`),
]);

return newEndpoints;
Expand Down
1 change: 0 additions & 1 deletion graph-updater/src/scan-endpoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ async function findPatternsInRepo(repoPath) {
'**/.txt/**',
],
});
//await fs.writeFileSync('test', JSON.stringify(filePaths, null, 2));

const allResults = [];
for (const filePath of filePaths) {
Expand Down
Loading

0 comments on commit c5aca98

Please sign in to comment.