Skip to content

Commit

Permalink
finished typegoose migration I guess
Browse files Browse the repository at this point in the history
  • Loading branch information
andrefs committed Oct 18, 2023
1 parent e73700c commit 04b1250
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 63 deletions.
7 changes: 6 additions & 1 deletion src/common/lib/mongoose-validators.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,9 @@ const urlValidator = {
message: (props: any) => `${props.value} is not a valid URL!`,
};

export { urlValidator };
const urlListValidator = {
validator: (v: string[]) => v.every(isValid),
message: (props: any) => `One of ${props} is not a valid URL!`,
};

export { urlValidator, urlListValidator };
14 changes: 10 additions & 4 deletions src/manager/lib/ProcessManager.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import { Process, ProcessClass, Resource } from '@derzis/models';
import {
Process,
ProcessClass,
ProcessDocument,
Resource,
} from '@derzis/models';
import express from 'express';
import { create } from 'express-handlebars';
import path from 'path';
Expand Down Expand Up @@ -239,7 +244,7 @@ app.post('/processes', async (req, res) => {
pathHeads.set(domain, pathHeads.get(domain)! + 1);
}

const p = await Process.create({
const p = {
params: {
maxPathLength: req.body.maxPathLength,
maxPathProps: req.body.maxPathProps,
Expand All @@ -256,9 +261,10 @@ app.post('/processes', async (req, res) => {
},
seeds: uniqueSeeds,
pathHeads,
});
};
const process = await Process.create(p);
await Process.startNext();
res.redirect(303, '/processes/' + p.pid);
res.redirect(303, '/processes/' + process.pid);
});

app.get('/processes/last/triples', async (req, res) => {
Expand Down
29 changes: 15 additions & 14 deletions src/models/Domain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,6 @@ import {
} from '@typegoose/typegoose';
const log = createLogger('Domain');

@index({
status: 1,
'crawl.pathHeads': 1,
'crawl.nextAllowed': -1,
})
@index({
'crawl.nextAllowed': -1,
})
@index({
'robots.status': 1,
})
@index({
jobId: 1,
})
class LastWarningClass {
@prop()
public errType!:
Expand Down Expand Up @@ -92,6 +78,21 @@ class CrawlClass {
@prop()
public nextAllowed?: Date;
}

@index({
status: 1,
'crawl.pathHeads': 1,
'crawl.nextAllowed': -1,
})
@index({
'crawl.nextAllowed': -1,
})
@index({
'robots.status': 1,
})
@index({
jobId: 1,
})
class DomainClass {
@prop({ required: true, index: true, unique: true })
public origin!: string;
Expand Down
53 changes: 29 additions & 24 deletions src/models/Path.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Types, Document } from 'mongoose';
import { urlValidator } from '@derzis/common';
import { urlListValidator, urlValidator } from '@derzis/common';
import {
prop,
index,
Expand All @@ -17,37 +17,22 @@ import {
TripleDocument,
} from '@derzis/models';

@pre<PathClass>('save', function () {
this.nodes.count = this.nodes.elems.length;
this.predicates.count = this.predicates.elems.length;
if (this.predicates.count) {
this.lastPredicate = this.predicates.elems[this.predicates.count - 1];
}
const origin = new URL(this.head.url).origin;
this.head.domain = origin;
})
@index({ processId: 1 })
@index({
'seed.url': 1,
'head.url': 1,
'predicates.count': 1,
})
@index({
'head.url': 1,
'nodes.count': 1,
})
class ResourceCount {
@prop({ default: 0 })
public count!: number;

@prop({ default: [], validate: urlValidator })
@prop({ default: [], validate: urlListValidator })
public elems!: string[];
}
class SeedClass {
@prop({ required: true, validate: urlValidator })
public url!: string;
}
class HeadClass {
@prop({ required: true, validate: urlValidator })
public url!: string;

@prop({ required: true })
@prop()
public domain!: string;
}

Expand All @@ -65,6 +50,26 @@ type PathSkeleton = Pick<PathClass, 'processId' | 'seed' | 'head'> &
nodes: Pick<ResourceCount, 'elems'>;
};

@pre<PathClass>('save', function () {
this.nodes.count = this.nodes.elems.length;
this.predicates.count = this.predicates.elems.length;
if (this.predicates.count) {
this.lastPredicate = this.predicates.elems[this.predicates.count - 1];
}

const origin = new URL(this.head.url).origin;
this.head.domain = origin;
})
@index({ processId: 1 })
@index({
'seed.url': 1,
'head.url': 1,
'predicates.count': 1,
})
@index({
'head.url': 1,
'nodes.count': 1,
})
class PathClass {
_id!: Types.ObjectId;
createdAt!: Date;
Expand All @@ -73,8 +78,8 @@ class PathClass {
@prop({ required: true })
public processId!: string;

@prop({ required: true, validate: urlValidator })
public seed!: string;
@prop({ required: true })
public seed!: SeedClass;

@prop({ required: true })
public head!: HeadClass;
Expand Down
38 changes: 20 additions & 18 deletions src/models/Process.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,6 @@ import {
Severity,
} from '@typegoose/typegoose';

@index({ status: 1 })
@index({ createdAt: 1 })
@pre<ProcessClass>('save', async function () {
const today = new Date(new Date().setUTCHours(0, 0, 0, 0));

const count = await Process.countDocuments({
createdAt: { $gt: today },
});
if (!this.pid) {
const date = today.toISOString().split('T')[0] + '-' + count;
const word = humanize(date);
this.pid = `${word}-${date}`;
}
if (!this.notification) {
this.notification = {};
}
this.notification.ssePath = `/processes/${this.pid}/events`;
})
class NotificationClass {
@prop()
public email?: string;
Expand All @@ -57,7 +39,26 @@ class ParamsClass {
public blackList?: string[];
}

@index({ status: 1 })
@index({ createdAt: 1 })
@pre<ProcessClass>('save', async function () {
const today = new Date(new Date().setUTCHours(0, 0, 0, 0));
const count = await Process.countDocuments({
createdAt: { $gt: today },
});
if (!this.pid) {
const date = today.toISOString().split('T')[0] + '-' + count;
const word = humanize(date);
this.pid = `${word}-${date}`;
}
if (!this.notification) {
this.notification = {};
}
const ssePath = `/processes/${this.pid}/events`;
this.notification.ssePath = ssePath;
})
class ProcessClass {
_id!: Types.ObjectId;
createdAt!: Date;
updatedAt!: Date;

Expand Down Expand Up @@ -312,6 +313,7 @@ class ProcessClass {
// TODO configurable number of simultaneous processes
public static async startNext(this: ReturnModelType<typeof ProcessClass>) {
const runningProcs = await this.countDocuments({ status: 'running' });

if (!runningProcs) {
const process = await this.findOneAndUpdate(
{ status: 'queued' },
Expand Down
4 changes: 2 additions & 2 deletions src/models/Resource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ import {
Severity,
} from '@typegoose/typegoose';

@index({ url: 1, status: 1 })
@index({ domain: 1, status: 1 })
class CrawlId {
@prop()
public domainTs!: Date;
Expand All @@ -23,6 +21,8 @@ class CrawlId {
public counter!: number;
}

@index({ url: 1, status: 1 })
@index({ domain: 1, status: 1 })
class ResourceClass {
createdAt!: Date;
updatedAt!: Date;
Expand Down

0 comments on commit 04b1250

Please sign in to comment.