From 4dca82dab1a174335b2ff325c261eed205ed5146 Mon Sep 17 00:00:00 2001 From: Eric BREHAULT Date: Wed, 14 Feb 2024 14:54:01 +0100 Subject: [PATCH] Ericbrehault/sc 8739/store origin path for all sync connectors (#14) * avoid failure if config file cannot be read * electron-app/package-lock.json use latest sdk * get file path from gdrive items * prefix path with / * get path from dropbox items * OneDrive: get files from subfolders + get path * set path for foldr and sitemap connectors * set path for Confluence items * lint * fix test * set nuclia sdk version explicitly * disable electron build for now (Forge makes an error) --- .github/workflows/build.yml | 8 +- electron-app/package-lock.json | 38 ++++++--- electron-app/package.json | 2 +- server/package-lock.json | 8 +- server/package.json | 2 +- .../connectors/confluence.connector.ts | 2 +- .../connectors/dropbox.connector.ts | 8 +- .../connectors/folder.connector.ts | 4 +- .../connectors/gdrive.connector.ts | 80 +++++++++++++++++-- .../connectors/onedrive.connector.ts | 58 ++++++++++---- .../connectors/sitemap.connector.ts | 1 + .../connectors/tests/gdrive.connector.spec.js | 1 + server/src/logic/sync/domain/nuclia-cloud.ts | 7 ++ .../file-system.sync.datasource.ts | 7 +- 14 files changed, 179 insertions(+), 47 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 795a480..321047c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -51,7 +51,7 @@ jobs: run: npm run compile working-directory: server - - name: build - env: - CI: false - run: npm run make \ No newline at end of file + # - name: build + # env: + # CI: false + # run: npm run make \ No newline at end of file diff --git a/electron-app/package-lock.json b/electron-app/package-lock.json index 71a8ba5..01c3d69 100644 --- a/electron-app/package-lock.json +++ b/electron-app/package-lock.json @@ -9,7 +9,7 @@ "version": "0.0.1", "license": "MIT", "dependencies": { - "@nuclia/core": "1.9.0", + "@nuclia/core": "^1.11.6", "cheerio": "^1.0.0-rc.12", "commander": "^11.1.0", "compression": "^1.7.4", @@ -1084,9 +1084,9 @@ } }, "node_modules/@nuclia/core": { - "version": "1.9.0", - "resolved": "https://registry.npmjs.org/@nuclia/core/-/core-1.9.0.tgz", - "integrity": "sha512-UwyotxeyuzhMgNgcfwoPW78vWZHef3KCMNupOE5in3mkMlmZp5dj0pgnOWxvkvq6dLjdmcVpmAqpbzoi/+Yl5g==", + "version": "1.11.6", + "resolved": "https://registry.npmjs.org/@nuclia/core/-/core-1.11.6.tgz", + "integrity": "sha512-BJxPU2nEeU1U5Ypp/+mWaek1zGY6OryD4JOf7ICQX9U+Yeok0jrui2+iUlYFKRalBrVcwXajokX4pfXxoeKrUQ==", "peerDependencies": { "rxjs": "^7.8.0" } @@ -4560,11 +4560,18 @@ "node": ">=10.13.0" } }, - "node_modules/ip": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ip/-/ip-2.0.0.tgz", - "integrity": "sha512-WKa+XuLG1A1R0UWhl2+1XQSi+fZWMsYKffMZTTYsiZaUD8k2yDAj5atimTUD2TZkyCkNEeYE5NhFZmupOGtjYQ==", - "dev": true + "node_modules/ip-address": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-9.0.5.tgz", + "integrity": "sha512-zHtQzGojZXTwZTHQqra+ETKd4Sn3vgi7uBmlPoXVWZqYvuKmtI0l/VZTjqGmJY9x88GGOaZ9+G9ES8hC4T4X8g==", + "dev": true, + "dependencies": { + "jsbn": "1.1.0", + "sprintf-js": "^1.1.3" + }, + "engines": { + "node": ">= 12" + } }, "node_modules/ipaddr.js": { "version": "1.9.1", @@ -4767,6 +4774,12 @@ "js-yaml": "bin/js-yaml.js" } }, + "node_modules/jsbn": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-1.1.0.tgz", + "integrity": "sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==", + "dev": true + }, "node_modules/json-buffer": { "version": "3.0.1", "dev": true, @@ -6682,11 +6695,11 @@ "integrity": "sha512-7maUZy1N7uo6+WVEX6psASxtNlKaNVMlGQKkG/63nEDdLOWNbiUMoLK7X4uYoLhQstau72mLgfEWcXcwsaHbYQ==", "dev": true, "dependencies": { - "ip": "^2.0.0", + "ip-address": "^9.0.5", "smart-buffer": "^4.2.0" }, "engines": { - "node": ">= 10.13.0", + "node": ">= 10.0.0", "npm": ">= 3.0.0" } }, @@ -6758,8 +6771,7 @@ "node_modules/sprintf-js": { "version": "1.1.3", "dev": true, - "license": "BSD-3-Clause", - "optional": true + "license": "BSD-3-Clause" }, "node_modules/ssri": { "version": "9.0.1", diff --git a/electron-app/package.json b/electron-app/package.json index 4b6b55e..e7f4adc 100644 --- a/electron-app/package.json +++ b/electron-app/package.json @@ -22,7 +22,7 @@ }, "license": "MIT", "dependencies": { - "@nuclia/core": "1.9.0", + "@nuclia/core": "^1.11.6", "cheerio": "^1.0.0-rc.12", "commander": "^11.1.0", "compression": "^1.7.4", diff --git a/server/package-lock.json b/server/package-lock.json index d2804c0..958aa0f 100644 --- a/server/package-lock.json +++ b/server/package-lock.json @@ -9,7 +9,7 @@ "version": "0.0.1", "license": "MIT", "dependencies": { - "@nuclia/core": "1.9.1", + "@nuclia/core": "^1.11.6", "cheerio": "^1.0.0-rc.12", "commander": "^11.1.0", "compression": "^1.7.4", @@ -561,9 +561,9 @@ } }, "node_modules/@nuclia/core": { - "version": "1.9.1", - "resolved": "https://registry.npmjs.org/@nuclia/core/-/core-1.9.1.tgz", - "integrity": "sha512-mZOtoDmzXf6W6f+fwhFrYKq9Gmflxyr188tqh+W31AhFQXatUP4UftpH0977pH2hAQmiNjG94bXVCdtH5mkO5g==", + "version": "1.11.6", + "resolved": "https://registry.npmjs.org/@nuclia/core/-/core-1.11.6.tgz", + "integrity": "sha512-BJxPU2nEeU1U5Ypp/+mWaek1zGY6OryD4JOf7ICQX9U+Yeok0jrui2+iUlYFKRalBrVcwXajokX4pfXxoeKrUQ==", "peerDependencies": { "rxjs": "^7.8.0" } diff --git a/server/package.json b/server/package.json index b95aacb..c9ff8cc 100644 --- a/server/package.json +++ b/server/package.json @@ -22,7 +22,7 @@ "start-server": "npm run compile && node build/start-file-system-server.js" }, "dependencies": { - "@nuclia/core": "1.9.1", + "@nuclia/core": "^1.11.6", "cheerio": "^1.0.0-rc.12", "commander": "^11.1.0", "compression": "^1.7.4", diff --git a/server/src/logic/connector/infrastructure/connectors/confluence.connector.ts b/server/src/logic/connector/infrastructure/connectors/confluence.connector.ts index edd92c9..7575703 100644 --- a/server/src/logic/connector/infrastructure/connectors/confluence.connector.ts +++ b/server/src/logic/connector/infrastructure/connectors/confluence.connector.ts @@ -163,7 +163,7 @@ export class ConfluenceImpl implements IConnector { return { title: (isFolder ? raw.name : raw.title) || '', originalId: (isFolder ? raw.key : itemOriginalId) || '', - metadata: { type: raw.type || '' }, + metadata: { type: raw.type || '', path: raw._links?.webui || '' }, status: FileStatus.PENDING, uuid: `${raw.id}` || '', isFolder: false, diff --git a/server/src/logic/connector/infrastructure/connectors/dropbox.connector.ts b/server/src/logic/connector/infrastructure/connectors/dropbox.connector.ts index cf60015..8d22f1e 100644 --- a/server/src/logic/connector/infrastructure/connectors/dropbox.connector.ts +++ b/server/src/logic/connector/infrastructure/connectors/dropbox.connector.ts @@ -189,7 +189,9 @@ export class DropboxImpl extends OAuthBaseConnector implements IConnector { return { title: raw.name || '', originalId: (isFolder ? raw.path_lower : raw.id) || '', - metadata: {}, + metadata: { + path: raw.path_lower.split('/').slice(0, -1).join('/'), + }, status: FileStatus.PENDING, uuid: (isFolder ? raw.path_lower : raw.id) || '', modifiedGMT: raw.client_modified, @@ -202,7 +204,9 @@ export class DropboxImpl extends OAuthBaseConnector implements IConnector { return { title: raw.metadata?.metadata?.['name'] || '', originalId: raw.metadata?.metadata?.['id'] || '', - metadata: {}, + metadata: { + path: raw.path_lower.split('/').slice(0, -1).join('/'), + }, status: FileStatus.PENDING, uuid: raw.metadata?.metadata?.['uuid'] || '', isFolder: raw.match_type?.['.tag'] === 'folder', diff --git a/server/src/logic/connector/infrastructure/connectors/folder.connector.ts b/server/src/logic/connector/infrastructure/connectors/folder.connector.ts index dacad5d..a952a05 100644 --- a/server/src/logic/connector/infrastructure/connectors/folder.connector.ts +++ b/server/src/logic/connector/infrastructure/connectors/folder.connector.ts @@ -145,7 +145,9 @@ class FolderImpl implements IConnector { return files.map((file) => ({ title: file.split('/').pop() || '', originalId: file, - metadata: {}, + metadata: { + path: file.split('/').slice(0, -1).join('/'), + }, status: FileStatus.PENDING, uid: '', })); diff --git a/server/src/logic/connector/infrastructure/connectors/gdrive.connector.ts b/server/src/logic/connector/infrastructure/connectors/gdrive.connector.ts index c7e2f15..df347a1 100644 --- a/server/src/logic/connector/infrastructure/connectors/gdrive.connector.ts +++ b/server/src/logic/connector/infrastructure/connectors/gdrive.connector.ts @@ -1,4 +1,4 @@ -import { catchError, concatMap, forkJoin, from, map, Observable, of } from 'rxjs'; +import { Observable, catchError, concatMap, forkJoin, from, map, of, switchMap } from 'rxjs'; import { ConnectorParameters, FileStatus, IConnector, Link, SearchResults, SyncItem } from '../../domain/connector'; import { SourceConnectorDefinition } from '../factory'; @@ -48,7 +48,7 @@ export class GDriveImpl extends OAuthBaseConnector implements IConnector { }); } try { - return forkJoin((folders || []).map((folder) => this._getItems('', folder.uuid))).pipe( + return forkJoin((folders || []).map((folder) => this._getFileItems('', folder.uuid))).pipe( map((results) => { const items = results.reduce( (acc, result) => acc.concat(result.items.filter((item) => item.modifiedGMT && item.modifiedGMT > since)), @@ -73,7 +73,7 @@ export class GDriveImpl extends OAuthBaseConnector implements IConnector { }); } try { - return forkJoin((folders || []).map((folder) => this._getItems('', folder.uuid))).pipe( + return forkJoin((folders || []).map((folder) => this._getFileItems('', folder.uuid))).pipe( map((results) => { const result: { items: SyncItem[] } = { items: [], @@ -96,7 +96,7 @@ export class GDriveImpl extends OAuthBaseConnector implements IConnector { } getFiles(query?: string): Observable { - return this._getItems(query); + return this._getFileItems(query); } isAccessTokenValid(): Observable { @@ -125,6 +125,77 @@ export class GDriveImpl extends OAuthBaseConnector implements IConnector { ); } + private getSubFolders(folders: SearchResults, folderId: string): string[] { + const getChildren = (folderId: string) => { + return folders.items.filter((item) => item.parents?.includes(folderId)).map((item) => item.originalId); + }; + const children = getChildren(folderId); + return children.reduce((acc, child) => [...acc, ...getChildren(child)], children); + } + + private _getFileItems(query = '', folder = ''): Observable { + return this.getFolders().pipe( + switchMap((folders) => { + if (folder) { + const allTargetedFolders = [folder, ...this.getSubFolders(folders, folder)]; + return forkJoin(allTargetedFolders.map((folder) => this._getItems(query, folder, false))).pipe( + map((results) => { + const items = results.reduce((acc, result) => acc.concat(result.items), [] as SyncItem[]); + return { + files: { + items, + }, + folders, + }; + }), + ); + } else { + return this._getItems(query, '', false).pipe(map((results) => ({ files: results, folders }))); + } + }), + map(({ files, folders }) => { + const getFolder = (folderId: string) => { + return folders.items.find((folder) => folder.originalId === folderId); + }; + const parents = folders.items.reduce( + (acc, folder) => { + if (folder.parents) { + acc[folder.originalId] = folder.parents[0]; + } + return acc; + }, + {} as { [key: string]: string }, + ); + const getFolderPath = (folderId: string | undefined) => { + if (!folderId) { + return []; + } + let path: string[] = []; + let currentFolder = getFolder(folderId); + while (currentFolder) { + path = [currentFolder.title, ...path]; + if (!parents[currentFolder.originalId]) { + break; + } + currentFolder = getFolder(parents[currentFolder.originalId]); + } + return path; + }; + const itemsWithPath = files.items.map((item) => ({ + ...item, + metadata: { + ...item.metadata, + path: getFolderPath(item.parents?.[0]).join('/'), + }, + })); + return { + ...files, + items: itemsWithPath, + }; + }), + ); + } + // Script create the tree https://gist.github.com/tanaikech/97b336f04c739ae0181a606eab3dff42 private _getItems( query = '', @@ -148,7 +219,6 @@ export class GDriveImpl extends OAuthBaseConnector implements IConnector { if (nextPage) { path += `&pageToken=${nextPage}`; } - return from( fetch(path, { headers: { diff --git a/server/src/logic/connector/infrastructure/connectors/onedrive.connector.ts b/server/src/logic/connector/infrastructure/connectors/onedrive.connector.ts index 5cae731..040f9af 100644 --- a/server/src/logic/connector/infrastructure/connectors/onedrive.connector.ts +++ b/server/src/logic/connector/infrastructure/connectors/onedrive.connector.ts @@ -124,13 +124,28 @@ export class OneDriveImpl extends OAuthBaseConnector implements IConnector { ); } - private _getItems( + private _getItems(query = '', folder = '', foldersOnly = false, previous?: SearchResults): Observable { + return this._getOneDriveItems(query, folder, foldersOnly, undefined, previous).pipe( + map((res) => { + const items = (res.value || []) + // eslint-disable-next-line @typescript-eslint/no-explicit-any + .filter((item: any) => foldersOnly || !!item.file) + // eslint-disable-next-line @typescript-eslint/no-explicit-any + .map((item: any) => (foldersOnly ? this.mapToSyncItemFolder(item) : this.mapToSyncItem(item))); + return { items }; + }), + ); + } + + private _getOneDriveItems( query = '', folder = '', foldersOnly = false, nextPage?: string, - previous?: SearchResults, - ): Observable { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + previous?: any, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + ): Observable { let path = `https://graph.microsoft.com/v1.0/me/drive/${folder ? `items/${folder}` : 'root'}`; if (query) { path += `/search(q='${query}')`; @@ -167,16 +182,27 @@ export class OneDriveImpl extends OAuthBaseConnector implements IConnector { res['@odata.nextLink'] && res['@odata.nextLink'].includes('&$skiptoken=') ? res?.['@odata.nextLink'].split('&$skiptoken=')[1].split('&')[0] : undefined; - const items = (res.value || []) - // eslint-disable-next-line @typescript-eslint/no-explicit-any - .filter((item: any) => foldersOnly || !!item.file) - // eslint-disable-next-line @typescript-eslint/no-explicit-any - .map((item: any) => (foldersOnly ? this.mapToSyncItemFolder(item) : this.mapToSyncItem(item))); - const results = { - items: [...(previous?.items || []), ...items], - nextPage, - }; - return nextPage ? this._getItems(query, folder, foldersOnly, nextPage, results) : of(results); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const folders: string[] = (res.value || []).filter((item: any) => !!item.folder).map((item: any) => item.id); + const results = { ...res, value: [...(previous?.value || []), ...(res?.value || [])] }; + const currentFolderResults = nextPage + ? this._getOneDriveItems(query, folder, foldersOnly, nextPage, results) + : of(results); + if (folders.length === 0) { + return currentFolderResults; + } else { + return forkJoin([ + currentFolderResults, + ...folders.map((subfolder) => this._getOneDriveItems(query, subfolder, foldersOnly)), + ]).pipe( + map((subresults) => + subresults.reduce( + (acc, subresult) => ({ ...acc, value: [...(acc.value || []), ...(subresult.value || [])] }), + {}, + ), + ), + ); + } } }), ); @@ -189,7 +215,11 @@ export class OneDriveImpl extends OAuthBaseConnector implements IConnector { title: item.name, originalId: item.id, modifiedGMT: item.lastModifiedDateTime, - metadata: { mimeType: item.file.mimeType, downloadLink: item['@microsoft.graph.downloadUrl'] }, + metadata: { + mimeType: item.file.mimeType, + downloadLink: item['@microsoft.graph.downloadUrl'], + path: item.parentReference.path, + }, status: FileStatus.PENDING, }; } diff --git a/server/src/logic/connector/infrastructure/connectors/sitemap.connector.ts b/server/src/logic/connector/infrastructure/connectors/sitemap.connector.ts index 698a5a7..a9e6be9 100644 --- a/server/src/logic/connector/infrastructure/connectors/sitemap.connector.ts +++ b/server/src/logic/connector/infrastructure/connectors/sitemap.connector.ts @@ -78,6 +78,7 @@ class SitemapImpl implements IConnector { originalId: parsedUrl.loc, metadata: { uri: parsedUrl.loc, + path: parsedUrl.loc.replace(/https?:\/\//, ''), lastModified: parsedUrl.lastmod, }, })), diff --git a/server/src/logic/connector/infrastructure/connectors/tests/gdrive.connector.spec.js b/server/src/logic/connector/infrastructure/connectors/tests/gdrive.connector.spec.js index d696928..8a37c46 100644 --- a/server/src/logic/connector/infrastructure/connectors/tests/gdrive.connector.spec.js +++ b/server/src/logic/connector/infrastructure/connectors/tests/gdrive.connector.spec.js @@ -148,6 +148,7 @@ describe('Test last modified', () => { metadata: { needsPdfConversion: 'yes', mimeType: 'application/pdf', + path: '', }, status: FileStatus.PENDING, }, diff --git a/server/src/logic/sync/domain/nuclia-cloud.ts b/server/src/logic/sync/domain/nuclia-cloud.ts index 522f5c3..ef40b76 100644 --- a/server/src/logic/sync/domain/nuclia-cloud.ts +++ b/server/src/logic/sync/domain/nuclia-cloud.ts @@ -66,6 +66,13 @@ export class NucliaCloud { if (data.metadata.labels) { resourceData.usermetadata = { classifications: data.metadata?.labels }; } + if (data.metadata.path) { + let path = data.metadata.path; + if (path && !path.startsWith('/')) { + path = `/${path}`; + } + resourceData.origin = { path }; + } return kb.createResource(resourceData, true).pipe( retry(RETRY_CONFIG), map((data) => kb.getResourceFromData({ id: data.uuid })), diff --git a/server/src/logic/sync/infrastructure/file-system.sync.datasource.ts b/server/src/logic/sync/infrastructure/file-system.sync.datasource.ts index 59c812f..ef62ddc 100644 --- a/server/src/logic/sync/infrastructure/file-system.sync.datasource.ts +++ b/server/src/logic/sync/infrastructure/file-system.sync.datasource.ts @@ -39,7 +39,12 @@ export class FileSystemSyncDatasource implements ISyncDatasource { this.allSyncData = {}; } private loadSyncData = async () => { - this.allSyncData = JSON.parse(await readFile(this.basePath)); + const data = await readFile(this.basePath); + try { + this.allSyncData = JSON.parse(data); + } catch (e) { + console.error(e); + } }; private createSyncFile = async () => {