Skip to content

Commit

Permalink
Merge pull request #2749 from the-library-code/DSpace_detect_duplicat…
Browse files Browse the repository at this point in the history
…es_PR

Basic Duplicate Detection
  • Loading branch information
tdonohue authored Mar 4, 2024
2 parents 061129e + e4a91e7 commit f6f3962
Show file tree
Hide file tree
Showing 29 changed files with 968 additions and 52 deletions.
4 changes: 4 additions & 0 deletions config/config.example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ submission:
# NOTE: after how many time (milliseconds) submission is saved automatically
# eg. timer: 5 * (1000 * 60); // 5 minutes
timer: 0
# Always show the duplicate detection section if enabled, even if there are no potential duplicates detected
# (a message will be displayed to indicate no matches were found)
duplicateDetection:
alwaysShowSection: false
icons:
metadata:
# NOTE: example of configuration
Expand Down
2 changes: 2 additions & 0 deletions src/app/core/core.module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ import { NotifyRequestsStatus } from '../item-page/simple/notify-requests-status
import { LdnService } from '../admin/admin-ldn-services/ldn-services-model/ldn-services.model';
import { Itemfilter } from '../admin/admin-ldn-services/ldn-services-model/ldn-service-itemfilters';
import { SubmissionCoarNotifyConfig } from '../submission/sections/section-coar-notify/submission-coar-notify.config';
import { SubmissionDuplicateDataService } from './submission/submission-duplicate-data.service';

/**
* When not in production, endpoint responses can be mocked for testing purposes
Expand Down Expand Up @@ -234,6 +235,7 @@ const PROVIDERS = [
HALEndpointService,
HostWindowService,
ItemDataService,
SubmissionDuplicateDataService,
MetadataService,
ObjectCacheService,
PaginationComponentOptions,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/*
* Object model for the data returned by the REST API to present potential duplicates in a submission section
*/
import { Duplicate } from '../../../shared/object-list/duplicate-data/duplicate.model';

export interface WorkspaceitemSectionDuplicatesObject {
potentialDuplicates?: Duplicate[]
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ import { WorkspaceitemSectionFormObject } from './workspaceitem-section-form.mod
import { WorkspaceitemSectionLicenseObject } from './workspaceitem-section-license.model';
import { WorkspaceitemSectionUploadObject } from './workspaceitem-section-upload.model';
import { WorkspaceitemSectionCcLicenseObject } from './workspaceitem-section-cc-license.model';
import {WorkspaceitemSectionIdentifiersObject} from './workspaceitem-section-identifiers.model';
import { WorkspaceitemSectionIdentifiersObject } from './workspaceitem-section-identifiers.model';
import { WorkspaceitemSectionSherpaPoliciesObject } from './workspaceitem-section-sherpa-policies.model';
import { WorkspaceitemSectionDuplicatesObject } from './workspaceitem-section-duplicates.model';

/**
* An interface to represent submission's section object.
Expand All @@ -25,6 +26,7 @@ export type WorkspaceitemSectionDataType
| WorkspaceitemSectionAccessesObject
| WorkspaceitemSectionSherpaPoliciesObject
| WorkspaceitemSectionIdentifiersObject
| WorkspaceitemSectionDuplicatesObject
| string;


30 changes: 30 additions & 0 deletions src/app/core/submission/submission-duplicate-data.service.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import { SubmissionDuplicateDataService } from './submission-duplicate-data.service';
import { FindListOptions } from '../data/find-list-options.model';
import { RequestParam } from '../cache/models/request-param.model';

/**
* Basic tests for the submission-duplicate-data.service.ts service
*/
describe('SubmissionDuplicateDataService', () => {
const duplicateDataService = new SubmissionDuplicateDataService(null, null, null, null);

// Test the findDuplicates method to make sure that a call results in an expected
// call to searchBy, using the 'findByItem' search method
describe('findDuplicates', () => {
beforeEach(() => {
spyOn(duplicateDataService, 'searchBy');
});

it('should call searchBy with the correct arguments', () => {
// Set up expected search parameters and find options
const searchParams = [];
searchParams.push(new RequestParam('uuid', 'test'));
let findListOptions = new FindListOptions();
findListOptions.searchParams = searchParams;
// Perform test search using uuid 'test' using the findDuplicates method
const result = duplicateDataService.findDuplicates('test', new FindListOptions(), true, true);
// Expect searchBy('findByItem'...) to have been used as SearchData impl with the expected options (uuid=test)
expect(duplicateDataService.searchBy).toHaveBeenCalledWith('findByItem', findListOptions, true, true);
});
});
});
139 changes: 139 additions & 0 deletions src/app/core/submission/submission-duplicate-data.service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/* eslint-disable max-classes-per-file */
import { Observable } from 'rxjs';
import { Injectable } from '@angular/core';
import { FollowLinkConfig } from '../../shared/utils/follow-link-config.model';
import { ResponseParsingService } from '../data/parsing.service';
import { RemoteData } from '../data/remote-data';
import { GetRequest } from '../data/request.models';
import { RequestService } from '../data/request.service';
import { GenericConstructor } from '../shared/generic-constructor';
import { HALEndpointService } from '../shared/hal-endpoint.service';
import { SearchResponseParsingService } from '../data/search-response-parsing.service';
import { RemoteDataBuildService } from '../cache/builders/remote-data-build.service';
import { RestRequest } from '../data/rest-request.model';
import { BaseDataService } from '../data/base/base-data.service';
import { FindListOptions } from '../data/find-list-options.model';
import { Duplicate } from '../../shared/object-list/duplicate-data/duplicate.model';
import { PaginatedList } from '../data/paginated-list.model';
import { RequestParam } from '../cache/models/request-param.model';
import { ObjectCacheService } from '../cache/object-cache.service';
import { SearchData, SearchDataImpl } from '../data/base/search-data';
import { DUPLICATE } from '../../shared/object-list/duplicate-data/duplicate.resource-type';
import { dataService } from '../data/base/data-service.decorator';


/**
* Service that handles search requests for potential duplicate items.
* This uses the /api/submission/duplicates endpoint to look for other archived or in-progress items (if user
* has READ permission) that match the item (for the given uuid).
* Matching is configured in the backend in dspace/config/modulesduplicate-detection.cfg
* The returned results are small preview 'stubs' of items, and displayed in either a submission section
* or the workflow pooled/claimed task page.
*
*/
@Injectable()
@dataService(DUPLICATE)
export class SubmissionDuplicateDataService extends BaseDataService<Duplicate> implements SearchData<Duplicate> {

/**
* The ResponseParsingService constructor name
*/
private parser: GenericConstructor<ResponseParsingService> = SearchResponseParsingService;

/**
* The RestRequest constructor name
*/
private request: GenericConstructor<RestRequest> = GetRequest;

/**
* SearchData interface to implement
* @private
*/
private searchData: SearchData<Duplicate>;

/**
* Subscription to unsubscribe from
*/
private sub;

constructor(
protected requestService: RequestService,
protected rdbService: RemoteDataBuildService,
protected objectCache: ObjectCacheService,
protected halService: HALEndpointService,
) {
super('duplicates', requestService, rdbService, objectCache, halService);
this.searchData = new SearchDataImpl(this.linkPath, requestService, rdbService, objectCache, halService, this.responseMsToLive);
}

/**
* Implement the searchBy method to return paginated lists of Duplicate resources
*
* @param searchMethod the search method name
* @param options find list options
* @param useCachedVersionIfAvailable whether to use cached version if available
* @param reRequestOnStale whether to rerequest results on stale
* @param linksToFollow links to follow in results
*/
searchBy(searchMethod: string, options?: FindListOptions, useCachedVersionIfAvailable?: boolean, reRequestOnStale?: boolean, ...linksToFollow: FollowLinkConfig<Duplicate>[]): Observable<RemoteData<PaginatedList<Duplicate>>> {
return this.searchData.searchBy(searchMethod, options, useCachedVersionIfAvailable, reRequestOnStale, ...linksToFollow);
}

/**
* Helper method to get the duplicates endpoint
* @protected
*/
protected getEndpoint(): Observable<string> {
return this.halService.getEndpoint(this.linkPath);
}

/**
* Method to set service options
* @param {GenericConstructor<ResponseParsingService>} parser The ResponseParsingService constructor name
* @param {boolean} request The RestRequest constructor name
*/
setServiceOptions(parser: GenericConstructor<ResponseParsingService>, request: GenericConstructor<RestRequest>) {
if (parser) {
this.parser = parser;
}
if (request) {
this.request = request;
}
}

/**
* Find duplicates for a given item UUID. Locates and returns results from the /api/submission/duplicates/search/findByItem
* SearchRestMethod, which is why this implements SearchData<Duplicate> and searchBy
*
* @param uuid the item UUID
* @param options any find list options e.g. paging
* @param useCachedVersionIfAvailable whether to use cached version if available
* @param reRequestOnStale whether to rerequest results on stale
* @param linksToFollow links to follow in results
*/
public findDuplicates(uuid: string, options?: FindListOptions, useCachedVersionIfAvailable = true, reRequestOnStale = true, ...linksToFollow: FollowLinkConfig<Duplicate>[]): Observable<RemoteData<PaginatedList<Duplicate>>> {
const searchParams = [new RequestParam('uuid', uuid)];
let findListOptions = new FindListOptions();
if (options) {
findListOptions = Object.assign(new FindListOptions(), options);
}
if (findListOptions.searchParams) {
findListOptions.searchParams = [...findListOptions.searchParams, ...searchParams];
} else {
findListOptions.searchParams = searchParams;
}

// Return actual search/findByItem results
return this.searchBy('findByItem', findListOptions, useCachedVersionIfAvailable, reRequestOnStale, ...linksToFollow);

}

/**
* Unsubscribe from the subscription
*/
ngOnDestroy(): void {
if (this.sub !== undefined) {
this.sub.unsubscribe();
}
}
}
5 changes: 4 additions & 1 deletion src/app/shared/mocks/submission.mock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1114,7 +1114,10 @@ export const mockSubmissionState: SubmissionObjectState = Object.assign({}, {
isLoading: false,
isValid: false,
removePending: false
} as any
} as any,
'duplicates': {
potentialDuplicates: []
} as any,
},
isLoading: false,
savePending: false,
Expand Down
57 changes: 57 additions & 0 deletions src/app/shared/object-list/duplicate-data/duplicate.model.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import {autoserialize, deserialize} from 'cerialize';
import { MetadataMap } from '../../../core/shared/metadata.models';
import { HALLink} from '../../../core/shared/hal-link.model';
import { CacheableObject } from '../../../core/cache/cacheable-object.model';
import { DUPLICATE } from './duplicate.resource-type';
import { ResourceType } from '../../../core/shared/resource-type';

/**
* This implements the model of a duplicate preview stub, to be displayed to submitters or reviewers
* if duplicate detection is enabled. The metadata map is configurable in the backend at duplicate-detection.cfg
*/
export class Duplicate implements CacheableObject {

static type = DUPLICATE;

/**
* The item title
*/
@autoserialize
title: string;
/**
* The item uuid
*/
@autoserialize
uuid: string;
/**
* The workfow item ID, if any
*/
@autoserialize
workflowItemId: number;
/**
* The workspace item ID, if any
*/
@autoserialize
workspaceItemId: number;
/**
* The owning collection of the item
*/
@autoserialize
owningCollection: string;
/**
* Metadata for the preview item (e.g. dc.title)
*/
@autoserialize
metadata: MetadataMap;

@autoserialize
type: ResourceType;

/**
* The {@link HALLink}s for the URL that generated this item (in context of search results)
*/
@deserialize
_links: {
self: HALLink;
};
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { ResourceType } from 'src/app/core/shared/resource-type';

/**
* The resource type for Duplicate preview stubs
*
* Needs to be in a separate file to prevent circular
* dependencies in webpack.
*/
export const DUPLICATE = new ResourceType('duplicate');
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,17 @@
[showSubmitter]="showSubmitter"
[badgeContext]="badgeContext"
[workflowItem]="workflowitem$.value"></ds-themed-item-list-preview>

<!-- Display duplicate alert, if feature enabled and duplicates detected -->
<ng-container *ngVar="(duplicates$|async)?.length as duplicateCount">
<div [ngClass]="'row'" *ngIf="duplicateCount > 0">
<div [ngClass]="'col-2'"></div>
<div [ngClass]="'col-10'">
<div class="d-flex alert alert-warning w-100">
{{ duplicateCount }} {{ 'submission.workflow.tasks.duplicates' | translate }}
</div>
</div>
</div>
</ng-container>
<div class="row">
<div [ngClass]="showThumbnails ? 'offset-3 offset-md-2 pl-3' : ''">
<ds-claimed-task-actions [item]="item$.value"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import { Item } from '../../../../core/shared/item.model';
import { ClaimedSearchResultListElementComponent } from './claimed-search-result-list-element.component';
import { ClaimedTask } from '../../../../core/tasks/models/claimed-task-object.model';
import { WorkflowItem } from '../../../../core/submission/models/workflowitem.model';
import { createSuccessfulRemoteDataObject } from '../../../remote-data.utils';
import { createSuccessfulRemoteDataObject, createSuccessfulRemoteDataObject$ } from '../../../remote-data.utils';
import { ClaimedTaskSearchResult } from '../../../object-collection/shared/claimed-task-search-result.model';
import { TruncatableService } from '../../../truncatable/truncatable.service';
import { VarDirective } from '../../../utils/var.directive';
Expand All @@ -28,13 +28,32 @@ import { APP_CONFIG } from '../../../../../config/app-config.interface';
import { environment } from '../../../../../environments/environment';
import { ObjectCacheService } from '../../../../core/cache/object-cache.service';
import { Context } from '../../../../core/shared/context.model';
import { createPaginatedList } from '../../../testing/utils.test';
import { SubmissionDuplicateDataService } from '../../../../core/submission/submission-duplicate-data.service';
import { ConfigurationProperty } from '../../../../core/shared/configuration-property.model';
import { ConfigurationDataService } from '../../../../core/data/configuration-data.service';

let component: ClaimedSearchResultListElementComponent;
let fixture: ComponentFixture<ClaimedSearchResultListElementComponent>;

const mockResultObject: ClaimedTaskSearchResult = new ClaimedTaskSearchResult();
mockResultObject.hitHighlights = {};

const emptyList = createSuccessfulRemoteDataObject(createPaginatedList([]));

const configurationDataService = jasmine.createSpyObj('configurationDataService', {
findByPropertyName: createSuccessfulRemoteDataObject$(Object.assign(new ConfigurationProperty(), {
name: 'duplicate.enable',
values: [
'true'
]
}))
});
const duplicateDataServiceStub = {
findListByHref: () => observableOf(emptyList),
findDuplicates: () => createSuccessfulRemoteDataObject$({}),
};

const item = Object.assign(new Item(), {
bundles: observableOf({}),
metadata: {
Expand Down Expand Up @@ -83,7 +102,9 @@ describe('ClaimedSearchResultListElementComponent', () => {
{ provide: LinkService, useValue: linkService },
{ provide: DSONameService, useClass: DSONameServiceMock },
{ provide: APP_CONFIG, useValue: environment },
{ provide: ObjectCacheService, useValue: objectCacheServiceMock }
{ provide: ObjectCacheService, useValue: objectCacheServiceMock },
{ provide: ConfigurationDataService, useValue: configurationDataService },
{ provide: SubmissionDuplicateDataService, useValue: duplicateDataServiceStub },
],
schemas: [NO_ERRORS_SCHEMA]
}).overrideComponent(ClaimedSearchResultListElementComponent, {
Expand Down
Loading

0 comments on commit f6f3962

Please sign in to comment.