From 3e37ce53612dbc0fd72d098832cc220efdc6ce24 Mon Sep 17 00:00:00 2001 From: ChiaraDipi Date: Sat, 30 Jan 2021 17:56:53 +0100 Subject: [PATCH 01/11] Add basic FileDescParser Elements in fileDesc are temporarily parsed in a generic way. --- src/app/models/evt-models.ts | 10 +++++ src/app/services/xml-parsers/header-parser.ts | 45 +++++++++++++++++++ src/app/services/xml-parsers/index.ts | 9 +++- 3 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 src/app/services/xml-parsers/header-parser.ts diff --git a/src/app/models/evt-models.ts b/src/app/models/evt-models.ts index e42446e86..a54d30bae 100644 --- a/src/app/models/evt-models.ts +++ b/src/app/models/evt-models.ts @@ -772,3 +772,13 @@ export class Dim extends GenericElement { max?: number; gEl?: G[]; } + +export class FileDesc extends GenericElement { + titleStmt: Array>; // TODO: Add specific type when titleStmt is handled + publicationStmt: Array>; // TODO: Add specific type when publicationStmt is handled + sourceDesc: Array>; // TODO: Add specific type when sourceDesc is handled + editionStmt?: Array>; // TODO: Add specific type when editionStmt is handled + extent?: Array>; // TODO: Add specific type when extent is handled + seriesStmt?: Array>; // TODO: Add specific type when seriesStmt is handled + notesStmt?: Array>; // TODO: Add specific type when notesStmt is handled +} diff --git a/src/app/services/xml-parsers/header-parser.ts b/src/app/services/xml-parsers/header-parser.ts new file mode 100644 index 000000000..2e86baa96 --- /dev/null +++ b/src/app/services/xml-parsers/header-parser.ts @@ -0,0 +1,45 @@ +import { isNestedInElem } from 'src/app/utils/dom-utils'; +import { FileDesc, XMLElement } from '../../models/evt-models'; +import { GenericElemParser } from './basic-parsers'; +import { getClass, parseChildren, Parser } from './parser-models'; + +export class FileDescParser extends GenericElemParser implements Parser { + private excludeFromParsing = [ + 'listBibl', + 'listEvent', + 'listOrg', + 'listPerson', + 'listPlace', + 'listWit', + 'sourceDesc list', + ]; + + parse(xml: XMLElement): FileDesc { + xml = xml.cloneNode(true) as XMLElement; + Array.from(xml.querySelectorAll(this.excludeFromParsing.toString())) + .filter((list) => !isNestedInElem(list, list.tagName)) + .forEach(el => el.remove()); + + const titleStmtEl = xml.querySelector(':scope > titleStmt'); + const editionStmtEl = xml.querySelector(':scope > editionStmt'); + const extentEl = xml.querySelector(':scope > extent'); + const publicationStmtEl = xml.querySelector(':scope > publicationStmt'); + const notesStmtEl = xml.querySelector('notesStmt'); + const seriesStmtEl = xml.querySelector('seriesStmt'); + const sourceDescEl = xml.querySelector('sourceDesc'); + + return { + type: FileDesc, + class: getClass(xml), + content: parseChildren(xml, this.genericParse), + attributes: this.attributeParser.parse(xml), + titleStmt: titleStmtEl ? parseChildren(titleStmtEl, this.genericParse) : [], + editionStmt: editionStmtEl ? parseChildren(editionStmtEl, this.genericParse) : [], + publicationStmt: publicationStmtEl ? parseChildren(publicationStmtEl, this.genericParse) : [], + sourceDesc: sourceDescEl ? parseChildren(sourceDescEl, this.genericParse) : [], + extent: extentEl ? parseChildren(extentEl, this.genericParse) : [], + notesStmt: notesStmtEl ? parseChildren(notesStmtEl, this.genericParse) : [], + seriesStmt: seriesStmtEl ? parseChildren(seriesStmtEl, this.genericParse) : [], + }; + } +} diff --git a/src/app/services/xml-parsers/index.ts b/src/app/services/xml-parsers/index.ts index ab4515b57..cbacc3392 100644 --- a/src/app/services/xml-parsers/index.ts +++ b/src/app/services/xml-parsers/index.ts @@ -8,6 +8,7 @@ import { CharParser, GlyphParser, GParser } from './character-declarations-parse import { ChoiceParser } from './choice-parser'; import { SicParser, SurplusParser } from './editorial-parsers'; import { GraphicParser, SurfaceParser, ZoneParser } from './facsimile-parser'; +import { FileDescParser } from './header-parser'; import { AccMatParser, AcquisitionParser, AdditionalParser, AdditionsParser, AdminInfoParser, AltIdentifierParser, BindingDescParser, BindingParser, CollationParser, CollectionParser, ConditionParser, CustEventParser, CustodialHistParser, DecoDescParser, DecoNoteParser, @@ -28,6 +29,7 @@ import { createParser, Parser, ParseResult } from './parser-models'; type AnalysisTags = 'w'; type CoreTags = 'add' | 'choice' | 'del' | 'gap' | 'graphic' | 'head' | 'l' | 'lb' | 'lg' | 'note' | 'p' | 'ptr' | 'sic'; type GaijiTags = 'char' | 'g' | 'glyph'; +type HeaderTags = 'fileDesc'; type MsDescriptionTags = 'accMat' | 'acquisition' | 'additional' | 'additions' | 'adminInfo' | 'altIdentifier' | 'binding' | 'bindingDesc' | 'collation' | 'collection' | 'condition' | 'custEvent' | 'custodialHist' | 'decoDesc' | 'decoNote' | 'depth' | 'dim' | 'dimensions' | 'explicit' | 'filiation' | 'finalRubric' | 'foliation' | @@ -40,7 +42,7 @@ type NamesDatesTags = 'event' | 'geogname' | 'org' | 'orgname' | 'persname' | 'p type TextCritTags = 'app' | 'lem' | 'rdg'; type TranscrTags = 'damage' | 'supplied' | 'surface' | 'surplus' | 'zone'; -type SupportedTagNames = AnalysisTags | CoreTags | GaijiTags | MsDescriptionTags | TextCritTags | TranscrTags | NamesDatesTags; +type SupportedTagNames = AnalysisTags | CoreTags | GaijiTags | HeaderTags | MsDescriptionTags | TextCritTags | TranscrTags | NamesDatesTags; const analysisParseF: { [T in AnalysisTags]: Parser } = { w: createParser(WordParser, parse), @@ -68,6 +70,10 @@ const gaijiParseF: { [T in GaijiTags]: Parser } = { glyph: createParser(GlyphParser, parse), }; +const headerParseF: { [T in HeaderTags]: Parser } = { + fileDesc: createParser(FileDescParser, parse), +}; + const msDescriptionParseF: { [T in MsDescriptionTags]: Parser } = { accMat: createParser(AccMatParser, parse), acquisition: createParser(AcquisitionParser, parse), @@ -162,6 +168,7 @@ export const parseF: { [T in SupportedTagNames]: Parser } = { ...analysisParseF, ...coreParseF, ...gaijiParseF, + ...headerParseF, ...namesDatesParseF, ...textCritParseF, ...transcrParseF, From 53de3056f4ac7677229526bffc4b1565c8b3217a Mon Sep 17 00:00:00 2001 From: ChiaraDipi Date: Sat, 30 Jan 2021 17:58:16 +0100 Subject: [PATCH 02/11] Add parsers for titleStmt, respStmt and resp --- src/app/models/evt-models.ts | 24 ++++- src/app/services/xml-parsers/header-parser.ts | 99 +++++++++++++++---- src/app/services/xml-parsers/index.ts | 9 +- src/app/services/xml-parsers/parser-models.ts | 10 +- 4 files changed, 117 insertions(+), 25 deletions(-) diff --git a/src/app/models/evt-models.ts b/src/app/models/evt-models.ts index a54d30bae..3d50dc24c 100644 --- a/src/app/models/evt-models.ts +++ b/src/app/models/evt-models.ts @@ -774,7 +774,7 @@ export class Dim extends GenericElement { } export class FileDesc extends GenericElement { - titleStmt: Array>; // TODO: Add specific type when titleStmt is handled + titleStmt: TitleStmt; publicationStmt: Array>; // TODO: Add specific type when publicationStmt is handled sourceDesc: Array>; // TODO: Add specific type when sourceDesc is handled editionStmt?: Array>; // TODO: Add specific type when editionStmt is handled @@ -782,3 +782,25 @@ export class FileDesc extends GenericElement { seriesStmt?: Array>; // TODO: Add specific type when seriesStmt is handled notesStmt?: Array>; // TODO: Add specific type when notesStmt is handled } + +export class TitleStmt extends GenericElement { + titles: Array>; // TODO: Add specific type when title is handled + subtitles: Array>; // TODO: Add specific type when subtitle is handled + authors: Array>; // TODO: Add specific type when author is handled + principals: Array>; // TODO: Add specific type when principal is handled + respStmts: RespStmt[]; + editors: Array>; // TODO: Add specific type when editor is handled + sponsors: Array>; // TODO: Add specific type when sponsor is handled + funders: Array>; // TODO: Add specific type when funder is handled +} + +export class RespStmt extends GenericElement { + responsibility: Resp; + people: Array>; + notes: Note[]; +} + +export class Resp extends GenericElement { + normalizedResp: string; + date: string; +} diff --git a/src/app/services/xml-parsers/header-parser.ts b/src/app/services/xml-parsers/header-parser.ts index 2e86baa96..9538c2a80 100644 --- a/src/app/services/xml-parsers/header-parser.ts +++ b/src/app/services/xml-parsers/header-parser.ts @@ -1,7 +1,76 @@ import { isNestedInElem } from 'src/app/utils/dom-utils'; -import { FileDesc, XMLElement } from '../../models/evt-models'; -import { GenericElemParser } from './basic-parsers'; -import { getClass, parseChildren, Parser } from './parser-models'; +import { FileDesc, GenericElement, NamedEntityRef, Note, Resp, RespStmt, TitleStmt, XMLElement } from '../../models/evt-models'; +import { GenericElemParser, NoteParser, queryAndParseElement, queryAndParseElements } from './basic-parsers'; +import { NamedEntityRefParser } from './named-entity-parsers'; +import { createParser, getClass, parseChildren, Parser } from './parser-models'; + +export class RespParser extends GenericElemParser implements Parser { + parse(xml: XMLElement): Resp { + const attributes = this.attributeParser.parse(xml); + const { ref, when } = attributes; + const normalizedResp = ref?.indexOf('http://') < 0 && ref?.indexOf('https://') < 0 ? `http://${ref}` : ref ?? ''; + + return { + type: Resp, + class: getClass(xml), + content: parseChildren(xml, this.genericParse, true), + attributes, + normalizedResp, + date: when || '', + }; + } +} + +export class RespStmtParser extends GenericElemParser implements Parser { + private noteParser = createParser(NoteParser, this.genericParse); + private respParser = createParser(RespParser, this.genericParse); + private namedEntityRefParser = createParser(NamedEntityRefParser, this.genericParse); + + parse(xml: XMLElement): RespStmt { + const people = Array.from(xml.querySelectorAll(':scope > name, :scope > orgName, :scope > persName')) + .map(p => { + if (['orgName', 'persName'].includes(p.tagName)) { + return this.namedEntityRefParser.parse(p) as NamedEntityRef; + } + + return this.genericParse(p) as GenericElement; + }); + + return { + type: RespStmt, + class: getClass(xml), + content: parseChildren(xml, this.genericParse, true), + attributes: this.attributeParser.parse(xml), + responsibility: queryAndParseElement(xml, 'resp', this.respParser), + notes: queryAndParseElements(xml, 'note', this.noteParser), + people, + }; + } +} + +export class TitleStmtParser extends GenericElemParser implements Parser { + private genericElemParser = createParser(GenericElemParser, this.genericParse); + private respStmtParser = createParser(RespStmtParser, this.genericParse); + + parse(xml: XMLElement): TitleStmt { + const title = queryAndParseElements(xml, 'title[type="main"]', this.genericElemParser); + + return { + type: TitleStmt, + class: getClass(xml), + content: parseChildren(xml, this.genericParse), + attributes: this.attributeParser.parse(xml), + titles: title.length > 0 ? title : queryAndParseElements(xml, 'title:not([type="sub"])', this.genericElemParser), + subtitles: queryAndParseElements(xml, 'title[type="sub"]', this.genericElemParser), + authors: queryAndParseElements(xml, 'author', this.genericElemParser), + editors: queryAndParseElements(xml, 'editor', this.genericElemParser), + sponsors: queryAndParseElements(xml, 'sponsor', this.genericElemParser), + funders: queryAndParseElements(xml, 'funder', this.genericElemParser), + principals: queryAndParseElements(xml, 'principal', this.genericElemParser), + respStmts: queryAndParseElements(xml, 'respStmt', this.respStmtParser), + }; + } +} export class FileDescParser extends GenericElemParser implements Parser { private excludeFromParsing = [ @@ -13,6 +82,8 @@ export class FileDescParser extends GenericElemParser implements Parser !isNestedInElem(list, list.tagName)) .forEach(el => el.remove()); - const titleStmtEl = xml.querySelector(':scope > titleStmt'); - const editionStmtEl = xml.querySelector(':scope > editionStmt'); - const extentEl = xml.querySelector(':scope > extent'); - const publicationStmtEl = xml.querySelector(':scope > publicationStmt'); - const notesStmtEl = xml.querySelector('notesStmt'); - const seriesStmtEl = xml.querySelector('seriesStmt'); - const sourceDescEl = xml.querySelector('sourceDesc'); - return { type: FileDesc, class: getClass(xml), content: parseChildren(xml, this.genericParse), attributes: this.attributeParser.parse(xml), - titleStmt: titleStmtEl ? parseChildren(titleStmtEl, this.genericParse) : [], - editionStmt: editionStmtEl ? parseChildren(editionStmtEl, this.genericParse) : [], - publicationStmt: publicationStmtEl ? parseChildren(publicationStmtEl, this.genericParse) : [], - sourceDesc: sourceDescEl ? parseChildren(sourceDescEl, this.genericParse) : [], - extent: extentEl ? parseChildren(extentEl, this.genericParse) : [], - notesStmt: notesStmtEl ? parseChildren(notesStmtEl, this.genericParse) : [], - seriesStmt: seriesStmtEl ? parseChildren(seriesStmtEl, this.genericParse) : [], + titleStmt: queryAndParseElement(xml, 'titleStmt', this.titleStmtParser), + editionStmt: queryAndParseElements(xml, 'editionStmt', this.genericElemParser), + publicationStmt: queryAndParseElements(xml, 'publicationStmt', this.genericElemParser), + sourceDesc: queryAndParseElements(xml, 'sourceDesc', this.genericElemParser), + extent: queryAndParseElements(xml, 'extent', this.genericElemParser), + notesStmt: queryAndParseElements(xml, 'notesStmt', this.genericElemParser), + seriesStmt: queryAndParseElements(xml, 'seriesStmt', this.genericElemParser), }; } } diff --git a/src/app/services/xml-parsers/index.ts b/src/app/services/xml-parsers/index.ts index cbacc3392..09fee373e 100644 --- a/src/app/services/xml-parsers/index.ts +++ b/src/app/services/xml-parsers/index.ts @@ -8,7 +8,7 @@ import { CharParser, GlyphParser, GParser } from './character-declarations-parse import { ChoiceParser } from './choice-parser'; import { SicParser, SurplusParser } from './editorial-parsers'; import { GraphicParser, SurfaceParser, ZoneParser } from './facsimile-parser'; -import { FileDescParser } from './header-parser'; +import { FileDescParser, RespParser, RespStmtParser, TitleStmtParser } from './header-parser'; import { AccMatParser, AcquisitionParser, AdditionalParser, AdditionsParser, AdminInfoParser, AltIdentifierParser, BindingDescParser, BindingParser, CollationParser, CollectionParser, ConditionParser, CustEventParser, CustodialHistParser, DecoDescParser, DecoNoteParser, @@ -27,9 +27,9 @@ import { import { createParser, Parser, ParseResult } from './parser-models'; type AnalysisTags = 'w'; -type CoreTags = 'add' | 'choice' | 'del' | 'gap' | 'graphic' | 'head' | 'l' | 'lb' | 'lg' | 'note' | 'p' | 'ptr' | 'sic'; +type CoreTags = 'add' | 'choice' | 'del' | 'gap' | 'graphic' | 'head' | 'l' | 'lb' | 'lg' | 'note' | 'p' | 'ptr' | 'resp' | 'respStmt' | 'sic'; type GaijiTags = 'char' | 'g' | 'glyph'; -type HeaderTags = 'fileDesc'; +type HeaderTags = 'fileDesc' | 'titleStmt'; type MsDescriptionTags = 'accMat' | 'acquisition' | 'additional' | 'additions' | 'adminInfo' | 'altIdentifier' | 'binding' | 'bindingDesc' | 'collation' | 'collection' | 'condition' | 'custEvent' | 'custodialHist' | 'decoDesc' | 'decoNote' | 'depth' | 'dim' | 'dimensions' | 'explicit' | 'filiation' | 'finalRubric' | 'foliation' | @@ -61,6 +61,8 @@ const coreParseF: { [T in CoreTags]: Parser } = { note: createParser(NoteParser, parse), p: createParser(ParagraphParser, parse), ptr: createParser(PtrParser, parse), + resp: createParser(RespParser, parse), + respStmt: createParser(RespStmtParser, parse), sic: createParser(SicParser, parse), }; @@ -72,6 +74,7 @@ const gaijiParseF: { [T in GaijiTags]: Parser } = { const headerParseF: { [T in HeaderTags]: Parser } = { fileDesc: createParser(FileDescParser, parse), + titleStmt: createParser(TitleStmtParser, parse), }; const msDescriptionParseF: { [T in MsDescriptionTags]: Parser } = { diff --git a/src/app/services/xml-parsers/parser-models.ts b/src/app/services/xml-parsers/parser-models.ts index 613dca82e..5db6770e8 100644 --- a/src/app/services/xml-parsers/parser-models.ts +++ b/src/app/services/xml-parsers/parser-models.ts @@ -14,8 +14,8 @@ export function createParser>(c: new (raw: ParseFn) => T, export function getID(xml: XMLElement, prefix: string = '') { return xml.getAttribute('xml:id') || prefix + xpath(xml); } export function getClass(xml: XMLElement) { return xml.tagName ? xml.tagName.toLowerCase() : ''; } -export function parseChildren(xml: XMLElement, parseFn: ParseFn) { - return complexElements(xml.childNodes).map(child => parseFn(child as XMLElement)); +export function parseChildren(xml: XMLElement, parseFn: ParseFn, excludeEmptyText?: boolean) { + return complexElements(xml.childNodes, excludeEmptyText).map(child => parseFn(child as XMLElement)); } export function getDefaultN(n: string) { return n || ''; } export function getDefaultAttr(attr: string) { return attr || ''; } @@ -24,4 +24,8 @@ export function unhandledElement(xml: XMLElement, name: string, parseFn: ParseFn return flat(Array.from(xml.querySelectorAll(`:scope > ${name}`)).map(e => parseChildren(e, parseFn))); } -function complexElements(nodes: NodeListOf): ChildNode[] { return Array.from(nodes).filter((n) => n.nodeType !== 8); } +export function complexElements(nodes: NodeListOf, excludeEmptyText?: boolean): ChildNode[] { + const interestingNodes = Array.from(nodes).filter((n) => n.nodeType !== 8); + + return excludeEmptyText ? interestingNodes.filter((n) => n.nodeType !== 3 || n.textContent.trim()) : interestingNodes; +} From 2c535edbcd0ce70328a2134c7af3d0155918c37c Mon Sep 17 00:00:00 2001 From: ChiaraDipi Date: Sat, 30 Jan 2021 18:00:13 +0100 Subject: [PATCH 03/11] Add EditionStmt parser --- src/app/models/evt-models.ts | 8 +++++- src/app/services/xml-parsers/header-parser.ts | 25 +++++++++++++++++-- src/app/services/xml-parsers/index.ts | 5 ++-- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/app/models/evt-models.ts b/src/app/models/evt-models.ts index 3d50dc24c..43a831ff1 100644 --- a/src/app/models/evt-models.ts +++ b/src/app/models/evt-models.ts @@ -777,7 +777,7 @@ export class FileDesc extends GenericElement { titleStmt: TitleStmt; publicationStmt: Array>; // TODO: Add specific type when publicationStmt is handled sourceDesc: Array>; // TODO: Add specific type when sourceDesc is handled - editionStmt?: Array>; // TODO: Add specific type when editionStmt is handled + editionStmt?: EditionStmt; extent?: Array>; // TODO: Add specific type when extent is handled seriesStmt?: Array>; // TODO: Add specific type when seriesStmt is handled notesStmt?: Array>; // TODO: Add specific type when notesStmt is handled @@ -804,3 +804,9 @@ export class Resp extends GenericElement { normalizedResp: string; date: string; } + +export class EditionStmt extends GenericElement { + structuredData: boolean; + edition: Array>; // TODO: Add specific type when edition is handled + respStmt: RespStmt[]; +} diff --git a/src/app/services/xml-parsers/header-parser.ts b/src/app/services/xml-parsers/header-parser.ts index 9538c2a80..867cee94f 100644 --- a/src/app/services/xml-parsers/header-parser.ts +++ b/src/app/services/xml-parsers/header-parser.ts @@ -1,5 +1,8 @@ import { isNestedInElem } from 'src/app/utils/dom-utils'; -import { FileDesc, GenericElement, NamedEntityRef, Note, Resp, RespStmt, TitleStmt, XMLElement } from '../../models/evt-models'; +import { + EditionStmt, FileDesc, GenericElement, NamedEntityRef, Note, + Resp, RespStmt, TitleStmt, XMLElement, +} from '../../models/evt-models'; import { GenericElemParser, NoteParser, queryAndParseElement, queryAndParseElements } from './basic-parsers'; import { NamedEntityRefParser } from './named-entity-parsers'; import { createParser, getClass, parseChildren, Parser } from './parser-models'; @@ -72,6 +75,23 @@ export class TitleStmtParser extends GenericElemParser implements Parser { + private genericElemParser = createParser(GenericElemParser, this.genericParse); + private respStmtParser = createParser(RespStmtParser, this.genericParse); + + parse(xml: XMLElement): EditionStmt { + return { + type: EditionStmt, + class: getClass(xml), + content: parseChildren(xml, this.genericParse), + attributes: this.attributeParser.parse(xml), + edition: queryAndParseElements(xml, 'edition', this.genericElemParser), + respStmt: queryAndParseElements(xml, 'respStmt', this.respStmtParser), + structuredData: Array.from(xml.children).filter(el => el.tagName === 'p').length !== xml.children.length, + }; + } +} + export class FileDescParser extends GenericElemParser implements Parser { private excludeFromParsing = [ 'listBibl', @@ -84,6 +104,7 @@ export class FileDescParser extends GenericElemParser implements Parser(xml, 'titleStmt', this.titleStmtParser), - editionStmt: queryAndParseElements(xml, 'editionStmt', this.genericElemParser), + editionStmt: queryAndParseElement(xml, 'editionStmt', this.editionStmtParser), publicationStmt: queryAndParseElements(xml, 'publicationStmt', this.genericElemParser), sourceDesc: queryAndParseElements(xml, 'sourceDesc', this.genericElemParser), extent: queryAndParseElements(xml, 'extent', this.genericElemParser), diff --git a/src/app/services/xml-parsers/index.ts b/src/app/services/xml-parsers/index.ts index 09fee373e..7609062db 100644 --- a/src/app/services/xml-parsers/index.ts +++ b/src/app/services/xml-parsers/index.ts @@ -8,7 +8,7 @@ import { CharParser, GlyphParser, GParser } from './character-declarations-parse import { ChoiceParser } from './choice-parser'; import { SicParser, SurplusParser } from './editorial-parsers'; import { GraphicParser, SurfaceParser, ZoneParser } from './facsimile-parser'; -import { FileDescParser, RespParser, RespStmtParser, TitleStmtParser } from './header-parser'; +import { EditionStmtParser, FileDescParser, RespParser, RespStmtParser, TitleStmtParser } from './header-parser'; import { AccMatParser, AcquisitionParser, AdditionalParser, AdditionsParser, AdminInfoParser, AltIdentifierParser, BindingDescParser, BindingParser, CollationParser, CollectionParser, ConditionParser, CustEventParser, CustodialHistParser, DecoDescParser, DecoNoteParser, @@ -29,7 +29,7 @@ import { createParser, Parser, ParseResult } from './parser-models'; type AnalysisTags = 'w'; type CoreTags = 'add' | 'choice' | 'del' | 'gap' | 'graphic' | 'head' | 'l' | 'lb' | 'lg' | 'note' | 'p' | 'ptr' | 'resp' | 'respStmt' | 'sic'; type GaijiTags = 'char' | 'g' | 'glyph'; -type HeaderTags = 'fileDesc' | 'titleStmt'; +type HeaderTags = 'editionStmt' | 'fileDesc' | 'titleStmt'; type MsDescriptionTags = 'accMat' | 'acquisition' | 'additional' | 'additions' | 'adminInfo' | 'altIdentifier' | 'binding' | 'bindingDesc' | 'collation' | 'collection' | 'condition' | 'custEvent' | 'custodialHist' | 'decoDesc' | 'decoNote' | 'depth' | 'dim' | 'dimensions' | 'explicit' | 'filiation' | 'finalRubric' | 'foliation' | @@ -73,6 +73,7 @@ const gaijiParseF: { [T in GaijiTags]: Parser } = { }; const headerParseF: { [T in HeaderTags]: Parser } = { + editionStmt: createParser(EditionStmtParser, parse), fileDesc: createParser(FileDescParser, parse), titleStmt: createParser(TitleStmtParser, parse), }; From bdb0508724ba57c3923b4fbcbe65811c40bdfcb5 Mon Sep 17 00:00:00 2001 From: ChiaraDipi Date: Sat, 30 Jan 2021 18:01:05 +0100 Subject: [PATCH 04/11] Add PublicationStmt parser --- src/app/models/evt-models.ts | 15 +++++++++- src/app/services/xml-parsers/header-parser.ts | 28 +++++++++++++++++-- src/app/services/xml-parsers/index.ts | 5 ++-- 3 files changed, 43 insertions(+), 5 deletions(-) diff --git a/src/app/models/evt-models.ts b/src/app/models/evt-models.ts index 43a831ff1..4c340e24a 100644 --- a/src/app/models/evt-models.ts +++ b/src/app/models/evt-models.ts @@ -775,7 +775,7 @@ export class Dim extends GenericElement { export class FileDesc extends GenericElement { titleStmt: TitleStmt; - publicationStmt: Array>; // TODO: Add specific type when publicationStmt is handled + publicationStmt: PublicationStmt; sourceDesc: Array>; // TODO: Add specific type when sourceDesc is handled editionStmt?: EditionStmt; extent?: Array>; // TODO: Add specific type when extent is handled @@ -810,3 +810,16 @@ export class EditionStmt extends GenericElement { edition: Array>; // TODO: Add specific type when edition is handled respStmt: RespStmt[]; } + +export class PublicationStmt extends GenericElement { + structuredData: boolean; + publisher: Array>; // TODO: Add specific type when publisher is handled + distributor: Array>; // TODO: Add specific type when distributor is handled + authority: Array>; // TODO: Add specific type when authority is handled + pubPlace: Array>; // TODO: Add specific type when pubPlace is handled + address: Array>; // TODO: Add specific type when address is handled + idno: Array>; // TODO: Add specific type when idno is handled + availability: Array>; // TODO: Add specific type when availability is handled + date: Array>; // TODO: Add specific type when date is handled + licence: Array>; // TODO: Add specific type when licence is handled +} diff --git a/src/app/services/xml-parsers/header-parser.ts b/src/app/services/xml-parsers/header-parser.ts index 867cee94f..beaef9b6f 100644 --- a/src/app/services/xml-parsers/header-parser.ts +++ b/src/app/services/xml-parsers/header-parser.ts @@ -1,7 +1,7 @@ import { isNestedInElem } from 'src/app/utils/dom-utils'; import { EditionStmt, FileDesc, GenericElement, NamedEntityRef, Note, - Resp, RespStmt, TitleStmt, XMLElement, + PublicationStmt, Resp, RespStmt, TitleStmt, XMLElement, } from '../../models/evt-models'; import { GenericElemParser, NoteParser, queryAndParseElement, queryAndParseElements } from './basic-parsers'; import { NamedEntityRefParser } from './named-entity-parsers'; @@ -92,6 +92,29 @@ export class EditionStmtParser extends GenericElemParser implements Parser { + private genericElemParser = createParser(GenericElemParser, this.genericParse); + + parse(xml: XMLElement): PublicationStmt { + return { + type: PublicationStmt, + class: getClass(xml), + content: parseChildren(xml, this.genericParse, true), + attributes: this.attributeParser.parse(xml), + structuredData: Array.from(xml.children).filter(el => el.tagName === 'p').length !== xml.children.length, + publisher: queryAndParseElements(xml, 'publisher', this.genericElemParser), + distributor: queryAndParseElements(xml, 'distributor', this.genericElemParser), + authority: queryAndParseElements(xml, 'authority', this.genericElemParser), + pubPlace: queryAndParseElements(xml, 'pubPlace', this.genericElemParser), + address: queryAndParseElements(xml, 'address', this.genericElemParser), + idno: queryAndParseElements(xml, 'idno', this.genericElemParser), + availability: queryAndParseElements(xml, 'availability', this.genericElemParser), + date: queryAndParseElements(xml, 'date', this.genericElemParser), + licence: queryAndParseElements(xml, 'licence', this.genericElemParser), + }; + } +} + export class FileDescParser extends GenericElemParser implements Parser { private excludeFromParsing = [ 'listBibl', @@ -105,6 +128,7 @@ export class FileDescParser extends GenericElemParser implements Parser(xml, 'titleStmt', this.titleStmtParser), editionStmt: queryAndParseElement(xml, 'editionStmt', this.editionStmtParser), - publicationStmt: queryAndParseElements(xml, 'publicationStmt', this.genericElemParser), + publicationStmt: queryAndParseElement(xml, 'publicationStmt', this.publicationStmtParser), sourceDesc: queryAndParseElements(xml, 'sourceDesc', this.genericElemParser), extent: queryAndParseElements(xml, 'extent', this.genericElemParser), notesStmt: queryAndParseElements(xml, 'notesStmt', this.genericElemParser), diff --git a/src/app/services/xml-parsers/index.ts b/src/app/services/xml-parsers/index.ts index 7609062db..1fb14f3db 100644 --- a/src/app/services/xml-parsers/index.ts +++ b/src/app/services/xml-parsers/index.ts @@ -8,7 +8,7 @@ import { CharParser, GlyphParser, GParser } from './character-declarations-parse import { ChoiceParser } from './choice-parser'; import { SicParser, SurplusParser } from './editorial-parsers'; import { GraphicParser, SurfaceParser, ZoneParser } from './facsimile-parser'; -import { EditionStmtParser, FileDescParser, RespParser, RespStmtParser, TitleStmtParser } from './header-parser'; +import { EditionStmtParser, FileDescParser, PublicationStmtParser, RespParser, RespStmtParser, TitleStmtParser } from './header-parser'; import { AccMatParser, AcquisitionParser, AdditionalParser, AdditionsParser, AdminInfoParser, AltIdentifierParser, BindingDescParser, BindingParser, CollationParser, CollectionParser, ConditionParser, CustEventParser, CustodialHistParser, DecoDescParser, DecoNoteParser, @@ -29,7 +29,7 @@ import { createParser, Parser, ParseResult } from './parser-models'; type AnalysisTags = 'w'; type CoreTags = 'add' | 'choice' | 'del' | 'gap' | 'graphic' | 'head' | 'l' | 'lb' | 'lg' | 'note' | 'p' | 'ptr' | 'resp' | 'respStmt' | 'sic'; type GaijiTags = 'char' | 'g' | 'glyph'; -type HeaderTags = 'editionStmt' | 'fileDesc' | 'titleStmt'; +type HeaderTags = 'editionStmt' | 'fileDesc' | 'publicationStmt' | 'titleStmt'; type MsDescriptionTags = 'accMat' | 'acquisition' | 'additional' | 'additions' | 'adminInfo' | 'altIdentifier' | 'binding' | 'bindingDesc' | 'collation' | 'collection' | 'condition' | 'custEvent' | 'custodialHist' | 'decoDesc' | 'decoNote' | 'depth' | 'dim' | 'dimensions' | 'explicit' | 'filiation' | 'finalRubric' | 'foliation' | @@ -75,6 +75,7 @@ const gaijiParseF: { [T in GaijiTags]: Parser } = { const headerParseF: { [T in HeaderTags]: Parser } = { editionStmt: createParser(EditionStmtParser, parse), fileDesc: createParser(FileDescParser, parse), + publicationStmt: createParser(PublicationStmtParser, parse), titleStmt: createParser(TitleStmtParser, parse), }; From 38402ce802644b3ef0c55ab11efb7a3898fb282a Mon Sep 17 00:00:00 2001 From: ChiaraDipi Date: Sat, 30 Jan 2021 18:04:29 +0100 Subject: [PATCH 05/11] Add SeriesStmt parser --- src/app/models/evt-models.ts | 11 +++++++- src/app/services/xml-parsers/header-parser.ts | 25 +++++++++++++++++-- src/app/services/xml-parsers/index.ts | 5 ++-- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/src/app/models/evt-models.ts b/src/app/models/evt-models.ts index 4c340e24a..654e0f7b4 100644 --- a/src/app/models/evt-models.ts +++ b/src/app/models/evt-models.ts @@ -779,7 +779,7 @@ export class FileDesc extends GenericElement { sourceDesc: Array>; // TODO: Add specific type when sourceDesc is handled editionStmt?: EditionStmt; extent?: Array>; // TODO: Add specific type when extent is handled - seriesStmt?: Array>; // TODO: Add specific type when seriesStmt is handled + seriesStmt?: SeriesStmt; notesStmt?: Array>; // TODO: Add specific type when notesStmt is handled } @@ -823,3 +823,12 @@ export class PublicationStmt extends GenericElement { date: Array>; // TODO: Add specific type when date is handled licence: Array>; // TODO: Add specific type when licence is handled } + +export class SeriesStmt extends GenericElement { + structuredData: boolean; + title: Array>; // TODO: Add specific type when title is handled + idno: Array>; // TODO: Add specific type when idno is handled + respStmt: RespStmt[]; + biblScope: Array>; // TODO: Add specific type when biblScope is handled + editor: Array>; // TODO: Add specific type when editor is handled +} diff --git a/src/app/services/xml-parsers/header-parser.ts b/src/app/services/xml-parsers/header-parser.ts index beaef9b6f..23d6d6f50 100644 --- a/src/app/services/xml-parsers/header-parser.ts +++ b/src/app/services/xml-parsers/header-parser.ts @@ -1,7 +1,7 @@ import { isNestedInElem } from 'src/app/utils/dom-utils'; import { EditionStmt, FileDesc, GenericElement, NamedEntityRef, Note, - PublicationStmt, Resp, RespStmt, TitleStmt, XMLElement, + PublicationStmt, Resp, RespStmt, SeriesStmt, TitleStmt, XMLElement, } from '../../models/evt-models'; import { GenericElemParser, NoteParser, queryAndParseElement, queryAndParseElements } from './basic-parsers'; import { NamedEntityRefParser } from './named-entity-parsers'; @@ -115,6 +115,26 @@ export class PublicationStmtParser extends GenericElemParser implements Parser { + private genericElemParser = createParser(GenericElemParser, this.genericParse); + private respStmtParser = createParser(RespStmtParser, this.genericParse); + + parse(xml: XMLElement): SeriesStmt { + return { + type: SeriesStmt, + class: getClass(xml), + content: parseChildren(xml, this.genericParse, true), + attributes: this.attributeParser.parse(xml), + structuredData: Array.from(xml.querySelectorAll(':scope > p')).length === 0, + title: queryAndParseElements(xml, 'title', this.genericElemParser), + idno: queryAndParseElements(xml, 'idno', this.genericElemParser), + respStmt: queryAndParseElements(xml, 'respStmt', this.respStmtParser), + editor: queryAndParseElements(xml, 'editor', this.genericElemParser), + biblScope: queryAndParseElements(xml, 'biblScope', this.genericElemParser), + }; + } +} + export class FileDescParser extends GenericElemParser implements Parser { private excludeFromParsing = [ 'listBibl', @@ -129,6 +149,7 @@ export class FileDescParser extends GenericElemParser implements Parser(xml, 'sourceDesc', this.genericElemParser), extent: queryAndParseElements(xml, 'extent', this.genericElemParser), notesStmt: queryAndParseElements(xml, 'notesStmt', this.genericElemParser), - seriesStmt: queryAndParseElements(xml, 'seriesStmt', this.genericElemParser), + seriesStmt: queryAndParseElement(xml, 'seriesStmt', this.seriesStmtParser), }; } } diff --git a/src/app/services/xml-parsers/index.ts b/src/app/services/xml-parsers/index.ts index 1fb14f3db..e8d1b6c92 100644 --- a/src/app/services/xml-parsers/index.ts +++ b/src/app/services/xml-parsers/index.ts @@ -8,7 +8,7 @@ import { CharParser, GlyphParser, GParser } from './character-declarations-parse import { ChoiceParser } from './choice-parser'; import { SicParser, SurplusParser } from './editorial-parsers'; import { GraphicParser, SurfaceParser, ZoneParser } from './facsimile-parser'; -import { EditionStmtParser, FileDescParser, PublicationStmtParser, RespParser, RespStmtParser, TitleStmtParser } from './header-parser'; +import { EditionStmtParser, FileDescParser, PublicationStmtParser, RespParser, RespStmtParser, SeriesStmtParser, TitleStmtParser } from './header-parser'; import { AccMatParser, AcquisitionParser, AdditionalParser, AdditionsParser, AdminInfoParser, AltIdentifierParser, BindingDescParser, BindingParser, CollationParser, CollectionParser, ConditionParser, CustEventParser, CustodialHistParser, DecoDescParser, DecoNoteParser, @@ -29,7 +29,7 @@ import { createParser, Parser, ParseResult } from './parser-models'; type AnalysisTags = 'w'; type CoreTags = 'add' | 'choice' | 'del' | 'gap' | 'graphic' | 'head' | 'l' | 'lb' | 'lg' | 'note' | 'p' | 'ptr' | 'resp' | 'respStmt' | 'sic'; type GaijiTags = 'char' | 'g' | 'glyph'; -type HeaderTags = 'editionStmt' | 'fileDesc' | 'publicationStmt' | 'titleStmt'; +type HeaderTags = 'editionStmt' | 'fileDesc' | 'publicationStmt' | 'seriesStmt' | 'titleStmt'; type MsDescriptionTags = 'accMat' | 'acquisition' | 'additional' | 'additions' | 'adminInfo' | 'altIdentifier' | 'binding' | 'bindingDesc' | 'collation' | 'collection' | 'condition' | 'custEvent' | 'custodialHist' | 'decoDesc' | 'decoNote' | 'depth' | 'dim' | 'dimensions' | 'explicit' | 'filiation' | 'finalRubric' | 'foliation' | @@ -76,6 +76,7 @@ const headerParseF: { [T in HeaderTags]: Parser } = { editionStmt: createParser(EditionStmtParser, parse), fileDesc: createParser(FileDescParser, parse), publicationStmt: createParser(PublicationStmtParser, parse), + seriesStmt: createParser(SeriesStmtParser, parse), titleStmt: createParser(TitleStmtParser, parse), }; From b741ecd22cbcdbdebe94b9d9d9a5aed3ac0e7af2 Mon Sep 17 00:00:00 2001 From: ChiaraDipi Date: Sat, 30 Jan 2021 18:05:06 +0100 Subject: [PATCH 06/11] Add NotesStmt parser --- src/app/models/evt-models.ts | 7 ++++++- src/app/services/xml-parsers/header-parser.ts | 21 +++++++++++++++++-- src/app/services/xml-parsers/index.ts | 8 +++++-- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/app/models/evt-models.ts b/src/app/models/evt-models.ts index 654e0f7b4..8dfb00dab 100644 --- a/src/app/models/evt-models.ts +++ b/src/app/models/evt-models.ts @@ -780,7 +780,7 @@ export class FileDesc extends GenericElement { editionStmt?: EditionStmt; extent?: Array>; // TODO: Add specific type when extent is handled seriesStmt?: SeriesStmt; - notesStmt?: Array>; // TODO: Add specific type when notesStmt is handled + notesStmt?: NotesStmt; } export class TitleStmt extends GenericElement { @@ -832,3 +832,8 @@ export class SeriesStmt extends GenericElement { biblScope: Array>; // TODO: Add specific type when biblScope is handled editor: Array>; // TODO: Add specific type when editor is handled } + +export class NotesStmt extends GenericElement { + notes: Note[]; + relatedItems: Array>; // TODO: Add specific type when relatedItem is handled +} diff --git a/src/app/services/xml-parsers/header-parser.ts b/src/app/services/xml-parsers/header-parser.ts index 23d6d6f50..2a972ef2a 100644 --- a/src/app/services/xml-parsers/header-parser.ts +++ b/src/app/services/xml-parsers/header-parser.ts @@ -1,7 +1,7 @@ import { isNestedInElem } from 'src/app/utils/dom-utils'; import { EditionStmt, FileDesc, GenericElement, NamedEntityRef, Note, - PublicationStmt, Resp, RespStmt, SeriesStmt, TitleStmt, XMLElement, + NotesStmt, PublicationStmt, Resp, RespStmt, SeriesStmt, TitleStmt, XMLElement, } from '../../models/evt-models'; import { GenericElemParser, NoteParser, queryAndParseElement, queryAndParseElements } from './basic-parsers'; import { NamedEntityRefParser } from './named-entity-parsers'; @@ -135,6 +135,22 @@ export class SeriesStmtParser extends GenericElemParser implements Parser { + private genericElemParser = createParser(GenericElemParser, this.genericParse); + private notesStmt = createParser(NoteParser, this.genericParse); + + parse(xml: XMLElement): NotesStmt { + return { + type: NotesStmt, + class: getClass(xml), + content: parseChildren(xml, this.genericParse, true), + attributes: this.attributeParser.parse(xml), + notes: queryAndParseElements(xml, 'note', this.notesStmt), + relatedItems: queryAndParseElements(xml, 'relatedItem', this.genericElemParser), + }; + } +} + export class FileDescParser extends GenericElemParser implements Parser { private excludeFromParsing = [ 'listBibl', @@ -150,6 +166,7 @@ export class FileDescParser extends GenericElemParser implements Parser(xml, 'publicationStmt', this.publicationStmtParser), sourceDesc: queryAndParseElements(xml, 'sourceDesc', this.genericElemParser), extent: queryAndParseElements(xml, 'extent', this.genericElemParser), - notesStmt: queryAndParseElements(xml, 'notesStmt', this.genericElemParser), + notesStmt: queryAndParseElement(xml, 'notesStmt', this.notesStmtParser), seriesStmt: queryAndParseElement(xml, 'seriesStmt', this.seriesStmtParser), }; } diff --git a/src/app/services/xml-parsers/index.ts b/src/app/services/xml-parsers/index.ts index e8d1b6c92..27dc24251 100644 --- a/src/app/services/xml-parsers/index.ts +++ b/src/app/services/xml-parsers/index.ts @@ -8,7 +8,10 @@ import { CharParser, GlyphParser, GParser } from './character-declarations-parse import { ChoiceParser } from './choice-parser'; import { SicParser, SurplusParser } from './editorial-parsers'; import { GraphicParser, SurfaceParser, ZoneParser } from './facsimile-parser'; -import { EditionStmtParser, FileDescParser, PublicationStmtParser, RespParser, RespStmtParser, SeriesStmtParser, TitleStmtParser } from './header-parser'; +import { + EditionStmtParser, FileDescParser, NotesStmtParser, PublicationStmtParser, RespParser, RespStmtParser, + SeriesStmtParser, TitleStmtParser, +} from './header-parser'; import { AccMatParser, AcquisitionParser, AdditionalParser, AdditionsParser, AdminInfoParser, AltIdentifierParser, BindingDescParser, BindingParser, CollationParser, CollectionParser, ConditionParser, CustEventParser, CustodialHistParser, DecoDescParser, DecoNoteParser, @@ -29,7 +32,7 @@ import { createParser, Parser, ParseResult } from './parser-models'; type AnalysisTags = 'w'; type CoreTags = 'add' | 'choice' | 'del' | 'gap' | 'graphic' | 'head' | 'l' | 'lb' | 'lg' | 'note' | 'p' | 'ptr' | 'resp' | 'respStmt' | 'sic'; type GaijiTags = 'char' | 'g' | 'glyph'; -type HeaderTags = 'editionStmt' | 'fileDesc' | 'publicationStmt' | 'seriesStmt' | 'titleStmt'; +type HeaderTags = 'editionStmt' | 'fileDesc' | 'notesStmt' | 'publicationStmt' | 'seriesStmt' | 'titleStmt'; type MsDescriptionTags = 'accMat' | 'acquisition' | 'additional' | 'additions' | 'adminInfo' | 'altIdentifier' | 'binding' | 'bindingDesc' | 'collation' | 'collection' | 'condition' | 'custEvent' | 'custodialHist' | 'decoDesc' | 'decoNote' | 'depth' | 'dim' | 'dimensions' | 'explicit' | 'filiation' | 'finalRubric' | 'foliation' | @@ -75,6 +78,7 @@ const gaijiParseF: { [T in GaijiTags]: Parser } = { const headerParseF: { [T in HeaderTags]: Parser } = { editionStmt: createParser(EditionStmtParser, parse), fileDesc: createParser(FileDescParser, parse), + notesStmt: createParser(NotesStmtParser, parse), publicationStmt: createParser(PublicationStmtParser, parse), seriesStmt: createParser(SeriesStmtParser, parse), titleStmt: createParser(TitleStmtParser, parse), From 2f6816c887bdf89849b14e71810ce12e34a9ee52 Mon Sep 17 00:00:00 2001 From: ChiaraDipi Date: Sat, 30 Jan 2021 18:05:57 +0100 Subject: [PATCH 07/11] Add SourceDesc parser --- src/app/models/evt-models.ts | 12 +++++++- src/app/services/xml-parsers/header-parser.ts | 29 +++++++++++++++++-- src/app/services/xml-parsers/index.ts | 5 ++-- 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/src/app/models/evt-models.ts b/src/app/models/evt-models.ts index 8dfb00dab..1bc93d0f5 100644 --- a/src/app/models/evt-models.ts +++ b/src/app/models/evt-models.ts @@ -776,7 +776,7 @@ export class Dim extends GenericElement { export class FileDesc extends GenericElement { titleStmt: TitleStmt; publicationStmt: PublicationStmt; - sourceDesc: Array>; // TODO: Add specific type when sourceDesc is handled + sourceDesc: SourceDesc; editionStmt?: EditionStmt; extent?: Array>; // TODO: Add specific type when extent is handled seriesStmt?: SeriesStmt; @@ -837,3 +837,13 @@ export class NotesStmt extends GenericElement { notes: Note[]; relatedItems: Array>; // TODO: Add specific type when relatedItem is handled } + +export class SourceDesc extends GenericElement { + structuredData: boolean; + msDesc: MsDesc; + bibl: Array>; // TODO: Add specific type when bibl is handled + biblFull: Array>; // TODO: Add specific type when biblFull is handled + biblStruct: Array>; // TODO: Add specific type when biblStruct is handled + recordingStmt: Array>; // TODO: Add specific type when recordingStmt is handled + scriptStmt: Array>; // TODO: Add specific type when scriptStmt is handled +} diff --git a/src/app/services/xml-parsers/header-parser.ts b/src/app/services/xml-parsers/header-parser.ts index 2a972ef2a..c81c39c4d 100644 --- a/src/app/services/xml-parsers/header-parser.ts +++ b/src/app/services/xml-parsers/header-parser.ts @@ -1,9 +1,10 @@ import { isNestedInElem } from 'src/app/utils/dom-utils'; import { - EditionStmt, FileDesc, GenericElement, NamedEntityRef, Note, - NotesStmt, PublicationStmt, Resp, RespStmt, SeriesStmt, TitleStmt, XMLElement, + EditionStmt, FileDesc, GenericElement, MsDesc, NamedEntityRef, Note, + NotesStmt, PublicationStmt, Resp, RespStmt, SeriesStmt, SourceDesc, TitleStmt, XMLElement, } from '../../models/evt-models'; import { GenericElemParser, NoteParser, queryAndParseElement, queryAndParseElements } from './basic-parsers'; +import { MsDescParser } from './msdesc-parser'; import { NamedEntityRefParser } from './named-entity-parsers'; import { createParser, getClass, parseChildren, Parser } from './parser-models'; @@ -151,6 +152,27 @@ export class NotesStmtParser extends GenericElemParser implements Parser { + private genericElemParser = createParser(GenericElemParser, this.genericParse); + private msDescParser = createParser(MsDescParser, this.genericParse); + + parse(xml: XMLElement): SourceDesc { + return { + type: SourceDesc, + class: getClass(xml), + content: parseChildren(xml, this.genericParse, true), + attributes: this.attributeParser.parse(xml), + structuredData: Array.from(xml.children).filter(el => el.tagName === 'p').length !== xml.children.length, + msDesc: queryAndParseElement(xml, 'note', this.msDescParser), + bibl: queryAndParseElements(xml, 'bibl', this.genericElemParser), + biblFull: queryAndParseElements(xml, 'biblFull', this.genericElemParser), + biblStruct: queryAndParseElements(xml, 'biblStruct', this.genericElemParser), + recordingStmt: queryAndParseElements(xml, 'recordingStmt', this.genericElemParser), + scriptStmt: queryAndParseElements(xml, 'scriptStmt', this.genericElemParser), + }; + } +} + export class FileDescParser extends GenericElemParser implements Parser { private excludeFromParsing = [ 'listBibl', @@ -167,6 +189,7 @@ export class FileDescParser extends GenericElemParser implements Parser(xml, 'titleStmt', this.titleStmtParser), editionStmt: queryAndParseElement(xml, 'editionStmt', this.editionStmtParser), publicationStmt: queryAndParseElement(xml, 'publicationStmt', this.publicationStmtParser), - sourceDesc: queryAndParseElements(xml, 'sourceDesc', this.genericElemParser), + sourceDesc: queryAndParseElement(xml, 'sourceDesc', this.sourceDescParser), extent: queryAndParseElements(xml, 'extent', this.genericElemParser), notesStmt: queryAndParseElement(xml, 'notesStmt', this.notesStmtParser), seriesStmt: queryAndParseElement(xml, 'seriesStmt', this.seriesStmtParser), diff --git a/src/app/services/xml-parsers/index.ts b/src/app/services/xml-parsers/index.ts index 27dc24251..5020e4053 100644 --- a/src/app/services/xml-parsers/index.ts +++ b/src/app/services/xml-parsers/index.ts @@ -10,7 +10,7 @@ import { SicParser, SurplusParser } from './editorial-parsers'; import { GraphicParser, SurfaceParser, ZoneParser } from './facsimile-parser'; import { EditionStmtParser, FileDescParser, NotesStmtParser, PublicationStmtParser, RespParser, RespStmtParser, - SeriesStmtParser, TitleStmtParser, + SeriesStmtParser, SourceDescParser, TitleStmtParser, } from './header-parser'; import { AccMatParser, AcquisitionParser, AdditionalParser, AdditionsParser, AdminInfoParser, AltIdentifierParser, BindingDescParser, @@ -32,7 +32,7 @@ import { createParser, Parser, ParseResult } from './parser-models'; type AnalysisTags = 'w'; type CoreTags = 'add' | 'choice' | 'del' | 'gap' | 'graphic' | 'head' | 'l' | 'lb' | 'lg' | 'note' | 'p' | 'ptr' | 'resp' | 'respStmt' | 'sic'; type GaijiTags = 'char' | 'g' | 'glyph'; -type HeaderTags = 'editionStmt' | 'fileDesc' | 'notesStmt' | 'publicationStmt' | 'seriesStmt' | 'titleStmt'; +type HeaderTags = 'editionStmt' | 'fileDesc' | 'notesStmt' | 'publicationStmt' | 'seriesStmt' | 'sourceDesc' | 'titleStmt'; type MsDescriptionTags = 'accMat' | 'acquisition' | 'additional' | 'additions' | 'adminInfo' | 'altIdentifier' | 'binding' | 'bindingDesc' | 'collation' | 'collection' | 'condition' | 'custEvent' | 'custodialHist' | 'decoDesc' | 'decoNote' | 'depth' | 'dim' | 'dimensions' | 'explicit' | 'filiation' | 'finalRubric' | 'foliation' | @@ -81,6 +81,7 @@ const headerParseF: { [T in HeaderTags]: Parser } = { notesStmt: createParser(NotesStmtParser, parse), publicationStmt: createParser(PublicationStmtParser, parse), seriesStmt: createParser(SeriesStmtParser, parse), + sourceDesc: createParser(SourceDescParser, parse), titleStmt: createParser(TitleStmtParser, parse), }; From 6ad31937032b0fd718bbabdf18f18739c0464ecf Mon Sep 17 00:00:00 2001 From: ChiaraDipi Date: Sat, 30 Jan 2021 18:07:02 +0100 Subject: [PATCH 08/11] Add Extent parser --- src/app/models/evt-models.ts | 4 +++- src/app/services/xml-parsers/header-parser.ts | 15 ++++++++++++--- src/app/services/xml-parsers/index.ts | 5 +++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/app/models/evt-models.ts b/src/app/models/evt-models.ts index 1bc93d0f5..a1653904c 100644 --- a/src/app/models/evt-models.ts +++ b/src/app/models/evt-models.ts @@ -778,7 +778,7 @@ export class FileDesc extends GenericElement { publicationStmt: PublicationStmt; sourceDesc: SourceDesc; editionStmt?: EditionStmt; - extent?: Array>; // TODO: Add specific type when extent is handled + extent?: Extent; seriesStmt?: SeriesStmt; notesStmt?: NotesStmt; } @@ -847,3 +847,5 @@ export class SourceDesc extends GenericElement { recordingStmt: Array>; // TODO: Add specific type when recordingStmt is handled scriptStmt: Array>; // TODO: Add specific type when scriptStmt is handled } + +export class Extent extends GenericElement { } diff --git a/src/app/services/xml-parsers/header-parser.ts b/src/app/services/xml-parsers/header-parser.ts index c81c39c4d..006cd009b 100644 --- a/src/app/services/xml-parsers/header-parser.ts +++ b/src/app/services/xml-parsers/header-parser.ts @@ -1,6 +1,6 @@ import { isNestedInElem } from 'src/app/utils/dom-utils'; import { - EditionStmt, FileDesc, GenericElement, MsDesc, NamedEntityRef, Note, + EditionStmt, Extent, FileDesc, GenericElement, MsDesc, NamedEntityRef, Note, NotesStmt, PublicationStmt, Resp, RespStmt, SeriesStmt, SourceDesc, TitleStmt, XMLElement, } from '../../models/evt-models'; import { GenericElemParser, NoteParser, queryAndParseElement, queryAndParseElements } from './basic-parsers'; @@ -173,6 +173,15 @@ export class SourceDescParser extends GenericElemParser implements Parser { + parse(xml: XMLElement): Extent { + return { + ...super.parse(xml), + type: Extent, + }; + } +} + export class FileDescParser extends GenericElemParser implements Parser { private excludeFromParsing = [ 'listBibl', @@ -183,13 +192,13 @@ export class FileDescParser extends GenericElemParser implements Parser(xml, 'editionStmt', this.editionStmtParser), publicationStmt: queryAndParseElement(xml, 'publicationStmt', this.publicationStmtParser), sourceDesc: queryAndParseElement(xml, 'sourceDesc', this.sourceDescParser), - extent: queryAndParseElements(xml, 'extent', this.genericElemParser), + extent: queryAndParseElement(xml, 'extent', this.extentParser), notesStmt: queryAndParseElement(xml, 'notesStmt', this.notesStmtParser), seriesStmt: queryAndParseElement(xml, 'seriesStmt', this.seriesStmtParser), }; diff --git a/src/app/services/xml-parsers/index.ts b/src/app/services/xml-parsers/index.ts index 5020e4053..e9eebdb5d 100644 --- a/src/app/services/xml-parsers/index.ts +++ b/src/app/services/xml-parsers/index.ts @@ -9,7 +9,7 @@ import { ChoiceParser } from './choice-parser'; import { SicParser, SurplusParser } from './editorial-parsers'; import { GraphicParser, SurfaceParser, ZoneParser } from './facsimile-parser'; import { - EditionStmtParser, FileDescParser, NotesStmtParser, PublicationStmtParser, RespParser, RespStmtParser, + EditionStmtParser, ExtentParser, FileDescParser, NotesStmtParser, PublicationStmtParser, RespParser, RespStmtParser, SeriesStmtParser, SourceDescParser, TitleStmtParser, } from './header-parser'; import { @@ -32,7 +32,7 @@ import { createParser, Parser, ParseResult } from './parser-models'; type AnalysisTags = 'w'; type CoreTags = 'add' | 'choice' | 'del' | 'gap' | 'graphic' | 'head' | 'l' | 'lb' | 'lg' | 'note' | 'p' | 'ptr' | 'resp' | 'respStmt' | 'sic'; type GaijiTags = 'char' | 'g' | 'glyph'; -type HeaderTags = 'editionStmt' | 'fileDesc' | 'notesStmt' | 'publicationStmt' | 'seriesStmt' | 'sourceDesc' | 'titleStmt'; +type HeaderTags = 'editionStmt' | 'extent' | 'fileDesc' | 'notesStmt' | 'publicationStmt' | 'seriesStmt' | 'sourceDesc' | 'titleStmt'; type MsDescriptionTags = 'accMat' | 'acquisition' | 'additional' | 'additions' | 'adminInfo' | 'altIdentifier' | 'binding' | 'bindingDesc' | 'collation' | 'collection' | 'condition' | 'custEvent' | 'custodialHist' | 'decoDesc' | 'decoNote' | 'depth' | 'dim' | 'dimensions' | 'explicit' | 'filiation' | 'finalRubric' | 'foliation' | @@ -77,6 +77,7 @@ const gaijiParseF: { [T in GaijiTags]: Parser } = { const headerParseF: { [T in HeaderTags]: Parser } = { editionStmt: createParser(EditionStmtParser, parse), + extent: createParser(ExtentParser, parse), fileDesc: createParser(FileDescParser, parse), notesStmt: createParser(NotesStmtParser, parse), publicationStmt: createParser(PublicationStmtParser, parse), From 8bb7d866448554fc6801ac4dada84cba3342785b Mon Sep 17 00:00:00 2001 From: ChiaraDipi Date: Sat, 30 Jan 2021 19:37:52 +0100 Subject: [PATCH 09/11] Add utility function to basic parsers --- src/app/services/xml-parsers/basic-parsers.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/app/services/xml-parsers/basic-parsers.ts b/src/app/services/xml-parsers/basic-parsers.ts index eb8fa2a15..053c5ab43 100644 --- a/src/app/services/xml-parsers/basic-parsers.ts +++ b/src/app/services/xml-parsers/basic-parsers.ts @@ -38,6 +38,10 @@ export class GenericElemParser extends AttrParser implements Parser } } +export class GenericParser extends GenericElemParser { + protected genericElemParser = createParser(GenericElemParser, this.genericParse); +} + export class AttributeParser extends EmptyParser implements Parser { parse(data: HTMLElement): Attributes { return Array.from(data.attributes) From 7ea390b5d534a3a6c1d7bff92051dbc1b27704e3 Mon Sep 17 00:00:00 2001 From: ChiaraDipi Date: Sat, 30 Jan 2021 19:38:03 +0100 Subject: [PATCH 10/11] Refactor header-parser --- src/app/services/xml-parsers/header-parser.ts | 110 ++++++------------ 1 file changed, 33 insertions(+), 77 deletions(-) diff --git a/src/app/services/xml-parsers/header-parser.ts b/src/app/services/xml-parsers/header-parser.ts index 006cd009b..540636dcf 100644 --- a/src/app/services/xml-parsers/header-parser.ts +++ b/src/app/services/xml-parsers/header-parser.ts @@ -3,22 +3,19 @@ import { EditionStmt, Extent, FileDesc, GenericElement, MsDesc, NamedEntityRef, Note, NotesStmt, PublicationStmt, Resp, RespStmt, SeriesStmt, SourceDesc, TitleStmt, XMLElement, } from '../../models/evt-models'; -import { GenericElemParser, NoteParser, queryAndParseElement, queryAndParseElements } from './basic-parsers'; +import { GenericElemParser, GenericParser, NoteParser, queryAndParseElement, queryAndParseElements } from './basic-parsers'; import { MsDescParser } from './msdesc-parser'; import { NamedEntityRefParser } from './named-entity-parsers'; -import { createParser, getClass, parseChildren, Parser } from './parser-models'; +import { createParser, Parser } from './parser-models'; export class RespParser extends GenericElemParser implements Parser { parse(xml: XMLElement): Resp { - const attributes = this.attributeParser.parse(xml); - const { ref, when } = attributes; + const { ref, when } = this.attributeParser.parse(xml); const normalizedResp = ref?.indexOf('http://') < 0 && ref?.indexOf('https://') < 0 ? `http://${ref}` : ref ?? ''; return { + ...super.parse(xml), type: Resp, - class: getClass(xml), - content: parseChildren(xml, this.genericParse, true), - attributes, normalizedResp, date: when || '', }; @@ -26,8 +23,6 @@ export class RespParser extends GenericElemParser implements Parser } export class RespStmtParser extends GenericElemParser implements Parser { - private noteParser = createParser(NoteParser, this.genericParse); - private respParser = createParser(RespParser, this.genericParse); private namedEntityRefParser = createParser(NamedEntityRefParser, this.genericParse); parse(xml: XMLElement): RespStmt { @@ -41,29 +36,22 @@ export class RespStmtParser extends GenericElemParser implements Parser(xml, 'resp', this.respParser), - notes: queryAndParseElements(xml, 'note', this.noteParser), + responsibility: queryAndParseElement(xml, 'resp', createParser(RespParser, this.genericParse)), + notes: queryAndParseElements(xml, 'note', createParser(NoteParser, this.genericParse)), people, }; } } -export class TitleStmtParser extends GenericElemParser implements Parser { - private genericElemParser = createParser(GenericElemParser, this.genericParse); - private respStmtParser = createParser(RespStmtParser, this.genericParse); - +export class TitleStmtParser extends GenericParser implements Parser { parse(xml: XMLElement): TitleStmt { const title = queryAndParseElements(xml, 'title[type="main"]', this.genericElemParser); return { + ...super.parse(xml), type: TitleStmt, - class: getClass(xml), - content: parseChildren(xml, this.genericParse), - attributes: this.attributeParser.parse(xml), titles: title.length > 0 ? title : queryAndParseElements(xml, 'title:not([type="sub"])', this.genericElemParser), subtitles: queryAndParseElements(xml, 'title[type="sub"]', this.genericElemParser), authors: queryAndParseElements(xml, 'author', this.genericElemParser), @@ -71,37 +59,28 @@ export class TitleStmtParser extends GenericElemParser implements Parser(xml, 'sponsor', this.genericElemParser), funders: queryAndParseElements(xml, 'funder', this.genericElemParser), principals: queryAndParseElements(xml, 'principal', this.genericElemParser), - respStmts: queryAndParseElements(xml, 'respStmt', this.respStmtParser), + respStmts: queryAndParseElements(xml, 'respStmt', createParser(RespStmtParser, this.genericParse)), }; } } -export class EditionStmtParser extends GenericElemParser implements Parser { - private genericElemParser = createParser(GenericElemParser, this.genericParse); - private respStmtParser = createParser(RespStmtParser, this.genericParse); - +export class EditionStmtParser extends GenericParser implements Parser { parse(xml: XMLElement): EditionStmt { return { + ...super.parse(xml), type: EditionStmt, - class: getClass(xml), - content: parseChildren(xml, this.genericParse), - attributes: this.attributeParser.parse(xml), edition: queryAndParseElements(xml, 'edition', this.genericElemParser), - respStmt: queryAndParseElements(xml, 'respStmt', this.respStmtParser), + respStmt: queryAndParseElements(xml, 'respStmt', createParser(RespStmtParser, this.genericParse)), structuredData: Array.from(xml.children).filter(el => el.tagName === 'p').length !== xml.children.length, }; } } -export class PublicationStmtParser extends GenericElemParser implements Parser { - private genericElemParser = createParser(GenericElemParser, this.genericParse); - +export class PublicationStmtParser extends GenericParser implements Parser { parse(xml: XMLElement): PublicationStmt { return { + ...super.parse(xml), type: PublicationStmt, - class: getClass(xml), - content: parseChildren(xml, this.genericParse, true), - attributes: this.attributeParser.parse(xml), structuredData: Array.from(xml.children).filter(el => el.tagName === 'p').length !== xml.children.length, publisher: queryAndParseElements(xml, 'publisher', this.genericElemParser), distributor: queryAndParseElements(xml, 'distributor', this.genericElemParser), @@ -116,54 +95,39 @@ export class PublicationStmtParser extends GenericElemParser implements Parser { - private genericElemParser = createParser(GenericElemParser, this.genericParse); - private respStmtParser = createParser(RespStmtParser, this.genericParse); - +export class SeriesStmtParser extends GenericParser implements Parser { parse(xml: XMLElement): SeriesStmt { return { + ...super.parse(xml), type: SeriesStmt, - class: getClass(xml), - content: parseChildren(xml, this.genericParse, true), - attributes: this.attributeParser.parse(xml), structuredData: Array.from(xml.querySelectorAll(':scope > p')).length === 0, title: queryAndParseElements(xml, 'title', this.genericElemParser), idno: queryAndParseElements(xml, 'idno', this.genericElemParser), - respStmt: queryAndParseElements(xml, 'respStmt', this.respStmtParser), + respStmt: queryAndParseElements(xml, 'respStmt', createParser(RespStmtParser, this.genericParse)), editor: queryAndParseElements(xml, 'editor', this.genericElemParser), biblScope: queryAndParseElements(xml, 'biblScope', this.genericElemParser), }; } } -export class NotesStmtParser extends GenericElemParser implements Parser { - private genericElemParser = createParser(GenericElemParser, this.genericParse); - private notesStmt = createParser(NoteParser, this.genericParse); - +export class NotesStmtParser extends GenericParser implements Parser { parse(xml: XMLElement): NotesStmt { return { + ...super.parse(xml), type: NotesStmt, - class: getClass(xml), - content: parseChildren(xml, this.genericParse, true), - attributes: this.attributeParser.parse(xml), - notes: queryAndParseElements(xml, 'note', this.notesStmt), + notes: queryAndParseElements(xml, 'note', createParser(NoteParser, this.genericParse)), relatedItems: queryAndParseElements(xml, 'relatedItem', this.genericElemParser), }; } } -export class SourceDescParser extends GenericElemParser implements Parser { - private genericElemParser = createParser(GenericElemParser, this.genericParse); - private msDescParser = createParser(MsDescParser, this.genericParse); - +export class SourceDescParser extends GenericParser implements Parser { parse(xml: XMLElement): SourceDesc { return { + ...super.parse(xml), type: SourceDesc, - class: getClass(xml), - content: parseChildren(xml, this.genericParse, true), - attributes: this.attributeParser.parse(xml), structuredData: Array.from(xml.children).filter(el => el.tagName === 'p').length !== xml.children.length, - msDesc: queryAndParseElement(xml, 'note', this.msDescParser), + msDesc: queryAndParseElement(xml, 'note', createParser(MsDescParser, this.genericParse)), bibl: queryAndParseElements(xml, 'bibl', this.genericElemParser), biblFull: queryAndParseElements(xml, 'biblFull', this.genericElemParser), biblStruct: queryAndParseElements(xml, 'biblStruct', this.genericElemParser), @@ -192,13 +156,6 @@ export class FileDescParser extends GenericElemParser implements Parser el.remove()); return { + ...super.parse(xml), type: FileDesc, - class: getClass(xml), - content: parseChildren(xml, this.genericParse), - attributes: this.attributeParser.parse(xml), - titleStmt: queryAndParseElement(xml, 'titleStmt', this.titleStmtParser), - editionStmt: queryAndParseElement(xml, 'editionStmt', this.editionStmtParser), - publicationStmt: queryAndParseElement(xml, 'publicationStmt', this.publicationStmtParser), - sourceDesc: queryAndParseElement(xml, 'sourceDesc', this.sourceDescParser), - extent: queryAndParseElement(xml, 'extent', this.extentParser), - notesStmt: queryAndParseElement(xml, 'notesStmt', this.notesStmtParser), - seriesStmt: queryAndParseElement(xml, 'seriesStmt', this.seriesStmtParser), + titleStmt: queryAndParseElement(xml, 'titleStmt', createParser(TitleStmtParser, this.genericParse)), + editionStmt: queryAndParseElement(xml, 'editionStmt', createParser(EditionStmtParser, this.genericParse)), + publicationStmt: queryAndParseElement( + xml, 'publicationStmt', createParser(PublicationStmtParser, this.genericParse)), + sourceDesc: queryAndParseElement(xml, 'sourceDesc', createParser(SourceDescParser, this.genericParse)), + extent: queryAndParseElement(xml, 'extent', createParser(ExtentParser, this.genericParse)), + notesStmt: queryAndParseElement(xml, 'notesStmt', createParser(NotesStmtParser, this.genericParse)), + seriesStmt: queryAndParseElement(xml, 'seriesStmt', createParser(SeriesStmtParser, this.genericParse)), }; } } From 4097ba2e6ef492b4c3b100a892a89a6ca7818219 Mon Sep 17 00:00:00 2001 From: ChiaraDipi Date: Mon, 1 Feb 2021 09:55:57 +0100 Subject: [PATCH 11/11] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 800ffc5ef..1cd90c6ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Updated to Angular 9 ### Added +- File description data extraction - Critical text pages division - Xi:include support for edition text - Manuscript description data extraction