From 3370dad6df97678256aba497b3b6f27e5d29e8dd Mon Sep 17 00:00:00 2001 From: Geertivp Date: Sun, 10 Nov 2024 20:04:30 +0100 Subject: [PATCH] [IMPR] update create_isbn_edition.py patch extracted from diff to https://github.com/geertivp/Pywikibot/blob/main/create_isbn_edition.py. with the following changes - keep add_claims function - move the outer try/exception loop to amend_isbn_edition call in main() - code cleanups Signed-off-by: xqt Bug: T314942 Change-Id: I34fb2f78f9e383cf8891d80ad96e796a93450273 --- scripts/create_isbn_edition.py | 1425 ++++++++++++++++++++++++-------- 1 file changed, 1078 insertions(+), 347 deletions(-) diff --git a/scripts/create_isbn_edition.py b/scripts/create_isbn_edition.py index 3afac24652..fca0e41a24 100755 --- a/scripts/create_isbn_edition.py +++ b/scripts/create_isbn_edition.py @@ -339,6 +339,8 @@ - https://www.goodreads.com/book/show/203964185-de-nieuwe-wereldeconomie .. versionadded:: 7.7 +.. versionchanged:: 9.6 + several implementation improvements """ # noqa: E501, W505 # # (C) Pywikibot team, 2022-2024 @@ -349,14 +351,16 @@ import os # Operating system import re # Regular expressions (very handy!) -from itertools import islice +import sys # System calls +from contextlib import suppress +from datetime import date from pprint import pformat from typing import Any import pywikibot # API interface to Wikidata -from pywikibot import pagegenerators as pg # Wikidata Query interface from pywikibot.config import verbose_output as verbose from pywikibot.data import api +from pywikibot.tools import first_upper try: @@ -369,258 +373,798 @@ except ImportError as e: unidecode = e -# Initialisation -booklib = 'goob' # Default digital library +# Global variables +# Module name (using the Pywikibot package) +pgmlic = 'MIT License' +creator = 'User:Geertivp' + +MAINLANG = 'en:mul' +MULANG = 'mul' + +# Exit on fatal error (can be disabled with -p; please take care) +exitfatal = True +exitstat = 0 # (default) Exit status + +# Wikibase properties +INSTANCEPROP = 'P31' +AUTHORPROP = 'P50' +EDITORPROP = 'P98' +PROFESSIONPROP = 'P106' +ILLUSTRATORPROP = 'P110' +PUBLISHERPROP = 'P123' +# STYLEPROP = 'P135' +# GENREPROP = 'P136' +# BASEDONPROP = 'P144' +# PREVSERIALPROP = 'P155' +# NEXTSERIALPROP = 'P156' +# PRIZEPROP = 'P166' +# SERIALPROP = 'P179' +# COLLIONPROP = 'P195' +ISBNPROP = 'P212' +ISNIIDPROP = 'P213' +# BNFIDPROP = 'P268' +PLACEPUBPROP = 'P291' +OCLDIDPROP = 'P243' +REFPROP = 'P248' +# EDITIONIDPROP = 'P393' +EDITIONLANGPROP = 'P407' +WIKILANGPROP = 'P424' +# ORIGCOUNTRYPROP = 'P495' +ORCIDIDPROP = 'P496' +PUBYEARPROP = 'P577' +WRITTENWORKPROP = 'P629' +# OPENLIBIDPROP = 'P648' +# TRANSLATORPROP = 'P655' +# PERSONPROP = 'P674' +GOOGLEBOOKIDPROP = 'P675' +# INTARCHIDPROP = 'P724' +EDITIONPROP = 'P747' +# CONTRIBUTORPROP = 'P767' +REFDATEPROP = 'P813' +# STORYLOCPROP = 'P840' +# PRINTEDBYPROP = 'P872' +MAINSUBPROP = 'P921' +# INSPIREDBYPROP = 'P941' +ISBN10PROP = 'P957' +# SUDOCIDPROP = 'P1025' +DEWCLASIDPROP = 'P1036' +# EULIDPROP = 'P1084' +# LIBTHINGIDPROP = 'P1085' +NUMBEROFPAGESPROP = 'P1104' +# LCOCLCCNIDPROP = 'P1144' +# LIBCONGRESSIDPROP = 'P1149' +# BNIDPROP = 'P1143' +# UDCPROP = 'P1190' +# DNBIDPROP = 'P1292' +DESCRIBEDBYPROP = 'P1343' +EDITIONTITLEPROP = 'P1476' +SEQNRPROP = 'P1545' +EDITIONSUBTITLEPROP = 'P1680' +# ASSUMEDAUTHORPROP = 'P1779' +# RSLBOOKIDPROP = 'P1815' +# RSLEDIDPROP = 'P1973' +# GUTENBERGIDPROP = 'P2034' +FASTIDPROP = 'P2163' +# NUMPARTSPROP = 'P2635' +PREFACEBYPROP = 'P2679' +AFTERWORDBYPROP = 'P2680' +GOODREADSIDPROP = 'P2969' +# CZLIBIDPROP = 'P3184' +# BABELIOIDPROP = 'P3631' +# ESTCIDPROP = 'P3939' +OCLCWORKIDPROP = 'P5331' +# K10IDPROP = 'P6721' +# CREATIVEWORKTYPE = 'P7937' +LIBCONGEDPROP = 'P8360' +GOODREADSWORKIDPROP = 'P8383' + +# Instances +AUTHORINSTANCE = 'Q482980' +ILLUSTRATORINSTANCE = 'Q15296811' +WRITERINSTANCE = 'Q36180' + +authorprop_list = { + AUTHORPROP, + EDITORPROP, + ILLUSTRATORPROP, + PREFACEBYPROP, + AFTERWORDBYPROP, +} + +# Profession author required +author_profession = { + AUTHORINSTANCE, + ILLUSTRATORINSTANCE, + WRITERINSTANCE, +} + +# List of digital library synonyms +bookliblist = { + '-': 'wiki', + 'dnl': 'dnb', + 'google': 'goob', + 'gb': 'goob', + 'isbn': 'isbndb', + 'kbn': 'kb', + 'wc': 'worldcat', + 'wcat': 'worldcat', + 'wikipedia': 'wiki', + 'wp': 'wiki', +} + +# List of of digital libraries +# You can better run the script repeatedly with difference library sources. +# Content and completeness differs amongst libraryies. +bib_source = { + # database ID - item number - label - default language + 'bnf': ('Q193563', 'Catalogue General (France)', 'fr'), + 'bol': ('Q609913', 'Bol.Com', 'en'), + 'dnb': ('Q27302', 'Deutsche National Library', 'de'), + 'goob': ('Q206033', 'Google Books', 'en'), + # A (paying) api key is needed + 'isbndb': ('Q117793433', 'isbndb.com', 'en'), + 'kb': ('Q1526131', 'Koninklijke Bibliotheek (Nederland)', 'nl'), + # Not implemented in Belgium + # 'kbr': ('Q383931', 'Koninklijke Bibliotheek (België)', 'nl'), + 'loc': ('Q131454', 'Library of Congress (US)', 'en'), + 'mcues': ('Q750403', 'Ministerio de Cultura (Spain)', 'es'), + 'openl': ('Q1201876', 'OpenLibrary.org', 'en'), + 'porbase': ('Q51882885', 'Portugal (urn.porbase.org)', 'pt'), + 'sbn': ('Q576951', 'Servizio Bibliotecario Nazionale (Italië)', 'it'), + 'wiki': ('Q121093616', 'Wikipedia.org', 'en'), + 'worldcat': ('Q76630151', 'WorldCat (worldcat2)', 'en'), +} + +# Remap obsolete or non-standard language codes +langcode = { + 'dut': 'nl', + 'eng': 'en', + 'frans': 'fr', + 'fre': 'fr', + 'iw': 'he', + 'nld': 'nl', +} + +# Statement property target validation rules +propreqobjectprop = { + # Main subject statement requires an object with FAST ID property + MAINSUBPROP: {FASTIDPROP}, +} # ISBN number: 10 or 13 digits with optional dashes (-) -ISBNRE = re.compile(r'[0-9-]{10,17}') +# or DOI number with 10-prefix +ISBNRE = re.compile(r'[0-9–-]{10,17}') +NAMEREVRE = re.compile(r',(\s*.*)*$') # Reverse lastname, firstname PROPRE = re.compile(r'P[0-9]+') # Wikidata P-number QSUFFRE = re.compile(r'Q[0-9]+') # Wikidata Q-number +# Remove trailing () suffix (keep only the base label) +SUFFRE = re.compile(r'\s*[(].*[)]$') -# Other statements are added via command line parameters +# Required statement for edition +# Additional statements can be added via command line parameters target = { - 'P31': 'Q3331189', # Is an instance of an edition + INSTANCEPROP: 'Q3331189', # Is an instance of an edition + # other statements to add } -# Statement property and instance validation rules +# Instance validation rules for properties propreqinst = { - 'P50': 'Q5', # Author requires human - # Publisher requires publisher - 'P123': {'Q2085381', 'Q1114515', 'Q1320047'}, + AUTHORPROP: {'Q5'}, # Author requires human # Edition language requires at least one of (living, natural) language - 'P407': {'Q34770', 'Q33742', 'Q1288568'}, + EDITIONLANGPROP: {'Q34770', 'Q33742', 'Q1288568'}, + # Is an instance of an edition + INSTANCEPROP: {'Q24017414'}, + # Publisher requires type of publisher + PUBLISHERPROP: {'Q41298', 'Q479716', 'Q1114515', 'Q1320047', 'Q2085381'}, + # Written work (requires list) + WRITTENWORKPROP: ['Q47461344', 'Q7725634'], } -mainlang = os.getenv('LANG', 'en')[:2] # Default description language +# Wikidata transaction comment +transcmt = '#pwb Create ISBN edition' + + +def fatal_error(errcode, errtext): + """A fatal error has occurred. + + Print the error message, and exit with an error code. + """ + global exitstat -# Connect to database -transcmt = '#pwb Create ISBN edition' # Wikidata transaction comment + exitstat = max(exitstat, errcode) + pywikibot.critical(errtext) + if exitfatal: # unless we ignore fatal errors + sys.exit(exitstat) + else: + pywikibot.warning('Proceed after fatal error') + + +def get_item_header(header: str | list[str]) -> str: + """Get the item header (label, description, alias in user language). + + :param header: item label, description, or alias language list + :return: label, description, or alias in the first available language + """ + # Return one of the preferred labels + for lang in main_languages: + if lang in header: + return header[lang] + + # Return any other available label + for lang in header: + return header[lang] + + return '-' + + +def get_item_header_lang(header: str | list[str], lang: str) -> str: + """Get the item header (label, description, alias in user language). + + :param header: item label, description, or alias language list + :param lang: language code + :return: label, description, or alias in the first available language + """ + # Try to get any explicit language code + if lang in header: + return header[lang] + + return get_item_header(header) + + +def get_item_page(qnumber) -> pywikibot.ItemPage: + """Get the item; handle redirects.""" + if isinstance(qnumber, str): + item = pywikibot.ItemPage(repo, qnumber) + try: + item.get() + except pywikibot.exceptions.IsRedirectPageError: + # Resolve a single redirect error + item = item.getRedirectTarget() + label = get_item_header(item.labels) + pywikibot.warning( + f'Item {label} ({qnumber}) redirects to {item.getID()}') + qnumber = item.getID() + else: + item = qnumber + qnumber = item.getID() + + while item.isRedirectPage(): + # Should fix the sitelinks + item = item.getRedirectTarget() + label = get_item_header(item.labels) + pywikibot.warning( + f'Item {label} ({qnumber}) redirects to {item.getID()}') + qnumber = item.getID() + + return item + + +def get_language_preferences() -> list[str]: + """Get the list of preferred languages. + + Uses environment variables LANG, LC_ALL, and LANGUAGE, 'en' is + always appended. + + .. seealso:: + - :wiki:`List_of_ISO_639-1_codes + + :Return: List of ISO 639-1 language codes with strings delimited by + ':'. + """ + # See also: + # https://www.gnu.org/software/gettext/manual/html_node/Locale-Environment-Variables.html + mainlang = os.getenv('LANGUAGE', + os.getenv('LC_ALL', + os.getenv('LANG', MAINLANG))).split(':') + main_languages = [lang.split('_')[0] for lang in mainlang] + + # Cleanup language list (remove non-ISO codes) + for lang in main_languages: + if len(lang) > 3: + main_languages.remove(lang) + + for lang in MAINLANG.split(':'): + if lang not in main_languages: + main_languages.append(lang) + + return main_languages + + +def item_is_in_list(statement_list: list, itemlist: list[str]) -> str: + """Verify if statement list contains at least one item from the itemlist. + + param statement_list: Statement list + param itemlist: List of values (string) + return: Matching or empty string + """ + for seq in statement_list: + with suppress(AttributeError): # Ignore NoneType error + isinlist = seq.getTarget().getID() + if isinlist in itemlist: + return isinlist + return '' + + +def item_has_label(item, label: str) -> str: + """Verify if the item has a label. + + :param item: Item + :param label: Item label + :return: Matching string + """ + label = unidecode(label).casefold() + for lang in item.labels: + if unidecode(item.labels[lang]).casefold() == label: + return item.labels[lang] + + for lang in item.aliases: + for seq in item.aliases[lang]: + if unidecode(seq).casefold() == label: + return seq + + return '' # Must return "False" when no label + + +def is_in_value_list(statement_list: list, valuelist: list[str]) -> bool: + """Verify if statement list contains at least one value from the valuelist. + + :param statement_list: Statement list of values + :param valuelist: List of values + :return: True when match, False otherwise + """ + for seq in statement_list: + if seq.getTarget() in valuelist: + return True + return False -def is_in_list(statement_list, checklist: list[str]) -> bool: - """Verify if statement list contains at least one item from the checklist. +def get_canon_name(baselabel: str) -> str: + """Get standardised name. - :param statement_list: Statement list - :param checklist: List of values - :Returns: True when match + :param baselabel: input label """ - return any(seq.getTarget().getID() in checklist for seq in statement_list) + suffix = SUFFRE.search(baselabel) # Remove () suffix, if any + if suffix: + baselabel = baselabel[:suffix.start()] # Get canonical form + colonloc = baselabel.find(':') + commaloc = NAMEREVRE.search(baselabel) -def get_item_list(item_name: str, instance_id): + # Reorder "lastname, firstname" and concatenate with space + if colonloc < 0 and commaloc: + baselabel = (baselabel[commaloc.start() + 1:] + + ' ' + baselabel[:commaloc.start()]) + baselabel = baselabel.replace(',', ' ') # Remove remaining "," + + # Remove redundant spaces + baselabel = ' '.join(baselabel.split()) + return baselabel + + +def get_item_list(item_name: str, + instance_id: str | set[str] | list[str]) -> list[str]: """Get list of items by name, belonging to an instance (list). + Normally there should have one single best match. The caller should + take care of homonyms. + + .. seealso:: + https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities + :param item_name: Item name (case sensitive) - :param instance_id: Instance ID (string, set, or list) - :Returns: Set of items (Q-numbers) + :param instance_id: Instance ID + :return: Set of items (Q-numbers) """ + pywikibot.debug(f'Search label: {item_name.encode("utf-8")}') item_list = set() # Empty set + # TODO: try to us search_entities instead? params = { 'action': 'wbsearchentities', 'format': 'json', 'type': 'item', - 'strictlanguage': False, # All languages are searched, but labels are in native language + 'strictlanguage': False, 'language': mainlang, + 'uselang': mainlang, # (primary) Search language 'search': item_name, # Get item list from label + 'limit': 20, # Should be reasonable value } request = api.Request(site=repo, parameters=params) result = request.submit() + pywikibot.debug(result) if 'search' in result: + # Ignore accents and case + item_name_canon = unidecode(item_name).casefold() + + # Loop though items for res in result['search']: - item = pywikibot.ItemPage(repo, res['id']) - item.get(get_redirect=True) - if 'P31' in item.claims: - for seq in item.claims['P31']: # Loop through instances - # Matching instance - if seq.getTarget().getID() in instance_id: - for lang in item.labels: # Search all languages - # Ignore label case and accents - if (unidecode(item_name.lower()) - == unidecode(item.labels[lang].lower())): - item_list.add(item.getID()) # Label math - for lang in item.aliases: - # Case sensitive for aliases - if item_name in item.aliases[lang]: - item_list.add(item.getID()) # Alias match - return item_list - - -def amend_isbn_edition(isbn_number: str) -> None: - """Amend ISBN registration. - - Amend Wikidata, by registering the ISBN-13 data via P212, - depending on the data obtained from the digital library. - - :param isbn_number: ISBN number (10 or 13 digits with optional hyphens) + item = get_item_page(res['id']) + + # Matching instance + if INSTANCEPROP not in item.claims \ + or not item_is_in_list(item.claims[INSTANCEPROP], instance_id): + continue + + # Search all languages, ignore label case and accents + for lang in item.labels: + if (item_name_canon + == unidecode(item.labels[lang].casefold())): + item_list.add(item.getID()) # Label math + break + + for lang in item.aliases: + for seq in item.aliases[lang]: + if item_name_canon == unidecode(seq).casefold(): + item_list.add(item.getID()) # Alias match + break + + pywikibot.log(item_list) + # Convert set to list + return list(item_list) + + +def get_item_with_prop_value(prop: str, propval: str) -> list[str]: + """Get list of items that have a property/value statement. + + .. seealso:: :api:`Search` + + :param prop: Property ID + :param propval: Property value + :return: List of items (Q-numbers) + """ + srsearch = f'{prop}:{propval}' + pywikibot.debug(f'Search statement: {srsearch}') + item_name_canon = unidecode(propval).casefold() + item_list = set() + # TODO: use APISite.search instead? + params = { + 'action': 'query', # Statement search + 'list': 'search', + 'srsearch': srsearch, + 'srwhat': 'text', + 'format': 'json', + 'srlimit': 50, # Should be reasonable value + } + request = api.Request(site=repo, parameters=params) + result = request.submit() + # https://www.wikidata.org/w/api.php?action=query&list=search&srwhat=text&srsearch=P212:978-94-028-1317-3 + # https://www.wikidata.org/w/index.php?search=P212:978-94-028-1317-3 + + if 'query' in result and 'search' in result['query']: + # Loop though items + for row in result['query']['search']: + qnumber = row['title'] + item = get_item_page(qnumber) + + if prop not in item.claims: + continue + + for seq in item.claims[prop]: + if unidecode(seq.getTarget()).casefold() == item_name_canon: + item_list.add(item.getID()) # Found match + break + + # Convert set to list + pywikibot.log(item_list) + return sorted(item_list) + + +def amend_isbn_edition(isbn_number: str) -> int: + """Amend ISBN registration in Wikidata. + + It is registering the ISBN-13 data via P212, depending on the data + obtained from the digital library. + + :param isbn_number: ISBN number (10 or 13 digits with optional + hyphens) + :return: Return status which is: + + * 0: Amended (found or created) + * 1: Not found + * 2: Ambiguous + * 3: Other error """ isbn_number = isbn_number.strip() if not isbn_number: - return # Do nothing when the ISBN number is missing + return 3 # Do nothing when the ISBN number is missing + # Some digital library services raise failure try: isbn_data = isbnlib.meta(isbn_number, service=booklib) - # {'ISBN-13': '9789042925564', - # 'Title': 'De Leuvense Vaart - Van De Vaartkom Tot Wijgmaal. ' - # 'Aspecten Uit De Industriele Geschiedenis Van Leuven', - # 'Authors': ['A. Cresens'], - # 'Publisher': 'Peeters Pub & Booksellers', - # 'Year': '2012', - # 'Language': 'nl'} + # { + # 'ISBN-13': '9789042925564', + # 'Title': 'De Leuvense Vaart - Van De Vaartkom Tot Wijgmaal. ' + # 'Aspecten Uit De Industriele Geschiedenis Van Leuven', + # 'Authors': ['A. Cresens'], + # 'Publisher': 'Peeters Pub & Booksellers', + # 'Year': '2012', + # 'Language': 'nl', + # } + except isbnlib._exceptions.NotRecognizedServiceError as error: + fatal_error(4, f'{error}\n pip install isbnlib-xxx') + except isbnlib._exceptions.NotValidISBNError as error: + pywikibot.error(error) + return 1 except Exception as error: # When the book is unknown the function returns - pywikibot.error(error) - return + pywikibot.error(f'{isbn_number} not found\n{error}') + return 1 - if len(isbn_data) < 6: - pywikibot.error(f'Unknown or incomplete digital library registration ' - f'for {isbn_number}') - return + # Others return an empty result + if not isbn_data: + pywikibot.error( + f'Unknown ISBN book number {isbnlib.mask(isbn_number)}') + return 1 # Show the raw results + # Can be very useful in troubleshooting if verbose: pywikibot.info('\n' + pformat(isbn_data)) - add_claims(isbn_data) + return add_claims(isbn_data) -def add_claims(isbn_data: dict[str, Any]) -> None: # noqa: C901 +def add_claims(isbn_data: dict[str, Any]) -> int: # noqa: C901 """Inspect isbn_data and add claims if possible.""" + global proptyx + # targetx is not global (to allow for language specific editions) + # Get the book language from the ISBN book reference booklang = mainlang # Default language if isbn_data['Language']: - booklang = isbn_data['Language'].strip() - if booklang == 'iw': # Obsolete codes - booklang = 'he' - lang_list = list(get_item_list(booklang, propreqinst['P407'])) + # Get the book language from the ISBN book number + # Can overwrite the default language + booklang = isbn_data['Language'].strip().lower() + + # Replace obsolete or non-standard codes + if booklang in langcode: + booklang = langcode[booklang] + + # Get Wikidata language code + lang_list = get_item_list(booklang, propreqinst[EDITIONLANGPROP]) + + # Hardcoded parameter + if 'Q3504110' in lang_list: # Somebody insisted on this disturbing value + lang_list.remove('Q3504110') # Remove duplicate "En" language + + if not lang_list: + # Can' t store unknown language (need to update mapping table...) + pywikibot.error(f'Unknown language {booklang}'.format(booklang)) + return 3 + + if len(lang_list) != 1: + # Ambiguous language + pywikibot.warning(f'Ambiguous language {booklang}') + return 3 + + # Set edition language item number + target[EDITIONLANGPROP] = lang_list[0] + + # Require short Wikipedia language code + if len(booklang) > 3: + # Get best ranked language item + lang = get_item_page(lang_list[0]) + + # Get official language code + if WIKILANGPROP in lang.claims: + booklang = lang.claims[WIKILANGPROP][0].getTarget() - if not lang_list: - pywikibot.warning('Unknown language ' + booklang) - return + # Get edition title + edition_title = isbn_data['Title'].strip() - if len(lang_list) != 1: - pywikibot.warning('Ambiguous language ' + booklang) - return + # Split (sub)title with first matching delimiter + # By priority of parsing strings: + for seq in ['|', '. ', ' - ', ': ', '; ', ', ']: + titles = edition_title.split(seq) + if len(titles) > 1: + break - target['P407'] = lang_list[0] + if verbose: # Print (sub)title(s) + pywikibot.info('\n' + pformat(titles)) # Get formatted ISBN number isbn_number = isbn_data['ISBN-13'] # Numeric format isbn_fmtd = isbnlib.mask(isbn_number) # Canonical format pywikibot.info(isbn_fmtd) # First one - # Get (sub)title when there is a dot - titles = isbn_data['Title'].split('. ') # goob is using a '.' - if len(titles) == 1: - titles = isbn_data['Title'].split(': ') # Extract subtitle - if len(titles) == 1: - titles = isbn_data['Title'].split(' - ') # Extract subtitle + # Get main title and subtitle objectname = titles[0].strip() - subtitle = '' + subtitle = '' # If there was no delimiter, there is no subtitle if len(titles) > 1: - subtitle = titles[1].strip() - - # pywikibot.info book titles - pywikibot.debug(objectname) - pywikibot.debug(subtitle) # Optional - - # print subsequent subtitles, when available - for title in islice(titles, 2, None): - # Not stored in Wikidata... - pywikibot.debug(title.strip()) - - # Search the ISBN number in Wikidata both canonical and numeric - # P212 should have canonical hyphenated format - isbn_query = f"""# Get ISBN number -SELECT ?item WHERE {{ - VALUES ?isbn_number {{ - "{isbn_fmtd}" - "{isbn_number}" - }} - ?item wdt:P212 ?isbn_number. -}} -""" - - pywikibot.info(isbn_query) - generator = pg.WikidataSPARQLPageGenerator(isbn_query, site=repo) - - # Main loop for all DISTINCT items - rescnt = 0 - for rescnt, item in enumerate(generator, start=1): - qnumber = item.getID() - pywikibot.warning(f'Found item: {qnumber}') + # Redundant "subtitles" are ignored + subtitle = first_upper(titles[1].strip()) + + # Get formatted ISBN number + isbn_number = isbn_data['ISBN-13'] # Numeric format + isbn_fmtd = isbnlib.mask(isbn_number) # Canonical format (with "-") + pywikibot.log(isbn_fmtd) + + # Search the ISBN number both in canonical and numeric format + qnumber_list = get_item_with_prop_value(ISBNPROP, isbn_fmtd) + qnumber_list += get_item_with_prop_value(ISBNPROP, isbn_number) + + # Get addional data from the digital library + # This could fail with + # ISBNLibHTTPError('403 Are you making many requests?') + # Handle ISBN classification + # pwb create_isbn_edition - de P407 Q188 978-3-8376-5645-9 Q113460204 + # { + # 'owi': '11103651812', + # 'oclc': '1260160983', + # 'lcc': 'TK5105.8882', + # 'ddc': '300', + # 'fast': { + # '1175035': 'Wikis (Computer science)', + # '1795979': 'Wikipedia', + # '1122877': 'Social sciences' + # } + # } + isbn_classify = {} + try: + isbn_classify = isbnlib.classify(isbn_number) + except Exception as error: + pywikibot.error(f'Classify error, {error}') + else: + pywikibot.info('\n' + pformat(isbn_classify)) + + # Note that only older works have an ISBN10 number + isbn10_number = '' + isbn10_fmtd = '' + + # Take care of ISBNLibHTTPError + # (classify is more important than obsolete ISBN-10) + # ISBNs were not used before 1966 + # Since 2007, new ISBNs are only issued in the ISBN-13 format + if isbn_fmtd.startswith('978-'): + try: + # Returns empty string for non-978 numbers + isbn10_number = isbnlib.to_isbn10(isbn_number) + if isbn10_number: + isbn10_fmtd = isbnlib.mask(isbn10_number) + pywikibot.info(f'ISBN 10: {isbn10_fmtd}') + qnumber_list += get_item_with_prop_value(ISBN10PROP, + isbn10_fmtd) + qnumber_list += get_item_with_prop_value(ISBN10PROP, + isbn10_number) + except Exception as error: + pywikibot.error(f'ISBN 10 error, {error}') + + qnumber_list = sorted(set(qnumber_list)) # Get unique values # Create or amend the item - if rescnt == 1: - item.get(get_redirect=True) # Update item - elif not rescnt: - label = {booklang: objectname} + + if not qnumber_list: + # Create the edition + label = {MULANG: objectname} item = pywikibot.ItemPage(repo) # Create item item.editEntity({'labels': label}, summary=transcmt) + qnumber = item.getID() # Get new item number + status = 'Created' + elif len(qnumber_list) == 1: + qnumber = qnumber_list[0] + item = get_item_page(qnumber) qnumber = item.getID() - pywikibot.warning(f'Creating item: {qnumber}') + + # Update item only if edition, or instance is missing + if (INSTANCEPROP in item.claims + and not item_is_in_list(item.claims[INSTANCEPROP], + [target[INSTANCEPROP]])): + pywikibot.error( + f'Item {qnumber} {isbn_fmtd} is not an edition; not updated') + return 3 + + # Add missing book label for book language + if MULANG not in item.labels: + item.labels[MULANG] = objectname + item.editEntity({'labels': item.labels}, summary=transcmt) + status = 'Found' else: - pywikibot.critical(f'Ambiguous ISBN number {isbn_fmtd}') - return + pywikibot.error(f'Ambiguous ISBN number {isbn_fmtd}, ' + f'{qnumber_list} not updated') + return 2 - # Add all P/Q values - # Make sure that labels are known in the native language - pywikibot.debug(target) + pywikibot.warning(f'{status} item: P212:{isbn_fmtd} ({qnumber}) ' + f'language {booklang} ({target[EDITIONLANGPROP]}) ' + f'{get_item_header_lang(item.labels, booklang)}') - # Register statements + # Register missing statements + pywikibot.debug(target) for propty in target: if propty not in item.claims: if propty not in proptyx: proptyx[propty] = pywikibot.PropertyPage(repo, propty) - targetx[propty] = pywikibot.ItemPage(repo, target[propty]) - try: - pywikibot.warning( - f'Add {proptyx[propty].labels[booklang]} ' - f'({propty}): {targetx[propty].labels[booklang]} ' - f'({target[propty]})' - ) - except: # noqa: B001, E722, H201 - pywikibot.warning(f'Add {propty}:{target[propty]}') + # Target could get overwritten locally + targetx[propty] = pywikibot.ItemPage(repo, target[propty]) claim = pywikibot.Claim(repo, propty) claim.setTarget(targetx[propty]) - item.addClaim(claim, bot=True, summary=transcmt) + item.addClaim(claim, bot=wdbotflag, summary=transcmt) + pywikibot.warning( + f'Add {get_item_header_lang(proptyx[propty].labels, booklang)}' + f':{get_item_header_lang(targetx[propty].labels, booklang)} ' + f'({propty}:{target[propty]})' + ) + + # Set source reference + if booklib in bib_sourcex: + # A source reference can be only used once + # Expected error: + # "The provided Claim instance is already used in an entity" + # TODO: This error is sometimes raised without reason + try: + claim.addSources(booklib_ref, summary=transcmt) + except ValueError as error: + pywikibot.error(f'Source reference error, {error}') + + if (DESCRIBEDBYPROP not in item.claims + or not item_is_in_list(item.claims[DESCRIBEDBYPROP], + [bib_source[booklib][0]])): + claim = pywikibot.Claim(repo, DESCRIBEDBYPROP) + claim.setTarget(bib_sourcex[booklib]) + item.addClaim(claim, bot=wdbotflag, summary=transcmt) + pywikibot.warning( + f'Add described by:{booklib} - {bib_source[booklib][1]} ' + f'({DESCRIBEDBYPROP}:{bib_source[booklib][0]})' + ) - # Set formatted ISBN number - if 'P212' not in item.claims: - pywikibot.warning(f'Add ISBN number (P212): {isbn_fmtd}') - claim = pywikibot.Claim(repo, 'P212') + if ISBNPROP not in item.claims: + # Create formatted ISBN-13 number + claim = pywikibot.Claim(repo, ISBNPROP) claim.setTarget(isbn_fmtd) - item.addClaim(claim, bot=True, summary=transcmt) + item.addClaim(claim, bot=wdbotflag, summary=transcmt) + pywikibot.warning(f'Add ISBN number ({ISBNPROP}) {isbn_fmtd}') + else: + for seq in item.claims[ISBNPROP]: + # Update unformatted to formatted ISBN-13 + if seq.getTarget() == isbn_number: + seq.changeTarget(isbn_fmtd, bot=wdbotflag, summary=transcmt) + pywikibot.warning( + f'Set formatted ISBN number ({ISBNPROP}): {isbn_fmtd}') + + if not isbn10_fmtd: + pass + elif ISBN10PROP in item.claims: + for seq in item.claims[ISBN10PROP]: + # Update unformatted to formatted ISBN-10 + if seq.getTarget() == isbn10_number: + seq.changeTarget(isbn10_fmtd, bot=wdbotflag, summary=transcmt) + pywikibot.warning('Set formatted ISBN-10 number ' + f'({ISBN10PROP}): {isbn10_fmtd}') + else: + # Create ISBN-10 number + claim = pywikibot.Claim(repo, ISBN10PROP) + claim.setTarget(isbn10_fmtd) + item.addClaim(claim, bot=wdbotflag, summary=transcmt) + pywikibot.warning(f'Add ISBN-10 number ({ISBN10PROP}) {isbn10_fmtd}') # Title - if 'P1476' not in item.claims: - pywikibot.warning(f'Add Title (P1476): {objectname}') - claim = pywikibot.Claim(repo, 'P1476') + if EDITIONTITLEPROP not in item.claims: + claim = pywikibot.Claim(repo, EDITIONTITLEPROP) claim.setTarget( pywikibot.WbMonolingualText(text=objectname, language=booklang)) - item.addClaim(claim, bot=True, summary=transcmt) + item.addClaim(claim, bot=wdbotflag, summary=transcmt) + pywikibot.warning(f'Add Title ({EDITIONTITLEPROP}) {objectname}') # Subtitle - if subtitle and 'P1680' not in item.claims: - pywikibot.warning(f'Add Subtitle (P1680): {subtitle}') - claim = pywikibot.Claim(repo, 'P1680') + if subtitle and EDITIONSUBTITLEPROP not in item.claims: + claim = pywikibot.Claim(repo, EDITIONSUBTITLEPROP) claim.setTarget( pywikibot.WbMonolingualText(text=subtitle, language=booklang)) - item.addClaim(claim, bot=True, summary=transcmt) + item.addClaim(claim, bot=wdbotflag, summary=transcmt) + pywikibot.warning(f'Add Subtitle ({EDITIONSUBTITLEPROP}): {subtitle}') # Date of publication pub_year = isbn_data['Year'] - if pub_year and 'P577' not in item.claims: - pywikibot.warning( - f"Add Year of publication (P577): {isbn_data['Year']}") - claim = pywikibot.Claim(repo, 'P577') + if pub_year and PUBYEARPROP not in item.claims: + claim = pywikibot.Claim(repo, PUBYEARPROP) claim.setTarget(pywikibot.WbTime(year=int(pub_year), precision='year')) - item.addClaim(claim, bot=True, summary=transcmt) + item.addClaim(claim, bot=wdbotflag, summary=transcmt) + pywikibot.warning( + f'Add Year of publication ({PUBYEARPROP}): {isbn_data["Year"]}') - # Get the author list + # Set the author list author_cnt = 0 for author_name in isbn_data['Authors']: author_name = author_name.strip() @@ -628,131 +1172,220 @@ def add_claims(isbn_data: dict[str, Any]) -> None: # noqa: C901 continue author_cnt += 1 - author_list = list(get_item_list(author_name, propreqinst['P50'])) + # Reorder "lastname, firstname" and concatenate with space + author_name = get_canon_name(author_name) + author_list = get_item_list(author_name, propreqinst[AUTHORPROP]) if len(author_list) == 1: add_author = True - if 'P50' in item.claims: - for seq in item.claims['P50']: - if seq.getTarget().getID() in author_list: + author_item = get_item_page(author_list[0]) + + if (PROFESSIONPROP not in author_item.claims + or not item_is_in_list(author_item.claims[PROFESSIONPROP], + author_profession)): + # Add profession:author statement + claim = pywikibot.Claim(repo, PROFESSIONPROP) + claim.setTarget(target_author) + author_item.addClaim( + claim, bot=wdbotflag, + summary=f'{transcmt} {PROFESSIONPROP}:{AUTHORINSTANCE}') + pywikibot.warning( + 'Add profession:author ' + f'({PROFESSIONPROP}:{AUTHORINSTANCE}) to ' + f'{author_name} ({author_list[0]})' + ) + + # Possibly found as author? + # Possibly found as editor? + # Possibly found as illustrator/photographer? + for prop in authorprop_list: + if prop not in item.claims: + continue + + for claim in item.claims[prop]: + book_author = claim.getTarget() + if book_author.getID() == author_list[0]: + # Add missing sequence number + if SEQNRPROP not in claim.qualifiers: + qualifier = pywikibot.Claim(repo, SEQNRPROP) + qualifier.setTarget(str(author_cnt)) + claim.addQualifier(qualifier, bot=wdbotflag, + summary=transcmt) add_author = False break - if add_author: - pywikibot.warning(f'Add author {author_cnt} (P50): ' - f'{author_name} ({author_list[0]})') - claim = pywikibot.Claim(repo, 'P50') - claim.setTarget(pywikibot.ItemPage(repo, author_list[0])) - item.addClaim(claim, bot=True, summary=transcmt) + elif item_has_label(book_author, author_name): + pywikibot.warning( + f'Edition has conflicting author ({prop}) ' + f'{author_name} ({book_author.getID()})' + ) + add_author = False + break - qualifier = pywikibot.Claim(repo, 'P1545') + if add_author: + claim = pywikibot.Claim(repo, AUTHORPROP) + claim.setTarget(author_item) + item.addClaim(claim, bot=wdbotflag, summary=transcmt) + pywikibot.warning(f'Add author {author_cnt}:{author_name} ' + f'({AUTHORPROP}:{author_list[0]})') + + # Add sequence number + qualifier = pywikibot.Claim(repo, SEQNRPROP) qualifier.setTarget(str(author_cnt)) - claim.addQualifier(qualifier, summary=transcmt) - elif not author_list: - pywikibot.warning(f'Unknown author: {author_name}') + claim.addQualifier(qualifier, bot=wdbotflag, summary=transcmt) + elif author_list: + pywikibot.error(f'Ambiguous author: {author_name}') else: - pywikibot.warning(f'Ambiguous author: {author_name}') + pywikibot.error(f'Unknown author: {author_name}') - # Get the publisher + # Set the publisher publisher_name = isbn_data['Publisher'].strip() if publisher_name: - publisher_list = list( - get_item_list(publisher_name, propreqinst['P123'])) + publisher_list = get_item_list(get_canon_name(publisher_name), + propreqinst[PUBLISHERPROP]) if len(publisher_list) == 1: - if 'P123' not in item.claims: - pywikibot.warning(f'Add publisher (P123): {publisher_name} ' - f'({publisher_list[0]})') - claim = pywikibot.Claim(repo, 'P123') - claim.setTarget(pywikibot.ItemPage(repo, publisher_list[0])) - item.addClaim(claim, bot=True, summary=transcmt) - elif not publisher_list: - pywikibot.warning('Unknown publisher: ' + publisher_name) + if (PUBLISHERPROP not in item.claims + or not item_is_in_list(item.claims[PUBLISHERPROP], + publisher_list)): + claim = pywikibot.Claim(repo, PUBLISHERPROP) + claim.setTarget(get_item_page(publisher_list[0])) + item.addClaim(claim, bot=wdbotflag, summary=transcmt) + pywikibot.warning(f'Add publisher: {publisher_name} ' + f'({PUBLISHERPROP}:{publisher_list[0]})') + elif publisher_list: + pywikibot.error(f'Ambiguous publisher: {publisher_name} ' + f'({publisher_list})') else: - pywikibot.warning('Ambiguous publisher: ' + publisher_name) + pywikibot.error(f'Unknown publisher: {publisher_name}') - # Get addional data from the digital library - isbn_cover = isbnlib.cover(isbn_number) - isbn_editions = isbnlib.editions(isbn_number, service='merge') - isbn_doi = isbnlib.doi(isbn_number) - isbn_info = isbnlib.info(isbn_number) - - if verbose: - pywikibot.info() - pywikibot.info(isbn_info) - pywikibot.info(isbn_doi) - pywikibot.info(isbn_editions) + # Amend Written work relationship (one to many relationship) + if WRITTENWORKPROP in item.claims: + work = item.claims[WRITTENWORKPROP][0].getTarget() + if len(item.claims[WRITTENWORKPROP]) > 1: # Many to many (error) + pywikibot.error(f'Written work {work.getID()} is not unique') + else: + # Enhance data quality for Written work + if ISBNPROP in work.claims: + pywikibot.error(f'Written work {work.getID()} must not have' + ' an ISBN number') + + # Add written work instance + if (INSTANCEPROP not in work.claims + or not item_is_in_list(work.claims[INSTANCEPROP], + propreqinst[WRITTENWORKPROP])): + claim = pywikibot.Claim(repo, INSTANCEPROP) + claim.setTarget(get_item_page(propreqinst[WRITTENWORKPROP][0])) + work.addClaim(claim, bot=wdbotflag, summary=transcmt) + pywikibot.warning( + f'Add is a:written work instance ({INSTANCEPROP}:' + f'{propreqinst[WRITTENWORKPROP][0]}) ' + f'to written work {work.getID()}' + ) - # Book cover images - pywikibot.info(pformat(isbn_cover)) + # Check if inverse relationship to "edition of" exists + if (EDITIONPROP not in work.claims + or not item_is_in_list(work.claims[EDITIONPROP], + [qnumber])): + claim = pywikibot.Claim(repo, EDITIONPROP) + claim.setTarget(item) + work.addClaim(claim, bot=wdbotflag, summary=transcmt) + pywikibot.warning( + f'Add edition statement ({EDITIONPROP}:{qnumber}) to ' + f'written work {work.getID()}' + ) - # Handle ISBN classification - isbn_classify = isbnlib.classify(isbn_number) - pywikibot.debug(pformat(isbn_classify)) - - # ./create_isbn_edition.py '978-3-8376-5645-9' - de P407 Q188 - # Q113460204 - # {'owi': '11103651812', 'oclc': '1260160983', 'lcc': 'TK5105.8882', - # 'ddc': '300', 'fast': {'1175035': 'Wikis (Computer science)', - # '1795979': 'Wikipedia', - # '1122877': 'Social sciences'}} - - # Set the OCLC ID - if 'oclc' in isbn_classify and 'P243' not in item.claims: - pywikibot.warning(f"Add OCLC ID (P243): {isbn_classify['oclc']}") - claim = pywikibot.Claim(repo, 'P243') + # We need to first set the OCLC ID + # Because OCLC Work ID can be in conflict for edition + if 'oclc' in isbn_classify and OCLDIDPROP not in item.claims: + claim = pywikibot.Claim(repo, OCLDIDPROP) claim.setTarget(isbn_classify['oclc']) - item.addClaim(claim, bot=True, summary=transcmt) - - # OCLC ID and OCLC work ID should not be both assigned - if 'P243' in item.claims and 'P5331' in item.claims: - if 'P629' in item.claims: - oclcwork = item.claims['P5331'][0] # OCLC Work should be unique - # Get the OCLC Work ID from the edition - oclcworkid = oclcwork.getTarget() + item.addClaim(claim, bot=wdbotflag, summary=transcmt) + pywikibot.warning( + f'Add OCLC ID ({OCLDIDPROP}) {isbn_classify["oclc"]}') + + # OCLC ID and OCLC Work ID should not be both assigned + # Move OCLC Work ID to work if possible + if OCLDIDPROP in item.claims and OCLCWORKIDPROP in item.claims: + # Check if OCLC Work is available + oclcwork = item.claims[OCLCWORKIDPROP][0] # OCLC Work should be unique + # Get the OCLC Work ID from the edition + oclcworkid = oclcwork.getTarget() + + # Keep OCLC Work ID in edition if ambiguous + if len(item.claims[OCLCWORKIDPROP]) > 1: + pywikibot.error( + 'OCLC Work ID {work.getID()} is not unique; not moving') + elif WRITTENWORKPROP in item.claims: # Edition should belong to only one single work - work = item.claims['P629'][0].getTarget() - # There doesn't exist a moveClaim method? + work = item.claims[WRITTENWORKPROP][0].getTarget() pywikibot.warning( f'Move OCLC Work ID {oclcworkid} to work {work.getID()}') - # Keep current OCLC Work ID if present - if 'P5331' not in work.claims: - claim = pywikibot.Claim(repo, 'P5331') + + # Keep OCLC Work ID in edition if mismatch or ambiguity + if len(item.claims[WRITTENWORKPROP]) > 1: + pywikibot.error( + f'Written Work {work.getID()} is not unique; not moving') + elif OCLCWORKIDPROP not in work.claims: + claim = pywikibot.Claim(repo, OCLCWORKIDPROP) claim.setTarget(oclcworkid) - work.addClaim(claim, bot=True, summary=transcmt) - # OCLC Work ID does not belong to edition - item.removeClaims(oclcwork, bot=True, summary=transcmt) + work.addClaim(claim, bot=wdbotflag, + summary='#pwb Move OCLC Work ID') + pywikibot.warning( + f'Move OCLC Work ID ({OCLCWORKIDPROP}) {oclcworkid} to ' + f'written work {work.getID()}' + ) + + # OCLC Work ID does not belong to edition + item.removeClaims(oclcwork, bot=wdbotflag, + summary='#pwb Move OCLC Work ID') + elif is_in_value_list(work.claims[OCLCWORKIDPROP], oclcworkid): + # OCLC Work ID does not belong to edition + item.removeClaims(oclcwork, bot=wdbotflag, + summary='#pwb Remove redundant OCLC Work ID') + else: + pywikibot.error( + f'OCLC Work ID mismatch {oclcworkid} - ' + f'{work.claims[OCLCWORKIDPROP][0].getTarget()}; not moving' + ) else: - pywikibot.error('OCLC Work ID {} conflicts with OCLC ID {} and no ' - 'work available' - .format(item.claims['P5331'][0].getTarget(), - item.claims['P243'][0].getTarget())) + pywikibot.error(f'OCLC Work ID {oclcworkid} conflicts with OCLC ' + f'ID {item.claims[OCLDIDPROP][0].getTarget()} and' + ' no work available') # OCLC work ID should not be registered for editions, only for works if 'owi' not in isbn_classify: pass - elif 'P629' in item.claims: # Get the work related to the edition + elif WRITTENWORKPROP in item.claims: + # Get the work related to the edition # Edition should only have one single work - work = item.claims['P629'][0].getTarget() - if 'P5331' not in work.claims: # Assign the OCLC work ID if missing - pywikibot.warning( - f"Add OCLC work ID (P5331): {isbn_classify['owi']} to work " - f'{work.getID()}') - claim = pywikibot.Claim(repo, 'P5331') + # Assign the OCLC work ID if missing in work + work = item.claims[WRITTENWORKPROP][0].getTarget() + if (OCLCWORKIDPROP not in work.claims + or not is_in_value_list(work.claims[OCLCWORKIDPROP], + isbn_classify['owi'])): + claim = pywikibot.Claim(repo, OCLCWORKIDPROP) claim.setTarget(isbn_classify['owi']) - work.addClaim(claim, bot=True, summary=transcmt) - elif 'P243' in item.claims: - pywikibot.warning('OCLC Work ID {} ignored because of OCLC ID {}' - .format(isbn_classify['owi'], - item.claims['P243'][0].getTarget())) - # Assign the OCLC work ID only if there is no work, and no OCLC ID - # for edition - elif 'P5331' not in item.claims: + work.addClaim(claim, bot=wdbotflag, summary=transcmt) + pywikibot.warning( + f'Add OCLC work ID ({OCLCWORKIDPROP}) {isbn_classify["owi"]} ' + f'to work {work.getID()}' + ) + elif OCLDIDPROP in item.claims: pywikibot.warning( - f"Add OCLC work ID (P5331): {isbn_classify['owi']} to edition") - claim = pywikibot.Claim(repo, 'P5331') + f'OCLC Work ID {isbn_classify["owi"]} ignored because of OCLC ID' + f'{item.claims[OCLDIDPROP][0].getTarget()}' + ) + elif (OCLCWORKIDPROP not in item.claims + or not is_in_value_list(item.claims[OCLCWORKIDPROP], + isbn_classify['owi'])): + # Assign the OCLC work ID only if there is no work, and no OCLC ID for + # edition + claim = pywikibot.Claim(repo, OCLCWORKIDPROP) claim.setTarget(isbn_classify['owi']) - item.addClaim(claim, bot=True, summary=transcmt) + item.addClaim(claim, bot=wdbotflag, summary=transcmt) + pywikibot.warning(f'Add OCLC work ID ({OCLCWORKIDPROP}) ' + f'{isbn_classify["owi"]} to edition') # Reverse logic for moving OCLC ID and P212 (ISBN) from work to # edition is more difficult because of 1:M relationship... @@ -765,22 +1398,22 @@ def add_claims(isbn_data: dict[str, Any]) -> None: # noqa: C901 # registered for editions; should rather use P2969 # Library of Congress Classification (works and editions) - if 'lcc' in isbn_classify and 'P8360' not in item.claims: - pywikibot.warning( - 'Add Library of Congress Classification for edition (P8360): ' - f"{isbn_classify['lcc']}") - claim = pywikibot.Claim(repo, 'P8360') + if 'lcc' in isbn_classify and LIBCONGEDPROP not in item.claims: + claim = pywikibot.Claim(repo, LIBCONGEDPROP) claim.setTarget(isbn_classify['lcc']) - item.addClaim(claim, bot=True, summary=transcmt) - - # Dewey Decimale Classificatie - if 'ddc' in isbn_classify and 'P1036' not in item.claims: + item.addClaim(claim, bot=wdbotflag, summary=transcmt) pywikibot.warning( - f"Add Dewey Decimale Classificatie (P1036): {isbn_classify['ddc']}" + 'Add Library of Congress Classification for edition ' + f'({LIBCONGEDPROP}) {isbn_classify["lcc"]}' ) - claim = pywikibot.Claim(repo, 'P1036') + + # Dewey Decimale Classificatie + if 'ddc' in isbn_classify and DEWCLASIDPROP not in item.claims: + claim = pywikibot.Claim(repo, DEWCLASIDPROP) claim.setTarget(isbn_classify['ddc']) - item.addClaim(claim, bot=True, summary=transcmt) + item.addClaim(claim, bot=wdbotflag, summary=transcmt) + pywikibot.warning(f'Add Dewey Decimale Classificatie ({DEWCLASIDPROP})' + f' {isbn_classify["ddc"]}') # Register Fast ID using P921 (main subject) through P2163 (Fast ID) # https://www.wikidata.org/wiki/Q3294867 @@ -793,76 +1426,75 @@ def add_claims(isbn_data: dict[str, Any]) -> None: # noqa: C901 # Corresponding to P921 (Wikidata main subject) if 'fast' in isbn_classify: for fast_id in isbn_classify['fast']: - - # Get the main subject - main_subject_query = f"""# Search the main subject -SELECT ?item WHERE {{ - ?item wdt:P2163 "{fast_id}". -}} -""" - - pywikibot.info(main_subject_query) - generator = pg.WikidataSPARQLPageGenerator(main_subject_query, - site=repo) - - # Main loop for all DISTINCT items - rescnt = 0 - for rescnt, main_subject in enumerate(generator, start=1): - qmain_subject = main_subject.getID() - try: - main_subject_label = main_subject.labels[booklang] - pywikibot.info(f'Found main subject {main_subject_label} ' - f'({qmain_subject}) for Fast ID {fast_id}') - except: # noqa: B001, E722, H201 - main_subject_label = '' - pywikibot.info(f'Found main subject ({qmain_subject}) for ' - f'Fast ID {fast_id}') - pywikibot.error(f'Missing label for item {qmain_subject}') - - # Create or amend P921 statement - if not rescnt: - pywikibot.error( - f'Main subject not found for Fast ID {fast_id}') - elif rescnt == 1: - add_main_subject = True - if 'P921' in item.claims: # Check for duplicates - for seq in item.claims['P921']: - if seq.getTarget().getID() == qmain_subject: - add_main_subject = False - break - - if add_main_subject: - pywikibot.warning( - f'Add main subject (P921) {main_subject_label} ' - f'({qmain_subject})') - claim = pywikibot.Claim(repo, 'P921') - claim.setTarget(main_subject) - item.addClaim(claim, bot=True, summary=transcmt) + # Get the main subject item number + qmain_subject = get_item_with_prop_value(FASTIDPROP, fast_id) + main_subject_label = isbn_classify['fast'][fast_id].lower() + + if len(qmain_subject) == 1: + # Get main subject and label + main_subject = get_item_page(qmain_subject[0]) + main_subject_label = get_item_header(main_subject.labels) + + if (MAINSUBPROP in item.claims + and item_is_in_list(item.claims[MAINSUBPROP], + qmain_subject)): + pywikibot.log( + f'Skipping main subject ({MAINSUBPROP}): ' + f'{main_subject_label} ({qmain_subject[0]})' + ) else: - pywikibot.info(f'Skipping main subject ' - f'{main_subject_label} ({qmain_subject})') + claim = pywikibot.Claim(repo, MAINSUBPROP) + claim.setTarget(main_subject) + # Add main subject + item.addClaim(claim, bot=wdbotflag, summary=transcmt) + pywikibot.warning( + f'Add main subject:{main_subject_label} ' + f'({MAINSUBPROP}:{qmain_subject[0]})' + ) + elif qmain_subject: + pywikibot.error(f'Ambiguous main subject for Fast ID {fast_id}' + f' - {main_subject_label}') else: - pywikibot.error( - f'Ambiguous main subject for Fast ID {fast_id}') - show_final_information(isbn_number, isbn_doi) + pywikibot.error(f'Main subject not found for Fast ID {fast_id}' + f' - {main_subject_label}') + + show_final_information(isbn_number) + return 0 -def show_final_information(number, doi): - """Print additional information.""" +def show_final_information(isbn_number: str) -> None: + """Print additional information. + + Get optional information.Could generate too many transactions errors; + so the process might stop at the first error. + """ # Book description - description = isbnlib.desc(number) - if description: + isbn_description = isbnlib.desc(isbn_number) + if isbn_description: pywikibot.info() - pywikibot.info(description) + pywikibot.info(isbn_description) - try: - bibtex_metadata = isbnlib.doi2tex(doi) - except Exception as error: - # Currently does not work (service not available) - pywikibot.error(error) # Data not available - pywikibot.warning('BibTex unavailable') - else: - pywikibot.info(bibtex_metadata) + # ISBN info + isbn_info = isbnlib.info(isbn_number) + if isbn_info: + pywikibot.info(isbn_info) + + # DOI number + isbn_doi = isbnlib.doi(isbn_number) + if isbn_doi: + pywikibot.info(isbn_doi) + + # ISBN editions + isbn_editions = isbnlib.editions(isbn_number, service='merge') + if isbn_editions: + pywikibot.info(isbn_editions) + + # Book cover images + isbn_cover = isbnlib.cover(isbn_number) + for seq in isbn_cover: + pywikibot.info(f'{seq}: {isbn_cover[seq]}') + + # BibTex currently does not work (service not available); code was removed. def main(*args: str) -> None: @@ -899,67 +1531,166 @@ def main(*args: str) -> None: :param args: command line arguments """ + global bib_sourcex global booklib + global booklib_ref + global exitstat global mainlang - global repo + global main_languages global proptyx + global repo + global target_author global targetx + global wdbotflag + + # check dependencies + for module in (isbnlib, unidecode): + if isinstance(module, ImportError): + raise module # Get optional parameters local_args = pywikibot.handle_args(*args) + # Connect to databases # Login to Wikibase instance # Required for wikidata object access (item, property, statement) repo = pywikibot.Site('wikidata') + repo.login() + + # Get language list + main_languages = get_language_preferences() + + # Get all program parameters + pywikibot.info(f'{pywikibot.calledModuleName()}, ' + f'{pywikibot.__version__}, {pgmlic}, {creator}') + + # This script requires a bot flag + wdbotflag = 'bot' in pywikibot.User(repo, repo.user()).groups() + + # Prebuilt targets + target_author = pywikibot.ItemPage(repo, AUTHORINSTANCE) + + # Get today's date + today = date.today() + date_ref = pywikibot.WbTime(year=int(today.strftime('%Y')), + month=int(today.strftime('%m')), + day=int(today.strftime('%d')), + precision='day') # Get the digital library + booklib = 'wiki' if local_args: booklib = local_args.pop(0) - if booklib == '-': - booklib = 'goob' + booklib = bookliblist.get(booklib, booklib) + + # Get ItemPage for digital library sources + bib_sourcex = {seq: get_item_page(bib_source[seq][0]) + for seq in bib_source} + + if booklib in bib_sourcex: + references = pywikibot.Claim(repo, REFPROP) + references.setTarget(bib_sourcex[booklib]) + + # Set retrieval date + retrieved = pywikibot.Claim(repo, REFDATEPROP) + retrieved.setTarget(date_ref) + booklib_ref = [references, retrieved] + + # Register source and retrieval date + mainlang = bib_source[booklib][2] + else: + # Unknown bib reference - show implemented codes + for seq in bib_source: + pywikibot.info(f'{seq.ljust(10)}{bib_source[seq][2].ljust(4)}' + f'{bib_source[seq][1]}') + fatal_error(3, f'Unknown Digital library ({REFPROP}) {booklib}') + + # Get optional parameters (all are optional) # Get the native language # The language code is only required when P/Q parameters are added, - # or different from the LANG code + # or different from the environment LANG code if local_args: mainlang = local_args.pop(0) + if mainlang not in main_languages: + main_languages.insert(0, mainlang) + + pywikibot.info( + f'Refers to Digital library: {bib_source[booklib][1]} ' + f'({REFPROP}:{bib_source[booklib][0]}), language {mainlang}' + ) + # Get additional P/Q parameters while local_args: - inpar = PROPRE.findall(local_args.pop(0).upper())[0] - target[inpar] = QSUFFRE.findall(local_args.pop(0).upper())[0] + inpar = local_args.pop(0).upper() + inprop = PROPRE.findall(inpar)[0] + + if ':-' in inpar: + target[inprop] = '-' + else: + if ':Q' not in inpar: + inpar = local_args.pop(0).upper() + try: + target[inprop] = QSUFFRE.findall(inpar)[0] + except IndexError: + target[inprop] = '-' + break # Validate P/Q list proptyx = {} targetx = {} - # Validate the propery/instance pair + # Validate and encode the propery/instance pair for propty in target: if propty not in proptyx: proptyx[propty] = pywikibot.PropertyPage(repo, propty) - targetx[propty] = pywikibot.ItemPage(repo, target[propty]) - targetx[propty].get(get_redirect=True) - if propty in propreqinst and ( - 'P31' not in targetx[propty].claims or not is_in_list( - targetx[propty].claims['P31'], propreqinst[propty])): - pywikibot.critical(f'{targetx[propty].labels[mainlang]} ' - f'({target[propty]}) is not a language') - return - - # check dependencies - for module in (isbnlib, unidecode): - if isinstance(module, ImportError): - raise module + if target[propty] != '-': + targetx[propty] = get_item_page(target[propty]) + pywikibot.info(f'Add {get_item_header(proptyx[propty].labels)}:' + f'{get_item_header(targetx[propty].labels)} ' + f'({propty}:{target[propty]})') + + # Check the instance type for P/Q pairs (critical) + if (propty in propreqinst + and (INSTANCEPROP not in targetx[propty].claims + or not item_is_in_list(targetx[propty].claims[INSTANCEPROP], + propreqinst[propty]))): + pywikibot.critical( + f'{get_item_header(targetx[propty].labels)} ({target[propty]})' + f' is not one of instance type {propreqinst[propty]} for ' + f'statement {get_item_header(proptyx[propty].labels)} ' + f'({propty})' + ) + sys.exit(3) + + # Verify that the target of a statement has a certain property + # (warning) + if (propty in propreqobjectprop + and not item_is_in_list(targetx[propty].claims, + propreqobjectprop[propty])): + pywikibot.error( + f'{get_item_header(targetx[propty].labels)} ({target[propty]})' + f' does not have property {propreqobjectprop[propty]} for ' + f'statement {get_item_header(proptyx[propty].labels)} ' + f'({propty})' + ) # Get list of item numbers # Typically the Appendix list of references of e.g. a Wikipedia page # containing ISBN numbers - inputfile = pywikibot.input('Get list of item numbers') - # Extract all ISBN numbers - itemlist = sorted(set(ISBNRE.findall(inputfile))) + # Extract all ISBN numbers from local args + itemlist = sorted(arg for arg in local_args if ISBNRE.fullmatch(arg)) for isbn_number in itemlist: # Process the next edition - amend_isbn_edition(isbn_number) + try: + exitstat = amend_isbn_edition(isbn_number) + except isbnlib.dev._exceptions.ISBNLibHTTPError: + pywikibot.exception() + except pywikibot.exceptions.Error as e: + pywikibot.error(e) + + sys.exit(exitstat) if __name__ == '__main__':