Skip to content

Commit

Permalink
Merge pull request #82 from NatLibFi/EKIR-232-demarque-audience-age-m…
Browse files Browse the repository at this point in the history
…apping

Ekir 232 demarque audience age mapping
  • Loading branch information
natlibfi-kaisa authored Jun 20, 2024
2 parents 3f69a51 + 2e00825 commit 2d6e7c9
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 250 deletions.
41 changes: 36 additions & 5 deletions core/classifier/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
"""
A classifier module that classifies books and subjects into various categories. This module is called when importing
collections to a library. It's called by the core/model/classification.py.
"""

# If the genre classification does not match the fiction classification, throw
# away the genre classifications.
#
Expand Down Expand Up @@ -37,13 +42,15 @@ class ClassifierConstants:
BISAC = "BISAC"
BIC = "BIC"
TAG = "tag" # Folksonomic tags.
DEMARQUE = "De Marque"

# Appeal controlled vocabulary developed by NYPL
NYPL_APPEAL = "NYPL Appeal"

GRADE_LEVEL = "Grade level" # "1-2", "Grade 4", "Kindergarten", etc.
AGE_RANGE = "schema:typicalAgeRange" # "0-2", etc.
AXIS_360_AUDIENCE = "Axis 360 Audience"
DEMARQUE_AUDIENCE = "schema:Audience"

# We know this says something about the audience but we're not sure what.
# Could be any of the values from GRADE_LEVEL or AGE_RANGE, plus
Expand Down Expand Up @@ -1104,7 +1111,6 @@ def add(self, classification):
self.seen_classifications.add(key)
if self.debug:
self.classifications.append(classification)

# Make sure the Subject is ready to be used in calculations.
if not classification.subject.checked: # or self.debug
classification.subject.assign_to_genre()
Expand Down Expand Up @@ -1223,6 +1229,32 @@ def add(self, classification):
# "Juvenile Fiction".
self.overdrive_juvenile_generic = classification

# E-kirjasto: Since De Marque classifications have target ages for children's and YA books, we want to weigh
# them more heavily by setting their weights to 1.0. This ensures that those books are classified accordingly.
if subject.type == "De Marque" and (
subject.audience == Classifier.AUDIENCE_CHILDREN
or subject.audience == Classifier.AUDIENCE_YOUNG_ADULT
):
if subject.target_age:
# Set the weight to 1.0 for any target age.
self.audience_weights = Counter()
self.audience_weights[subject.audience] += weight * 1.0
scaled_weight = classification.weight_as_indicator_of_target_age
target_min = subject.target_age.lower
target_max = subject.target_age.upper
if target_min is not None:
self.target_age_lower_weights[target_min] = 1.0
if target_max is not None:
self.target_age_upper_weights[target_max] = 1.0
# E-kirjasto: Some De Marque adult books were incorrectly classified as children's books. Let's set the
# weight to 1.0 for any adult audience books.
if (
subject.type == "De Marque"
and subject.audience == Classifier.AUDIENCE_ADULT
):
self.audience_weights = Counter()
self.audience_weights[subject.audience] += weight * 1.0

def weigh_metadata(self):
"""Modify the weights according to the given Work's metadata.
Expand Down Expand Up @@ -1497,12 +1529,10 @@ def target_age(self, audience):
if target_age_min is None:
target_age_min = target_age_max

if target_age_max is None:
# Err on the side of setting the minimum age too high but first ensure we have values to compare.
if target_age_min and target_age_max and target_age_min > target_age_max:
target_age_max = target_age_min

# Err on the side of setting the minimum age too high.
if target_age_min > target_age_max:
target_age_max = target_age_min
return Classifier.range_tuple(target_age_min, target_age_max)

def genres(self, fiction, cutoff=0.15):
Expand Down Expand Up @@ -1624,6 +1654,7 @@ def consolidate_genre_weights(cls, weights, subgenre_swallows_parent_at=0.03):
from core.classifier.bic import BICClassifier
from core.classifier.bisac import BISACClassifier
from core.classifier.ddc import DeweyDecimalClassifier
from core.classifier.demarque import DeMarqueClassifier
from core.classifier.gutenberg import GutenbergBookshelfClassifier
from core.classifier.keyword import (
Eg,
Expand Down
62 changes: 62 additions & 0 deletions core/classifier/demarque.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""Classifier to extract classifications from De Marque data.
"""
from core.classifier import *


class DeMarqueClassifier(Classifier):
@classmethod
def scrub_identifier(cls, identifier):
"""
Make sure that the identifier matches with De Marque codes.
:param identifier: The identifier to be scrubbed.
:return: The scrubbed identifier.
"""
if identifier.startswith("READ"):
return identifier

@classmethod
def scrub_name(cls, name):
"""
Read in the De Marque name of the subject code.
:param name: The name of the subject.
"""
if name:
return name

@classmethod
def audience(cls, identifier, name):
"""
Function to determine the audience based on the given identifier.
:param identifier: The identifier to check for audience classification.
:param name: The name associated with the identifier.
:return: The audience classification based on the identifier.
"""
if identifier in ["READ0001", "READ0002", "READ0003"]:
return cls.AUDIENCE_CHILDREN
elif identifier in ["READ0004", "READ0005"]:
return cls.AUDIENCE_YOUNG_ADULT
return cls.AUDIENCE_ADULT

@classmethod
def target_age(cls, identifier, name):
"""
Function that determines the target age range based on the given identifier.
:param identifier: The identifier to check for target age classification.
:return: A tuple representing the target age range.
"""
if identifier == "READ0001":
return (0, 3)
if identifier == "READ0002":
return (4, 7)
if identifier == "READ0003":
return (8, 12)
if identifier == "READ0004":
return (13, 18)
if identifier == "READ0005":
return (17, None)


Classifier.classifiers[Classifier.DEMARQUE] = DeMarqueClassifier
2 changes: 2 additions & 0 deletions core/model/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class Subject(Base):
TAG: str = Classifier.TAG # Folksonomic tags.
FREEFORM_AUDIENCE: str = Classifier.FREEFORM_AUDIENCE
NYPL_APPEAL = Classifier.NYPL_APPEAL
DEMARQUE = Classifier.DEMARQUE

# Types with terms that are suitable for search.
TYPES_FOR_SEARCH = [FAST, OVERDRIVE, BISAC, TAG]
Expand Down Expand Up @@ -92,6 +93,7 @@ class Subject(Base):
"http://www.bisg.org/standards/bisac_subject/": BISAC,
# Feedbooks uses a modified BISAC which we know how to handle.
"http://www.feedbooks.com/categories": BISAC,
"http://schema.org/Audience": DEMARQUE,
}

uri_lookup = dict()
Expand Down
Loading

0 comments on commit 2d6e7c9

Please sign in to comment.