diff --git a/.github/PR_TEMPLATE/pull_request_template.md b/.github/PR_TEMPLATE/pull_request_template.md new file mode 100644 index 0000000..4edb0b2 --- /dev/null +++ b/.github/PR_TEMPLATE/pull_request_template.md @@ -0,0 +1,23 @@ +--- +name: Pull Request +about: Create a pull request to make a change to the code +title: '' +labels: bug +assignees: '' + +--- + +**Describe the change** +Please provide a clear and concise description and motivation of the proposed change. + +**Linked issue** +If this pull request is related to an issue, please provide a link to the issue here. + +**I have checked my changes are in line with the [Coding Standards](https://clinlp.readthedocs.io/en/latest/contributing.html#coding-standards)** +Yes/no + +**I have added my changes to the `CHANGELOG.md` file** +Yes/no + +**Any other relevant information** +Add any other context about the pull request here. diff --git a/CHANGELOG.md b/CHANGELOG.md index c800ecc..a0d3614 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * :exclamation: `clinlp` now stores entities in `doc.spans['ents']` rather than `doc.ents`, allowing for overlap * :exclamation: Overlap in entities found by the entity matcher is no longer resolved by default (replacing old behavior). To remove overlap, pass `resolve_overlap=True`. * Refactored tests to use `pytest` best practices -* Changed `clinlp_autocomponent` to `clinlp_component`, which automatically registers your component with spaCy +* Changed `clinlp_autocomponent` to `clinlp_component`, which automatically registers your component with `spaCy` * Codebase and linting improvements * Renamed the `other_threshold` config to `family_threshold` in the `clinlp_experiencer_transformer` component @@ -143,7 +143,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -* Remove a default spacy abbreviation (`ts.`) +* Remove a default `spaCy` abbreviation (`ts.`) * Option for max scope on qualifier rules, limiting the number of tokens it applies to * A transformer based pipeline for negation detection (`clinlp_negation_transformer`) * A base class `QualifierDetector` for qualifier detection diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0b1b476..4781228 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -12,6 +12,7 @@ - [Repository structure](#repository-structure) - [Coding standards](#coding-standards) - [General principles](#general-principles) + - [Creating a component](#creating-a-component) - [Formatting and linting](#formatting-and-linting) - [Dependencies](#dependencies) - [Tests](#tests) @@ -36,7 +37,7 @@ Please keep in mind that this page describes the ideal process and criteria for Our preferred way of communication is through [issues](https://github.com/umcu/clinlp/issues), GitHubs built-in issue tracker. We use it for most communication, including questions, bug reports, feature requests, help getting started, etc. This way, the entire community can benefit from the discussion. If this is not an option, you can also reach out to us by e-mail: [analytics@umcutrecht.nl](mailto:analytics@umcutrecht.nl). -To create an issue right now, you can use the following link: [Create an issue](https://github.com/umcu/clinlp/issues/new). +To create an issue right now, you can use the following link: [Create an issue](https://github.com/umcu/clinlp/issues/new/choose). We will try to respond to you as soon as possible. Please keep in mind that we are with a small group of maintainers, so we might not always be able to get back to you within a few days. @@ -75,7 +76,7 @@ If you have a feature request that you would like someone to pick up, please inc Keep in mind that a feature request might not be picked up immediately, or at all. We will try to keep the roadmap up to date, so you can see what is being worked on, and what is planned for the future. Furthermore, remember that `clinlp` is a collection of generic components that process clinical text written in Dutch. If the proposed addition does not meet those criteria, a separate release might be a better option. We typically also don't include preprocessing components (e.g. fixing encodings, de-identification, etc.), as those should preferably be handled at the source. -If you would like to contribute to the project yourself directly, it's recommended to [create an issue](https://github.com/umcu/clinlp/issues/new) to discuss your idea beforehand. This way, we can make sure that your contribution is in line with the project's goals and that it is not already being worked on by someone else. Of course, for small changes that only touch a couple of lines of code, you can also directly create a pull request. When you are ready to start working on your contribution, please follow the steps outlined in the [Pull requests](#pull-requests) section. +If you would like to contribute to the project yourself directly, it's recommended to [create an issue](https://github.com/umcu/clinlp/issues/new/choose) to discuss your idea beforehand. This way, we can make sure that your contribution is in line with the project's goals and that it is not already being worked on by someone else. Of course, for small changes that only touch a couple of lines of code, you can also directly create a pull request. When you are ready to start working on your contribution, please follow the steps outlined in the [Pull requests](#pull-requests) section. ## Pull requests @@ -146,6 +147,47 @@ Please keep the following principles in mind when writing code: We fully acknowledge that writing production ready code is a skill that takes time to develop. We are happy to work together, so please don't hesitate to reach out to us. This is especially true for scientific researchers who are working on something cool, but are new to software development. +### Creating a component + +When creating a new component for `clinlp`, try to: + +- Use a class to define the component, and use `__init__` to set the arguments. +- Inherit from `Pipe` to make it compatible with `spaCy`. +- Use the `clinlp_component` decorator, to automatically register it in the component library. +- Use a dictionary to define any defaults, and pass this to `default_config` of `clinlp_component`. +- Use type hints for all arguments and return values. +- Use the `requires` and `assigns` arguments to specify which fields the component needs, and which it sets. +- Implement the actual behavior of the component in the `__call__` method + +The following code snippet shows an example of a new component: + +```python +from clinlp.utils import clinlp_component +from spacy.language import Pipe +from spacy.tokens import Doc + +_defaults = { + "arg_1": 1, + "arg_2": True +} + +@clinlp_component( + name="my_new_component", + requires=["input_spacy_field"], + assigns=["output_spacy_field"], + default_config=_defaults +) + +class MyNewComponent(Pipe): + + def __init__(self, arg_1: Type = _defaults['arg_1'], arg_2: Type = _defaults['arg_2']): + ... + + def __call__(doc: Doc) -> Doc: + ... + return doc +``` + ### Formatting and linting We use `ruff` for both formatting and linting. It is configured in `pyproject.toml`. @@ -260,7 +302,7 @@ We use type hints throughout the codebase, for both functions and classes. This ### Documentation -We like our code to be well documented. The documentation can be found in the `docs` directory. If you are making changes to the codebase, please make sure to update the documentation accordingly. +We like our code to be well documented. The documentation can be found in the `docs` directory. If you are making changes to the codebase, please make sure to update the documentation accordingly. If you are adding new components, please add them to the [component library](https://clinlp.readthedocs.io/en/latest/components.html), and following the existing structure. #### Docstrings diff --git a/README.md b/README.md index df0038d..6048a2b 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +# clinlp + [![test](https://github.com/umcu/clinlp/actions/workflows/test.yml/badge.svg)](https://github.com/umcu/clinlp/actions/workflows/test.yml) [![docs](https://readthedocs.org/projects/clinlp/badge/?version=latest)](https://clinlp.readthedocs.io/en/latest/?badge=latest) [![pypi version](https://img.shields.io/pypi/v/clinlp?color=blue)](https://pypi.org/project/clinlp/) @@ -6,27 +8,34 @@ [![made with spaCy](https://img.shields.io/badge/made_with-spaCy-blue)](https://spacy.io/) [![ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) -# clinlp - ![clinlp](media/clinlp.png) + * :hospital: `clinical` + :netherlands: `nl` + :clipboard: `NLP` = :sparkles: `clinlp` -* :star: Performant and production-ready NLP pipelines for clinical text written in Dutch -* :rocket: Open source, created and maintained by the Dutch Clinical NLP community -* :triangular_ruler: Useful out of the box, but customization highly recommended + +* :star: NLP tools and algorithms for clinical text written in Dutch + +* :triangular_ruler: Organized in a standardized but flexible framework using `spaCy` + +* :rocket: Production-ready, performant, well-tested and easy to use + +* :bulb: Free, open source, created and maintained by the Dutch Clinical NLP community + -If you are enthusiastic about using or contributing to `clinlp`, please don't hesitate to get in touch (via [e-mail](mailto:analytics@umcutrecht.nl) or by creating an [issue](https://github.com/umcu/clinlp/issues/new)). `clinlp` is intended as a community project, and we would love to hear from you. +## Contact -This readme contains information on [getting started](#getting-started), how to [cite](#citing) this work, some basic [documentation](#documentation), the [roadmap](#roadmap), the overarching [principles and goals](#principles-and-goals) and how to [contribute](#contributing) :arrow_down:. +If you have questions, need help getting started, found a bug, or have a feature request, please don't hesitate to [contact us](https://clinlp.readthedocs.io/en/latest/contributing.html#contact)! ## Getting started ### Installation + ```bash pip install clinlp ``` ### Example + ```python import spacy from clinlp.ie import Term @@ -59,7 +68,7 @@ entity_matcher.load_concepts(concepts) nlp.add_pipe("clinlp_context_algorithm", config={"phrase_matcher_attr": "NORM"}) text = ( - "Preterme neonaat ( :bulb: `clinlp` stores entities in `doc.spans`, specifically in `doc.spans["ents"]`. The reason for this is that spans can overlap, while the entities in `doc.ents` cannot. If you use other/custom components, make sure they read/write entities from/to the same span key if interoperability is needed. - -> :bulb: The `clinlp_rule_based_entity_matcher` component wraps the spaCy `Matcher` and `PhraseMatcher` components, adding some convenience and configurability. However, the `Matcher`, `PhraseMatcher` or `SpanRuler` can also be used directly with `clinlp` for those who prefer it. You can configure the `SpanRuler` to write to the same `SpanGroup` as follows: -> ```python -> from clinlp.ie import SPAN_KEY -> ruler = nlp.add_pipe('span_ruler', config={'span_key': SPAN_KEY}) -> ``` - -#### Attribute - -Specify the token attribute the entity matcher should use as follows (by default `TEXT`): - -```python -entity_matcher = nlp.add_pipe("clinlp_rule_based_entity_matcher", config={"attr": "NORM"}) -``` - -Any [Token attribute](https://spacy.io/api/token#attributes) can be used, but in the above example the `clinlp_normalizer` should be added before the entity matcher, or the `NORM` attribute is simply the literal text. `clinlp` does not include Part of Speech tags and dependency trees, at least not until a reliable model for Dutch clinical text is created, though it's always possible to add a relevant component from a trained (general) Dutch model if needed. - -#### Proximity matching - -The proxmity setting defines how many tokens can optionally be skipped between the tokens of a pattern. With `proxmity` set to `1`, the pattern `slaapt slecht` will also match `slaapt vaak slecht`, but not `slaapt al weken slecht`. - -```python -entity_matcher = nlp.add_pipe("clinlp_rule_based_entity_matcher", config={"proximity": 1}) -``` - -#### Fuzzy matching - -Fuzzy matching enables finding misspelled variants of terms. For instance, with `fuzzy` set to `1`, the pattern `diabetes` will also match `diabets`, `ddiabetes`, or `diabetis`, but not `diabetse` or `ddiabetess`. The threshold is based on Levenshtein distance with insertions, deletions and replacements (but not swaps). - -```python -entity_matcher = nlp.add_pipe("clinlp_rule_based_entity_matcher", config={"fuzzy": 1}) -``` - -Additionally, the `fuzzy_min_len` argument can be used to specify the minimum length of a phrase for fuzzy matching. This also works for multi-token phrases. For example, with `fuzzy` set to `1` and `fuzzy_min_len` set to `5`, the pattern `bloeding graad ii` would also match `bloedin graad ii`, but not `bloeding graad iii`. - -```python -entity_matcher = nlp.add_pipe("clinlp_rule_based_entity_matcher", config={"fuzzy": 1, "fuzzy_min_len": 5}) -``` - -#### Terms -The settings above are described at the matcher level, but can all be overridden at the term level by adding a `Term` to a concept, rather than a literal phrase: - -```python -from clinlp.ie import Term - -concepts = { - "sepsis": [ - "sepsis", - "lijnsepsis", - Term("early onset", proximity=1), - Term("late onset", proximity=1), - Term("EOS", attr="TEXT", fuzzy=0), - Term("LOS", attr="TEXT", fuzzy=0) - ] -} - -entity_matcher = nlp.add_pipe("clinlp_rule_based_entity_matcher", config={"attr": "NORM", "fuzzy": 1}) -entity_matcher.load_concepts(concepts) -``` - -In the above example, by default the `NORM` attribute is used, and `fuzzy` is set to `1`. In addition, for the terms `early onset` and `late onset` proximity matching is set to `1`, in addition to matcher-level config of matching the `NORM` attribute and fuzzy matching. For the `EOS` and `LOS` abbreviations the `TEXT` attribute is used (so the matching is case sensitive), and fuzzy matching is disabled. - -#### Pseudo/negative phrases - -On the term level, it is possible to add pseudo or negative patterns, for those phrases that need to be excluded. For example: - -```python -concepts = { - "prematuriteit": [ - "prematuur", - Term("prematuur ademhalingspatroon", pseudo=True), - ] -} -``` - -In this case `prematuur` will be matched, but not in the context of `prematuur ademhalingspatroon` (which may indicate prematurity, but is not a definitive diagnosis). - -#### spaCy patterns - -Finally, if you need more control than literal phrases and terms as explained above, the entity matcher also accepts [spaCy patterns](https://spacy.io/usage/rule-based-matching#adding-patterns). These patterns do not respect any other configurations (like attribute, fuzzy, proximity, etc.): - -```python -concepts = { - "delier": [ - Term("delier", attr="NORM"), - Term("DOS", attr="TEXT"), - [ - {"NORM": {"IN": ["zag", "ziet", "hoort", "hoorde", "ruikt", "rook"]}}, - {"OP": "?"}, - {"OP": "?"}, - {"OP": "?"}, - {"NORM": {"FUZZY1": "dingen"}}, - {"OP": "?"}, - {"NORM": "die"}, - {"NORM": "er"}, - {"OP": "?"}, - {"NORM": "niet"}, - {"OP": "?"}, - {"NORM": {"IN": ["zijn", "waren"]}} - ], - ] -} -``` - -#### Concept dictionary from external source - -When matching entities, it is possible to load external lists of concepts (e.g. from a medical thesaurus such as UMLS) from `csv` through the `create_concept_dict` function. Your `csv` should contain a combination of concept and phrase on each line, with optional columns to configure the `Term`-options described above (e.g. `attribute`, `proximity`, `fuzzy`). You may present the columns in any order, but make sure the names match the `Term` attributes. Any other columns are ignored. For example: - -| **concept** | **phrase** | **attr** | **proximity** | **fuzzy** | **fuzzy_min_len** | **pseudo** | **comment** | -|--|--|--|--|--|--|--|--| -| prematuriteit | prematuriteit | | | | | | some comment | -| prematuriteit | 3.0.0`) - * Therefore non-destructive -* Work towards some level of standardization of components (abstraction, protocols) -* Follows industry best practices (system design, code, documentation, testing, CI/CD) +The full documentation can be found at [https://clinlp.readthedocs.io](https://clinlp.readthedocs.io). -Overarching goals: +## Links -* Improve the quality of Dutch Clinical NLP pipelines -* Enable easier (re)use/valorization of efforts -* Help mature the field of Dutch Clinical NLP -* Help develop the Dutch Clinical NLP community +* [Documentation](https://clinlp.readthedocs.io) +* [Contributing guidelines](https://clinlp.readthedocs.io/en/latest/contributing.html) +* [API](https://clinlp.readthedocs.io/en/latest/api/api.html) +* [`clinlp` development roadmap](https://github.com/orgs/umcu/projects/3) +* [Create an issue](https://github.com/umcu/clinlp/issues/new/choose) +* [Cite `clinlp`](https://clinlp.readthedocs.io/en/latest/citing.html) +* [Changelog](https://clinlp.readthedocs.io/en/latest/changelog.html) diff --git a/docs/conf.py b/docs/conf.py index 05056f0..3351ded 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -24,7 +24,12 @@ # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon", "myst_parser"] +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "myst_parser", + "sphinx_new_tab_link", +] source_suffix = { ".rst": "restructuredtext", diff --git a/docs/qualifier_definitions.md b/docs/qualifier_definitions.md deleted file mode 100644 index 396b3a8..0000000 --- a/docs/qualifier_definitions.md +++ /dev/null @@ -1,3 +0,0 @@ -# Qualifier operational definitions - -It's useful to have some operational definitions of a qualifier/context, i.e. what we mean exactly when we talk about negations, hypothetical situations, etc. For now the framework we use can be found here: [qualifiers.docx](qualifiers.docx). This information will be incorporated in a separate documentation page in the near future. diff --git a/docs/qualifiers.docx b/docs/qualifiers.docx deleted file mode 100644 index 78d4b09..0000000 Binary files a/docs/qualifiers.docx and /dev/null differ diff --git a/docs/source/citing.md b/docs/source/citing.md new file mode 100644 index 0000000..eb42415 --- /dev/null +++ b/docs/source/citing.md @@ -0,0 +1,7 @@ +# Citing + +If you use `clinlp` in your research, please cite our work. This helps making `clinlp` findable and accessible to others. You can find the appropriate citation (APA, BibTex, etc.) by clicking the Zenodo button below. This should always point you to the current latest release: + +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10528055.svg)](https://doi.org/10.5281/zenodo.10528055) + +You can also find citations for other specific versions on the page above. diff --git a/docs/source/components.md b/docs/source/components.md new file mode 100644 index 0000000..7ca43b5 --- /dev/null +++ b/docs/source/components.md @@ -0,0 +1,319 @@ +# Components + +This page describes the various pipeline components that `clinlp` offers, along with how to configure and use them effectively. This page assumes you have made yourself familiar with the foundations of the `clinlp` and `spaCy` frameworks. If this is not the case, it might be a good idea to read the [Getting Started](getting_started.md) page first. + +## Basic components + +### `clinlp` (language) + +| property | value | +| --- | --- | +| name | `clinlp` | +| class | [clinlp.language.Clinlp](clinlp.language.Clinlp) | +| example | `nlp = spacy.blank("clinlp")` | +| requires | `-` | +| assigns | `-` | +| config options | `-` | + +The `clinlp` language class is an instantiation of the `spaCy` `Language` class, with some customizations for clinical text. It contains the default settings for Dutch clinical text, such as rules for tokenizing, abbreviations and units. Creating an instance of the `clinlp` language class is usually the first step in setting up a pipeline for clinical text processing. + +```{admonition} Note +:class: tip +Note that `clinlp` does not start from a pre-trained `spaCy` model, but from a blank model. This is because `spaCy` only provides models and components pre-trained on general Dutch text, which typically perform poorly on the domain-specific language of clinical text. Although, you are always free to to add pre-trained components from a general Dutch model to the pipeline if needed. +``` + +The included tokenizer employs some custom rule based logic, including: + +- Clinical text-specific logic for splitting punctuation, units, dosages (e.g. `20mg/dag` :arrow_right: `20` `mg` `/` `dag`) +- Custom lists of abbreviations, units (e.g. `pt.`, `zn.`, `mmHg`) +- Custom tokenizing rules (e.g. `xdd` :arrow_right: `x` `dd`) +- Regarding [DEDUCE](https://github.com/vmenger/deduce) tags as a single token (e.g. `[DATUM-1]`). + - De-identification is not built into `clinlp` and should be done as a preprocessing step. + +### `clinlp_normalizer` + +| property | value | +| --- | --- | +| name | `clinlp_normalizer` | +| class | [clinlp.normalizer.Normalizer](clinlp.normalizer.Normalizer) | +| example | `nlp.add_pipe("clinlp_normalizer")` | +| requires | `-` | +| assigns | `token.norm` | +| config options | `lowercase = True`
`map_non_ascii = True` | + +The normalizer sets the `Token.norm` attribute, which can be used by further components (entity matching, qualification). It currently has two options (enabled by default): + +- Lowercasing +- Mapping non-ascii characters to ascii-characters, for instance removing diacritics, where possible. For instance, it will map `ë` :arrow_right: `e`, but keeps most other non-ascii characters intact (e.g. `µ`, `²`). + +Note that this component only has effect when explicitly configuring successor components to match on the `Token.norm` attribute. + +### `clinlp_sentencizer` + +| property | value | +| --- | --- | +| name | `clinlp_sentencizer` | +| class | [clinlp.sentencizer.Sentencizer](clinlp.sentencizer.Sentencizer) | +| example | `nlp.add_pipe("clinlp_sentencizer")` | +| requires | `-` | +| assigns | `token.is_sent_start`, `doc.sents` | +| config options | `sent_end_chars = [".", "!", "?", "\n", "\r"]`
`sent_start_punct = ["-", "*", "[", "("]` | + +The sentencizer is a rule-based sentence boundary detector. It is designed to detect sentence boundaries in clinical text, whenever a character that marks a sentence ending is matched (e.g. newline, period, question mark). The next sentence is started whenever an alpha character or a character in `sent_start_punct` is encountered. This prevents e.g. sentences ending in `...` to be classified as three separate sentences. The sentencizer correctly detects items in enumerations (e.g. starting with `-` or `*`). + +## Entity Matching + +### `clinlp_rule_based_entity_matcher` + +| property | value | +| --- | --- | +| name | `clinlp_rule_based_entity_matcher` | +| class | [clinlp.ie.entity.RuleBasedEntityMatcher](clinlp.ie.entity.RuleBasedEntityMatcher) | +| example | `nlp.add_pipe("clinlp_rule_based_entity_matcher")` | +| requires | `-` | +| assigns | `doc.spans['ents']` | +| config options | `attr = "TEXT"`
`proximity = 0`
`fuzzy = 0`
`fuzzy_min_len = 0`
`pseudo = False` | + +The `clinlp_rule_based_entity_matcher` component can be used for matching entities in text, based on a dictionary of known concepts and their terms/synonyms. It includes options for matching on different token attributes, proximity matching, fuzzy matching and non-matching pseudo/negative terms. + +The most basic example would be the following, with further options described below: + +```python +concepts = { + "sepsis": [ + "sepsis", + "lijnsepsis", + "systemische infectie", + "bacteriemie", + ], + "veneus_infarct": [ + "veneus infarct", + "VI", + ] +} + +entity_matcher = nlp.add_pipe("clinlp_rule_based_entity_matcher") +entity_matcher.load_concepts(concepts) +``` + +```{admonition} Spans vs ents +:class: tip +`clinlp` stores entities in `doc.spans`, specifically in `doc.spans["ents"]`. The reason for this is that spans can overlap, while the entities in `doc.ents` cannot. If you use other/custom components, make sure they read/write entities from/to the same span key if interoperability is needed. +``` + +```{admonition} Using spaCy components directly +:class: tip +The `clinlp_rule_based_entity_matcher` component wraps the `spaCy` `Matcher` and `PhraseMatcher` components, adding some convenience and configurability. However, the `Matcher`, `PhraseMatcher` or `SpanRuler` can also be used directly with `clinlp` for those who prefer it. You can configure the `SpanRuler` to write to the same `SpanGroup` as follows: + + from clinlp.ie import SPAN_KEY + ruler = nlp.add_pipe('span_ruler', config={'span_key': SPAN_KEY}) + +``` + +#### Attribute + +Specify the token attribute the entity matcher should use as follows (by default `TEXT`): + +```python +entity_matcher = nlp.add_pipe("clinlp_rule_based_entity_matcher", config={"attr": "NORM"}) +``` + +Any [Token attribute](https://spacy.io/api/token#attributes) can be used, but in the above example the `clinlp_normalizer` should be added before the entity matcher, or the `NORM` attribute is simply the literal text. `clinlp` does not include Part of Speech tags and dependency trees, at least not until a reliable model for Dutch clinical text is created, though it's always possible to add a relevant component from a trained (general) Dutch model if needed. + +#### Proximity matching + +The proximity setting defines how many tokens can optionally be skipped between the tokens of a pattern. With `proxmity` set to `1`, the pattern `slaapt slecht` will also match `slaapt vaak slecht`, but not `slaapt al weken slecht`. + +```python +entity_matcher = nlp.add_pipe("clinlp_rule_based_entity_matcher", config={"proximity": 1}) +``` + +#### Fuzzy matching + +Fuzzy matching enables finding misspelled variants of terms. For instance, with `fuzzy` set to `1`, the pattern `diabetes` will also match `diabets`, `ddiabetes`, or `diabetis`, but not `diabetse` or `ddiabetess`. The threshold is based on Levenshtein distance with insertions, deletions and replacements (but not swaps). + +```python +entity_matcher = nlp.add_pipe("clinlp_rule_based_entity_matcher", config={"fuzzy": 1}) +``` + +Additionally, the `fuzzy_min_len` argument can be used to specify the minimum length of a phrase for fuzzy matching. This also works for multi-token phrases. For example, with `fuzzy` set to `1` and `fuzzy_min_len` set to `5`, the pattern `bloeding graad ii` would also match `bloedin graad ii`, but not `bloeding graad iii`. + +```python +entity_matcher = nlp.add_pipe("clinlp_rule_based_entity_matcher", config={"fuzzy": 1, "fuzzy_min_len": 5}) +``` + +#### Terms + +The settings above are described at the matcher level, but can all be overridden at the term level by adding a `Term` to a concept, rather than a literal phrase: + +```python +from clinlp.ie import Term + +concepts = { + "sepsis": [ + "sepsis", + "lijnsepsis", + Term("early onset", proximity=1), + Term("late onset", proximity=1), + Term("EOS", attr="TEXT", fuzzy=0), + Term("LOS", attr="TEXT", fuzzy=0) + ] +} + +entity_matcher = nlp.add_pipe("clinlp_rule_based_entity_matcher", config={"attr": "NORM", "fuzzy": 1}) +entity_matcher.load_concepts(concepts) +``` + +In the above example, by default the `NORM` attribute is used, and `fuzzy` is set to `1`. In addition, for the terms `early onset` and `late onset` proximity matching is set to `1`, in addition to matcher-level config of matching the `NORM` attribute and fuzzy matching. For the `EOS` and `LOS` abbreviations the `TEXT` attribute is used (so the matching is case sensitive), and fuzzy matching is disabled. + +#### Pseudo/negative phrases + +On the term level, it is possible to add pseudo or negative patterns, for those phrases that need to be excluded. For example: + +```python +concepts = { + "prematuriteit": [ + "prematuur", + Term("prematuur ademhalingspatroon", pseudo=True), + ] +} +``` + +In this case `prematuur` will be matched, but not in the context of `prematuur ademhalingspatroon` (which may indicate prematurity, but is not a definitive diagnosis). + +#### `spaCy` patterns + +Finally, if you need more control than literal phrases and terms as explained above, the entity matcher also accepts [`spaCy` patterns](https://spacy.io/usage/rule-based-matching#adding-patterns). These patterns do not respect any other configurations (like attribute, fuzzy, proximity, etc.): + +```python +concepts = { + "delier": [ + Term("delier", attr="NORM"), + Term("DOS", attr="TEXT"), + [ + {"NORM": {"IN": ["zag", "ziet", "hoort", "hoorde", "ruikt", "rook"]}}, + {"OP": "?"}, + {"OP": "?"}, + {"OP": "?"}, + {"NORM": {"FUZZY1": "dingen"}}, + {"OP": "?"}, + {"NORM": "die"}, + {"NORM": "er"}, + {"OP": "?"}, + {"NORM": "niet"}, + {"OP": "?"}, + {"NORM": {"IN": ["zijn", "waren"]}} + ], + ] +} +``` + +#### Concept dictionary from external source + +External lists of concepts (e.g. from a medical thesaurus such as `UMLS`) can also be loaded directly from `csv` through the `create_concept_dict` function. Your `csv` should contain a combination of concept and phrase on each line, with optional columns to configure the `Term`-options described above (e.g. `attribute`, `proximity`, `fuzzy`). You may present the columns in any order, but make sure the names match the `Term` attributes. Any other columns are ignored. For example: + +| **concept** | **phrase** | **attr** | **proximity** | **fuzzy** | **fuzzy_min_len** | **pseudo** | **comment** | +|--|--|--|--|--|--|--|--| +| prematuriteit | prematuriteit | | | | | | some comment | +| prematuriteit | `load_rules = True`
`rules = "src/clinlp/resources/context_rules.json"` | + +The rule-based [Context Algorithm](https://doi.org/10.1016%2Fj.jbi.2009.05.002) is fairly accurate, and quite transparent and fast. A set of rules, that checks for `Presence`, `Temporality`, and `Experiencer`, is loaded by default: + +```python +nlp.add_pipe("clinlp_context_algorithm", config={"phrase_matcher_attr": "NORM"}) +``` + +A custom set of rules, including different types of qualifiers, can easily be defined. See [`src/clinlp/resources/context_rules.json`](../../src/clinlp/resources/context_rules.json) for an example, and load it as follows: + +```python +cm = nlp.add_pipe("clinlp_context_algorithm", config={"rules": "/path/to/my_own_ruleset.json"}) +``` + +```{admonition} Definitions of qualifiers +:class: tip +For more extensive documentation on the definitions of the qualifiers we use in `clinlp`, see the [Qualifiers](qualifiers.md) page. +``` + +### `clinlp_negation_transformer` + +| property | value | +| --- | --- | +| name | `clinlp_negation_transformer` | +| class | [clinlp.ie.qualifier.transformer.NegationTransformer](clinlp.ie.qualifier.transformer.NegationTransformer) | +| example | `nlp.add_pipe('clinlp_negation_transformer')` | +| requires | `doc.spans['ents']` | +| assigns | `span._.qualifiers` | +| config options | `token_window = 32`
`strip_entities = True`
`placeholder = None`
`prob_aggregator = statistics.mean`
`absence_threshold = 0.1`
`presence_threshold = 0.9` | + +The `clinlp_negation_transformer` wraps the the negation detector described in [van Es et al, 2022](https://doi.org/10.48550/arxiv.2209.00470). The underlying transformer can be found on [HuggingFace](https://huggingface.co/UMCU/). The negation detector is reported as more accurate than the rule-based version (see paper for details), at the cost of less transparency and additional computational cost. + +This component requires the following optional dependencies: + +```bash +pip install "clinlp[transformers]" +``` + +The component can be configured to consider a maximum number of tokens as context, when determining whether a term is negated. There is an option to strip the entity, removing any potential whitespace or punctuation before passing it to the transformer. The `placeholder` option can be used to replace the entity with a placeholder token, which has a small impact on the output probability. The `prob_aggregator` option can be used to aggregate the probabilities of the transformer, which is only used for for multi-token entities. + +The thresholds define where the cutoff for absence and presence are. If the predicted probability of presence < `absence_threshold`, entities will be qualified as `Presence.Absent`. If the predicted probability of presence > `presence_threshold`, entities will be qualified as `Presence.Present`. If the predicted probability is between these thresholds, the entity will be qualified as `Presence.Uncertain`. + +```{admonition} Definitions of qualifiers +:class: tip +For more extensive documentation on the definitions of the qualifiers we use in `clinlp`, see the [Qualifiers](qualifiers.md) page. +``` + +### `clinlp_experiencer_transformer` + +| property | value | +| --- | --- | +| name | `clinlp_experiencer_transformer` | +| class | [clinlp.ie.qualifier.transformer.ExperiencerTransformer](clinlp.ie.qualifier.transformer.ExperiencerTransformer) | +| example | `nlp.add_pipe('clinlp_experiencer_transformer')` | +| requires | `doc.spans['ents']` | +| assigns | `span._.qualifiers` | +| config options | `token_window = 32`
`strip_entities = True`
`placeholder = None`
`prob_aggregator = statistics.mean`
`family_threshold = 0.5` | + +The `clinlp_experiencer_transformer` wraps a very similar model as the [`clinlp_negation_transformer`](#clinlp_negation_transformer) component, with which it shares most of its configuration. + +Additionally, it has a threshold for determining whether an entity is experienced by the patient or by a family member. If the predicted probability < `family_threshold`, the entity will be qualified as `Experiencer.Patient`. If the predicted probability > `family_threshold`, the entity will be qualified as `Experiencer.Family`. The `Experiencer.Other` qualifier is currently not implemented in this component. + +```{admonition} Definitions of qualifiers +:class: tip +For more extensive documentation on the definitions of the qualifiers we use in `clinlp`, see the [Qualifiers](qualifiers.md) page. +``` diff --git a/docs/source/getting_started.md b/docs/source/getting_started.md new file mode 100644 index 0000000..5069899 --- /dev/null +++ b/docs/source/getting_started.md @@ -0,0 +1,186 @@ +# Getting started + +This guide contains some code examples to get you started with `clinlp`. Since `clinlp` is built on top of the `spaCy` framework, it's highly recommended to read [`spaCy` 101: Everything you need to know (~15 minutes)](https://spacy.io/usage/spacy-101) before getting started with `clinlp`. Understanding the basic `spaCy` framework will make working with `clinlp` much easier. + +## Creating a blank model + +You can create a blank `clinlp` model using the following code: + +```python +import spacy +import clinlp + +nlp = spacy.blank('clinlp') +``` + +This instantiates a `Language` object, which is the central object in `spaCy`. It contains all default settings for a language, in our case Dutch clinical text, such as the tokenizer, abbreviations, stop words, and so on. Calling it on a piece of text creates a `Doc` object: + +```python +text = "De patient krijgt 2x daags 500 mg paracetamol." +doc = nlp(text) +``` + +In the `Doc` object, you can find the tokenized text: + +```python +print(list(token.text for token in doc)) + +> ['De', 'patient', 'krijgt', '2', 'x', 'daags', '500', 'mg', 'paracetamol', '.'] +``` + +Each token in the document is a `Token` object, which contains the text and some additional information. You can also access the tokens directly from the `Doc` object: + +```python +print(doc[8]) + +> 'paracetamol' +``` + +A span of multiple tokens, essentially a slice of the document, is called a `Span` object. This can be a sentence, a named entity, or any other contiguous part of the text. You can create a `Span` by slicing the `Doc`: + +```python +print(doc[6:8]) + +> '500 mg' +``` + +Even when using a blank model, the `Doc`, `Token` and `Span` objects already contain some information about the text and tokens, such as the token's text and its position in the document. In the next section, we will add more components to the model, that will add more interesting information. Then we can start using our model for more interesting things. + +## Adding components + +The above model is a blank model, which means it does not contain any additional components yet. It's essentially an almost empty pipeline. Adding a component is done using: + +```python +nlp.add_pipe('component_name') +``` + +For example, let's add the `clinlp` normalizer and `clinlp` sentencizer to the model. They respectively normalize the text and detect sentence boundaries: + +```python +nlp.add_pipe('clinlp_normalizer') +nlp.add_pipe('clinlp_sentencizer') +``` + +If we now again process a piece of sample text, we can see that `clinlp` has added some additional information to the `Doc` and `Span` objects: + +```python +doc = nlp( + "Patiënt krijgt 2x daags 500 mg " + "paracetamol. De patiënt is allergisch " + "voor penicilline." +) + +print(token.norm_ for token in doc) +> ['patient', 'krijgt', '2', 'x', 'daags', '500', 'mg', 'paracetamol', '.', 'de', 'patient', 'is', 'allergisch', 'voor', 'penicilline', '.'] + +print(str(sent) for sent in doc.sents) +> ['Patiënt krijgt 2x daags 500 mg paracetamol.', 'De patiënt is allergisch voor penicilline.'] +``` +Other components can use these newly set properties `Token.norm_` and `Doc.sents`. For example, an entity recognizer can use the normalized text to recognize entities, and a negation detector can use the sentence boundaries to determine the range of a negation. + +You can always inspect the current model's pipeline using: + +```python +print(nlp.pipe_names) + +> ['clinlp_normalizer', 'clinlp_sentencizer'] +``` + +This shows the current components in the pipeline, in the order they are executed. The order of the components is important, as the output of one component is the input of the next component. The order of the components can be changed by using the `nlp.add_pipe` method with the `before` or `after` parameter. For example, to add a component before the `clinlp_sentencizer`: + +```python +nlp.add_pipe('component_name', before='clinlp_sentencizer') +``` + +This will add the component before the `clinlp_sentencizer` in the pipeline. + +## Information extraction example + +Now that we understand the basics of a blank model and adding components, let's add two more components to create a basic information extraction pipeline. + +First, we will add the `clinlp_rule_based_entity_matcher`, along with some sample concepts to match: + +```python +from clinlp.ie import Term + +concepts = { + "prematuriteit": [ + "preterm", " 'Preterme' 'prematuriteit' +> ' 'bd enigszins verlaagd' 'hypotensie' +> 'hypotensie' 'hypotensie' +> 'veneus infarkt' 'veneus_infarct' +> 'partus prematurus' 'prematuriteit' +> 'VI' 'veneus_infarct' + +``` + +As you can see, the `doc.spans['ents']` property now contains seven `Span` objects, each with the matched text, along with the concept label. + +Now, as a final step, let's add the `clinlp_context_algorithm` component to the pipeline, which implements the Context Algorithm. For each matched entity, it can detect qualifiers, such as `Presence`, `Temporality` and `Experiencer`, based on triggers like 'geen', 'uitgesloten', etc. + +```python +nlp.add_pipe("clinlp_context_algorithm", config={"phrase_matcher_attr": "NORM"}) +``` + +We again configure it to match on the `NORM` attribute, set by the `clinlp_normalizer`. + +If we now process the same text, we can see that the Context Algorithm has added some additional information to the entities: + +```python +doc = nlp(text) + +for ent in doc.spans['ents']: + print(ent.text, ent._.qualifiers) + + +> 'Preterme' set() +> ' 'bd enigszins verlaagd' set() +> 'hypotensie' {'Experiencer.Family'} +> 'veneus infarkt' {'Presence.Absent'} +> 'partus prematurus' set() +> 'VI' {'Temporality.Future'} +``` + +In the above example, for readability all default qualifier values (`Presence.Present`, `Temporality.Current`, `Experiencer.Patient`) have been omitted. You can see that three out of seven entities have correctly been qualified, either as `Absent`, related to `Family`, or potentially occurring in the `Future`. Of course, your specific use case determines how the output of this pipeline will further be handled. + +## Conclusion + +In this guide, we have shown how to create a blank model, add components to it, and process a piece of text. It also shows how to configure individual components and organize them in a specific information extraction pipeline. Note that there are more components available than shown in this example, you can find them on the [Components](components) page. By now you understand the basics, and are ready to further explore everything `clinlp` can offer! diff --git a/docs/source/index.md b/docs/source/index.md index cac1092..71fdbf5 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -1,6 +1,33 @@ -# clinlp documentation +![clinlp logo](../../media/clinlp.png) -Welcome to the documentation pages for `clinlp`. +Welcome to the documentation pages for `clinlp`, a Python library for performing NLP on clinical text written in Dutch. In the menu to the left, you should be able to find the information you are looking for. If you have any questions, need help getting started, found a bug, or have a feature request, please don't hesitate to [contact us](contributing.md#contact)! + +## Links + +- [GitHub repository](https://github.com/umcu/clinlp) + +```{toctree} +:caption: clinlp +:hidden: +Introduction +Installation +Getting started +Roadmap +Citing +``` + +```{toctree} +:caption: Usage +:hidden: +Components +Metrics +``` + +```{toctree} +:caption: Standards +:hidden: +Qualifiers +``` ```{toctree} :caption: Search & index diff --git a/docs/source/installation.md b/docs/source/installation.md new file mode 100644 index 0000000..5221e1b --- /dev/null +++ b/docs/source/installation.md @@ -0,0 +1,23 @@ +# Installation + +The easiest way to install `clinlp` is by using `pip`: + +```bash +pip install clinlp +``` + +As a good practice, we recommend installing `clinlp` in a virtual environment. If you are not familiar with virtual environments, you can find more information [here](https://docs.python.org/3/library/venv.html). + +## Optional dependencies + +To keep the base package lightweight, we use optional dependencies for some components. In the component library, each component will list the required optional dependencies, if any. They can be installed using: + +```bash +pip install clinlp[extra_name] +``` + +Or, if you want to install multiple extras at once: + +```bash +pip install clinlp[extra_name1,extra_name2] +``` diff --git a/docs/source/introduction.md b/docs/source/introduction.md new file mode 100644 index 0000000..ffc35d6 --- /dev/null +++ b/docs/source/introduction.md @@ -0,0 +1,114 @@ +# Introduction + +```{include} ../../README.md +:start-after: +:end-before: +``` + +`clinlp` is a Python library for performing NLP on clinical text written in Dutch. It is designed to be a standardized framework for building, maintaining and sharing solutions for NLP tasks in the clinical domain. The library is built on top of the [`spaCy`](https://spacy.io/) library, and extends it with components that are specifically tailored to Dutch clinical text. The library is open source and welcomes contributions from the community. + +## Motivation + +`clinlp` was motivated by the lack of standardized tools for processing clinical text in Dutch. This makes it difficult for researchers, data scientists and developers working with Dutch clinical text to build, validate and maintain NLP solutions. With `clinlp`, we aim to fill this gap. + +## Principles + +We organized `clinlp` around four basic principles: useful NLP tools, a standardized framework, production-ready quality, and open source collaboration. + +### 1. Useful NLP tools + +```{include} ../../README.md +:start-after: +:end-before: +``` + +There are many interesting NLP tasks in the clinical domain, like normalization, entity recognition, qualifier detection, entity linking, summarization, reasoning, and many more. In addition to that, each task can often be solved using rule-based methods, classical machine learning, deep learning, transformers, or a combination of these, with trade-offs between them. + +The main idea behind `clinlp` is to build, maintain and share solutions for these NLP tasks, specifically for clinical text written in Dutch. In `clinlp`, we typically call a specific implementation for a task a "component". For instance: a rule-based sentence boundary detector, or a transformer-based negation detector. + +Currently, `clinlp` mainly includes components used for information extraction, such as tokenizing, detecting sentence boundaries, normalizing text, detecting entities, and detecting qualifiers (e.g. negation, uncertainty). The library is regularly being updated with new or improved components, both components for different tasks (e.g. entity linking, summarization) and components that use a different method for solving a task (e.g. a transformer-based entity recognizer). + +```{admonition} Contributing +:class: note + +Components can be built by anyone from the Dutch clinical NLP field, typically a researcher, data scientist, engineer or clinician who works with Dutch clinical text in daily practice. If you have a contribution in mind, please check out the [Contributing](contributing) page. +``` + +When building new solutions, we preferably start with a component that implements a simple, rule-based solution, which can function as a baseline. Then subsequently, more sophisticated components can be built. If possible, we try to (re)use existing implementations, but if needed, building from scratch is also an option. + +We prefer components to work out of the box, but to be highly customizable. For instance, our implementation of the [Context Algorithm](components.md#clinlp_context_algorithm) has a set of built in rules for for qualifying entities with Presence, Temporality and Experiencer properties. However, both the types of qualifiers and the rules can easily be modified or replaced by the user. This way, the components can be used in a wide variety of use cases, and no user is forced to use a one-size-fits-all solution. + +```{admonition} Validating components +:class: important + +Remember, there is no guarantee that components based on existing rules or pre-trained models also extend to your particular dataset and use case. It is always recommended to evaluate the performance of the components on your own data. +``` + +In addition to functional components, `clinlp` also implements some functionality for computing metrics. This is useful for evaluating the performance of the components, and for comparing different methods for solving the same task. + +An overview of all components included in `clinlp` can be found on the [Components](components) page. + +### 2. Standardized framework + +```{include} ../../README.md +:start-after: +:end-before: +``` + +Some of the real power from `clinlp` comes from the fact that the different components it implements are organized in a standardized framework. This framework ensures that the components can be easily combined and that they can be used in a consistent way. This makes it easy to build complex pipelines that can effectively process clinical text. + +We use the [`spaCy`](https://spacy.io/) library as the backbone of our framework. This allows us to leverage the power of `spaCy`'s NLP capabilities and to build on top of it. We have extended `spaCy` with our own domain-specific language defaults to make it easier to work with clinical text. In a pipeline, you can mix and match different `clinlp` components with existing `spaCy` components, and add your own custom components to that mix as well. For example, you could use the `clinlp` normalizer, the `spaCy` entity recognizer, and a custom built entity linker in the same pipeline without any issues. + +```{admonition} Getting familiar with spaCy +:class: note + +It's highly recommended to read [`spaCy` 101: Everything you need to know (~15 minutes)](https://spacy.io/usage/spacy-101) before getting started with `clinlp`. Understanding the basic `spaCy` framework will make working with `clinlp` much easier. +``` + +In addition to the `spaCy` framework, we have added some additional abstractions and interfaces that make building components easier. For instance, if you want to add a new component that detects qualifiers, it can make use of the `QualifierDetector` abstraction, and the `Qualifier` and `QualifierClass` classes. This way, the new component can easily be integrated in the framework, while the developer can focus on building a new solution. + +Finally, by adopting a framework, we can easily build components that wrap a specific pre-trained model. The transformer-based qualifier detectors included in `clinlp` are good examples of this. These components wrap around pre-trained transformer models, but fit seamlessly into the `clinlp` framework. This way, we can easily add new components that use the latest and greatest in NLP research. + +### 3. Production-ready quality + +```{include} ../../README.md +:start-after: +:end-before: +``` + +`clinlp` can potentially serve many types of users, including researchers, data scientists, engineers and clinicians. One thing they all have in common, is that they would like to rely on the library to work as expected. Our goal is to build a library with the robustness and reliability required in production environments, i.e. real world environments. To ensure this, we employ various software development best practices, including: + +* Proper system design by using abstractions, interfaces and design patterns (where appropriate) +* Formatting, linting and type hints for a clean, consistent and readable codebase +* Versioning and a changelog to track changes over time +* Optimizations for speed and scalability +* Structural management of dependencies and packaging +* Extensive testing to ensure that the library works (and keeps working) as expected +* Documentation to explain the library's principles, functionality and how to use it +* Continuous deployment and frequent new releases + +We actively maintain the library, and are always looking for ways to improve it. If you have suggestions how to further increase the quality of the library, please let us know. + +More detail on the `clinlp` development practices can be found in the [Coding Standards](contributing.md#coding-standards) section of the contributing page. + +### 4. Open source collaboration + +```{include} ../../README.md +:start-after: +:end-before: +``` + +`clinlp` is being built as a free and open source library, but we cannot do it alone. As an open source project, we highly welcome contributions from the community. We believe that open source collaboration is the best way to build high quality software that can be used by everyone. We encourage you to contribute to the project by reporting issues, suggesting improvements, or even submitting your own code. + +In order to be transparent, we prefer to communicate through means that are open to everyone. This includes using GitHub for issue tracking, pull requests and discussions, and using the `clinlp` documentation for explaining the library's principles and functionality. We keep our [Roadmap](roadmap) and [Changelog](changelog) up to date, so you can see what we are working on and what has changed in the library. + +Finally, by working together in `clinlp`, we hope to strengthen the connections in our specific field of Dutch clinical NLP across organizations and institutions. By committing to making algorithms and implementations available in this package, and to collaboratively further standardize algorithms and protocols, we can ensure that the research is reproducible and that the algorithms can be used by others. This way, we can build on each other's work, and make the field of Dutch clinical NLP stronger. + +## About + +`clinlp` was initiated by a group of data scientists and engineers from the UMCU, who ran into practical issues working with clinical text and decided to build a library to solve them. + +The library is currently actively maintained by: + +* [Vincent Menger, ML engineer, UMCU](https://github.com/vmenger) +* [Bram van Es, Assistant Professor, UMCU](https://github.com/bramiozo) diff --git a/docs/source/metrics.md b/docs/source/metrics.md new file mode 100644 index 0000000..2af740a --- /dev/null +++ b/docs/source/metrics.md @@ -0,0 +1,317 @@ +# Metrics and statistics + +`clinlp` contains calculators for some specific metrics and statistics for evaluating NLP tools. You can find some basic information on using them below. + +## Information extraction + +Information extraction related metrics and statistics for annotated datasets can be computed by using the `InfoExtractionDataset` and `InfoExtractionMetrics` classes. They require the following optional dependencies: + +```bash +pip install clinlp[metrics] +``` + +### Creating a `InfoExtractionDataset` + +An `InfoExtractionDataset` contains a collection of annotated documents, regardless of whether the annotations were collected manually, or from an NLP tool. + +#### From `MedCATTrainer` + +The `MedCATTrainer` interface allows exporting annotated data in a `JSON` format. It can be converted to a `InfoExtractionDataset` as follows: + +```python +from clinlp.metrics import InfoExtractionDataset +import json +from pathlib import Path + +with Path('medcattrainer_export.json').open('rb') as f: + mtrainer_data = json.load(f) + +mct_dataset = InfoExtractionDataset.from_medcattrainer(mctrainer_data) +``` + +#### From `clinlp` + +```python +from clinlp.metrics import InfoExtractionDataset +import clinlp +import spacy + +# assumes a model (nlp) and iterable of texts (texts) exists +nlp_docs = nlp.pipe(texts) + +clinlp_dataset = InfoExtractionDataset.from_clinlp_docs(nlp_docs) + +``` + +#### From other + +If your data is in a different format, you can manually convert it by creating `Annotation` and `Document` objects, and add those to a `InfoExtractionDataset`. Below are some pointers on how to create the appropriate objects: + +```python +from clinlp.metrics import Annotation, Document, InfoExtractionDataset + +annotation = Annotation( + text='prematuriteit', + start=0, + end=12, + label='C0151526_prematuriteit', + qualifiers={ + "Presence": "Present", + "Temporality": "Current", + "Experiencer": "Patient" + } +) + +document = Document( + identifier='doc_0001', + text='De patiënt heeft prematuriteit.', + annotations=[annotation1, annotation2, ...] +) + +dataset = InfoExtractionDataset( + documents=[document1, document2, ...] +) + +``` + +If you are writing code to convert data from a specific existing format, please consider contributing to `clinlp` by adding a `InfoExtractionDataset` method like `from_medcattrainer` and `from_clinlp_docs` that does the conversion. + +#### Displaying descriptive statistics + +Get descriptive statistics for an `InfoExtractionDataset` as follows: + +```python +dataset.stats() + +> { + "num_docs": 50, + "num_annotations": 513, + "span_counts": { + "prematuriteit": 43, + "infectie": 31, + "fototherapie": 25, + "dysmaturiteit": 24, + "IRDS": 20, + "prematuur": 15, + "sepsis": 15, + "hyperbilirubinemie": 14, + "Prematuriteit": 14, + "ROP": 13, + "necrotiserende enterocolitis": 12, + "Prematuur": 11, + "infektie": 11, + "ductus": 11, + "bloeding": 8, + "dysmatuur": 7, + "IUGR": 7, + "Hyperbilirubinemie": 7, + "transfusie": 6, + "hyperbilirubinaemie": 6, + "Dopamine": 6, + "wisseltransfusie": 5, + "premature partus": 5, + "retinopathy of prematurity": 5, + "bloedtransfusie": 5, + }, + "label_counts": { + "C0151526_prematuriteit": 94, + "C0020433_hyperbilirubinemie": 68, + "C0243026_sepsis": 63, + "C0015934_intrauterine_groeivertraging": 57, + "C0002871_anemie": 37, + "C0035220_infant_respiratory_distress_syndrome": 25, + "C0035344_retinopathie_van_de_prematuriteit": 21, + "C0520459_necrotiserende_enterocolitis": 18, + "C0013274_patent_ductus_arteriosus": 18, + "C0020649_hypotensie": 18, + "C0559477_perinatale_asfyxie": 18, + "C0270191_intraventriculaire_bloeding": 17, + "C0877064_post_hemorrhagische_ventrikeldilatatie": 13, + "C0014850_oesophagus_atresie": 12, + "C0006287_bronchopulmonale_dysplasie": 9, + "C0031190_persisterende_pulmonale_hypertensie": 7, + "C0015938_macrosomie": 6, + "C0751954_veneus_infarct": 5, + "C0025289_meningitis": 5, + "C0023529_periventriculaire_leucomalacie": 2, + }, + "qualifier_counts": { + "Presence": {"Present": 436, "Uncertain": 34, "Absent": 30}, + "Temporality": {"Current": 473, "Historical": 18, "Future": 9}, + "Experiencer": {"Patient": 489, "Family": 9, "Other": 2}, + } +} +``` + +You can also get the individual statistics, rather than all combined in a dictionary, i.e.: + +```python +dataset.num_docs() + +> 50 +``` + +### Comparison statistics + +To compare two `InfoExtractionDataset` objects, you need to create a `InfoExtractionMetrics` object that compares two datasets. The `InfoExtractionMetrics` object will then calculate the relevant metrics for the annotations the two datasets. + +```python +from clinlp.metrics import InfoExtractionMetrics + +nlp_metrics = InfoExtractionMetrics(dataset1, dataset2) +``` + +#### Entity metrics + +For comparison metrics on entities, use: + +```python +nlp_metrics.entity_metrics() + +> { + 'ent_type': { + 'correct': 480, + 'incorrect': 1, + 'partial': 0, + 'missed': 32, + 'spurious': 21, + 'possible': 513, + 'actual': 502, + 'precision': 0.9561752988047809, + 'recall': 0.935672514619883, + 'f1': 0.9458128078817734 + }, + 'partial': { + 'correct': 473, + 'incorrect': 0, + 'partial': 8, + 'missed': 32, + 'spurious': 21, + 'possible': 513, + 'actual': 502, + 'precision': 0.950199203187251, + 'recall': 0.9298245614035088, + 'f1': 0.9399014778325123 + }, + 'strict': { + 'correct': 473, + 'incorrect': 8, + 'partial': 0, + 'missed': 32, + 'spurious': 21, + 'possible': 513, + 'actual': 502, + 'precision': 0.9422310756972112, + 'recall': 0.9220272904483431, + 'f1': 0.9320197044334976 + }, + 'exact': { + 'correct': 473, + 'incorrect': 8, + 'partial': 0, + 'missed': 32, + 'spurious': 21, + 'possible': 513, + 'actual': 502, + 'precision': 0.9422310756972112, + 'recall': 0.9220272904483431, + 'f1': 0.9320197044334976 + } +} +``` + +The different metrics (`partial`, `exact`, `strict` and `ent_type`) are calculated using `Nervaluate`, based on the SemEval 2013 - 9.1 task. Check the [Nervaluate documentation](https://github.com/MantisAI/nervaluate) for more information. + +#### Qualifier metrics + +For comparison metrics on qualifiers, use: + +```python +nlp_metrics.qualifier_info() + +> { + "Experiencer": { + "metrics": { + "n": 460, + "precision": 0.3333333333333333, + "recall": 0.09090909090909091, + "f1": 0.14285714285714288, + }, + "misses": [ + { + "doc.identifier": "doc_0001", + "annotation": { + "text": "anemie", + "start": 1849, + "end": 1855, + "label": "C0002871_anemie", + }, + "true_qualifier": "Family", + "pred_qualifier": "Patient", + }, + ..., + ], + }, + "Temporality": { + "metrics": {"n": 460, "precision": 0.0, "recall": 0.0, "f1": 0.0}, + "misses": [ + { + "doc.identifier": "doc_0001", + "annotation": { + "text": "premature partus", + "start": 1611, + "end": 1627, + "label": "C0151526_prematuriteit", + }, + "true_qualifier": "Current", + "pred_qualifier": "Historical", + }, + ..., + ], + }, + "Plausibility": { + "metrics": { + "n": 460, + "precision": 0.6486486486486487, + "recall": 0.5217391304347826, + "f1": 0.5783132530120482, + }, + "misses": [ + { + "doc.identifier": "doc_0001", + "annotation": { + "text": "Groeivertraging", + "start": 1668, + "end": 1683, + "label": "C0015934_intrauterine_groeivertraging", + }, + "true_qualifier": "Current", + "pred_qualifier": "Future", + }, + ..., + ], + }, + "Negation": { + "metrics": { + "n": 460, + "precision": 0.7692307692307693, + "recall": 0.6122448979591837, + "f1": 0.6818181818181818, + }, + "misses": [ + { + "doc.identifier": "doc_0001", + "annotation": { + "text": "wisseltransfusie", + "start": 4095, + "end": 4111, + "label": "C0020433_hyperbilirubinemie", + }, + "true_qualifier": "Present", + "pred_qualifier": "Absent", + }, + ..., + ] + } +} +``` diff --git a/docs/source/qualifiers.md b/docs/source/qualifiers.md new file mode 100644 index 0000000..d8d9dc5 --- /dev/null +++ b/docs/source/qualifiers.md @@ -0,0 +1,96 @@ +# Definition of qualifier classes used in `clinlp` + +This page describes the definitions of qualifier classes for Dutch clinical text we use in `clinlp`. + +## Introduction + +When processing clinical documents (e.g., doctor’s notes, discharge letters), detecting qualifiers (e.g., `absent`, `historical`, `non-patient`, `increasing`, `decreasing`, `severe`, `light`, etc.) follows the matching of concepts (e.g., diagnoses, symptoms, procedures). In `clinlp` we primarily use the term “qualifier”, although the terms “context”, “meta-annotation”, and “modifier” have also been used to denote the same concept. + +A consensus on potential qualifier classes, along with **clear** definitions of the qualifiers they encompass, is needed to develop accurate algorithms for detecting them. Despite some shared intuitive understanding of recognizing, for instance, a _negation_ in a sentence, there are numerous cases where intuition simply falls short. In practice this has impeded manual annotation of gold standards, used for training and evaluating algorithms. In turn the resulting annotations (with Kappa-Cohen as low as 0.5) lead to a difficult target for supervised machine learning models. A standardized classification of qualifiers, as proposed here, will hopefully advance both research and clinical implementation of NLP (Natural Language Processing) algorithms. This page is the result of some deliberation among various researchers and developers working with clinical text, but it is not necessarily definitive. We welcome feedback and suggestions for improvement. + +For these definitions we took the Context Algorithm (Harkema, 2009) as a starting point, both because this is an influential paper, and because a corresponding Dutch corpus is available (Afzal, 2014). There are already some trained models available that can for a large part be re-used. We will here describe three qualifier classes: **Presence**, **Temporality**, and **Experiencer**, including some definitions, issues to resolve, and illustrative examples. These classes can be further de-aggregated at a later stage, and other classes may follow later as well. Note that choosing qualifiers is a trade-off between granularity and practicality. We aim for a balance that is useful for most clinical NLP tasks. + +**Qualifier classes** are denoted by boldface, with the `Qualifier` (a mutually exclusive value a **qualifier class** can assume) formatted as inline code. + +## Presence + +| `Absent` / `Negated` | `Uncertain` | `Present` / `Affirmed` (default) | +|--------------------------|---------------|--------------------------------------| +| Concepts that are explicitly or implicitly described as absent in a patient | Whether the concept was absent or present is not asserted with very high certainty | Concepts that are explicitly or implicitly described as present in a patient | + +Assessing whether some concept was present or absent is one of the most important parts of a clinician's job. Whether something is present or occurred in the real world is knowable in principle, but in the clinical world, such assertions are rarely made with complete certainty. This is already implied by the uncertainty at the core of the clinical reasoning process, but in clinical text the uncertainty is often made explicit by means of hedging. It’s therefore important to note that when we are extracting concepts from medical text, it’s very hard to make direct assertions about the real world, but we are limited to recognizing probability statements made by clinicians. + +The **presence** class therefore captures whether a concept is present in three qualifiers. The `Present` and `Absent` qualifiers are used when the clinician assesses a concept as being present (or absent) with very high probability, extending beyond reasonable doubt. When neither presence nor absence is definitively asserted, the `Uncertain` qualifier applies. This qualifier therefore ranges from very unlikely to very likely. + +The default qualifier for **presence** is `Present`. When the text does not indicate absence or uncertainty of a concept, we assume the writer intended to convey its presence. + +### To resolve + +- In future work, the uncertain qualifier may be further split up, for instance, into a negative uncertain (i.e., unlikely) qualifier and a positive uncertain (i.e., likely) qualifier. Or perhaps an ‘uncertain uncertain’ qualifier in addition to those, for 50/50 cases. +- The exact threshold for absent and present should be further defined. What probability cutoff should be regarded as ‘beyond a reasonable doubt’? We can set this threshold at two standard deviations (`<0.025`, `>0.975`), but it would be even better to do some small empiric study with clinicians, to find out where each trigger term should go. Consider for example edge cases such as: very insignificant, very likely, subclinical, etc. + +### Examples + +| Example | Qualifier | +| ------- | --------- | +| Rechtszijdig fraai de middelste neusgang te visualiseren, vrij van poliepen. | `Absent` | +| Tractus circulatorius: geen pijn op de borst. | `Absent` | +| Een acuut coronair syndroom werd uitgesloten. | `Absent` | +| Werkdiagnose maagklachten bij diclofenac gebruik en weinig intake. | `Uncertain` | +| Waarschijnlijk hematurie bij reeds gepasseerde niersteen. | `Uncertain` | +| Dat er toen bradypacing is geweest valt niet uit te sluiten. | `Uncertain` | +| In juni 2023 longembolie waarvoor rivaroxaban met nu asymptomatische progressie. | `Present` | +| PTSS en recidiverende depressie in VG. | `Present` | +| Status na mild delier, heden wel slaperig. | `Present` | + +## Temporality + +| `Historical` | `Current` (default) | `Future` | +|----------------|-----------------------|------------| +| Concepts that were applicable at some point in history, but not in the last two weeks. | Concepts that were applicable in the last two weeks (potentially starting before that) up to and including the present moment. | Concepts that are potentially applicable in a future scenario. | + +The **temporality** class places concepts in a temporal framework, ranging from past to future, relative to the document date. The `Historical` and `Current` qualifiers distinguish between concepts that were applicable in the past, versus concepts that are applicable in the present. The exact cutoff between `Historical` and `Current` is problem-specific and therefore hard to definitively establish in a general sense. In a discharge summary, everything that happened before the admission period could be considered `Historical`, which can easily range up to months, while during a GP (General Practitioner) visit, events from a few days prior might be considered `Historical`. For the general case, we see no reason to divert from the threshold of two weeks in the original Context paper (Harkema et al., 2009). Note that the `Current` qualifier also applies when the concept is applicable in the last two weeks, but already started before that. + +The `Future` qualifier is applicable when a concept is described in a future scenario, for instance when describing the risk of developing a condition at a later stage, or when describing a procedure that will take place later. + +### To resolve + +- A way to dynamically define the threshold for `Historical` and `Current`, so that a cutoff can be established for each problem. In future work, we might map each concept to a timedelta (e.g., -1 year, -14 days, +5 days), but that does not fit the current qualifier framework very well. Also, it seems quite a hard problem. + +### Examples + +| Example | Qualifier | +| ------- | --------- | +| Zwanger, meerdere miskramen in de voorgeschiedenis. | `Historical` | +| Progressieve autonome functiestoornissen bij eerdere dermoidcyste. | `Historical` | +| Als tiener een osteotomiecorrectie beiderzijds gehad. | `Historical` | +| Echocardiografisch zagen wij geen aanwijzingen voor een hypertrofe cardiomyopathie. | `Current` | +| Al langer bestaande bloeddrukproblematiek. | `Current` | +| CT thorax: laesie rechter onderkwab bevestigd. | `Current` | +| Conservatieve maatregelen ter preventie van pulmonale infectie zijn herbesproken. | `Future` | +| Mocht hij koorts en/of tachycardie ontwikkelen, dan contact opnemen met dienstdoende arts. | `Future` | +| Wordt nu opgenomen middels IBS ter afwending van suïcide. | `Future` | + +## Experiencer + +| `Patient` (default) | `Family` | `Other` | +|-----------------------|------------|-----------| +| Concepts applicable to the patient related to the current document. | Concepts not applicable to the patient, but to someone with a genetic relationship to the patient. | Concepts not applicable to the patient, but to someone without a genetic relationship to the patient. | + +The **experiencer** qualifier distinguishes between concepts that apply to the `Patient`, to those that apply to `Family` members with a genetic relationship to the patient, and `Other` individuals with no genetic relationship to the patient (e.g. acquaintances). Clinical documents are typically obtained from electronic health records, where the relation between a document and a patient is explicit. Since a patient is a well separated entity, there is usually little ambiguity which class applies. If a concept applies to both the patient and another person, the patient label should be selected. + +### Examples + +| Example | Qualifier | +| ------------------------------------------------------------ | --------- | +| Behandeling in WKZ ivm diabetes beeindigd. | `Patient` | +| Pte wil geen medicatie tegen parkinson ivm slechte ervaringen broer | `Patient` | +| X-enkel rechts: schuine fractuur laterale malleolus | `Patient` | +| Familieanamnese omvat: ADD/ADHD: broer | `Family` | +| Moederszijde: voor zover bekend geen kanker | `Family` | +| 2. Covid positieve huisgenoot | `Other` | + +## References + +- Afzal, Z., Pons, E., Kang, N. et al. ContextD: an algorithm to identify contextual properties of medical terms in a Dutch clinical corpus. BMC Bioinformatics 15, 373 (2014). [https://doi.org/10.1186/s12859-014-0373-3](https://doi.org/10.1186/s12859-014-0373-3) +- Harkema H, Dowling JN, Thornblade T, Chapman WW. ConText: an algorithm for determining negation, experiencer, and temporal status from clinical reports. J Biomed Inform. 2009 Oct;42(5):839-51. doi: 10.1016/j.jbi.2009.05.002. Epub 2009 May 10. PMID: 19435614; PMCID: PMC2757457. diff --git a/docs/source/roadmap.md b/docs/source/roadmap.md new file mode 100644 index 0000000..829219a --- /dev/null +++ b/docs/source/roadmap.md @@ -0,0 +1,3 @@ +# Roadmap + +We keep track of work being done on `clinlp` (now and in the future) using GitHub Projects, on this page: [clinlp development roadmap](https://github.com/orgs/umcu/projects/3/views/1). All issues created automatically appear on the roadmap, where they will be triaged and assigned. diff --git a/media/example_doc_render.png b/media/example_doc_render.png index 789014d..ec9fd06 100644 Binary files a/media/example_doc_render.png and b/media/example_doc_render.png differ diff --git a/poetry.lock b/poetry.lock index ed7dc07..857a10c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1425,18 +1425,18 @@ test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] [[package]] name = "pydantic" -version = "2.7.2" +version = "2.7.4" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic-2.7.2-py3-none-any.whl", hash = "sha256:834ab954175f94e6e68258537dc49402c4a5e9d0409b9f1b86b7e934a8372de7"}, - {file = "pydantic-2.7.2.tar.gz", hash = "sha256:71b2945998f9c9b7919a45bde9a50397b289937d215ae141c1d0903ba7149fd7"}, + {file = "pydantic-2.7.4-py3-none-any.whl", hash = "sha256:ee8538d41ccb9c0a9ad3e0e5f07bf15ed8015b481ced539a1759d8cc89ae90d0"}, + {file = "pydantic-2.7.4.tar.gz", hash = "sha256:0c84efd9548d545f63ac0060c1e4d39bb9b14db8b3c0652338aecc07b5adec52"}, ] [package.dependencies] annotated-types = ">=0.4.0" -pydantic-core = "2.18.3" +pydantic-core = "2.18.4" typing-extensions = ">=4.6.1" [package.extras] @@ -1444,90 +1444,90 @@ email = ["email-validator (>=2.0.0)"] [[package]] name = "pydantic-core" -version = "2.18.3" +version = "2.18.4" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic_core-2.18.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:744697428fcdec6be5670460b578161d1ffe34743a5c15656be7ea82b008197c"}, - {file = "pydantic_core-2.18.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37b40c05ced1ba4218b14986fe6f283d22e1ae2ff4c8e28881a70fb81fbfcda7"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:544a9a75622357076efb6b311983ff190fbfb3c12fc3a853122b34d3d358126c"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e2e253af04ceaebde8eb201eb3f3e3e7e390f2d275a88300d6a1959d710539e2"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:855ec66589c68aa367d989da5c4755bb74ee92ccad4fdb6af942c3612c067e34"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d3e42bb54e7e9d72c13ce112e02eb1b3b55681ee948d748842171201a03a98a"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6ac9ffccc9d2e69d9fba841441d4259cb668ac180e51b30d3632cd7abca2b9b"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c56eca1686539fa0c9bda992e7bd6a37583f20083c37590413381acfc5f192d6"}, - {file = "pydantic_core-2.18.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:17954d784bf8abfc0ec2a633108207ebc4fa2df1a0e4c0c3ccbaa9bb01d2c426"}, - {file = "pydantic_core-2.18.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:98ed737567d8f2ecd54f7c8d4f8572ca7c7921ede93a2e52939416170d357812"}, - {file = "pydantic_core-2.18.3-cp310-none-win32.whl", hash = "sha256:9f9e04afebd3ed8c15d67a564ed0a34b54e52136c6d40d14c5547b238390e779"}, - {file = "pydantic_core-2.18.3-cp310-none-win_amd64.whl", hash = "sha256:45e4ffbae34f7ae30d0047697e724e534a7ec0a82ef9994b7913a412c21462a0"}, - {file = "pydantic_core-2.18.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:b9ebe8231726c49518b16b237b9fe0d7d361dd221302af511a83d4ada01183ab"}, - {file = "pydantic_core-2.18.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b8e20e15d18bf7dbb453be78a2d858f946f5cdf06c5072453dace00ab652e2b2"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0d9ff283cd3459fa0bf9b0256a2b6f01ac1ff9ffb034e24457b9035f75587cb"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2f7ef5f0ebb77ba24c9970da18b771711edc5feaf00c10b18461e0f5f5949231"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73038d66614d2e5cde30435b5afdced2b473b4c77d4ca3a8624dd3e41a9c19be"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6afd5c867a74c4d314c557b5ea9520183fadfbd1df4c2d6e09fd0d990ce412cd"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd7df92f28d351bb9f12470f4c533cf03d1b52ec5a6e5c58c65b183055a60106"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:80aea0ffeb1049336043d07799eace1c9602519fb3192916ff525b0287b2b1e4"}, - {file = "pydantic_core-2.18.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:aaee40f25bba38132e655ffa3d1998a6d576ba7cf81deff8bfa189fb43fd2bbe"}, - {file = "pydantic_core-2.18.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9128089da8f4fe73f7a91973895ebf2502539d627891a14034e45fb9e707e26d"}, - {file = "pydantic_core-2.18.3-cp311-none-win32.whl", hash = "sha256:fec02527e1e03257aa25b1a4dcbe697b40a22f1229f5d026503e8b7ff6d2eda7"}, - {file = "pydantic_core-2.18.3-cp311-none-win_amd64.whl", hash = "sha256:58ff8631dbab6c7c982e6425da8347108449321f61fe427c52ddfadd66642af7"}, - {file = "pydantic_core-2.18.3-cp311-none-win_arm64.whl", hash = "sha256:3fc1c7f67f34c6c2ef9c213e0f2a351797cda98249d9ca56a70ce4ebcaba45f4"}, - {file = "pydantic_core-2.18.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f0928cde2ae416a2d1ebe6dee324709c6f73e93494d8c7aea92df99aab1fc40f"}, - {file = "pydantic_core-2.18.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0bee9bb305a562f8b9271855afb6ce00223f545de3d68560b3c1649c7c5295e9"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e862823be114387257dacbfa7d78547165a85d7add33b446ca4f4fae92c7ff5c"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6a36f78674cbddc165abab0df961b5f96b14461d05feec5e1f78da58808b97e7"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba905d184f62e7ddbb7a5a751d8a5c805463511c7b08d1aca4a3e8c11f2e5048"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7fdd362f6a586e681ff86550b2379e532fee63c52def1c666887956748eaa326"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24b214b7ee3bd3b865e963dbed0f8bc5375f49449d70e8d407b567af3222aae4"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:691018785779766127f531674fa82bb368df5b36b461622b12e176c18e119022"}, - {file = "pydantic_core-2.18.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:60e4c625e6f7155d7d0dcac151edf5858102bc61bf959d04469ca6ee4e8381bd"}, - {file = "pydantic_core-2.18.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a4e651e47d981c1b701dcc74ab8fec5a60a5b004650416b4abbef13db23bc7be"}, - {file = "pydantic_core-2.18.3-cp312-none-win32.whl", hash = "sha256:ffecbb5edb7f5ffae13599aec33b735e9e4c7676ca1633c60f2c606beb17efc5"}, - {file = "pydantic_core-2.18.3-cp312-none-win_amd64.whl", hash = "sha256:2c8333f6e934733483c7eddffdb094c143b9463d2af7e6bd85ebcb2d4a1b82c6"}, - {file = "pydantic_core-2.18.3-cp312-none-win_arm64.whl", hash = "sha256:7a20dded653e516a4655f4c98e97ccafb13753987434fe7cf044aa25f5b7d417"}, - {file = "pydantic_core-2.18.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:eecf63195be644b0396f972c82598cd15693550f0ff236dcf7ab92e2eb6d3522"}, - {file = "pydantic_core-2.18.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2c44efdd3b6125419c28821590d7ec891c9cb0dff33a7a78d9d5c8b6f66b9702"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e59fca51ffbdd1638b3856779342ed69bcecb8484c1d4b8bdb237d0eb5a45e2"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:70cf099197d6b98953468461d753563b28e73cf1eade2ffe069675d2657ed1d5"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63081a49dddc6124754b32a3774331467bfc3d2bd5ff8f10df36a95602560361"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:370059b7883485c9edb9655355ff46d912f4b03b009d929220d9294c7fd9fd60"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a64faeedfd8254f05f5cf6fc755023a7e1606af3959cfc1a9285744cc711044"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:19d2e725de0f90d8671f89e420d36c3dd97639b98145e42fcc0e1f6d492a46dc"}, - {file = "pydantic_core-2.18.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:67bc078025d70ec5aefe6200ef094576c9d86bd36982df1301c758a9fff7d7f4"}, - {file = "pydantic_core-2.18.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:adf952c3f4100e203cbaf8e0c907c835d3e28f9041474e52b651761dc248a3c0"}, - {file = "pydantic_core-2.18.3-cp38-none-win32.whl", hash = "sha256:9a46795b1f3beb167eaee91736d5d17ac3a994bf2215a996aed825a45f897558"}, - {file = "pydantic_core-2.18.3-cp38-none-win_amd64.whl", hash = "sha256:200ad4e3133cb99ed82342a101a5abf3d924722e71cd581cc113fe828f727fbc"}, - {file = "pydantic_core-2.18.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:304378b7bf92206036c8ddd83a2ba7b7d1a5b425acafff637172a3aa72ad7083"}, - {file = "pydantic_core-2.18.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c826870b277143e701c9ccf34ebc33ddb4d072612683a044e7cce2d52f6c3fef"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e201935d282707394f3668380e41ccf25b5794d1b131cdd96b07f615a33ca4b1"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5560dda746c44b48bf82b3d191d74fe8efc5686a9ef18e69bdabccbbb9ad9442"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b32c2a1f8032570842257e4c19288eba9a2bba4712af542327de9a1204faff8"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:929c24e9dea3990bc8bcd27c5f2d3916c0c86f5511d2caa69e0d5290115344a9"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1a8376fef60790152564b0eab376b3e23dd6e54f29d84aad46f7b264ecca943"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dccf3ef1400390ddd1fb55bf0632209d39140552d068ee5ac45553b556780e06"}, - {file = "pydantic_core-2.18.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:41dbdcb0c7252b58fa931fec47937edb422c9cb22528f41cb8963665c372caf6"}, - {file = "pydantic_core-2.18.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:666e45cf071669fde468886654742fa10b0e74cd0fa0430a46ba6056b24fb0af"}, - {file = "pydantic_core-2.18.3-cp39-none-win32.whl", hash = "sha256:f9c08cabff68704a1b4667d33f534d544b8a07b8e5d039c37067fceb18789e78"}, - {file = "pydantic_core-2.18.3-cp39-none-win_amd64.whl", hash = "sha256:4afa5f5973e8572b5c0dcb4e2d4fda7890e7cd63329bd5cc3263a25c92ef0026"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:77319771a026f7c7d29c6ebc623de889e9563b7087911b46fd06c044a12aa5e9"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:df11fa992e9f576473038510d66dd305bcd51d7dd508c163a8c8fe148454e059"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d531076bdfb65af593326ffd567e6ab3da145020dafb9187a1d131064a55f97c"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d33ce258e4e6e6038f2b9e8b8a631d17d017567db43483314993b3ca345dcbbb"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1f9cd7f5635b719939019be9bda47ecb56e165e51dd26c9a217a433e3d0d59a9"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cd4a032bb65cc132cae1fe3e52877daecc2097965cd3914e44fbd12b00dae7c5"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f2718430098bcdf60402136c845e4126a189959d103900ebabb6774a5d9fdb"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c0037a92cf0c580ed14e10953cdd26528e8796307bb8bb312dc65f71547df04d"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b95a0972fac2b1ff3c94629fc9081b16371dad870959f1408cc33b2f78ad347a"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a62e437d687cc148381bdd5f51e3e81f5b20a735c55f690c5be94e05da2b0d5c"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b367a73a414bbb08507da102dc2cde0fa7afe57d09b3240ce82a16d608a7679c"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ecce4b2360aa3f008da3327d652e74a0e743908eac306198b47e1c58b03dd2b"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bd4435b8d83f0c9561a2a9585b1de78f1abb17cb0cef5f39bf6a4b47d19bafe3"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:616221a6d473c5b9aa83fa8982745441f6a4a62a66436be9445c65f241b86c94"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:7e6382ce89a92bc1d0c0c5edd51e931432202b9080dc921d8d003e616402efd1"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ff58f379345603d940e461eae474b6bbb6dab66ed9a851ecd3cb3709bf4dcf6a"}, - {file = "pydantic_core-2.18.3.tar.gz", hash = "sha256:432e999088d85c8f36b9a3f769a8e2b57aabd817bbb729a90d1fe7f18f6f1f39"}, + {file = "pydantic_core-2.18.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:f76d0ad001edd426b92233d45c746fd08f467d56100fd8f30e9ace4b005266e4"}, + {file = "pydantic_core-2.18.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:59ff3e89f4eaf14050c8022011862df275b552caef8082e37b542b066ce1ff26"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a55b5b16c839df1070bc113c1f7f94a0af4433fcfa1b41799ce7606e5c79ce0a"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4d0dcc59664fcb8974b356fe0a18a672d6d7cf9f54746c05f43275fc48636851"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8951eee36c57cd128f779e641e21eb40bc5073eb28b2d23f33eb0ef14ffb3f5d"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4701b19f7e3a06ea655513f7938de6f108123bf7c86bbebb1196eb9bd35cf724"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e00a3f196329e08e43d99b79b286d60ce46bed10f2280d25a1718399457e06be"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:97736815b9cc893b2b7f663628e63f436018b75f44854c8027040e05230eeddb"}, + {file = "pydantic_core-2.18.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6891a2ae0e8692679c07728819b6e2b822fb30ca7445f67bbf6509b25a96332c"}, + {file = "pydantic_core-2.18.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bc4ff9805858bd54d1a20efff925ccd89c9d2e7cf4986144b30802bf78091c3e"}, + {file = "pydantic_core-2.18.4-cp310-none-win32.whl", hash = "sha256:1b4de2e51bbcb61fdebd0ab86ef28062704f62c82bbf4addc4e37fa4b00b7cbc"}, + {file = "pydantic_core-2.18.4-cp310-none-win_amd64.whl", hash = "sha256:6a750aec7bf431517a9fd78cb93c97b9b0c496090fee84a47a0d23668976b4b0"}, + {file = "pydantic_core-2.18.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:942ba11e7dfb66dc70f9ae66b33452f51ac7bb90676da39a7345e99ffb55402d"}, + {file = "pydantic_core-2.18.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b2ebef0e0b4454320274f5e83a41844c63438fdc874ea40a8b5b4ecb7693f1c4"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a642295cd0c8df1b86fc3dced1d067874c353a188dc8e0f744626d49e9aa51c4"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f09baa656c904807e832cf9cce799c6460c450c4ad80803517032da0cd062e2"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98906207f29bc2c459ff64fa007afd10a8c8ac080f7e4d5beff4c97086a3dabd"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19894b95aacfa98e7cb093cd7881a0c76f55731efad31073db4521e2b6ff5b7d"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fbbdc827fe5e42e4d196c746b890b3d72876bdbf160b0eafe9f0334525119c8"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f85d05aa0918283cf29a30b547b4df2fbb56b45b135f9e35b6807cb28bc47951"}, + {file = "pydantic_core-2.18.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e85637bc8fe81ddb73fda9e56bab24560bdddfa98aa64f87aaa4e4b6730c23d2"}, + {file = "pydantic_core-2.18.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2f5966897e5461f818e136b8451d0551a2e77259eb0f73a837027b47dc95dab9"}, + {file = "pydantic_core-2.18.4-cp311-none-win32.whl", hash = "sha256:44c7486a4228413c317952e9d89598bcdfb06399735e49e0f8df643e1ccd0558"}, + {file = "pydantic_core-2.18.4-cp311-none-win_amd64.whl", hash = "sha256:8a7164fe2005d03c64fd3b85649891cd4953a8de53107940bf272500ba8a788b"}, + {file = "pydantic_core-2.18.4-cp311-none-win_arm64.whl", hash = "sha256:4e99bc050fe65c450344421017f98298a97cefc18c53bb2f7b3531eb39bc7805"}, + {file = "pydantic_core-2.18.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:6f5c4d41b2771c730ea1c34e458e781b18cc668d194958e0112455fff4e402b2"}, + {file = "pydantic_core-2.18.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2fdf2156aa3d017fddf8aea5adfba9f777db1d6022d392b682d2a8329e087cef"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4748321b5078216070b151d5271ef3e7cc905ab170bbfd27d5c83ee3ec436695"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:847a35c4d58721c5dc3dba599878ebbdfd96784f3fb8bb2c356e123bdcd73f34"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c40d4eaad41f78e3bbda31b89edc46a3f3dc6e171bf0ecf097ff7a0ffff7cb1"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:21a5e440dbe315ab9825fcd459b8814bb92b27c974cbc23c3e8baa2b76890077"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01dd777215e2aa86dfd664daed5957704b769e726626393438f9c87690ce78c3"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4b06beb3b3f1479d32befd1f3079cc47b34fa2da62457cdf6c963393340b56e9"}, + {file = "pydantic_core-2.18.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:564d7922e4b13a16b98772441879fcdcbe82ff50daa622d681dd682175ea918c"}, + {file = "pydantic_core-2.18.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0eb2a4f660fcd8e2b1c90ad566db2b98d7f3f4717c64fe0a83e0adb39766d5b8"}, + {file = "pydantic_core-2.18.4-cp312-none-win32.whl", hash = "sha256:8b8bab4c97248095ae0c4455b5a1cd1cdd96e4e4769306ab19dda135ea4cdb07"}, + {file = "pydantic_core-2.18.4-cp312-none-win_amd64.whl", hash = "sha256:14601cdb733d741b8958224030e2bfe21a4a881fb3dd6fbb21f071cabd48fa0a"}, + {file = "pydantic_core-2.18.4-cp312-none-win_arm64.whl", hash = "sha256:c1322d7dd74713dcc157a2b7898a564ab091ca6c58302d5c7b4c07296e3fd00f"}, + {file = "pydantic_core-2.18.4-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:823be1deb01793da05ecb0484d6c9e20baebb39bd42b5d72636ae9cf8350dbd2"}, + {file = "pydantic_core-2.18.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ebef0dd9bf9b812bf75bda96743f2a6c5734a02092ae7f721c048d156d5fabae"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae1d6df168efb88d7d522664693607b80b4080be6750c913eefb77e34c12c71a"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f9899c94762343f2cc2fc64c13e7cae4c3cc65cdfc87dd810a31654c9b7358cc"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99457f184ad90235cfe8461c4d70ab7dd2680e28821c29eca00252ba90308c78"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18f469a3d2a2fdafe99296a87e8a4c37748b5080a26b806a707f25a902c040a8"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7cdf28938ac6b8b49ae5e92f2735056a7ba99c9b110a474473fd71185c1af5d"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:938cb21650855054dc54dfd9120a851c974f95450f00683399006aa6e8abb057"}, + {file = "pydantic_core-2.18.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:44cd83ab6a51da80fb5adbd9560e26018e2ac7826f9626bc06ca3dc074cd198b"}, + {file = "pydantic_core-2.18.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:972658f4a72d02b8abfa2581d92d59f59897d2e9f7e708fdabe922f9087773af"}, + {file = "pydantic_core-2.18.4-cp38-none-win32.whl", hash = "sha256:1d886dc848e60cb7666f771e406acae54ab279b9f1e4143babc9c2258213daa2"}, + {file = "pydantic_core-2.18.4-cp38-none-win_amd64.whl", hash = "sha256:bb4462bd43c2460774914b8525f79b00f8f407c945d50881568f294c1d9b4443"}, + {file = "pydantic_core-2.18.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:44a688331d4a4e2129140a8118479443bd6f1905231138971372fcde37e43528"}, + {file = "pydantic_core-2.18.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a2fdd81edd64342c85ac7cf2753ccae0b79bf2dfa063785503cb85a7d3593223"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86110d7e1907ab36691f80b33eb2da87d780f4739ae773e5fc83fb272f88825f"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:46387e38bd641b3ee5ce247563b60c5ca098da9c56c75c157a05eaa0933ed154"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:123c3cec203e3f5ac7b000bd82235f1a3eced8665b63d18be751f115588fea30"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dc1803ac5c32ec324c5261c7209e8f8ce88e83254c4e1aebdc8b0a39f9ddb443"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53db086f9f6ab2b4061958d9c276d1dbe3690e8dd727d6abf2321d6cce37fa94"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:abc267fa9837245cc28ea6929f19fa335f3dc330a35d2e45509b6566dc18be23"}, + {file = "pydantic_core-2.18.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a0d829524aaefdebccb869eed855e2d04c21d2d7479b6cada7ace5448416597b"}, + {file = "pydantic_core-2.18.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:509daade3b8649f80d4e5ff21aa5673e4ebe58590b25fe42fac5f0f52c6f034a"}, + {file = "pydantic_core-2.18.4-cp39-none-win32.whl", hash = "sha256:ca26a1e73c48cfc54c4a76ff78df3727b9d9f4ccc8dbee4ae3f73306a591676d"}, + {file = "pydantic_core-2.18.4-cp39-none-win_amd64.whl", hash = "sha256:c67598100338d5d985db1b3d21f3619ef392e185e71b8d52bceacc4a7771ea7e"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:574d92eac874f7f4db0ca653514d823a0d22e2354359d0759e3f6a406db5d55d"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1f4d26ceb5eb9eed4af91bebeae4b06c3fb28966ca3a8fb765208cf6b51102ab"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77450e6d20016ec41f43ca4a6c63e9fdde03f0ae3fe90e7c27bdbeaece8b1ed4"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d323a01da91851a4f17bf592faf46149c9169d68430b3146dcba2bb5e5719abc"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43d447dd2ae072a0065389092a231283f62d960030ecd27565672bd40746c507"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:578e24f761f3b425834f297b9935e1ce2e30f51400964ce4801002435a1b41ef"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:81b5efb2f126454586d0f40c4d834010979cb80785173d1586df845a632e4e6d"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ab86ce7c8f9bea87b9d12c7f0af71102acbf5ecbc66c17796cff45dae54ef9a5"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:90afc12421df2b1b4dcc975f814e21bc1754640d502a2fbcc6d41e77af5ec312"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:51991a89639a912c17bef4b45c87bd83593aee0437d8102556af4885811d59f5"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:293afe532740370aba8c060882f7d26cfd00c94cae32fd2e212a3a6e3b7bc15e"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b48ece5bde2e768197a2d0f6e925f9d7e3e826f0ad2271120f8144a9db18d5c8"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:eae237477a873ab46e8dd748e515c72c0c804fb380fbe6c85533c7de51f23a8f"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:834b5230b5dfc0c1ec37b2fda433b271cbbc0e507560b5d1588e2cc1148cf1ce"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e858ac0a25074ba4bce653f9b5d0a85b7456eaddadc0ce82d3878c22489fa4ee"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2fd41f6eff4c20778d717af1cc50eca52f5afe7805ee530a4fbd0bae284f16e9"}, + {file = "pydantic_core-2.18.4.tar.gz", hash = "sha256:ec3beeada09ff865c344ff3bc2f427f5e6c26401cc6113d77e372c3fdac73864"}, ] [package.dependencies] @@ -2257,6 +2257,26 @@ sphinx = ">=4.0" [package.extras] docs = ["furo", "ipython", "myst-parser", "sphinx-copybutton", "sphinx-inline-tabs"] +[[package]] +name = "sphinx-new-tab-link" +version = "0.4.0" +description = "Open external links in new tabs of the browser in Sphinx HTML documents" +optional = false +python-versions = "*" +files = [ + {file = "sphinx-new-tab-link-0.4.0.tar.gz", hash = "sha256:aaefd94d5aa75c60a6c1e94b80d75c4281c3b6f95669b8e606f212744818b916"}, + {file = "sphinx_new_tab_link-0.4.0-py3-none-any.whl", hash = "sha256:2353bfd3a171fdbd9dcdf33e5f26b4447607293ff2b57e0f6a2dc18d1507598e"}, +] + +[package.dependencies] +sphinxcontrib-extdevhelper-kasane = "*" + +[package.extras] +dev = ["build", "twine", "wheel"] +lint = ["black", "flake8", "isort"] +testing = ["beautifulsoup4", "pytest"] +typecheck = ["mypy", "types-beautifulsoup"] + [[package]] name = "sphinxcontrib-applehelp" version = "1.0.8" @@ -2289,6 +2309,23 @@ lint = ["docutils-stubs", "flake8", "mypy"] standalone = ["Sphinx (>=5)"] test = ["pytest"] +[[package]] +name = "sphinxcontrib-extdevhelper-kasane" +version = "0.2.0" +description = "襲 - Provide dynamic inheritance shortcuts to make Sphinx extension development easier" +optional = false +python-versions = ">=3.8" +files = [ + {file = "sphinxcontrib-extdevhelper-kasane-0.2.0.tar.gz", hash = "sha256:4dc7b00327f33c7b421c27122b40278eeaca43f24601b572cee5616d31b206a9"}, + {file = "sphinxcontrib_extdevhelper_kasane-0.2.0-py3-none-any.whl", hash = "sha256:20f94e3b209cddec24596234458ea3887e7a7ad45b54a4d0a5bf169ff45a38f1"}, +] + +[package.dependencies] +Sphinx = "*" + +[package.extras] +dev = ["autoflake", "black", "flake8", "isort", "mypy", "pytest", "pytest-randomly", "pyupgrade", "taskipy"] + [[package]] name = "sphinxcontrib-htmlhelp" version = "2.0.5" @@ -2924,4 +2961,4 @@ transformers = ["transformers"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "3653e00789b2bc9c172da1de3a89ae8c59db874813308fc751c2af49c8a606d8" +content-hash = "e102fd27aaa4be66c4cf9c2d5227976f25443f6e2342a91ba1e16ed188222398" diff --git a/pyproject.toml b/pyproject.toml index fd41a6b..9fe4e45 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ myst-parser = "^3.0.1" emoji = "^2.12.1" toml = "^0.10.2" furo = "^2024.5.6" +sphinx-new-tab-link = "^0.4.0" [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/src/clinlp/util.py b/src/clinlp/util.py index d2d6e81..3cd0cf6 100644 --- a/src/clinlp/util.py +++ b/src/clinlp/util.py @@ -26,7 +26,7 @@ def get_class_init_signature(cls: Type) -> Tuple[list, dict]: ``list`` The arguments of the class's ``__init__`` method. ``dict`` - and keyword arguments of the class's ``__init__`` method. + and keyword arguments of the class's ``__init__`` method. """ args = [] kwargs = {}