feat: bump python-version to 3.9

spraakbanken · Nov 22, 2024 · 7c5e88f · 7c5e88f
1 parent fbb59c2
commit 7c5e88f
Show file tree

Hide file tree

Showing 8 changed files with 119 additions and 71 deletions.
diff --git a/mypy.ini b/mypy.ini
@@ -1,2 +1,2 @@
 [mypy]
-python_version = 3.8
+python_version = 3.9
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,3 +7,8 @@ dependencies = []
 
 [tool.uv.workspace]
 members = ["sparv-sbx-sentence-sentiment-kb-sent"]
+
+[dependency-groups]
+dev = [
+    "mypy>=1.13.0",
+]
diff --git a/ruff.toml b/ruff.toml
@@ -1,50 +1,50 @@
-line-length = 97
+line-length = 120
 
-target-version = "py38"
+target-version = "py39"
 
 [lint]
 select = [
-  "A",    # flake8-builtins
-  "ANN",  # flake8-annotations
-  "ARG",  # flake8-unused-arguments
-  "B",    # flake8-bugbear
-  "C4",   # flake8-comprehensions
-  "COM",  # flake8-commas
-  "D",    # pydocstyle
-  "D400", # pydocstyle: ends-in-period
-  "D401", # pydocstyle: non-imperative-mood
-  "E",    # pycodestyle: errors
-  "F",    # Pyflakes
-  "FLY",  # flynt
-  "FURB", # refurb
-  "G",    # flake8-logging-format
-  "I",    # isort
-  "ISC",  # flake8-implicit-str-concat
-  "N",    # pep8-naming
-  "PERF", # Perflint
-  "PIE",  # flake8-pie
-  "PL",   # Pylint
-  #   "PT",   # flake8-pytest-style
-  "PTH", # flake8-use-pathlib
-  "Q",   # flake8-quotes
-  "RET", # flake8-return
-  "RSE", # flake8-raise
-  "RUF", # Ruff-specific rules
-  "SIM", # flake8-simplify
-  "T20", # flake8-print
-  "TID", # flake8-tidy-imports
-  "UP",  # pyupgrade
-  "W",   # pycodestyle: warnings
+    "A",    # flake8-builtins
+    "ANN",  # flake8-annotations
+    "ARG",  # flake8-unused-arguments
+    "B",    # flake8-bugbear
+    "C4",   # flake8-comprehensions
+    "COM",  # flake8-commas
+    "D",    # pydocstyle
+    "D400", # pydocstyle: ends-in-period
+    "D401", # pydocstyle: non-imperative-mood
+    "E",    # pycodestyle: errors
+    "F",    # Pyflakes
+    "FLY",  # flynt
+    "FURB", # refurb
+    "G",    # flake8-logging-format
+    "I",    # isort
+    "ISC",  # flake8-implicit-str-concat
+    "N",    # pep8-naming
+    "PERF", # Perflint
+    "PIE",  # flake8-pie
+    "PL",   # Pylint
+    #   "PT",   # flake8-pytest-style
+    "PTH", # flake8-use-pathlib
+    "Q",   # flake8-quotes
+    "RET", # flake8-return
+    "RSE", # flake8-raise
+    "RUF", # Ruff-specific rules
+    "SIM", # flake8-simplify
+    "T20", # flake8-print
+    "TID", # flake8-tidy-imports
+    "UP",  # pyupgrade
+    "W",   # pycodestyle: warnings
 ]
 ignore = [
-  "ANN101", # flake8-annotations: missing-type-self (deprecated)
-  "ANN102", # flake8-annotations: missing-type-cls (deprecated)
-  "ANN401", # flake8-annotations: any-type
-  "B008",   # flake8-bugbear: function-call-in-default-argument
-  "ISC001",
-  "COM812", # flake8-commas: missing-trailing-comma
-  "PLR09",  # Pylint: too-many-*
-  "SIM105", # flake8-simplify: suppressible-exception
+    "ANN101", # flake8-annotations: missing-type-self (deprecated)
+    "ANN102", # flake8-annotations: missing-type-cls (deprecated)
+    "ANN401", # flake8-annotations: any-type
+    "B008",   # flake8-bugbear: function-call-in-default-argument
+    "ISC001",
+    "COM812", # flake8-commas: missing-trailing-comma
+    "PLR09",  # Pylint: too-many-*
+    "SIM105", # flake8-simplify: suppressible-exception
 ]
 preview = true
 
@@ -58,4 +58,4 @@ convention = "google"
 
 # Ignore `E402` (import violations) in all `__init__.py` files, and in `path/to/file.py`.
 [lint.per-file-ignores]
-"*/tests/*" = ["D", "ARG002", "E501"]
+"**/tests/*" = ["D", "ARG002", "E501", "SIM905"]
diff --git a/sparv-sbx-sentence-sentiment-kb-sent/README.md b/sparv-sbx-sentence-sentiment-kb-sent/README.md
@@ -69,6 +69,15 @@ Type | HuggingFace Model | Revision
 Model | [`KBLab/robust-swedish-sentiment-multiclass`](https://huggingface.co/KBLab/robust-swedish-sentiment-multiclass) | b0ec32dca56aa6182a6955c8f12129bbcbc7fdbd
 Tokenizer | [`KBLab/megatron-bert-large-swedish-cased-165k`](https://huggingface.co/KBLab/megatron-bert-large-swedish-cased-165k)  | 90c57ab49e27b820bd85308a488409dfea25600d
 
+## Minimum Supported Python Version
+
+This library thrives to support each Python version to End-Of-Life.
+
+Versions of this library supportes the following Python version:
+
+- v0.3: Python 3.9
+- v0.2: Python 3.8
+
 ## Changelog
 
 This project keeps a [changelog](./CHANGELOG.md).
diff --git a/sparv-sbx-sentence-sentiment-kb-sent/src/sbx_sentence_sentiment_kb_sent/__init__.py b/sparv-sbx-sentence-sentiment-kb-sent/src/sbx_sentence_sentiment_kb_sent/__init__.py
@@ -10,7 +10,7 @@
 __description__ = "Annotate sentence with sentiment analysis."
 __version__ = "0.2.0"
 
-__config__ = [
+__config__: list[sparv_api.Config] = [
     sparv_api.Config(
         f"{PROJECT_NAME}.num_decimals",
         description="The number of decimals to round the score to",

diff --git a/sparv-sbx-sentence-sentiment-kb-sent/src/sbx_sentence_sentiment_kb_sent/annotations.py b/sparv-sbx-sentence-sentiment-kb-sent/src/sbx_sentence_sentiment_kb_sent/annotations.py
@@ -13,7 +13,7 @@ def annotate_sentence_sentiment(
     out_sentence_sentiment: sparv_api.Output = sparv_api.Output(
         f"<sentence>:{PROJECT_NAME}.sentence-sentiment--kb-sent",
         # cls="sbx_sentence_sentiment_kb_sent",
-        description="Sentiment analysis of sentence with KBLab/robust-swedish-sentiment-multiclass",  # noqa: E501
+        description="Sentiment analysis of sentence with KBLab/robust-swedish-sentiment-multiclass",
     ),
     word: sparv_api.Annotation = sparv_api.Annotation("<token:word>"),
     sentence: sparv_api.Annotation = sparv_api.Annotation("<sentence>"),

diff --git a/...v-sbx-sentence-sentiment-kb-sent/src/sbx_sentence_sentiment_kb_sent/sentiment_analyzer.py b/...v-sbx-sentence-sentiment-kb-sent/src/sbx_sentence_sentiment_kb_sent/sentiment_analyzer.py
@@ -1,7 +1,7 @@
 """Sentiment analyzer."""
 
 from collections import defaultdict
-from typing import Dict, List, Optional, Union
+from typing import Optional, Union
 
 from sparv import api as sparv_api  # type: ignore [import-untyped]
 from transformers import (  # type: ignore [import-untyped]
@@ -45,19 +45,15 @@ def __init__(
         self.tokenizer = self._default_tokenizer() if tokenizer is None else tokenizer
         self.model = self._default_model() if model is None else model
         self.num_decimals = num_decimals
-        self.classifier = pipeline(
-            "sentiment-analysis", model=self.model, tokenizer=self.tokenizer
-        )
+        self.classifier = pipeline("sentiment-analysis", model=self.model, tokenizer=self.tokenizer)
 
     @classmethod
     def _default_tokenizer(cls) -> PreTrainedTokenizerFast:
         return AutoTokenizer.from_pretrained(TOKENIZER_NAME, revision=TOKENIZER_REVISION)
 
     @classmethod
     def _default_model(cls) -> MegatronBertForSequenceClassification:
-        return AutoModelForSequenceClassification.from_pretrained(
-            MODEL_NAME, revision=MODEL_REVISION
-        )
+        return AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, revision=MODEL_REVISION)
 
     @classmethod
     def default(cls) -> "SentimentAnalyzer":
@@ -70,21 +66,19 @@ def default(cls) -> "SentimentAnalyzer":
         model = cls._default_model()
         return cls(model=model, tokenizer=tokenizer)
 
-    def analyze_sentence(self, text: List[str]) -> Optional[str]:
+    def analyze_sentence(self, text: list[str]) -> Optional[str]:
         """Analyze a sentence.
 
         Args:
             text (Iterable[str]): the text to analyze
 
         Returns:
-            List[Optional[str]]: the sentence annotations.
+            list[Optional[str]]: the sentence annotations.
         """
         total_length = sum(len(t) for t in text) + len(text) - 1
         logger.debug("analyzed text length=%d", total_length)
         if total_length > MAX_LENGTH:
-            logger.warning(
-                "Long sentence (%d chars), splitting and combining results", total_length
-            )
+            logger.warning("Long sentence (%d chars), splitting and combining results", total_length)
             classifications = self._analyze_in_chunks(text)
         else:
             sentence = TOK_SEP.join(text)
@@ -94,18 +88,12 @@ def analyze_sentence(self, text: List[str]) -> Optional[str]:
         collect_label_and_score = ((clss["label"], clss["score"]) for clss in classifications)
         score_format, score_pred = SCORE_FORMAT_AND_PREDICATE[self.num_decimals]
 
-        format_scores = (
-            (label, score_format.format(score)) for label, score in collect_label_and_score
-        )
-        filter_out_zero_scores = (
-            (label, score) for label, score in format_scores if not score_pred(score)
-        )
-        classification_str = "|".join(
-            f"{label}:{score}" for label, score in filter_out_zero_scores
-        )
+        format_scores = ((label, score_format.format(score)) for label, score in collect_label_and_score)
+        filter_out_zero_scores = ((label, score) for label, score in format_scores if not score_pred(score))
+        classification_str = "|".join(f"{label}:{score}" for label, score in filter_out_zero_scores)
         return f"|{classification_str}|" if classification_str else "|"
 
-    def _analyze_in_chunks(self, text: List[str]) -> List[Dict[str, Union[str, float]]]:
+    def _analyze_in_chunks(self, text: list[str]) -> list[dict[str, Union[str, float]]]:
         classifications_list = []
         start_i = 0
         curr_length = 0
@@ -121,10 +109,7 @@ def _analyze_in_chunks(self, text: List[str]) -> List[Dict[str, Union[str, float
             for clss in clsss:
                 classifications_dict[clss["label"]].append(clss["score"])
 
-        return [
-            {"label": label, "score": sum(scores) / len(scores)}
-            for label, scores in classifications_dict.items()
-        ]
+        return [{"label": label, "score": sum(scores) / len(scores)} for label, scores in classifications_dict.items()]
 
 
 SCORE_FORMAT_AND_PREDICATE = {

diff --git a/uv.lock b/uv.lock