Skip to content

Commit

Permalink
feat: bump python-version to 3.9
Browse files Browse the repository at this point in the history
  • Loading branch information
kod-kristoff committed Nov 22, 2024
1 parent fbb59c2 commit 7c5e88f
Show file tree
Hide file tree
Showing 8 changed files with 119 additions and 71 deletions.
2 changes: 1 addition & 1 deletion mypy.ini
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[mypy]
python_version = 3.8
python_version = 3.9
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,8 @@ dependencies = []

[tool.uv.workspace]
members = ["sparv-sbx-sentence-sentiment-kb-sent"]

[dependency-groups]
dev = [
"mypy>=1.13.0",
]
84 changes: 42 additions & 42 deletions ruff.toml
Original file line number Diff line number Diff line change
@@ -1,50 +1,50 @@
line-length = 97
line-length = 120

target-version = "py38"
target-version = "py39"

[lint]
select = [
"A", # flake8-builtins
"ANN", # flake8-annotations
"ARG", # flake8-unused-arguments
"B", # flake8-bugbear
"C4", # flake8-comprehensions
"COM", # flake8-commas
"D", # pydocstyle
"D400", # pydocstyle: ends-in-period
"D401", # pydocstyle: non-imperative-mood
"E", # pycodestyle: errors
"F", # Pyflakes
"FLY", # flynt
"FURB", # refurb
"G", # flake8-logging-format
"I", # isort
"ISC", # flake8-implicit-str-concat
"N", # pep8-naming
"PERF", # Perflint
"PIE", # flake8-pie
"PL", # Pylint
# "PT", # flake8-pytest-style
"PTH", # flake8-use-pathlib
"Q", # flake8-quotes
"RET", # flake8-return
"RSE", # flake8-raise
"RUF", # Ruff-specific rules
"SIM", # flake8-simplify
"T20", # flake8-print
"TID", # flake8-tidy-imports
"UP", # pyupgrade
"W", # pycodestyle: warnings
"A", # flake8-builtins
"ANN", # flake8-annotations
"ARG", # flake8-unused-arguments
"B", # flake8-bugbear
"C4", # flake8-comprehensions
"COM", # flake8-commas
"D", # pydocstyle
"D400", # pydocstyle: ends-in-period
"D401", # pydocstyle: non-imperative-mood
"E", # pycodestyle: errors
"F", # Pyflakes
"FLY", # flynt
"FURB", # refurb
"G", # flake8-logging-format
"I", # isort
"ISC", # flake8-implicit-str-concat
"N", # pep8-naming
"PERF", # Perflint
"PIE", # flake8-pie
"PL", # Pylint
# "PT", # flake8-pytest-style
"PTH", # flake8-use-pathlib
"Q", # flake8-quotes
"RET", # flake8-return
"RSE", # flake8-raise
"RUF", # Ruff-specific rules
"SIM", # flake8-simplify
"T20", # flake8-print
"TID", # flake8-tidy-imports
"UP", # pyupgrade
"W", # pycodestyle: warnings
]
ignore = [
"ANN101", # flake8-annotations: missing-type-self (deprecated)
"ANN102", # flake8-annotations: missing-type-cls (deprecated)
"ANN401", # flake8-annotations: any-type
"B008", # flake8-bugbear: function-call-in-default-argument
"ISC001",
"COM812", # flake8-commas: missing-trailing-comma
"PLR09", # Pylint: too-many-*
"SIM105", # flake8-simplify: suppressible-exception
"ANN101", # flake8-annotations: missing-type-self (deprecated)
"ANN102", # flake8-annotations: missing-type-cls (deprecated)
"ANN401", # flake8-annotations: any-type
"B008", # flake8-bugbear: function-call-in-default-argument
"ISC001",
"COM812", # flake8-commas: missing-trailing-comma
"PLR09", # Pylint: too-many-*
"SIM105", # flake8-simplify: suppressible-exception
]
preview = true

Expand All @@ -58,4 +58,4 @@ convention = "google"

# Ignore `E402` (import violations) in all `__init__.py` files, and in `path/to/file.py`.
[lint.per-file-ignores]
"*/tests/*" = ["D", "ARG002", "E501"]
"**/tests/*" = ["D", "ARG002", "E501", "SIM905"]
9 changes: 9 additions & 0 deletions sparv-sbx-sentence-sentiment-kb-sent/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,15 @@ Type | HuggingFace Model | Revision
Model | [`KBLab/robust-swedish-sentiment-multiclass`](https://huggingface.co/KBLab/robust-swedish-sentiment-multiclass) | b0ec32dca56aa6182a6955c8f12129bbcbc7fdbd
Tokenizer | [`KBLab/megatron-bert-large-swedish-cased-165k`](https://huggingface.co/KBLab/megatron-bert-large-swedish-cased-165k) | 90c57ab49e27b820bd85308a488409dfea25600d

## Minimum Supported Python Version

This library thrives to support each Python version to End-Of-Life.

Versions of this library supportes the following Python version:

- v0.3: Python 3.9
- v0.2: Python 3.8

## Changelog

This project keeps a [changelog](./CHANGELOG.md).
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
__description__ = "Annotate sentence with sentiment analysis."
__version__ = "0.2.0"

__config__ = [
__config__: list[sparv_api.Config] = [
sparv_api.Config(
f"{PROJECT_NAME}.num_decimals",
description="The number of decimals to round the score to",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def annotate_sentence_sentiment(
out_sentence_sentiment: sparv_api.Output = sparv_api.Output(
f"<sentence>:{PROJECT_NAME}.sentence-sentiment--kb-sent",
# cls="sbx_sentence_sentiment_kb_sent",
description="Sentiment analysis of sentence with KBLab/robust-swedish-sentiment-multiclass", # noqa: E501
description="Sentiment analysis of sentence with KBLab/robust-swedish-sentiment-multiclass",
),
word: sparv_api.Annotation = sparv_api.Annotation("<token:word>"),
sentence: sparv_api.Annotation = sparv_api.Annotation("<sentence>"),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Sentiment analyzer."""

from collections import defaultdict
from typing import Dict, List, Optional, Union
from typing import Optional, Union

from sparv import api as sparv_api # type: ignore [import-untyped]
from transformers import ( # type: ignore [import-untyped]
Expand Down Expand Up @@ -45,19 +45,15 @@ def __init__(
self.tokenizer = self._default_tokenizer() if tokenizer is None else tokenizer
self.model = self._default_model() if model is None else model
self.num_decimals = num_decimals
self.classifier = pipeline(
"sentiment-analysis", model=self.model, tokenizer=self.tokenizer
)
self.classifier = pipeline("sentiment-analysis", model=self.model, tokenizer=self.tokenizer)

@classmethod
def _default_tokenizer(cls) -> PreTrainedTokenizerFast:
return AutoTokenizer.from_pretrained(TOKENIZER_NAME, revision=TOKENIZER_REVISION)

@classmethod
def _default_model(cls) -> MegatronBertForSequenceClassification:
return AutoModelForSequenceClassification.from_pretrained(
MODEL_NAME, revision=MODEL_REVISION
)
return AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, revision=MODEL_REVISION)

@classmethod
def default(cls) -> "SentimentAnalyzer":
Expand All @@ -70,21 +66,19 @@ def default(cls) -> "SentimentAnalyzer":
model = cls._default_model()
return cls(model=model, tokenizer=tokenizer)

def analyze_sentence(self, text: List[str]) -> Optional[str]:
def analyze_sentence(self, text: list[str]) -> Optional[str]:
"""Analyze a sentence.
Args:
text (Iterable[str]): the text to analyze
Returns:
List[Optional[str]]: the sentence annotations.
list[Optional[str]]: the sentence annotations.
"""
total_length = sum(len(t) for t in text) + len(text) - 1
logger.debug("analyzed text length=%d", total_length)
if total_length > MAX_LENGTH:
logger.warning(
"Long sentence (%d chars), splitting and combining results", total_length
)
logger.warning("Long sentence (%d chars), splitting and combining results", total_length)
classifications = self._analyze_in_chunks(text)
else:
sentence = TOK_SEP.join(text)
Expand All @@ -94,18 +88,12 @@ def analyze_sentence(self, text: List[str]) -> Optional[str]:
collect_label_and_score = ((clss["label"], clss["score"]) for clss in classifications)
score_format, score_pred = SCORE_FORMAT_AND_PREDICATE[self.num_decimals]

format_scores = (
(label, score_format.format(score)) for label, score in collect_label_and_score
)
filter_out_zero_scores = (
(label, score) for label, score in format_scores if not score_pred(score)
)
classification_str = "|".join(
f"{label}:{score}" for label, score in filter_out_zero_scores
)
format_scores = ((label, score_format.format(score)) for label, score in collect_label_and_score)
filter_out_zero_scores = ((label, score) for label, score in format_scores if not score_pred(score))
classification_str = "|".join(f"{label}:{score}" for label, score in filter_out_zero_scores)
return f"|{classification_str}|" if classification_str else "|"

def _analyze_in_chunks(self, text: List[str]) -> List[Dict[str, Union[str, float]]]:
def _analyze_in_chunks(self, text: list[str]) -> list[dict[str, Union[str, float]]]:
classifications_list = []
start_i = 0
curr_length = 0
Expand All @@ -121,10 +109,7 @@ def _analyze_in_chunks(self, text: List[str]) -> List[Dict[str, Union[str, float
for clss in clsss:
classifications_dict[clss["label"]].append(clss["score"])

return [
{"label": label, "score": sum(scores) / len(scores)}
for label, scores in classifications_dict.items()
]
return [{"label": label, "score": sum(scores) / len(scores)} for label, scores in classifications_dict.items()]


SCORE_FORMAT_AND_PREDICATE = {
Expand Down
49 changes: 49 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 7c5e88f

Please sign in to comment.