From 15651e215298d8028a4627315a339d8d7e10f2c7 Mon Sep 17 00:00:00 2001 From: markus583 Date: Sun, 16 Jun 2024 13:54:01 +0000 Subject: [PATCH] regular sigmoid --- setup.py | 2 +- wtpsplit/utils.py | 25 +------------------------ 2 files changed, 2 insertions(+), 25 deletions(-) diff --git a/setup.py b/setup.py index a9908930..402e5cc5 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name="segment-any-text", version="1.0.0", - packages=["wtpsplit"], + packages=["segment-any-text"], description="Universal Robust, Efficient and Adaptable Sentence Segmentation", author="Markus Frohmann, Igor Sterner, Benjamin Minixhofer", author_email="markus.frohmann@gmail.com", diff --git a/wtpsplit/utils.py b/wtpsplit/utils.py index 015730a7..6a25ae8c 100644 --- a/wtpsplit/utils.py +++ b/wtpsplit/utils.py @@ -132,31 +132,8 @@ def get_subword_label_dict(label_args, tokenizer): return label_dict -# numerically more stable sigmoid taken from -# https://stackoverflow.com/questions/51976461/optimal-way-of-defining-a-numerically-stable-sigmoid-function-for-a-list-in-pyth -def _positive_sigmoid(x): - return 1 / (1 + np.exp(-x)) - - -def _negative_sigmoid(x): - # Cache exp so you won't have to calculate it twice - exp = np.exp(x) - return exp / (exp + 1) - - def sigmoid(x): - positive = x >= 0 - # Boolean array inversion is faster than another comparison - negative = ~positive - - # empty contains junk hence will be faster to allocate - # Zeros has to zero-out the array after allocation, no need for that - # See comment to the answer when it comes to dtype - result = np.empty_like(x, dtype=np.float) - result[positive] = _positive_sigmoid(x[positive]) - result[negative] = _negative_sigmoid(x[negative]) - - return result + return 1 / (1 + np.exp(-x.astype(np.float32))) # fp32 for better precision def encode(text):