Skip to content

Commit

Permalink
Merge pull request #6532 from noahnovsak/dask-fix-removenancolumns
Browse files Browse the repository at this point in the history
RemoveNaNColumns - compute nans in advance
  • Loading branch information
markotoplak authored Aug 16, 2023
2 parents 32e0a15 + e0a3988 commit 5ae6389
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion Orange/preprocess/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,8 @@ def __call__(self, data, threshold=None):
self.threshold
if isinstance(threshold, float):
threshold = threshold * data.X.shape[0]
nans = np.sum(np.isnan(data.X), axis=0)
# compute nans in advance, otherwise dask will do it for every attribute
nans = np.asarray(np.sum(np.isnan(data.X), axis=0))
att = [a for a, n in zip(data.domain.attributes, nans) if n < threshold]
domain = Orange.data.Domain(att, data.domain.class_vars,
data.domain.metas)
Expand Down

0 comments on commit 5ae6389

Please sign in to comment.