Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1417 - implicitly modify to str dtype on check #1418

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from

1417 - implicitly modify to str dtype on check

dd9468c
Select commit
Loading
Failed to load commit list.
Draft

1417 - implicitly modify to str dtype on check #1418

1417 - implicitly modify to str dtype on check
dd9468c
Select commit
Loading
Failed to load commit list.
Azure Pipelines / scverse.anndata failed Mar 12, 2024 in 14m 14s

Build #20240312.8 had test failures

Details

Tests

  • Failed: 8 (0.05%)
  • Passed: 12,948 (83.30%)
  • Other: 2,588 (16.65%)
  • Total: 15,544

Annotations

Check failure on line 515 in Build log

See this annotation in the file changed.

@azure-pipelines azure-pipelines / scverse.anndata

Build log #L515

Bash exited with code '1'.

Check failure on line 491 in Build log

See this annotation in the file changed.

@azure-pipelines azure-pipelines / scverse.anndata

Build log #L491

Bash exited with code '1'.

Check failure on line 489 in Build log

See this annotation in the file changed.

@azure-pipelines azure-pipelines / scverse.anndata

Build log #L489

Bash exited with code '1'.

Check failure on line 532 in Build log

See this annotation in the file changed.

@azure-pipelines azure-pipelines / scverse.anndata

Build log #L532

Bash exited with code '1'.

Check failure on line 1 in test_dataframe_column_uniqueness[h5]

See this annotation in the file changed.

@azure-pipelines azure-pipelines / scverse.anndata

test_dataframe_column_uniqueness[h5]

ValueError: DataFrame.index.name ('col_name') is also used by a column whose values are different. This is not supported. Please make sure the values are the same, or use a different name.
Error raised while writing key 'index_shared_okay' of <class 'h5py._hl.group.Group'> to /
Raw output
store = <HDF5 group "/" (3 members)>

    def test_dataframe_column_uniqueness(store):
        repeated_cols = pd.DataFrame(np.ones((3, 2)), columns=["a", "a"])
    
        with pytest_8_raises(
            ValueError,
            match=r"Found repeated column names: \['a'\]\. Column names must be unique\.",
        ):
            write_elem(store, "repeated_cols", repeated_cols)
    
        index_shares_col_name = pd.DataFrame(
            {"col_name": [1, 2, 3]}, index=pd.Index([1, 3, 2], name="col_name")
        )
    
        with pytest_8_raises(
            ValueError,
            match=r"DataFrame\.index\.name \('col_name'\) is also used by a column whose values are different\.",
        ):
            write_elem(store, "index_shares_col_name", index_shares_col_name)
    
        index_shared_okay = pd.DataFrame(
            {"col_name": [1, 2, 3]}, index=pd.Index([1, 2, 3], name="col_name")
        )
    
>       write_elem(store, "index_shared_okay", index_shared_okay)

/home/vsts/work/1/s/anndata/tests/test_io_elementwise.py:331: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/home/vsts/work/1/s/anndata/_io/specs/registry.py:359: in write_elem
    Writer(_REGISTRY).write_elem(store, k, elem, dataset_kwargs=dataset_kwargs)
/home/vsts/work/1/s/anndata/_io/utils.py:243: in func_wrapper
    return func(*args, **kwargs)
/home/vsts/work/1/s/anndata/_io/specs/registry.py:309: in write_elem
    return write_func(store, k, elem, dataset_kwargs=dataset_kwargs)
/home/vsts/work/1/s/anndata/_io/specs/registry.py:57: in wrapper
    result = func(g, k, *args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

f = <HDF5 group "/" (3 members)>, key = '/index_shared_okay'
df =           col_name
col_name          
1                1
2                2
3                3
_writer = <anndata._io.specs.registry.Writer object at 0x7cab71054100>
dataset_kwargs = mappingproxy({})

    @_REGISTRY.register_write(H5Group, views.DataFrameView, IOSpec("dataframe", "0.2.0"))
    @_REGISTRY.register_write(H5Group, pd.DataFrame, IOSpec("dataframe", "0.2.0"))
    @_REGISTRY.register_write(ZarrGroup, views.DataFrameView, IOSpec("dataframe", "0.2.0"))
    @_REGISTRY.register_write(ZarrGroup, pd.DataFrame, IOSpec("dataframe", "0.2.0"))
    def write_dataframe(f, key, df, _writer, dataset_kwargs=MappingProxyType({})):
        # Check arguments
        for reserved in ("_index",):
            if reserved in df.columns:
                raise ValueError(f"{reserved!r} is a reserved name for dataframe columns.")
        group = f.require_group(key)
        if not df.columns.is_unique:
            duplicates = list(df.columns[df.columns.duplicated()])
            raise ValueError(
                f"Found repeated column names: {duplicates}. Column names must be unique."
            )
        col_names = [check_key(c) for c in df.columns]
        group.attrs["column-order"] = col_names
    
        if df.index.name is not None:
    
            if df.index.name in col_names:
    
                index_values = pd.Series(df.index, index=df.index)
    
                df_values = df[df.index.name]
    
                # This logic is required to mirror anndata/_core/aligned_df.py:_gen_dataframe_df
                if not is_string_dtype(df_values):
                    warn("Transforming to str index.", ImplicitModificationWarning)
                    df_values = df_values.astype(str)
    
                if not index_values.equals(df_values):
>                   raise ValueError(
                        f"DataFrame.index.name ({df.index.name!r}) is also used by a column "
                        "whose values are different. This is not supported. Please make sure "
                        "the values are the same, or use a different name."
                    )
E                   ValueError: DataFrame.index.name ('col_name') is also used by a column whose values are different. This is 

Check failure on line 1 in test_dataframe_column_uniqueness[zarr]

See this annotation in the file changed.

@azure-pipelines azure-pipelines / scverse.anndata

test_dataframe_column_uniqueness[zarr]

ValueError: DataFrame.index.name ('col_name') is also used by a column whose values are different. This is not supported. Please make sure the values are the same, or use a different name.
Error raised while writing key 'index_shared_okay' of <class 'zarr.hierarchy.Group'> to /
Raw output
store = <zarr.hierarchy.Group '/'>

    def test_dataframe_column_uniqueness(store):
        repeated_cols = pd.DataFrame(np.ones((3, 2)), columns=["a", "a"])
    
        with pytest_8_raises(
            ValueError,
            match=r"Found repeated column names: \['a'\]\. Column names must be unique\.",
        ):
            write_elem(store, "repeated_cols", repeated_cols)
    
        index_shares_col_name = pd.DataFrame(
            {"col_name": [1, 2, 3]}, index=pd.Index([1, 3, 2], name="col_name")
        )
    
        with pytest_8_raises(
            ValueError,
            match=r"DataFrame\.index\.name \('col_name'\) is also used by a column whose values are different\.",
        ):
            write_elem(store, "index_shares_col_name", index_shares_col_name)
    
        index_shared_okay = pd.DataFrame(
            {"col_name": [1, 2, 3]}, index=pd.Index([1, 2, 3], name="col_name")
        )
    
>       write_elem(store, "index_shared_okay", index_shared_okay)

/home/vsts/work/1/s/anndata/tests/test_io_elementwise.py:331: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/home/vsts/work/1/s/anndata/_io/specs/registry.py:359: in write_elem
    Writer(_REGISTRY).write_elem(store, k, elem, dataset_kwargs=dataset_kwargs)
/home/vsts/work/1/s/anndata/_io/utils.py:243: in func_wrapper
    return func(*args, **kwargs)
/home/vsts/work/1/s/anndata/_io/specs/registry.py:309: in write_elem
    return write_func(store, k, elem, dataset_kwargs=dataset_kwargs)
/home/vsts/work/1/s/anndata/_io/specs/registry.py:57: in wrapper
    result = func(g, k, *args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

f = <zarr.hierarchy.Group '/'>, key = '/index_shared_okay'
df =           col_name
col_name          
1                1
2                2
3                3
_writer = <anndata._io.specs.registry.Writer object at 0x7cab7093f4c0>
dataset_kwargs = mappingproxy({})

    @_REGISTRY.register_write(H5Group, views.DataFrameView, IOSpec("dataframe", "0.2.0"))
    @_REGISTRY.register_write(H5Group, pd.DataFrame, IOSpec("dataframe", "0.2.0"))
    @_REGISTRY.register_write(ZarrGroup, views.DataFrameView, IOSpec("dataframe", "0.2.0"))
    @_REGISTRY.register_write(ZarrGroup, pd.DataFrame, IOSpec("dataframe", "0.2.0"))
    def write_dataframe(f, key, df, _writer, dataset_kwargs=MappingProxyType({})):
        # Check arguments
        for reserved in ("_index",):
            if reserved in df.columns:
                raise ValueError(f"{reserved!r} is a reserved name for dataframe columns.")
        group = f.require_group(key)
        if not df.columns.is_unique:
            duplicates = list(df.columns[df.columns.duplicated()])
            raise ValueError(
                f"Found repeated column names: {duplicates}. Column names must be unique."
            )
        col_names = [check_key(c) for c in df.columns]
        group.attrs["column-order"] = col_names
    
        if df.index.name is not None:
    
            if df.index.name in col_names:
    
                index_values = pd.Series(df.index, index=df.index)
    
                df_values = df[df.index.name]
    
                # This logic is required to mirror anndata/_core/aligned_df.py:_gen_dataframe_df
                if not is_string_dtype(df_values):
                    warn("Transforming to str index.", ImplicitModificationWarning)
                    df_values = df_values.astype(str)
    
                if not index_values.equals(df_values):
>                   raise ValueError(
                        f"DataFrame.index.name ({df.index.name!r}) is also used by a column "
                        "whose values are different. This is not supported. Please make sure "
                        "the values are the same, or use a different name."
                    )
E                   ValueError: DataFrame.index.name ('col_name') is also used by a column whose values are different. This is not 

Check failure on line 1 in test_dataframe_column_uniqueness[h5]

See this annotation in the file changed.

@azure-pipelines azure-pipelines / scverse.anndata

test_dataframe_column_uniqueness[h5]

ValueError: DataFrame.index.name ('col_name') is also used by a column whose values are different. This is not supported. Please make sure the values are the same, or use a different name.
Error raised while writing key 'index_shared_okay' of <class 'h5py._hl.group.Group'> to /
Raw output
store = <HDF5 group "/" (3 members)>

    def test_dataframe_column_uniqueness(store):
        repeated_cols = pd.DataFrame(np.ones((3, 2)), columns=["a", "a"])
    
        with pytest_8_raises(
            ValueError,
            match=r"Found repeated column names: \['a'\]\. Column names must be unique\.",
        ):
            write_elem(store, "repeated_cols", repeated_cols)
    
        index_shares_col_name = pd.DataFrame(
            {"col_name": [1, 2, 3]}, index=pd.Index([1, 3, 2], name="col_name")
        )
    
        with pytest_8_raises(
            ValueError,
            match=r"DataFrame\.index\.name \('col_name'\) is also used by a column whose values are different\.",
        ):
            write_elem(store, "index_shares_col_name", index_shares_col_name)
    
        index_shared_okay = pd.DataFrame(
            {"col_name": [1, 2, 3]}, index=pd.Index([1, 2, 3], name="col_name")
        )
    
>       write_elem(store, "index_shared_okay", index_shared_okay)

anndata/tests/test_io_elementwise.py:331: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
anndata/_io/specs/registry.py:359: in write_elem
    Writer(_REGISTRY).write_elem(store, k, elem, dataset_kwargs=dataset_kwargs)
anndata/_io/utils.py:243: in func_wrapper
    return func(*args, **kwargs)
anndata/_io/specs/registry.py:309: in write_elem
    return write_func(store, k, elem, dataset_kwargs=dataset_kwargs)
anndata/_io/specs/registry.py:57: in wrapper
    result = func(g, k, *args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

f = <HDF5 group "/" (3 members)>, key = '/index_shared_okay'
df =           col_name
col_name          
1                1
2                2
3                3
_writer = <anndata._io.specs.registry.Writer object at 0x753d40de4bf0>
dataset_kwargs = mappingproxy({})

    @_REGISTRY.register_write(H5Group, views.DataFrameView, IOSpec("dataframe", "0.2.0"))
    @_REGISTRY.register_write(H5Group, pd.DataFrame, IOSpec("dataframe", "0.2.0"))
    @_REGISTRY.register_write(ZarrGroup, views.DataFrameView, IOSpec("dataframe", "0.2.0"))
    @_REGISTRY.register_write(ZarrGroup, pd.DataFrame, IOSpec("dataframe", "0.2.0"))
    def write_dataframe(f, key, df, _writer, dataset_kwargs=MappingProxyType({})):
        # Check arguments
        for reserved in ("_index",):
            if reserved in df.columns:
                raise ValueError(f"{reserved!r} is a reserved name for dataframe columns.")
        group = f.require_group(key)
        if not df.columns.is_unique:
            duplicates = list(df.columns[df.columns.duplicated()])
            raise ValueError(
                f"Found repeated column names: {duplicates}. Column names must be unique."
            )
        col_names = [check_key(c) for c in df.columns]
        group.attrs["column-order"] = col_names
    
        if df.index.name is not None:
    
            if df.index.name in col_names:
    
                index_values = pd.Series(df.index, index=df.index)
    
                df_values = df[df.index.name]
    
                # This logic is required to mirror anndata/_core/aligned_df.py:_gen_dataframe_df
                if not is_string_dtype(df_values):
                    warn("Transforming to str index.", ImplicitModificationWarning)
                    df_values = df_values.astype(str)
    
                if not index_values.equals(df_values):
>                   raise ValueError(
                        f"DataFrame.index.name ({df.index.name!r}) is also used by a column "
                        "whose values are different. This is not supported. Please make sure "
                        "the values are the same, or use a different name."
                    )
E                   ValueError: DataFrame.index.name ('col_name') is also used by a column whose values are different. This is not supported. Please make sure the values are the same, or use a different name.
E                 

Check failure on line 1 in test_dataframe_column_uniqueness[zarr]

See this annotation in the file changed.

@azure-pipelines azure-pipelines / scverse.anndata

test_dataframe_column_uniqueness[zarr]

ValueError: DataFrame.index.name ('col_name') is also used by a column whose values are different. This is not supported. Please make sure the values are the same, or use a different name.
Error raised while writing key 'index_shared_okay' of <class 'zarr.hierarchy.Group'> to /
Raw output
store = <zarr.hierarchy.Group '/'>

    def test_dataframe_column_uniqueness(store):
        repeated_cols = pd.DataFrame(np.ones((3, 2)), columns=["a", "a"])
    
        with pytest_8_raises(
            ValueError,
            match=r"Found repeated column names: \['a'\]\. Column names must be unique\.",
        ):
            write_elem(store, "repeated_cols", repeated_cols)
    
        index_shares_col_name = pd.DataFrame(
            {"col_name": [1, 2, 3]}, index=pd.Index([1, 3, 2], name="col_name")
        )
    
        with pytest_8_raises(
            ValueError,
            match=r"DataFrame\.index\.name \('col_name'\) is also used by a column whose values are different\.",
        ):
            write_elem(store, "index_shares_col_name", index_shares_col_name)
    
        index_shared_okay = pd.DataFrame(
            {"col_name": [1, 2, 3]}, index=pd.Index([1, 2, 3], name="col_name")
        )
    
>       write_elem(store, "index_shared_okay", index_shared_okay)

anndata/tests/test_io_elementwise.py:331: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
anndata/_io/specs/registry.py:359: in write_elem
    Writer(_REGISTRY).write_elem(store, k, elem, dataset_kwargs=dataset_kwargs)
anndata/_io/utils.py:243: in func_wrapper
    return func(*args, **kwargs)
anndata/_io/specs/registry.py:309: in write_elem
    return write_func(store, k, elem, dataset_kwargs=dataset_kwargs)
anndata/_io/specs/registry.py:57: in wrapper
    result = func(g, k, *args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

f = <zarr.hierarchy.Group '/'>, key = '/index_shared_okay'
df =           col_name
col_name          
1                1
2                2
3                3
_writer = <anndata._io.specs.registry.Writer object at 0x753d40dbbfb0>
dataset_kwargs = mappingproxy({})

    @_REGISTRY.register_write(H5Group, views.DataFrameView, IOSpec("dataframe", "0.2.0"))
    @_REGISTRY.register_write(H5Group, pd.DataFrame, IOSpec("dataframe", "0.2.0"))
    @_REGISTRY.register_write(ZarrGroup, views.DataFrameView, IOSpec("dataframe", "0.2.0"))
    @_REGISTRY.register_write(ZarrGroup, pd.DataFrame, IOSpec("dataframe", "0.2.0"))
    def write_dataframe(f, key, df, _writer, dataset_kwargs=MappingProxyType({})):
        # Check arguments
        for reserved in ("_index",):
            if reserved in df.columns:
                raise ValueError(f"{reserved!r} is a reserved name for dataframe columns.")
        group = f.require_group(key)
        if not df.columns.is_unique:
            duplicates = list(df.columns[df.columns.duplicated()])
            raise ValueError(
                f"Found repeated column names: {duplicates}. Column names must be unique."
            )
        col_names = [check_key(c) for c in df.columns]
        group.attrs["column-order"] = col_names
    
        if df.index.name is not None:
    
            if df.index.name in col_names:
    
                index_values = pd.Series(df.index, index=df.index)
    
                df_values = df[df.index.name]
    
                # This logic is required to mirror anndata/_core/aligned_df.py:_gen_dataframe_df
                if not is_string_dtype(df_values):
                    warn("Transforming to str index.", ImplicitModificationWarning)
                    df_values = df_values.astype(str)
    
                if not index_values.equals(df_values):
>                   raise ValueError(
                        f"DataFrame.index.name ({df.index.name!r}) is also used by a column "
                        "whose values are different. This is not supported. Please make sure "
                        "the values are the same, or use a different name."
                    )
E                   ValueError: DataFrame.index.name ('col_name') is also used by a column whose values are different. This is not supported. Please make sure the values are the same, or use a different name.
E                   Er