diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 299ed867..18493bca 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -9,9 +9,9 @@ jobs: runs-on: ubuntu-latest if: startsWith(github.ref, 'refs/tags/v') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v6 - name: Set up Python 3.12 - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: python-version: "3.12" cache: pip diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 12798628..196c6d8c 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -13,7 +13,7 @@ jobs: runs-on: ${{ matrix.os }} defaults: run: - shell: bash -e {0} + shell: bash # bash also on windows strategy: fail-fast: false @@ -32,8 +32,8 @@ jobs: PRERELEASE: ${{ matrix.prerelease }} steps: - - uses: actions/checkout@v2 - - uses: astral-sh/setup-uv@v5 + - uses: actions/checkout@v6 + - uses: astral-sh/setup-uv@v7 id: setup-uv with: version: "latest" @@ -41,20 +41,18 @@ jobs: - name: Install dependencies run: | if [[ "${PRERELEASE}" == "allow" ]]; then - uv sync --extra test - : # uv sync --extra test --prerelease ${PRERELEASE} - uv pip install git+https://github.com/scverse/anndata.git - uv pip install --prerelease allow pandas - else - uv sync --extra test + sed -i '' 's/requires-python.*//' pyproject.toml # otherwise uv complains that anndata requires python>=3.12 and we only do >=3.11 😱 + uv add 'git+https://github.com/scverse/anndata.git' + uv add --prerelease=allow 'pandas>=3' fi if [[ -n "${DASK_VERSION}" ]]; then if [[ "${DASK_VERSION}" == "latest" ]]; then - uv pip install --upgrade dask + uv add dask else - uv pip install dask==${DASK_VERSION} + uv add dask==${DASK_VERSION} fi fi + uv sync --group=test - name: Test env: MPLBACKEND: agg @@ -63,7 +61,7 @@ jobs: run: | uv run pytest --cov --color=yes --cov-report=xml - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: name: coverage verbose: true diff --git a/.readthedocs.yaml b/.readthedocs.yaml index acecf90e..4bbc5937 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,19 +1,22 @@ # https://docs.readthedocs.io/en/stable/config-file/v2.html version: 2 build: - os: ubuntu-20.04 + os: ubuntu-24.04 tools: - python: "3.11" -sphinx: - configuration: docs/conf.py - fail_on_warning: true -python: - install: - - method: pip - path: . - extra_requirements: - - docs - - torch + python: "3.13" + jobs: + post_checkout: + # unshallow so version can be derived from tag + - git fetch --unshallow || true + create_environment: + - asdf plugin add uv + - asdf install uv latest + - asdf global uv latest + build: + html: + - uv sync --group=docs --extra=torch + - make --directory=docs build + - mv docs/_build $READTHEDOCS_OUTPUT submodules: include: - "docs/tutorials/notebooks" diff --git a/pyproject.toml b/pyproject.toml index 89e9e023..fb06b861 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,8 +51,17 @@ dependencies = [ "xarray-spatial>=0.3.5", "zarr>=3.0.0", ] - [project.optional-dependencies] +torch = [ + "torch" +] +extra = [ + "napari-spatialdata[all]", + "spatialdata-plot", + "spatialdata-io", +] + +[dependency-groups] dev = [ "bump2version", "sentry-prevent-cli", @@ -80,14 +89,6 @@ benchmark = [ "asv", "memray", ] -torch = [ - "torch" -] -extra = [ - "napari-spatialdata[all]", - "spatialdata-plot", - "spatialdata-io", -] [tool.coverage.run] source = ["spatialdata"] @@ -95,11 +96,10 @@ omit = [ "**/test_*.py", ] -[tool.pytest.ini_options] +[tool.pytest] testpaths = ["tests"] -xfail_strict = true +strict = true addopts = [ -# "-Werror", # if 3rd party libs raise DeprecationWarnings, just use filterwarnings below "--import-mode=importlib", # allow using test files with same name "-s", # print output from tests ] @@ -107,11 +107,13 @@ addopts = [ markers = [ "slow: marks tests as slow (deselect with '-m \"not slow\"')", "gpu: run test on GPU using CuPY.", + "array_api: used by anndata.tests.helpers, not us", "skip_with_pyarrow_strings: skipwhen pyarrow string conversion is turned on", ] # info on how to use this https://stackoverflow.com/questions/57925071/how-do-i-avoid-getting-deprecationwarning-from-inside-dependencies-with-pytest filterwarnings = [ - # "ignore:.*U.*mode is deprecated:DeprecationWarning", + # "error", # if 3rd party libs raise DeprecationWarnings, TODO: filter them individually below + # "ignore:.*U.*mode is deprecated:DeprecationWarning", ] [tool.jupytext] diff --git a/tests/core/test_centroids.py b/tests/core/test_centroids.py index aa332f9d..9679c3ff 100644 --- a/tests/core/test_centroids.py +++ b/tests/core/test_centroids.py @@ -183,7 +183,7 @@ def test_get_centroids_invalid_element(images): region_key="region", instance_key="instance_id", ) - with pytest.raises(ValueError, match="The object type is not supported."): + with pytest.raises(ValueError, match=r"The object type is not supported"): get_centroids(adata) diff --git a/tests/io/test_readwrite.py b/tests/io/test_readwrite.py index af028d29..e6d23eee 100644 --- a/tests/io/test_readwrite.py +++ b/tests/io/test_readwrite.py @@ -13,6 +13,7 @@ import zarr from anndata import AnnData from numpy.random import default_rng +from packaging.version import Version from shapely import MultiPolygon, Polygon from upath import UPath from zarr.errors import GroupNotFoundError @@ -1067,7 +1068,7 @@ def test_read_sdata(tmp_path: Path, points: SpatialData) -> None: assert_spatial_data_objects_are_identical(sdata_from_path, sdata_from_zarr_group) -def test_sdata_with_nan_in_obs() -> None: +def test_sdata_with_nan_in_obs(tmp_path: Path) -> None: """Test writing SpatialData with mixed string/NaN values in obs works correctly. Regression test for https://github.com/scverse/spatialdata/issues/399 @@ -1096,14 +1097,18 @@ def test_sdata_with_nan_in_obs() -> None: assert sdata["table"].obs["column_only_region1"].iloc[1] is np.nan assert np.isnan(sdata["table"].obs["column_only_region2"].iloc[0]) - with tempfile.TemporaryDirectory() as tmpdir: - path = os.path.join(tmpdir, "data.zarr") - sdata.write(path) - - sdata2 = SpatialData.read(path) - assert "column_only_region1" in sdata2["table"].obs.columns - assert sdata2["table"].obs["column_only_region1"].iloc[0] == "string" - assert sdata2["table"].obs["column_only_region2"].iloc[1] == 3 - # After round-trip, NaN in object-dtype column becomes string "nan" - assert sdata2["table"].obs["column_only_region1"].iloc[1] == "nan" - assert np.isnan(sdata2["table"].obs["column_only_region2"].iloc[0]) + path = tmp_path / "data.zarr" + sdata.write(path) + + sdata2 = SpatialData.read(path) + assert "column_only_region1" in sdata2["table"].obs.columns + r1 = sdata2["table"].obs["column_only_region1"] + r2 = sdata2["table"].obs["column_only_region2"] + + assert r1.iloc[0] == "string" + assert r2.iloc[1] == 3 + if Version(pd.__version__) >= Version("3"): + assert pd.isna(r1.iloc[1]) + else: # After round-trip, NaN in object-dtype column becomes string "nan" on pandas 2 + assert r1.iloc[1] == "nan" + assert np.isnan(r2.iloc[0]) diff --git a/tests/models/test_models.py b/tests/models/test_models.py index e2087ace..1e61c33a 100644 --- a/tests/models/test_models.py +++ b/tests/models/test_models.py @@ -18,6 +18,7 @@ from dask.dataframe import DataFrame as DaskDataFrame from geopandas import GeoDataFrame from numpy.random import default_rng +from packaging.version import Version from shapely.geometry import MultiPolygon, Point, Polygon from shapely.io import to_ragged_array from spatial_image import to_spatial_image @@ -311,7 +312,7 @@ def test_shapes_model(self, model: ShapesModel, path: Path) -> None: @pytest.mark.parametrize("model", [PointsModel]) @pytest.mark.parametrize("instance_key", [None, "cell_id"]) @pytest.mark.parametrize("feature_key", [None, "target"]) - @pytest.mark.parametrize("typ", [np.ndarray, pd.DataFrame, dd.DataFrame]) + @pytest.mark.parametrize("typ", [np.ndarray, pd.DataFrame, dd.DataFrame], ids=["numpy", "pandas", "dask"]) @pytest.mark.parametrize("is_annotation", [True, False]) @pytest.mark.parametrize("is_3d", [True, False]) @pytest.mark.parametrize("coordinates", [None, {"x": "A", "y": "B", "z": "C"}]) @@ -880,12 +881,12 @@ def test_categories_on_partitioned_dataframe(sdata_blobs: SpatialData): assert np.array_equal(df["genes"].to_numpy(), ddf_parsed["genes"].compute().to_numpy()) assert set(df["genes"].cat.categories.tolist()) == set(ddf_parsed["genes"].compute().cat.categories.tolist()) - # two behavior to investigate later/report to dask (they originate in dask) - # TODO: df['genes'].cat.categories has dtype 'object', while ddf_parsed['genes'].compute().cat.categories has dtype - # 'string' - # this problem should disappear after pandas 3.0 is released - assert df["genes"].cat.categories.dtype == "object" + if Version(pd.__version__) >= Version("3"): + assert df["genes"].cat.categories.dtype == "string" + else: + assert df["genes"].cat.categories.dtype == "object" assert ddf_parsed["genes"].compute().cat.categories.dtype == "string" + # behavior to investigate later/report to dask # TODO: the list of categories are not preserving the order assert df["genes"].cat.categories.tolist() != ddf_parsed["genes"].compute().cat.categories.tolist()