From 213154ff92f89bdd4b670f4c1cdc94ff985d81d2 Mon Sep 17 00:00:00 2001 From: Tong LI Date: Tue, 12 Nov 2024 15:20:55 +0000 Subject: [PATCH 1/7] add options to filter different elements --- src/spatialdata/_core/query/spatial_query.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index f94dfa3cc..eaafca766 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -510,11 +510,24 @@ def _( max_coordinate: list[Number] | ArrayLike, target_coordinate_system: str, filter_table: bool = True, + shapes: bool = True, + images: bool = True, + labels: bool = True, + points: bool = True, ) -> SpatialData: min_coordinate = _parse_list_into_array(min_coordinate) max_coordinate = _parse_list_into_array(max_coordinate) new_elements = {} - for element_type in ["points", "images", "labels", "shapes"]: + elements_to_filter = [] + if images: + elements_to_filter.append("images") + if shapes: + elements_to_filter.append("shapes") + if points: + elements_to_filter.append("points") + if labels: + elements_to_filter.append("labels") + for element_type in elements_to_filter: elements = getattr(sdata, element_type) queried_elements = _dict_query_dispatcher( elements, From 8c2a96feb61dc9e4bd57fdad4dec2572793eceaa Mon Sep 17 00:00:00 2001 From: Tong LI Date: Wed, 13 Nov 2024 23:09:46 +0000 Subject: [PATCH 2/7] do not change the logic in this query; people should use sdata.subset() to remove points element if needs performance boost --- src/spatialdata/_core/query/spatial_query.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index eaafca766..2234d060a 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -510,24 +510,13 @@ def _( max_coordinate: list[Number] | ArrayLike, target_coordinate_system: str, filter_table: bool = True, - shapes: bool = True, - images: bool = True, - labels: bool = True, - points: bool = True, ) -> SpatialData: min_coordinate = _parse_list_into_array(min_coordinate) max_coordinate = _parse_list_into_array(max_coordinate) new_elements = {} - elements_to_filter = [] - if images: - elements_to_filter.append("images") - if shapes: - elements_to_filter.append("shapes") - if points: - elements_to_filter.append("points") - if labels: - elements_to_filter.append("labels") - for element_type in elements_to_filter: + if len(sdata.points) > 0: + warnings.warn(f"Your SpatialData object has points element. Thus maybe suffer from performance issues when querying.", UserWarning) + for element_type in ["points", "images", "labels", "shapes"]: elements = getattr(sdata, element_type) queried_elements = _dict_query_dispatcher( elements, From 9493855b9ecb52d64be422088a9694252a63efbb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 13 Nov 2024 23:10:07 +0000 Subject: [PATCH 3/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spatialdata/_core/query/spatial_query.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 2234d060a..41914b6e4 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -515,7 +515,10 @@ def _( max_coordinate = _parse_list_into_array(max_coordinate) new_elements = {} if len(sdata.points) > 0: - warnings.warn(f"Your SpatialData object has points element. Thus maybe suffer from performance issues when querying.", UserWarning) + warnings.warn( + "Your SpatialData object has points element. Thus maybe suffer from performance issues when querying.", + UserWarning, + ) for element_type in ["points", "images", "labels", "shapes"]: elements = getattr(sdata, element_type) queried_elements = _dict_query_dispatcher( From e649ca1e7361a03a85dd6ff2f180f1ceea782b96 Mon Sep 17 00:00:00 2001 From: Tong LI Date: Thu, 14 Nov 2024 08:27:31 +0000 Subject: [PATCH 4/7] Add the "warning" in the function description as well --- src/spatialdata/_core/query/spatial_query.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 41914b6e4..937a4bac1 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -476,6 +476,7 @@ def bounding_box_query( ) -> SpatialElement | SpatialData | None: """ Query a SpatialData object or SpatialElement within a bounding box. + If the object has `points` element, depending on the number of points, it MAY suffer from performance issues. Parameters ---------- @@ -514,11 +515,8 @@ def _( min_coordinate = _parse_list_into_array(min_coordinate) max_coordinate = _parse_list_into_array(max_coordinate) new_elements = {} - if len(sdata.points) > 0: - warnings.warn( - "Your SpatialData object has points element. Thus maybe suffer from performance issues when querying.", - UserWarning, - ) + if sdata.points: + warnings.warn(f"Your SpatialData object has points element. Thus maybe suffer from performance issues when querying.", UserWarning) for element_type in ["points", "images", "labels", "shapes"]: elements = getattr(sdata, element_type) queried_elements = _dict_query_dispatcher( From 3afffc6328db240ef118e1440d5c5db4301ad992 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 14 Nov 2024 08:31:35 +0000 Subject: [PATCH 5/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spatialdata/_core/query/spatial_query.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 937a4bac1..93cbde684 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -516,7 +516,10 @@ def _( max_coordinate = _parse_list_into_array(max_coordinate) new_elements = {} if sdata.points: - warnings.warn(f"Your SpatialData object has points element. Thus maybe suffer from performance issues when querying.", UserWarning) + warnings.warn( + "Your SpatialData object has points element. Thus maybe suffer from performance issues when querying.", + UserWarning, + ) for element_type in ["points", "images", "labels", "shapes"]: elements = getattr(sdata, element_type) queried_elements = _dict_query_dispatcher( From ad4702751990eaaade10b238e932407d67561e73 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Sun, 5 Jan 2025 15:47:34 +0100 Subject: [PATCH 6/7] fix pre-commit --- CHANGELOG.md | 252 ++++++++--------- README.md | 28 +- docs/contributing.md | 16 +- docs/design_doc.md | 270 +++++++++---------- docs/index.md | 6 +- docs/installation.md | 6 +- src/spatialdata/_core/query/spatial_query.py | 13 +- 7 files changed, 300 insertions(+), 291 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ecde18b30..b64e522b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,140 +12,140 @@ and this project adheres to [Semantic Versioning][]. ### Major -- Added attributes at the SpatialData object level (`.attrs`) -- `rasterize_bins()` can now produce a labels element #811 @ArneDefauw +- Added attributes at the SpatialData object level (`.attrs`) +- `rasterize_bins()` can now produce a labels element #811 @ArneDefauw ### Minor -- Added `asv` benchmark code #784 @berombau -- Validate tables while parsing #808 -- fix join non matching tables #813 +- Added `asv` benchmark code #784 @berombau +- Validate tables while parsing #808 +- fix join non matching tables #813 ### Fixed -- Relaxed `fsspec` requirement #798 -- Fix for `to_polygons` when using processed instead of threads in Dask #756 @ArneDefauw -- Fix `transform_to_data_extent` converting labels to images #791 @aeisenbarth +- Relaxed `fsspec` requirement #798 +- Fix for `to_polygons` when using processed instead of threads in Dask #756 @ArneDefauw +- Fix `transform_to_data_extent` converting labels to images #791 @aeisenbarth ## [0.2.6] - 2024-11-26 ### Added -- Added `set_channel_names` method to `SpatialData` to change the channel names of an - image element in `SpatialData` #786 -- Added `write_channel_names` method to `SpatialData` to overwrite channel metadata on disk - without overwriting the image array itself. #786 +- Added `set_channel_names` method to `SpatialData` to change the channel names of an + image element in `SpatialData` #786 +- Added `write_channel_names` method to `SpatialData` to overwrite channel metadata on disk + without overwriting the image array itself. #786 ### Changed -- Argument `c_coords` is moved out of kwargs for the `ImageModel`s. #779 -- `get_channels` is marked for deprecation in `SpatialData` v0.3.0. Function is replaced - by `get_channel_names` #786 -- Updated dependency of `multiscale-spatial-image` #792 -- Adjust to new version of `xarray` with `DataTree` # 752 +- Argument `c_coords` is moved out of kwargs for the `ImageModel`s. #779 +- `get_channels` is marked for deprecation in `SpatialData` v0.3.0. Function is replaced + by `get_channel_names` #786 +- Updated dependency of `multiscale-spatial-image` #792 +- Adjust to new version of `xarray` with `DataTree` # 752 ### Fixed -- Updated deprecated default stages of `pre-commit` #771 -- Preserve points `feature_key` during queries #794 +- Updated deprecated default stages of `pre-commit` #771 +- Preserve points `feature_key` during queries #794 ## [0.2.5] - 2024-11-06 ### Fixed -- Incompatibility issues due to newest release of `multiscale-spatial-image` #760 +- Incompatibility issues due to newest release of `multiscale-spatial-image` #760 ## [0.2.4] - 2024-11-06 ### Major -- Enable vectorization of `bounding_box_query` for all `SpatialData` elements. #699 +- Enable vectorization of `bounding_box_query` for all `SpatialData` elements. #699 ### Minor -- Added `shortest_path` parameter to `get_transformation_between_coordinate_systems` #714 -- Added `get_pyramid_levels()` utils API #719 -- Improved ergonomics of `concatenate()` when element names are non-unique #720 -- Improved performance of writing images with multiscales #577 +- Added `shortest_path` parameter to `get_transformation_between_coordinate_systems` #714 +- Added `get_pyramid_levels()` utils API #719 +- Improved ergonomics of `concatenate()` when element names are non-unique #720 +- Improved performance of writing images with multiscales #577 ## [0.2.3] - 2024-09-25 ### Minor -- Added `clip: bool = False` parameter to `polygon_query()` #670 -- Add `sort` parameter to `PointsModel.parse()` #672 +- Added `clip: bool = False` parameter to `polygon_query()` #670 +- Add `sort` parameter to `PointsModel.parse()` #672 ### Fixed -- Fix interpolation artifact multiscale computation for labels #697 +- Fix interpolation artifact multiscale computation for labels #697 ## [0.2.2] - 2024-08-07 ### Major -- New disk format for shapes using `GeoParquet` (the change is backward compatible) #542 +- New disk format for shapes using `GeoParquet` (the change is backward compatible) #542 ### Minor -- Add `return_background` as argument to `get_centroids` and `get_element_instances` #621 -- Ability to save data using older disk formats #542 +- Add `return_background` as argument to `get_centroids` and `get_element_instances` #621 +- Ability to save data using older disk formats #542 ### Fixed -- Circles validation now checks for inf or nan radii #653 -- Bug with table name in torch dataset #654 @LLehner +- Circles validation now checks for inf or nan radii #653 +- Bug with table name in torch dataset #654 @LLehner ## [0.2.1] - 2024-07-04 ### Minor -- Relaxing `spatial-image` package requirement #616 +- Relaxing `spatial-image` package requirement #616 ## [0.2.0] - 2024-07-03 ### Changed -- Using `DataArray` directly instead of the subclass `SpatialImage` (removed install constraint for the `spatial_image` package) #587 -- Using `DataTree` directly instead of the subclass `MultiscaleSpatialImage` (removed install constraint for the `multiscale_spatial_image` package) #587 -- Changed `element`parameter (deprecation in v0.3.0) of `transform_element_to_coordinate_system` to a string `element_name` #611 +- Using `DataArray` directly instead of the subclass `SpatialImage` (removed install constraint for the `spatial_image` package) #587 +- Using `DataTree` directly instead of the subclass `MultiscaleSpatialImage` (removed install constraint for the `multiscale_spatial_image` package) #587 +- Changed `element`parameter (deprecation in v0.3.0) of `transform_element_to_coordinate_system` to a string `element_name` #611 ### Major -- Added operation: `to_polygons()` @quentinblampey #560 -- Extended `rasterize()` to support all the data types @quentinblampey #566 -- Added operation: `rasterize_bins()` @quentinblampey #578 -- Added operation: `map_raster()` to apply functions block-wise to raster data @ArneDefauw #588 +- Added operation: `to_polygons()` @quentinblampey #560 +- Extended `rasterize()` to support all the data types @quentinblampey #566 +- Added operation: `rasterize_bins()` @quentinblampey #578 +- Added operation: `map_raster()` to apply functions block-wise to raster data @ArneDefauw #588 ### Minor -- Removed `pygeos` dependency @omsai #545 -- Channel coordinate annotations on images now persist through `rasterize()` @clwgg #544 -- Added `datasets` module -- Extended `get_values()` to `AnnData` tables #579 -- Added `get_element_instances()` (replaces `_get_unique_label_values_as_index()`) #582 -- Added `get_element_annotators()`, retrieving the tables that annotate a particular SpatialElement #595 +- Removed `pygeos` dependency @omsai #545 +- Channel coordinate annotations on images now persist through `rasterize()` @clwgg #544 +- Added `datasets` module +- Extended `get_values()` to `AnnData` tables #579 +- Added `get_element_instances()` (replaces `_get_unique_label_values_as_index()`) #582 +- Added `get_element_annotators()`, retrieving the tables that annotate a particular SpatialElement #595 ### Fixed -- Preserve channel names of multi-scale images in `transform` (#379) -- Fix `filter_by_coordinate_system` with SpatialData object having a table not annotating an element (#619) +- Preserve channel names of multi-scale images in `transform` (#379) +- Fix `filter_by_coordinate_system` with SpatialData object having a table not annotating an element (#619) ## [0.1.2] - 2024-03-30 ### Minor -- Made `get_channels()` public. -- Added utils `force_2d()` to force 3D shapes to 2D (this is a temporary solution until `.force_2d()` is available in `geopandas`). +- Made `get_channels()` public. +- Added utils `force_2d()` to force 3D shapes to 2D (this is a temporary solution until `.force_2d()` is available in `geopandas`). ## [0.1.1] - 2024-03-28 ### Added -- Added method `update_annotated_regions_metadata() which updates the `region`value automatically from the`region_key` columns +- Added method `update_annotated_regions_metadata() which updates the `region`value automatically from the`region_key` columns ### Changed -- Renamed `join_sdata_spatialelement_table` to `join_spatialelement_table`, and made it work also without `SpatialData` objects. +- Renamed `join_sdata_spatialelement_table` to `join_spatialelement_table`, and made it work also without `SpatialData` objects. ## [0.1.0] - 2024-03-24 @@ -153,70 +153,70 @@ and this project adheres to [Semantic Versioning][]. #### Major -- Implemented support in `SpatialData` for storing multiple tables. -- These tables can annotate a `SpatialElement` but now not necessarily so. -- Deprecated `.table` attribute in favor of `.tables` dict-like accessor. +- Implemented support in `SpatialData` for storing multiple tables. +- These tables can annotate a `SpatialElement` but now not necessarily so. +- Deprecated `.table` attribute in favor of `.tables` dict-like accessor. -- Added join operations -- Added SQL like joins that can be executed by calling one public function `join_sdata_spatialelement_table`. The following joins are supported: `left`, `left_exclusive`, `right`, `right_exclusive` and `inner`. The function has an option to match rows. For `left` only matching `left` is supported and for `right` join only `right` matching of rows is supported. Not all joins are supported for `Labels` elements. -- Added function `match_element_to_table` which allows the user to perform a right join of `SpatialElement`(s) with a table with rows matching the row order in the table. +- Added join operations +- Added SQL like joins that can be executed by calling one public function `join_sdata_spatialelement_table`. The following joins are supported: `left`, `left_exclusive`, `right`, `right_exclusive` and `inner`. The function has an option to match rows. For `left` only matching `left` is supported and for `right` join only `right` matching of rows is supported. Not all joins are supported for `Labels` elements. +- Added function `match_element_to_table` which allows the user to perform a right join of `SpatialElement`(s) with a table with rows matching the row order in the table. -- Incremental IO of data and metadata: -- Increased in-memory vs on-disk control: changes performed in-memory (e.g. adding a new image) are not automatically performed on-disk. -- Deprecated `add_image()`, `add_labels()`, `add_shapes()`, `add_points()` in favor of `.images`, `.labels`, `.shapes`, `.points` dict-like accessors. -- new methods `write_element()`, `write_transformations()`, `write_metadata()`, `remove_element_from_disk()` -- new methods `write_consolidated_metadata()` and `has_consolidated_metadata()` -- deprecated `save_transformations()` -- improved `__repr__()` with information on Zarr storage and Dask-backed files -- new utils `is_self_contained()`, `describe_elements_are_self_contained()` -- new utils `element_paths_in_memory()`, `element_paths_on_disk()` +- Incremental IO of data and metadata: +- Increased in-memory vs on-disk control: changes performed in-memory (e.g. adding a new image) are not automatically performed on-disk. +- Deprecated `add_image()`, `add_labels()`, `add_shapes()`, `add_points()` in favor of `.images`, `.labels`, `.shapes`, `.points` dict-like accessors. +- new methods `write_element()`, `write_transformations()`, `write_metadata()`, `remove_element_from_disk()` +- new methods `write_consolidated_metadata()` and `has_consolidated_metadata()` +- deprecated `save_transformations()` +- improved `__repr__()` with information on Zarr storage and Dask-backed files +- new utils `is_self_contained()`, `describe_elements_are_self_contained()` +- new utils `element_paths_in_memory()`, `element_paths_on_disk()` #### Minor -- Multiple table helper functions -- Added public helper function `get_table_keys()` in `spatialdata.models` to retrieve annotation information of a given table. -- Added public helper function `check_target_region_column_symmetry()` in `spatialdata.models` to check whether annotation - metadata in `table.uns['spatialdata_attrs']` corresponds with respective columns in `table.obs`. -- Added function `validate_table_in_spatialdata()` in SpatialData to validate the annotation target of a table being present in the `SpatialData` object. -- Added method `get_annotated_regions()` in `SpatialData` to get the regions annotated by a given table. -- Added method `get_region_key_column()` in `SpatialData` to get the region_key column in table.obs. -- Added method `get_instance_key_column()` in `SpatialData` to get the instance_key column in table.obs. -- Added method `set_table_annotates_spatialelement()` in `SpatialData` to either set or change the annotation metadata of a table in a given `SpatialData` object. - Added `table_name` parameter to the `aggregate()` function to allow users to give a custom table name to table resulting from aggregation. -- Added `table_name` parameter to the `get_values()` function. - -- Utils -- Added `gen_spatial_elements()` generator in SpatialData to generate the `SpatialElements` in a given `SpatialData` object. -- Added `gen_elements` generator in `SpatialData` to generate elements of a `SpatialData` object including tables. -- added `SpatialData.subset()` API -- added `SpatialData.locate_element()` API -- added utils function: `get_centroids()` -- added utils function: `deepcopy()` -- added operation: `to_circles()` -- documented previously-added `get_channels()` to retrieve the channel names of a raster element indepently of it being single or multi-scale - -- Transformations-related - - - added utils function: `transform_to_data_extent()` - - added utils function: `are_extents_equal()` - - added utils function: `postpone_transformation()` - - added utils function: `remove_transformations_to_coordinate_system()` - -- added testing utilities: `assert_spatial_data_objects_are_identical()`, `assert_elements_are_identical()`, `assert_elements_dict_are_identical()` +- Multiple table helper functions +- Added public helper function `get_table_keys()` in `spatialdata.models` to retrieve annotation information of a given table. +- Added public helper function `check_target_region_column_symmetry()` in `spatialdata.models` to check whether annotation + metadata in `table.uns['spatialdata_attrs']` corresponds with respective columns in `table.obs`. +- Added function `validate_table_in_spatialdata()` in SpatialData to validate the annotation target of a table being present in the `SpatialData` object. +- Added method `get_annotated_regions()` in `SpatialData` to get the regions annotated by a given table. +- Added method `get_region_key_column()` in `SpatialData` to get the region_key column in table.obs. +- Added method `get_instance_key_column()` in `SpatialData` to get the instance_key column in table.obs. +- Added method `set_table_annotates_spatialelement()` in `SpatialData` to either set or change the annotation metadata of a table in a given `SpatialData` object. - Added `table_name` parameter to the `aggregate()` function to allow users to give a custom table name to table resulting from aggregation. +- Added `table_name` parameter to the `get_values()` function. + +- Utils +- Added `gen_spatial_elements()` generator in SpatialData to generate the `SpatialElements` in a given `SpatialData` object. +- Added `gen_elements` generator in `SpatialData` to generate elements of a `SpatialData` object including tables. +- added `SpatialData.subset()` API +- added `SpatialData.locate_element()` API +- added utils function: `get_centroids()` +- added utils function: `deepcopy()` +- added operation: `to_circles()` +- documented previously-added `get_channels()` to retrieve the channel names of a raster element indepently of it being single or multi-scale + +- Transformations-related + + - added utils function: `transform_to_data_extent()` + - added utils function: `are_extents_equal()` + - added utils function: `postpone_transformation()` + - added utils function: `remove_transformations_to_coordinate_system()` + +- added testing utilities: `assert_spatial_data_objects_are_identical()`, `assert_elements_are_identical()`, `assert_elements_dict_are_identical()` ### Changed/fixed #### Major -- refactored data loader for deep learning -- refactored `SpatialData.write()` to be more robust -- generalized spatial queries to any combination of 2D/3D data and 2D/3D query region #409 +- refactored data loader for deep learning +- refactored `SpatialData.write()` to be more robust +- generalized spatial queries to any combination of 2D/3D data and 2D/3D query region #409 #### Minor -- Changed the string representation of `SpatialData` to reflect the changes in regard to multiple tables and incremental IO. -- improved usability and robustness of `sdata.write()` when `overwrite=True` @aeisenbarth -- fixed warnings for categorical dtypes in tables in `TableModel` and `PointsModel` -- fixed wrong order of points after spatial queries +- Changed the string representation of `SpatialData` to reflect the changes in regard to multiple tables and incremental IO. +- improved usability and robustness of `sdata.write()` when `overwrite=True` @aeisenbarth +- fixed warnings for categorical dtypes in tables in `TableModel` and `PointsModel` +- fixed wrong order of points after spatial queries ## [0.0.14] - 2023-10-11 @@ -224,105 +224,105 @@ and this project adheres to [Semantic Versioning][]. #### Minor -- new API: sdata.rename_coordinate_systems() +- new API: sdata.rename_coordinate_systems() #### Technical -- decompose affine transformation into simpler transformations -- remove padding for blobs() +- decompose affine transformation into simpler transformations +- remove padding for blobs() #### Major -- get_extent() function to compute bounding box of the data +- get_extent() function to compute bounding box of the data #### Minor -- testing against pre-release packages +- testing against pre-release packages ### Fixed -- Fixed bug with get_values(): ignoring background channel in labels +- Fixed bug with get_values(): ignoring background channel in labels ## [0.0.13] - 2023-10-02 ### Added -- polygon_query() support for images #358 +- polygon_query() support for images #358 ### Fixed -- Fix missing c_coords argument in blobs multiscale #342 -- Replaced hardcoded string with instance_key #346 +- Fix missing c_coords argument in blobs multiscale #342 +- Replaced hardcoded string with instance_key #346 ## [0.0.12] - 2023-06-24 ### Added -- Add multichannel blobs sample data (by @melonora) +- Add multichannel blobs sample data (by @melonora) ## [0.0.11] - 2023-06-21 ### Improved -- Aggregation APIs. +- Aggregation APIs. ## [0.0.10] - 2023-06-06 ### Fixed -- Fix blobs (#282) +- Fix blobs (#282) ## [0.0.9] - 2023-05-23 ### Updated -- Update napari-spatialdata pin (#279) -- pin typing-extensions +- Update napari-spatialdata pin (#279) +- pin typing-extensions ## [0.0.8] - 2023-05-22 ### Merged -- Merge pull request #271 from scverse/fix/aggregation +- Merge pull request #271 from scverse/fix/aggregation ## [0.0.7] - 2023-05-20 ### Updated -- Update readme +- Update readme ## [0.0.6] - 2023-05-10 ### Added -- This release adds polygon spatial query. +- This release adds polygon spatial query. ## [0.0.5] - 2023-05-05 ### Fixed -- fix tests badge (#242) +- fix tests badge (#242) ## [0.0.4] - 2023-05-04 ### Tested -- This release tests distribution via pypi +- This release tests distribution via pypi ## [0.0.3] - 2023-05-02 ### Added -- This is an alpha release to test the release process. +- This is an alpha release to test the release process. ## [0.0.2] - 2023-05-02 ### Added -- make version dynamic +- make version dynamic ## [0.0.1.dev1] - 2023-03-25 ### Added -- Dev version, not official release yet +- Dev version, not official release yet diff --git a/README.md b/README.md index 38d3a16e0..bd451203f 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,9 @@ SpatialData is a data framework that comprises a FAIR storage format and a collection of python libraries for performant access, alignment, and processing of uni- and multi-modal spatial omics datasets. This repository contains the core spatialdata library. See the links below to learn more about other packages in the SpatialData ecosystem. -- [spatialdata-io](https://github.com/scverse/spatialdata-io): load data from common spatial omics technologies into spatialdata. -- [spatialdata-plot](https://github.com/scverse/spatialdata-plot): Static plotting library for spatialdata. -- [napari-spatialdata](https://github.com/scverse/napari-spatialdata): napari plugin for interactive exploration and annotation of spatial data. +- [spatialdata-io](https://github.com/scverse/spatialdata-io): load data from common spatial omics technologies into spatialdata. +- [spatialdata-plot](https://github.com/scverse/spatialdata-plot): Static plotting library for spatialdata. +- [napari-spatialdata](https://github.com/scverse/napari-spatialdata): napari plugin for interactive exploration and annotation of spatial data. [//]: # "numfocus-fiscal-sponsor-attribution" @@ -33,16 +33,16 @@ The spatialdata project also received support by the Chan Zuckerberg Initiative. ![SpatialDataOverview](https://github.com/scverse/spatialdata/assets/1120672/cb91071f-12a7-4b8e-9430-2b3a0f65e52f) -- **The library is currently under review.** We expect there to be changes as the community provides feedback. We have an announcement channel for communicating these changes, please see the contact section below. -- The SpatialData storage format is built on top of the [OME-NGFF](https://ngff.openmicroscopy.org/latest/) specification. +- **The library is currently under review.** We expect there to be changes as the community provides feedback. We have an announcement channel for communicating these changes, please see the contact section below. +- The SpatialData storage format is built on top of the [OME-NGFF](https://ngff.openmicroscopy.org/latest/) specification. ## Getting started Please refer to the [documentation][link-docs]. In particular: -- [API documentation][link-api]. -- [Design doc][link-design-doc]. -- [Example notebooks][link-notebooks]. +- [API documentation][link-api]. +- [Design doc][link-design-doc]. +- [Example notebooks][link-notebooks]. Another useful resource to get started is the source code of the [`spatialdata-io`](https://github.com/scverse/spatialdata-io) package, which shows example of how to read data from common technologies. @@ -62,20 +62,20 @@ mamba install -c conda-forge spatialdata napari-spatialdata spatialdata-io spati ## Limitations -- Code only manually tested for Windows machines. Currently the framework is being developed using Linux, macOS and Windows machines, but it is automatically tested only for Linux and macOS machines. +- Code only manually tested for Windows machines. Currently the framework is being developed using Linux, macOS and Windows machines, but it is automatically tested only for Linux and macOS machines. ## Contact To get involved in the discussion, or if you need help to get started, you are welcome to use the following options. -- Chat via [`scverse` Zulip](https://scverse.zulipchat.com/#narrow/stream/315824-spatial) (public or 1 to 1). -- Forum post in the [scverse discourse forum](https://discourse.scverse.org/). -- Bug report/feature request via the [GitHub issue tracker][issue-tracker]. -- Zoom call as part of the SpatialData Community Meetings, held every 2 weeks on Thursday, [schedule here](https://hackmd.io/enWU826vRai-JYaL7TZaSw). +- Chat via [`scverse` Zulip](https://scverse.zulipchat.com/#narrow/stream/315824-spatial) (public or 1 to 1). +- Forum post in the [scverse discourse forum](https://discourse.scverse.org/). +- Bug report/feature request via the [GitHub issue tracker][issue-tracker]. +- Zoom call as part of the SpatialData Community Meetings, held every 2 weeks on Thursday, [schedule here](https://hackmd.io/enWU826vRai-JYaL7TZaSw). Finally, especially relevant for for developers that are building a library upon `spatialdata`, please follow this channel for: -- Announcements on new features and important changes [Zulip](https://imagesc.zulipchat.com/#narrow/stream/329057-scverse/topic/spatialdata.20announcements). +- Announcements on new features and important changes [Zulip](https://imagesc.zulipchat.com/#narrow/stream/329057-scverse/topic/spatialdata.20announcements). ## Citation diff --git a/docs/contributing.md b/docs/contributing.md index a2aad91c7..7c88b1637 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -143,10 +143,10 @@ in the cookiecutter-scverse template. Please write documentation for new or changed features and use-cases. This project uses [sphinx][] with the following features: -- the [myst][] extension allows to write documentation in markdown/Markedly Structured Text -- [Numpy-style docstrings][numpydoc] (through the [napoloen][numpydoc-napoleon] extension). -- Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks)) -- [Sphinx autodoc typehints][], to automatically reference annotated input and output types +- the [myst][] extension allows to write documentation in markdown/Markedly Structured Text +- [Numpy-style docstrings][numpydoc] (through the [napoloen][numpydoc-napoleon] extension). +- Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks)) +- [Sphinx autodoc typehints][], to automatically reference annotated input and output types See the [scanpy developer docs](https://scanpy.readthedocs.io/en/latest/dev/documentation.html) for more information on how to write documentation. @@ -163,10 +163,10 @@ repository. #### Hints -- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. Only - if you do so can sphinx automatically create a link to the external documentation. -- If building the documentation fails because of a missing link that is outside your control, you can add an entry to - the `nitpick_ignore` list in `docs/conf.py` +- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. Only + if you do so can sphinx automatically create a link to the external documentation. +- If building the documentation fails because of a missing link that is outside your control, you can add an entry to + the `nitpick_ignore` list in `docs/conf.py` #### Building the docs locally diff --git a/docs/design_doc.md b/docs/design_doc.md index 8dc37b4f7..65fbfc96f 100644 --- a/docs/design_doc.md +++ b/docs/design_doc.md @@ -10,72 +10,72 @@ Recent advances in molecular profiling technologies allow to measure abundance o The goals define _what_ SpatialData will be able to do (as opposed to _how_). Goals can have the following priority levels: -- P0: highest priority, required for successful implementation (i.e., must have) -- P1: high priority, but not required (i.e., nice to have) -- P2: nice to have, but not a priority +- P0: highest priority, required for successful implementation (i.e., must have) +- P1: high priority, but not required (i.e., nice to have) +- P2: nice to have, but not a priority **1. Load data from modern spatial multiomics experiments** -- P0. Data can be loaded from the OME-NGFF and saved to OME-NGFF. - - [x] multiscale images and labels, 2d and 3d - - [x] point clouds - - [x] polygon-shaped regions of interest - - [x] circle-shaped regions of interest - - [x] tables - - [x] graphs -- P0. Data can be loaded lazily. - - [x] Images - - [x] Points - - [ ] (P1) Shapes https://github.com/scverse/spatialdata/issues/359 -- P1. - - [x] Loaded data can be iterated over to generate tiles for multiprocessing and deep learning. +- P0. Data can be loaded from the OME-NGFF and saved to OME-NGFF. + - [x] multiscale images and labels, 2d and 3d + - [x] point clouds + - [x] polygon-shaped regions of interest + - [x] circle-shaped regions of interest + - [x] tables + - [x] graphs +- P0. Data can be loaded lazily. + - [x] Images + - [x] Points + - [ ] (P1) Shapes https://github.com/scverse/spatialdata/issues/359 +- P1. + - [x] Loaded data can be iterated over to generate tiles for multiprocessing and deep learning. **2. Align different datasets via affine transformations** -- [x] P0. Transformations can be loaded from and written to OME-NGFF. -- [x] P0. Identity transformation -- [x] P0. Affine transformations. - - [x] scale - - [x] translation - - [x] rotation -- [x] P0. Support definition of common coordinate systems across datasets (i.e., extrinsic coordinate systems). -- [x] P0. Sequence of transformation. -- Utils - - [x] P0 permute axis -- [ ] P2. non-linear - - [ ] coordinates and displacements +- [x] P0. Transformations can be loaded from and written to OME-NGFF. +- [x] P0. Identity transformation +- [x] P0. Affine transformations. + - [x] scale + - [x] translation + - [x] rotation +- [x] P0. Support definition of common coordinate systems across datasets (i.e., extrinsic coordinate systems). +- [x] P0. Sequence of transformation. +- Utils + - [x] P0 permute axis +- [ ] P2. non-linear + - [ ] coordinates and displacements **3. Performant spatial query of multimodal spatial datasets** -- [x] P0. Support querying a multimodal dataset for all data in a specified region (at the cost of creating spatial index every time). - - [x] Arbitrary bounding boxes - - [x] Polygons or regions of interest (ball, shape) +- [x] P0. Support querying a multimodal dataset for all data in a specified region (at the cost of creating spatial index every time). + - [x] Arbitrary bounding boxes + - [x] Polygons or regions of interest (ball, shape) **4. Aggregate observations by regions of interest** -- [x] P0. Support aggregation functions with standard summary statistics - - [x] mean - - [x] sum - - [x] count -- [x] P1. User-defined aggregation function +- [x] P0. Support aggregation functions with standard summary statistics + - [x] mean + - [x] sum + - [x] count +- [x] P1. User-defined aggregation function ### Non-goals -- _SpatialData_ is not an analysis library. Instead the aim is to provide an infrastructure to analysis libraries for IO and spatial queries. -- _SpatialData_ is not a format converter. We should not support converting to/from too many formats and instead use OME-NGFF as the interchange format. Nevertheless,[spatialdata-io][] offers a place for some common data conversions (external contributions are highly encouraged). -- _SpatialData_ is based on standard on-disk storage formats (Zarr and Parquet) and on existing specifications (NGFF, AnnData) and uses existing solutions when possible. The resulting storage objects which brings together these technologies defines the _SpatialData on-disk format_, which is described in this document and finely characterized in [this online resource](https://github.com/scverse/spatialdata-notebooks/tree/main/notebooks/developers_resources/storage_format). +- _SpatialData_ is not an analysis library. Instead the aim is to provide an infrastructure to analysis libraries for IO and spatial queries. +- _SpatialData_ is not a format converter. We should not support converting to/from too many formats and instead use OME-NGFF as the interchange format. Nevertheless,[spatialdata-io][] offers a place for some common data conversions (external contributions are highly encouraged). +- _SpatialData_ is based on standard on-disk storage formats (Zarr and Parquet) and on existing specifications (NGFF, AnnData) and uses existing solutions when possible. The resulting storage objects which brings together these technologies defines the _SpatialData on-disk format_, which is described in this document and finely characterized in [this online resource](https://github.com/scverse/spatialdata-notebooks/tree/main/notebooks/developers_resources/storage_format). ## Satellite projects We strongly encourage collaborations and community supports in all of these projects. -- [x] P0. _Visualization_: we are developing a napari plugin for interactive visualization of _SpatialData_ objects @ [napari-spatialdata][]. -- [x] P0. _Raw data IO_: we are implementing readers for raw data of common spatial omics technologies @ [spatialdata-io][]. -- [x] P1. _Static plotting_: a static plotting library for _SpatialData_ @ [spatialdata-plot][]. -- [ ] P2. _Image analysis_: Library to perform image analysis, wrapping common analysis library in python such as skimage. - Once ready, we will deprecate such functionalities in [squidpy][]. -- [ ] P2. _Spatial and graph analysis_: [squidpy][] will be refactor to accept SpatialData objects as input. -- [ ] P2. _Database_: Some form of update on released datasets with updated specs as development progresses. A temporary sandbox where we store downloader and converter scripts for representative datasets is available @ [spatialdata-sandbox][]. +- [x] P0. _Visualization_: we are developing a napari plugin for interactive visualization of _SpatialData_ objects @ [napari-spatialdata][]. +- [x] P0. _Raw data IO_: we are implementing readers for raw data of common spatial omics technologies @ [spatialdata-io][]. +- [x] P1. _Static plotting_: a static plotting library for _SpatialData_ @ [spatialdata-plot][]. +- [ ] P2. _Image analysis_: Library to perform image analysis, wrapping common analysis library in python such as skimage. + Once ready, we will deprecate such functionalities in [squidpy][]. +- [ ] P2. _Spatial and graph analysis_: [squidpy][] will be refactor to accept SpatialData objects as input. +- [ ] P2. _Database_: Some form of update on released datasets with updated specs as development progresses. A temporary sandbox where we store downloader and converter scripts for representative datasets is available @ [spatialdata-sandbox][]. @@ -107,12 +107,12 @@ The `SpatialData` object contains a set of Elements to be used for analysis. Ele We model a spatial dataset as a composition of distinct elements, of any type. The elements correspond to: -- Pixel-based _Images_, 2D or 3D -- Regions of interest - - _Shapes_ (circles, polygons, multipolygons), 2D - - Pixel masks (such as segmentation masks), aka _Labels_, 2D, or 3D -- Points (such as transcript locations, point clouds, ...), 2D or 3D -- _Tables_ of annotations +- Pixel-based _Images_, 2D or 3D +- Regions of interest + - _Shapes_ (circles, polygons, multipolygons), 2D + - Pixel masks (such as segmentation masks), aka _Labels_, 2D, or 3D +- Points (such as transcript locations, point clouds, ...), 2D or 3D +- _Tables_ of annotations Each of these elements should be useful by itself, and in combination with other relevant elements. All elements are stored in the Zarr container in hierarchy store that MAY be flat; currently Zarr hierarchies are not supported, [see here](https://github.com/scverse/spatialdata/issues/340)). @@ -124,29 +124,29 @@ By decomposing the data model into building blocks (i.e. Elements) we support th _SpatialData_ follows the OME-NGFF specifications whenever possible and therefore much of its assumptions are inherited from it. Extra assumptions will be discussed with the OME-NGFF community and adapted to the community-agreed design. The key assumptions are the following: -- `Images`, `Labels`, `Points` and `Shapes` MUST have one or more _coordinate systems_ and _coordinate transformations_. -- `Tables` CAN NOT have a _coordinate system_ or _coordinate transforms_. Tables should not contain spatial coordinate: the user can decided to store them there, but they will not be processed by the library and needs to placed in a element and a coordiante system to be recognized by the framework. -- `Labels` and `Shapes` are both instances of `Regions`, `Regions` are `Elements`. -- Any `Element` MAY be annotated by `Tables`; also `Shapes` and `Points` MAY contain annotations within themselves as additional dataframe columns (e.g. intensity of point spread function of a each point, or gene id). -- `Tables` CAN NOT be annotated by other `Tables`. +- `Images`, `Labels`, `Points` and `Shapes` MUST have one or more _coordinate systems_ and _coordinate transformations_. +- `Tables` CAN NOT have a _coordinate system_ or _coordinate transforms_. Tables should not contain spatial coordinate: the user can decided to store them there, but they will not be processed by the library and needs to placed in a element and a coordiante system to be recognized by the framework. +- `Labels` and `Shapes` are both instances of `Regions`, `Regions` are `Elements`. +- Any `Element` MAY be annotated by `Tables`; also `Shapes` and `Points` MAY contain annotations within themselves as additional dataframe columns (e.g. intensity of point spread function of a each point, or gene id). +- `Tables` CAN NOT be annotated by other `Tables`. #### Images Images of a sample. Should conform to the [OME-NGFF concept of an image](https://ngff.openmicroscopy.org/latest/#image-layout). Images are n-dimensional arrays where each element of an array is a pixel of an image. These arrays have labelled dimensions which correspond to: -- Spatial dimensions (height and width). -- Imaging or feature channels. -- Z-stacks. +- Spatial dimensions (height and width). +- Imaging or feature channels. +- Z-stacks. We require the following axes (in the following order): -- 2D images: cyx -- 3D images: czyx +- 2D images: cyx +- 3D images: czyx Other ordering or axes neames are currently not supported. -- [ ] P2 We will support also time-point axes in the future. Furthermore, thanks to NGFF specs v0.5, such axes will not have name constraints (although they do for first iteration due to NGFF specs v0.4). +- [ ] P2 We will support also time-point axes in the future. Furthermore, thanks to NGFF specs v0.5, such axes will not have name constraints (although they do for first iteration due to NGFF specs v0.4). The image object itself builds on prior art in image analysis, in particular the [xarray library][]. @@ -172,19 +172,19 @@ The xarray coordinates are not saved in the NGFF storage. APIs to take into acco Regions of interest define distinct regions of space that can be used to select and aggregate observations. For instance, regions can correspond to -- Tissues -- Tissue structures -- Clinical annotations -- Multi-cellular communities -- Cells -- Subcellular structures -- Physical structures from the assay (e.g. Visium "spots") -- Synthetic regions created by analysts (e.g. output of algorithms) +- Tissues +- Tissue structures +- Clinical annotations +- Multi-cellular communities +- Cells +- Subcellular structures +- Physical structures from the assay (e.g. Visium "spots") +- Synthetic regions created by analysts (e.g. output of algorithms) As an example, regions can be used for: -- subsetting observations (e.g., get all observations in a given region) -- aggregating observations (e.g., count all observations in an region) +- subsetting observations (e.g., get all observations in a given region) +- aggregating observations (e.g., count all observations in an region) Regions can be defined in multiple ways. @@ -197,8 +197,8 @@ The Python data structures used for Labels are the same one that we discussed fo We require the following axes (in the following order): -- 2D labels: yx -- 3D labels: zyx +- 2D labels: yx +- 3D labels: zyx ##### Shapes @@ -216,24 +216,24 @@ Coordinates of points for single molecule data. Each observation is a point, and Current implementation represent points as a Parquet file and a [`dask.dataframe.DataFrame`](https://docs.dask.org/en/stable/dataframe.html) in memory. The requirements are the following: -- The table MUST contains axis name to represent the axes. - - If it's 2D, the axes should be `["x","y"]`. - - If it's 3D, the axes should be `["x","y","z"]`. -- It MUST also contains coordinates transformations in `dask.dataframe.DataFrame().attrs["transform"]`. +- The table MUST contains axis name to represent the axes. + - If it's 2D, the axes should be `["x","y"]`. + - If it's 3D, the axes should be `["x","y","z"]`. +- It MUST also contains coordinates transformations in `dask.dataframe.DataFrame().attrs["transform"]`. Additional information is stored in `dask.dataframe.DataFrame().attrs["spatialdata_attrs"]` -- It MAY also contains `"feature_key"`, that is, the column name of the table that refers to the features. This `Series` MAY be of type `pandas.Categorical`. -- It MAY contains additional information in `dask.dataframe.DataFrame().attrs["spatialdata_attrs"]`, specifically: - - `"instance_key"`: the column name of the table where unique instance ids that this point refers to are stored, if available. +- It MAY also contains `"feature_key"`, that is, the column name of the table that refers to the features. This `Series` MAY be of type `pandas.Categorical`. +- It MAY contains additional information in `dask.dataframe.DataFrame().attrs["spatialdata_attrs"]`, specifically: + - `"instance_key"`: the column name of the table where unique instance ids that this point refers to are stored, if available. #### Table (table of annotations for regions) Annotations of regions of interest. Each row in this table corresponds to a single region on the coordinate space. This is represented as an `AnnData` object to allow for complex annotations on the data. This includes: -- multivariate feature support, e.g. a matrix of dimensions regions x variables; -- annotations on top of the features or of the observations. E.g. calculated statistic, prior knowledge based annotations, cell types etc. -- graphs of observations or variables. These can be spatial graphs, nearest neighbor networks based on feature similarity, etc. +- multivariate feature support, e.g. a matrix of dimensions regions x variables; +- annotations on top of the features or of the observations. E.g. calculated statistic, prior knowledge based annotations, cell types etc. +- graphs of observations or variables. These can be spatial graphs, nearest neighbor networks based on feature similarity, etc. One region table can refer to multiple sets of Regions. But each row can map to only one region in its Regions element. For example, one region table can store annotation for multiple slides, though each slide would have its own label element. @@ -247,21 +247,21 @@ If any of `region`, `region_key` and `instance_key` are defined, they all MUST b In `spatialdata-io` we use a consistent naming scheme for the `region_key` and `instance_key` column, which is suggested (but not required): -- we use the name `'region'` as the default name for the `region_key` column; -- we use the name `'instance_id'` as the default name for the `instance_key` column. +- we use the name `'region'` as the default name for the `region_key` column; +- we use the name `'instance_id'` as the default name for the `instance_key` column. ### Summary -- Image `type: Image` -- Regions `type: Union[Labels, Shapes]` - - Labels `type: Labels` - - Shapes `type: Shapes` -- Points `type: Points` -- Tables `type: Table` +- Image `type: Image` +- Regions `type: Union[Labels, Shapes]` + - Labels `type: Labels` + - Shapes `type: Shapes` +- Points `type: Points` +- Tables `type: Table` #### Open discussions -- Points vs Circles [discussion](https://github.com/scverse/spatialdata/issues/46) +- Points vs Circles [discussion](https://github.com/scverse/spatialdata/issues/46) ### Transforms and coordinate systems @@ -276,14 +276,14 @@ There are two types of coordinate systems: intrinsic (called also implicit) and The NGFF specification only operates with images and labels, so it specifies rules for the coordinate systems only for these two types of elements. The main points are the following: -- each image/labels MUST have one and only one intrinsic coordinate system; -- each image/labels MAY have a transformation mapping them to one (at last one MUST be present) or more extrinsic coordinate systems; -- a transformation MAY be defined between any two coordinate systems, including intrinsic and extrinsic coordinate systems. +- each image/labels MUST have one and only one intrinsic coordinate system; +- each image/labels MAY have a transformation mapping them to one (at last one MUST be present) or more extrinsic coordinate systems; +- a transformation MAY be defined between any two coordinate systems, including intrinsic and extrinsic coordinate systems. Furthermore, acoording to NGFF, a coordinate system: -- MUST have a name; -- MUST specify all the axes. +- MUST have a name; +- MUST specify all the axes. #### SpatialData approach @@ -292,18 +292,18 @@ Since elements are allowed to have only (a subset of the) c, x, y, z axes and mu In details: -- we don't need to specify the intrinsic coordinate systems, these are inferred from the element schema -- each element MAY have a transformation mapping them to one or more extrinsic coordinate systems +- we don't need to specify the intrinsic coordinate systems, these are inferred from the element schema +- each element MAY have a transformation mapping them to one or more extrinsic coordinate systems Each coordinate system -- MUST have a name -- MAY specify its axes +- MUST have a name +- MAY specify its axes We also have a constraint (that we will relax in the future, [see here](https://github.com/scverse/spatialdata/issues/308)): -- a transformation MAY be defined only between an intrinsic coordinate system and an extrinsic coordinate system -- each element MUST be mapped at least to an extrinsic coordinate system. When no mapping is specified, we define a mapping to the "global" coordinate system via an "Identity" transformation. +- a transformation MAY be defined only between an intrinsic coordinate system and an extrinsic coordinate system +- each element MUST be mapped at least to an extrinsic coordinate system. When no mapping is specified, we define a mapping to the "global" coordinate system via an "Identity" transformation. #### In-memory representation @@ -344,19 +344,19 @@ This section describes a more detailed timeline of future developments, includin #### Early 2024 -- [ ] Simplify data models - - [x] Use `xarray.DataArray` instead of the subclass `SpatialImage` and `xarray.DataTree` instad of the subclass `MultiscaleSpatialImage` - - [ ] Use `GeoDataFrame` for points -- [ ] More performant disk storage - - [ ] Use `geoparquet` for shapes and points -- [ ] Support for nested hierarchies in NGFF stores -- [x] Start working on multiple tables -- [x] Start working on the transformations refactoring +- [ ] Simplify data models + - [x] Use `xarray.DataArray` instead of the subclass `SpatialImage` and `xarray.DataTree` instad of the subclass `MultiscaleSpatialImage` + - [ ] Use `GeoDataFrame` for points +- [ ] More performant disk storage + - [ ] Use `geoparquet` for shapes and points +- [ ] Support for nested hierarchies in NGFF stores +- [x] Start working on multiple tables +- [x] Start working on the transformations refactoring #### Late 2024 -- [x] Finalize multiple tables support -- [ ] Finalize transformations refactoring +- [x] Finalize multiple tables support +- [ ] Finalize transformations refactoring --- @@ -386,32 +386,32 @@ The layout of some common datasets. **Layout of [MERFISH example](https://github.com/giovp/spatialdata-sandbox/tree/main/merfish)** -- points (coordinates of spots); -- each point has features (e.g., gene, size, cell assignment); -- segmented cell locations are saved as labels (missing in this example) or approximated as circles of variable diameter; -- gene expression for cells, obtained by counting the points inside each cell; -- large anatomical regions saved as polygons; -- rasterized version of the single molecule points (to mimic the original hires image, missing in this example). +- points (coordinates of spots); +- each point has features (e.g., gene, size, cell assignment); +- segmented cell locations are saved as labels (missing in this example) or approximated as circles of variable diameter; +- gene expression for cells, obtained by counting the points inside each cell; +- large anatomical regions saved as polygons; +- rasterized version of the single molecule points (to mimic the original hires image, missing in this example). **Layout of [Visium example](https://github.com/giovp/spatialdata-sandbox/tree/main/visium)** -- The datasets include multiple slides from the same individual, or slides from multiple samples; -- "Visium spots" (circular regions) where sequences are captured; -- each spot has RNA expression; -- H&E image (multiscale 2D); -- (optional) large microscopy (e.g. 40x magnification, 50K x 50K pixels) images may be available, which would need to be aligned to the rest of spatial elements; -- (optional) cell segmentation labels can be derived from the H&E images; -- (optional) the cell segmentation can be annotated with image-derived features (image features/statistics). +- The datasets include multiple slides from the same individual, or slides from multiple samples; +- "Visium spots" (circular regions) where sequences are captured; +- each spot has RNA expression; +- H&E image (multiscale 2D); +- (optional) large microscopy (e.g. 40x magnification, 50K x 50K pixels) images may be available, which would need to be aligned to the rest of spatial elements; +- (optional) cell segmentation labels can be derived from the H&E images; +- (optional) the cell segmentation can be annotated with image-derived features (image features/statistics). #### Code/pseudo-code workflows **Workflows to show** -- [x] loading multiple samples visium data from disk (SpaceRanger), concatenating and saving them to .zarr -- [x] loading a generic NGFF dataset -- [ ] calling the SpatialData constructor with some transformations on it -- [x] accumulation with multiple types of elements -- [x] subsetting/querying by coordinate system, bounding box, spatial region, table rows +- [x] loading multiple samples visium data from disk (SpaceRanger), concatenating and saving them to .zarr +- [x] loading a generic NGFF dataset +- [ ] calling the SpatialData constructor with some transformations on it +- [x] accumulation with multiple types of elements +- [x] subsetting/querying by coordinate system, bounding box, spatial region, table rows #### Loading multiple Visium samples from the SpaceRanger output and saving them to NGFF using the SpatialData APIs @@ -538,5 +538,5 @@ sdata1 = sdata.query.table(...) #### Related notes/issues/PRs -- [Issue discussing SpatialData layout](https://github.com/scverse/spatialdata/issues/12) -- [Notes from Basel Hackathon](https://hackmd.io/MPeMr2mbSRmeIzOCgwKbxw) +- [Issue discussing SpatialData layout](https://github.com/scverse/spatialdata/issues/12) +- [Notes from Basel Hackathon](https://hackmd.io/MPeMr2mbSRmeIzOCgwKbxw) diff --git a/docs/index.md b/docs/index.md index 085a4839a..8a0aff416 100644 --- a/docs/index.md +++ b/docs/index.md @@ -8,9 +8,9 @@ SpatialData is a data framework that comprises a FAIR storage format and a collection of python libraries for performant access, alignment, and processing of uni- and multi-modal spatial omics datasets. This page provides documentation on how to install, use, and extend the core `spatialdata` library. See the links below to learn more about other packages in the SpatialData ecosystem. -- `spatialdata-io`: load data from common spatial omics technologies into `spatialdata` ([repository][spatialdata-io-repo], [documentation][spatialdata-io-docs]). -- `spatialdata-plot`: Static plotting library for `spatialdata` ([repository][spatialdata-plot-repo], [documentation][spatialdata-plot-docs]). -- `napari-spatialdata-repo`: napari plugin for interactive exploration and annotation of `spatialdata` ([repository][napari-spatialdata-repo], [documentation][napari-spatialdata-docs]). +- `spatialdata-io`: load data from common spatial omics technologies into `spatialdata` ([repository][spatialdata-io-repo], [documentation][spatialdata-io-docs]). +- `spatialdata-plot`: Static plotting library for `spatialdata` ([repository][spatialdata-plot-repo], [documentation][spatialdata-plot-docs]). +- `napari-spatialdata-repo`: napari plugin for interactive exploration and annotation of `spatialdata` ([repository][napari-spatialdata-repo], [documentation][napari-spatialdata-docs]). Please see our publication {cite}`marconatoSpatialDataOpenUniversal2024` for citation and to learn more. diff --git a/docs/installation.md b/docs/installation.md index 7d25ef78c..23c25d858 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -14,9 +14,9 @@ Install `spatialdata` by running: The SpatialData ecosystem is designed to work with the following packages: -- [spatialdata-io][]: `spatialdata` readers and converters for common spatial omics technologies. -- [spatialdata-plot][]: Static plotting library for `spatialdata`. -- [napari-spatialdata][]: napari plugin for `spatialdata`. +- [spatialdata-io][]: `spatialdata` readers and converters for common spatial omics technologies. +- [spatialdata-plot][]: Static plotting library for `spatialdata`. +- [napari-spatialdata][]: napari plugin for `spatialdata`. They can be installed with: diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 110f72e15..a8a8fc251 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -477,7 +477,6 @@ def bounding_box_query( ) -> SpatialElement | SpatialData | None: """ Query a SpatialData object or SpatialElement within a bounding box. - If the object has `points` element, depending on the number of points, it MAY suffer from performance issues. This function can also be accessed as a method of a `SpatialData` object, via `sdata.query.bounding_box(...)`, without specifying `element`. @@ -505,6 +504,11 @@ def bounding_box_query( ------- The SpatialData object or SpatialElement containing the requested data. Eventual empty Elements are omitted by the SpatialData object. + + Notes + ----- + If the object has `points` element, depending on the number of points, it MAY suffer from performance issues. Please + consider filtering the object before calling this function by calling the `subset()` method of `SpatialData`. """ raise RuntimeError("Unsupported type for bounding_box_query: " + str(type(element)) + ".") @@ -523,8 +527,13 @@ def _( new_elements = {} if sdata.points: warnings.warn( - "Your SpatialData object has points element. Thus maybe suffer from performance issues when querying.", + ( + "The object has `points` element. Depending on the number of points, querying MAY suffer from " + "performance issues. Please consider filtering the object before calling this function by calling the " + "`subset()` method of `SpatialData`." + ), UserWarning, + stacklevel=2, ) for element_type in ["points", "images", "labels", "shapes"]: elements = getattr(sdata, element_type) From c5e347a9b7f46a64a31c29abe4b11a07d02fcc8c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 5 Jan 2025 14:49:01 +0000 Subject: [PATCH 7/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- CHANGELOG.md | 252 ++++++++++++++++++++-------------------- README.md | 28 ++--- docs/contributing.md | 16 +-- docs/design_doc.md | 270 +++++++++++++++++++++---------------------- docs/index.md | 6 +- docs/installation.md | 6 +- 6 files changed, 289 insertions(+), 289 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b64e522b2..ecde18b30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,140 +12,140 @@ and this project adheres to [Semantic Versioning][]. ### Major -- Added attributes at the SpatialData object level (`.attrs`) -- `rasterize_bins()` can now produce a labels element #811 @ArneDefauw +- Added attributes at the SpatialData object level (`.attrs`) +- `rasterize_bins()` can now produce a labels element #811 @ArneDefauw ### Minor -- Added `asv` benchmark code #784 @berombau -- Validate tables while parsing #808 -- fix join non matching tables #813 +- Added `asv` benchmark code #784 @berombau +- Validate tables while parsing #808 +- fix join non matching tables #813 ### Fixed -- Relaxed `fsspec` requirement #798 -- Fix for `to_polygons` when using processed instead of threads in Dask #756 @ArneDefauw -- Fix `transform_to_data_extent` converting labels to images #791 @aeisenbarth +- Relaxed `fsspec` requirement #798 +- Fix for `to_polygons` when using processed instead of threads in Dask #756 @ArneDefauw +- Fix `transform_to_data_extent` converting labels to images #791 @aeisenbarth ## [0.2.6] - 2024-11-26 ### Added -- Added `set_channel_names` method to `SpatialData` to change the channel names of an - image element in `SpatialData` #786 -- Added `write_channel_names` method to `SpatialData` to overwrite channel metadata on disk - without overwriting the image array itself. #786 +- Added `set_channel_names` method to `SpatialData` to change the channel names of an + image element in `SpatialData` #786 +- Added `write_channel_names` method to `SpatialData` to overwrite channel metadata on disk + without overwriting the image array itself. #786 ### Changed -- Argument `c_coords` is moved out of kwargs for the `ImageModel`s. #779 -- `get_channels` is marked for deprecation in `SpatialData` v0.3.0. Function is replaced - by `get_channel_names` #786 -- Updated dependency of `multiscale-spatial-image` #792 -- Adjust to new version of `xarray` with `DataTree` # 752 +- Argument `c_coords` is moved out of kwargs for the `ImageModel`s. #779 +- `get_channels` is marked for deprecation in `SpatialData` v0.3.0. Function is replaced + by `get_channel_names` #786 +- Updated dependency of `multiscale-spatial-image` #792 +- Adjust to new version of `xarray` with `DataTree` # 752 ### Fixed -- Updated deprecated default stages of `pre-commit` #771 -- Preserve points `feature_key` during queries #794 +- Updated deprecated default stages of `pre-commit` #771 +- Preserve points `feature_key` during queries #794 ## [0.2.5] - 2024-11-06 ### Fixed -- Incompatibility issues due to newest release of `multiscale-spatial-image` #760 +- Incompatibility issues due to newest release of `multiscale-spatial-image` #760 ## [0.2.4] - 2024-11-06 ### Major -- Enable vectorization of `bounding_box_query` for all `SpatialData` elements. #699 +- Enable vectorization of `bounding_box_query` for all `SpatialData` elements. #699 ### Minor -- Added `shortest_path` parameter to `get_transformation_between_coordinate_systems` #714 -- Added `get_pyramid_levels()` utils API #719 -- Improved ergonomics of `concatenate()` when element names are non-unique #720 -- Improved performance of writing images with multiscales #577 +- Added `shortest_path` parameter to `get_transformation_between_coordinate_systems` #714 +- Added `get_pyramid_levels()` utils API #719 +- Improved ergonomics of `concatenate()` when element names are non-unique #720 +- Improved performance of writing images with multiscales #577 ## [0.2.3] - 2024-09-25 ### Minor -- Added `clip: bool = False` parameter to `polygon_query()` #670 -- Add `sort` parameter to `PointsModel.parse()` #672 +- Added `clip: bool = False` parameter to `polygon_query()` #670 +- Add `sort` parameter to `PointsModel.parse()` #672 ### Fixed -- Fix interpolation artifact multiscale computation for labels #697 +- Fix interpolation artifact multiscale computation for labels #697 ## [0.2.2] - 2024-08-07 ### Major -- New disk format for shapes using `GeoParquet` (the change is backward compatible) #542 +- New disk format for shapes using `GeoParquet` (the change is backward compatible) #542 ### Minor -- Add `return_background` as argument to `get_centroids` and `get_element_instances` #621 -- Ability to save data using older disk formats #542 +- Add `return_background` as argument to `get_centroids` and `get_element_instances` #621 +- Ability to save data using older disk formats #542 ### Fixed -- Circles validation now checks for inf or nan radii #653 -- Bug with table name in torch dataset #654 @LLehner +- Circles validation now checks for inf or nan radii #653 +- Bug with table name in torch dataset #654 @LLehner ## [0.2.1] - 2024-07-04 ### Minor -- Relaxing `spatial-image` package requirement #616 +- Relaxing `spatial-image` package requirement #616 ## [0.2.0] - 2024-07-03 ### Changed -- Using `DataArray` directly instead of the subclass `SpatialImage` (removed install constraint for the `spatial_image` package) #587 -- Using `DataTree` directly instead of the subclass `MultiscaleSpatialImage` (removed install constraint for the `multiscale_spatial_image` package) #587 -- Changed `element`parameter (deprecation in v0.3.0) of `transform_element_to_coordinate_system` to a string `element_name` #611 +- Using `DataArray` directly instead of the subclass `SpatialImage` (removed install constraint for the `spatial_image` package) #587 +- Using `DataTree` directly instead of the subclass `MultiscaleSpatialImage` (removed install constraint for the `multiscale_spatial_image` package) #587 +- Changed `element`parameter (deprecation in v0.3.0) of `transform_element_to_coordinate_system` to a string `element_name` #611 ### Major -- Added operation: `to_polygons()` @quentinblampey #560 -- Extended `rasterize()` to support all the data types @quentinblampey #566 -- Added operation: `rasterize_bins()` @quentinblampey #578 -- Added operation: `map_raster()` to apply functions block-wise to raster data @ArneDefauw #588 +- Added operation: `to_polygons()` @quentinblampey #560 +- Extended `rasterize()` to support all the data types @quentinblampey #566 +- Added operation: `rasterize_bins()` @quentinblampey #578 +- Added operation: `map_raster()` to apply functions block-wise to raster data @ArneDefauw #588 ### Minor -- Removed `pygeos` dependency @omsai #545 -- Channel coordinate annotations on images now persist through `rasterize()` @clwgg #544 -- Added `datasets` module -- Extended `get_values()` to `AnnData` tables #579 -- Added `get_element_instances()` (replaces `_get_unique_label_values_as_index()`) #582 -- Added `get_element_annotators()`, retrieving the tables that annotate a particular SpatialElement #595 +- Removed `pygeos` dependency @omsai #545 +- Channel coordinate annotations on images now persist through `rasterize()` @clwgg #544 +- Added `datasets` module +- Extended `get_values()` to `AnnData` tables #579 +- Added `get_element_instances()` (replaces `_get_unique_label_values_as_index()`) #582 +- Added `get_element_annotators()`, retrieving the tables that annotate a particular SpatialElement #595 ### Fixed -- Preserve channel names of multi-scale images in `transform` (#379) -- Fix `filter_by_coordinate_system` with SpatialData object having a table not annotating an element (#619) +- Preserve channel names of multi-scale images in `transform` (#379) +- Fix `filter_by_coordinate_system` with SpatialData object having a table not annotating an element (#619) ## [0.1.2] - 2024-03-30 ### Minor -- Made `get_channels()` public. -- Added utils `force_2d()` to force 3D shapes to 2D (this is a temporary solution until `.force_2d()` is available in `geopandas`). +- Made `get_channels()` public. +- Added utils `force_2d()` to force 3D shapes to 2D (this is a temporary solution until `.force_2d()` is available in `geopandas`). ## [0.1.1] - 2024-03-28 ### Added -- Added method `update_annotated_regions_metadata() which updates the `region`value automatically from the`region_key` columns +- Added method `update_annotated_regions_metadata() which updates the `region`value automatically from the`region_key` columns ### Changed -- Renamed `join_sdata_spatialelement_table` to `join_spatialelement_table`, and made it work also without `SpatialData` objects. +- Renamed `join_sdata_spatialelement_table` to `join_spatialelement_table`, and made it work also without `SpatialData` objects. ## [0.1.0] - 2024-03-24 @@ -153,70 +153,70 @@ and this project adheres to [Semantic Versioning][]. #### Major -- Implemented support in `SpatialData` for storing multiple tables. -- These tables can annotate a `SpatialElement` but now not necessarily so. -- Deprecated `.table` attribute in favor of `.tables` dict-like accessor. +- Implemented support in `SpatialData` for storing multiple tables. +- These tables can annotate a `SpatialElement` but now not necessarily so. +- Deprecated `.table` attribute in favor of `.tables` dict-like accessor. -- Added join operations -- Added SQL like joins that can be executed by calling one public function `join_sdata_spatialelement_table`. The following joins are supported: `left`, `left_exclusive`, `right`, `right_exclusive` and `inner`. The function has an option to match rows. For `left` only matching `left` is supported and for `right` join only `right` matching of rows is supported. Not all joins are supported for `Labels` elements. -- Added function `match_element_to_table` which allows the user to perform a right join of `SpatialElement`(s) with a table with rows matching the row order in the table. +- Added join operations +- Added SQL like joins that can be executed by calling one public function `join_sdata_spatialelement_table`. The following joins are supported: `left`, `left_exclusive`, `right`, `right_exclusive` and `inner`. The function has an option to match rows. For `left` only matching `left` is supported and for `right` join only `right` matching of rows is supported. Not all joins are supported for `Labels` elements. +- Added function `match_element_to_table` which allows the user to perform a right join of `SpatialElement`(s) with a table with rows matching the row order in the table. -- Incremental IO of data and metadata: -- Increased in-memory vs on-disk control: changes performed in-memory (e.g. adding a new image) are not automatically performed on-disk. -- Deprecated `add_image()`, `add_labels()`, `add_shapes()`, `add_points()` in favor of `.images`, `.labels`, `.shapes`, `.points` dict-like accessors. -- new methods `write_element()`, `write_transformations()`, `write_metadata()`, `remove_element_from_disk()` -- new methods `write_consolidated_metadata()` and `has_consolidated_metadata()` -- deprecated `save_transformations()` -- improved `__repr__()` with information on Zarr storage and Dask-backed files -- new utils `is_self_contained()`, `describe_elements_are_self_contained()` -- new utils `element_paths_in_memory()`, `element_paths_on_disk()` +- Incremental IO of data and metadata: +- Increased in-memory vs on-disk control: changes performed in-memory (e.g. adding a new image) are not automatically performed on-disk. +- Deprecated `add_image()`, `add_labels()`, `add_shapes()`, `add_points()` in favor of `.images`, `.labels`, `.shapes`, `.points` dict-like accessors. +- new methods `write_element()`, `write_transformations()`, `write_metadata()`, `remove_element_from_disk()` +- new methods `write_consolidated_metadata()` and `has_consolidated_metadata()` +- deprecated `save_transformations()` +- improved `__repr__()` with information on Zarr storage and Dask-backed files +- new utils `is_self_contained()`, `describe_elements_are_self_contained()` +- new utils `element_paths_in_memory()`, `element_paths_on_disk()` #### Minor -- Multiple table helper functions -- Added public helper function `get_table_keys()` in `spatialdata.models` to retrieve annotation information of a given table. -- Added public helper function `check_target_region_column_symmetry()` in `spatialdata.models` to check whether annotation - metadata in `table.uns['spatialdata_attrs']` corresponds with respective columns in `table.obs`. -- Added function `validate_table_in_spatialdata()` in SpatialData to validate the annotation target of a table being present in the `SpatialData` object. -- Added method `get_annotated_regions()` in `SpatialData` to get the regions annotated by a given table. -- Added method `get_region_key_column()` in `SpatialData` to get the region_key column in table.obs. -- Added method `get_instance_key_column()` in `SpatialData` to get the instance_key column in table.obs. -- Added method `set_table_annotates_spatialelement()` in `SpatialData` to either set or change the annotation metadata of a table in a given `SpatialData` object. - Added `table_name` parameter to the `aggregate()` function to allow users to give a custom table name to table resulting from aggregation. -- Added `table_name` parameter to the `get_values()` function. - -- Utils -- Added `gen_spatial_elements()` generator in SpatialData to generate the `SpatialElements` in a given `SpatialData` object. -- Added `gen_elements` generator in `SpatialData` to generate elements of a `SpatialData` object including tables. -- added `SpatialData.subset()` API -- added `SpatialData.locate_element()` API -- added utils function: `get_centroids()` -- added utils function: `deepcopy()` -- added operation: `to_circles()` -- documented previously-added `get_channels()` to retrieve the channel names of a raster element indepently of it being single or multi-scale - -- Transformations-related - - - added utils function: `transform_to_data_extent()` - - added utils function: `are_extents_equal()` - - added utils function: `postpone_transformation()` - - added utils function: `remove_transformations_to_coordinate_system()` - -- added testing utilities: `assert_spatial_data_objects_are_identical()`, `assert_elements_are_identical()`, `assert_elements_dict_are_identical()` +- Multiple table helper functions +- Added public helper function `get_table_keys()` in `spatialdata.models` to retrieve annotation information of a given table. +- Added public helper function `check_target_region_column_symmetry()` in `spatialdata.models` to check whether annotation + metadata in `table.uns['spatialdata_attrs']` corresponds with respective columns in `table.obs`. +- Added function `validate_table_in_spatialdata()` in SpatialData to validate the annotation target of a table being present in the `SpatialData` object. +- Added method `get_annotated_regions()` in `SpatialData` to get the regions annotated by a given table. +- Added method `get_region_key_column()` in `SpatialData` to get the region_key column in table.obs. +- Added method `get_instance_key_column()` in `SpatialData` to get the instance_key column in table.obs. +- Added method `set_table_annotates_spatialelement()` in `SpatialData` to either set or change the annotation metadata of a table in a given `SpatialData` object. - Added `table_name` parameter to the `aggregate()` function to allow users to give a custom table name to table resulting from aggregation. +- Added `table_name` parameter to the `get_values()` function. + +- Utils +- Added `gen_spatial_elements()` generator in SpatialData to generate the `SpatialElements` in a given `SpatialData` object. +- Added `gen_elements` generator in `SpatialData` to generate elements of a `SpatialData` object including tables. +- added `SpatialData.subset()` API +- added `SpatialData.locate_element()` API +- added utils function: `get_centroids()` +- added utils function: `deepcopy()` +- added operation: `to_circles()` +- documented previously-added `get_channels()` to retrieve the channel names of a raster element indepently of it being single or multi-scale + +- Transformations-related + + - added utils function: `transform_to_data_extent()` + - added utils function: `are_extents_equal()` + - added utils function: `postpone_transformation()` + - added utils function: `remove_transformations_to_coordinate_system()` + +- added testing utilities: `assert_spatial_data_objects_are_identical()`, `assert_elements_are_identical()`, `assert_elements_dict_are_identical()` ### Changed/fixed #### Major -- refactored data loader for deep learning -- refactored `SpatialData.write()` to be more robust -- generalized spatial queries to any combination of 2D/3D data and 2D/3D query region #409 +- refactored data loader for deep learning +- refactored `SpatialData.write()` to be more robust +- generalized spatial queries to any combination of 2D/3D data and 2D/3D query region #409 #### Minor -- Changed the string representation of `SpatialData` to reflect the changes in regard to multiple tables and incremental IO. -- improved usability and robustness of `sdata.write()` when `overwrite=True` @aeisenbarth -- fixed warnings for categorical dtypes in tables in `TableModel` and `PointsModel` -- fixed wrong order of points after spatial queries +- Changed the string representation of `SpatialData` to reflect the changes in regard to multiple tables and incremental IO. +- improved usability and robustness of `sdata.write()` when `overwrite=True` @aeisenbarth +- fixed warnings for categorical dtypes in tables in `TableModel` and `PointsModel` +- fixed wrong order of points after spatial queries ## [0.0.14] - 2023-10-11 @@ -224,105 +224,105 @@ and this project adheres to [Semantic Versioning][]. #### Minor -- new API: sdata.rename_coordinate_systems() +- new API: sdata.rename_coordinate_systems() #### Technical -- decompose affine transformation into simpler transformations -- remove padding for blobs() +- decompose affine transformation into simpler transformations +- remove padding for blobs() #### Major -- get_extent() function to compute bounding box of the data +- get_extent() function to compute bounding box of the data #### Minor -- testing against pre-release packages +- testing against pre-release packages ### Fixed -- Fixed bug with get_values(): ignoring background channel in labels +- Fixed bug with get_values(): ignoring background channel in labels ## [0.0.13] - 2023-10-02 ### Added -- polygon_query() support for images #358 +- polygon_query() support for images #358 ### Fixed -- Fix missing c_coords argument in blobs multiscale #342 -- Replaced hardcoded string with instance_key #346 +- Fix missing c_coords argument in blobs multiscale #342 +- Replaced hardcoded string with instance_key #346 ## [0.0.12] - 2023-06-24 ### Added -- Add multichannel blobs sample data (by @melonora) +- Add multichannel blobs sample data (by @melonora) ## [0.0.11] - 2023-06-21 ### Improved -- Aggregation APIs. +- Aggregation APIs. ## [0.0.10] - 2023-06-06 ### Fixed -- Fix blobs (#282) +- Fix blobs (#282) ## [0.0.9] - 2023-05-23 ### Updated -- Update napari-spatialdata pin (#279) -- pin typing-extensions +- Update napari-spatialdata pin (#279) +- pin typing-extensions ## [0.0.8] - 2023-05-22 ### Merged -- Merge pull request #271 from scverse/fix/aggregation +- Merge pull request #271 from scverse/fix/aggregation ## [0.0.7] - 2023-05-20 ### Updated -- Update readme +- Update readme ## [0.0.6] - 2023-05-10 ### Added -- This release adds polygon spatial query. +- This release adds polygon spatial query. ## [0.0.5] - 2023-05-05 ### Fixed -- fix tests badge (#242) +- fix tests badge (#242) ## [0.0.4] - 2023-05-04 ### Tested -- This release tests distribution via pypi +- This release tests distribution via pypi ## [0.0.3] - 2023-05-02 ### Added -- This is an alpha release to test the release process. +- This is an alpha release to test the release process. ## [0.0.2] - 2023-05-02 ### Added -- make version dynamic +- make version dynamic ## [0.0.1.dev1] - 2023-03-25 ### Added -- Dev version, not official release yet +- Dev version, not official release yet diff --git a/README.md b/README.md index bd451203f..38d3a16e0 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,9 @@ SpatialData is a data framework that comprises a FAIR storage format and a collection of python libraries for performant access, alignment, and processing of uni- and multi-modal spatial omics datasets. This repository contains the core spatialdata library. See the links below to learn more about other packages in the SpatialData ecosystem. -- [spatialdata-io](https://github.com/scverse/spatialdata-io): load data from common spatial omics technologies into spatialdata. -- [spatialdata-plot](https://github.com/scverse/spatialdata-plot): Static plotting library for spatialdata. -- [napari-spatialdata](https://github.com/scverse/napari-spatialdata): napari plugin for interactive exploration and annotation of spatial data. +- [spatialdata-io](https://github.com/scverse/spatialdata-io): load data from common spatial omics technologies into spatialdata. +- [spatialdata-plot](https://github.com/scverse/spatialdata-plot): Static plotting library for spatialdata. +- [napari-spatialdata](https://github.com/scverse/napari-spatialdata): napari plugin for interactive exploration and annotation of spatial data. [//]: # "numfocus-fiscal-sponsor-attribution" @@ -33,16 +33,16 @@ The spatialdata project also received support by the Chan Zuckerberg Initiative. ![SpatialDataOverview](https://github.com/scverse/spatialdata/assets/1120672/cb91071f-12a7-4b8e-9430-2b3a0f65e52f) -- **The library is currently under review.** We expect there to be changes as the community provides feedback. We have an announcement channel for communicating these changes, please see the contact section below. -- The SpatialData storage format is built on top of the [OME-NGFF](https://ngff.openmicroscopy.org/latest/) specification. +- **The library is currently under review.** We expect there to be changes as the community provides feedback. We have an announcement channel for communicating these changes, please see the contact section below. +- The SpatialData storage format is built on top of the [OME-NGFF](https://ngff.openmicroscopy.org/latest/) specification. ## Getting started Please refer to the [documentation][link-docs]. In particular: -- [API documentation][link-api]. -- [Design doc][link-design-doc]. -- [Example notebooks][link-notebooks]. +- [API documentation][link-api]. +- [Design doc][link-design-doc]. +- [Example notebooks][link-notebooks]. Another useful resource to get started is the source code of the [`spatialdata-io`](https://github.com/scverse/spatialdata-io) package, which shows example of how to read data from common technologies. @@ -62,20 +62,20 @@ mamba install -c conda-forge spatialdata napari-spatialdata spatialdata-io spati ## Limitations -- Code only manually tested for Windows machines. Currently the framework is being developed using Linux, macOS and Windows machines, but it is automatically tested only for Linux and macOS machines. +- Code only manually tested for Windows machines. Currently the framework is being developed using Linux, macOS and Windows machines, but it is automatically tested only for Linux and macOS machines. ## Contact To get involved in the discussion, or if you need help to get started, you are welcome to use the following options. -- Chat via [`scverse` Zulip](https://scverse.zulipchat.com/#narrow/stream/315824-spatial) (public or 1 to 1). -- Forum post in the [scverse discourse forum](https://discourse.scverse.org/). -- Bug report/feature request via the [GitHub issue tracker][issue-tracker]. -- Zoom call as part of the SpatialData Community Meetings, held every 2 weeks on Thursday, [schedule here](https://hackmd.io/enWU826vRai-JYaL7TZaSw). +- Chat via [`scverse` Zulip](https://scverse.zulipchat.com/#narrow/stream/315824-spatial) (public or 1 to 1). +- Forum post in the [scverse discourse forum](https://discourse.scverse.org/). +- Bug report/feature request via the [GitHub issue tracker][issue-tracker]. +- Zoom call as part of the SpatialData Community Meetings, held every 2 weeks on Thursday, [schedule here](https://hackmd.io/enWU826vRai-JYaL7TZaSw). Finally, especially relevant for for developers that are building a library upon `spatialdata`, please follow this channel for: -- Announcements on new features and important changes [Zulip](https://imagesc.zulipchat.com/#narrow/stream/329057-scverse/topic/spatialdata.20announcements). +- Announcements on new features and important changes [Zulip](https://imagesc.zulipchat.com/#narrow/stream/329057-scverse/topic/spatialdata.20announcements). ## Citation diff --git a/docs/contributing.md b/docs/contributing.md index 7c88b1637..a2aad91c7 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -143,10 +143,10 @@ in the cookiecutter-scverse template. Please write documentation for new or changed features and use-cases. This project uses [sphinx][] with the following features: -- the [myst][] extension allows to write documentation in markdown/Markedly Structured Text -- [Numpy-style docstrings][numpydoc] (through the [napoloen][numpydoc-napoleon] extension). -- Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks)) -- [Sphinx autodoc typehints][], to automatically reference annotated input and output types +- the [myst][] extension allows to write documentation in markdown/Markedly Structured Text +- [Numpy-style docstrings][numpydoc] (through the [napoloen][numpydoc-napoleon] extension). +- Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks)) +- [Sphinx autodoc typehints][], to automatically reference annotated input and output types See the [scanpy developer docs](https://scanpy.readthedocs.io/en/latest/dev/documentation.html) for more information on how to write documentation. @@ -163,10 +163,10 @@ repository. #### Hints -- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. Only - if you do so can sphinx automatically create a link to the external documentation. -- If building the documentation fails because of a missing link that is outside your control, you can add an entry to - the `nitpick_ignore` list in `docs/conf.py` +- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. Only + if you do so can sphinx automatically create a link to the external documentation. +- If building the documentation fails because of a missing link that is outside your control, you can add an entry to + the `nitpick_ignore` list in `docs/conf.py` #### Building the docs locally diff --git a/docs/design_doc.md b/docs/design_doc.md index 65fbfc96f..8dc37b4f7 100644 --- a/docs/design_doc.md +++ b/docs/design_doc.md @@ -10,72 +10,72 @@ Recent advances in molecular profiling technologies allow to measure abundance o The goals define _what_ SpatialData will be able to do (as opposed to _how_). Goals can have the following priority levels: -- P0: highest priority, required for successful implementation (i.e., must have) -- P1: high priority, but not required (i.e., nice to have) -- P2: nice to have, but not a priority +- P0: highest priority, required for successful implementation (i.e., must have) +- P1: high priority, but not required (i.e., nice to have) +- P2: nice to have, but not a priority **1. Load data from modern spatial multiomics experiments** -- P0. Data can be loaded from the OME-NGFF and saved to OME-NGFF. - - [x] multiscale images and labels, 2d and 3d - - [x] point clouds - - [x] polygon-shaped regions of interest - - [x] circle-shaped regions of interest - - [x] tables - - [x] graphs -- P0. Data can be loaded lazily. - - [x] Images - - [x] Points - - [ ] (P1) Shapes https://github.com/scverse/spatialdata/issues/359 -- P1. - - [x] Loaded data can be iterated over to generate tiles for multiprocessing and deep learning. +- P0. Data can be loaded from the OME-NGFF and saved to OME-NGFF. + - [x] multiscale images and labels, 2d and 3d + - [x] point clouds + - [x] polygon-shaped regions of interest + - [x] circle-shaped regions of interest + - [x] tables + - [x] graphs +- P0. Data can be loaded lazily. + - [x] Images + - [x] Points + - [ ] (P1) Shapes https://github.com/scverse/spatialdata/issues/359 +- P1. + - [x] Loaded data can be iterated over to generate tiles for multiprocessing and deep learning. **2. Align different datasets via affine transformations** -- [x] P0. Transformations can be loaded from and written to OME-NGFF. -- [x] P0. Identity transformation -- [x] P0. Affine transformations. - - [x] scale - - [x] translation - - [x] rotation -- [x] P0. Support definition of common coordinate systems across datasets (i.e., extrinsic coordinate systems). -- [x] P0. Sequence of transformation. -- Utils - - [x] P0 permute axis -- [ ] P2. non-linear - - [ ] coordinates and displacements +- [x] P0. Transformations can be loaded from and written to OME-NGFF. +- [x] P0. Identity transformation +- [x] P0. Affine transformations. + - [x] scale + - [x] translation + - [x] rotation +- [x] P0. Support definition of common coordinate systems across datasets (i.e., extrinsic coordinate systems). +- [x] P0. Sequence of transformation. +- Utils + - [x] P0 permute axis +- [ ] P2. non-linear + - [ ] coordinates and displacements **3. Performant spatial query of multimodal spatial datasets** -- [x] P0. Support querying a multimodal dataset for all data in a specified region (at the cost of creating spatial index every time). - - [x] Arbitrary bounding boxes - - [x] Polygons or regions of interest (ball, shape) +- [x] P0. Support querying a multimodal dataset for all data in a specified region (at the cost of creating spatial index every time). + - [x] Arbitrary bounding boxes + - [x] Polygons or regions of interest (ball, shape) **4. Aggregate observations by regions of interest** -- [x] P0. Support aggregation functions with standard summary statistics - - [x] mean - - [x] sum - - [x] count -- [x] P1. User-defined aggregation function +- [x] P0. Support aggregation functions with standard summary statistics + - [x] mean + - [x] sum + - [x] count +- [x] P1. User-defined aggregation function ### Non-goals -- _SpatialData_ is not an analysis library. Instead the aim is to provide an infrastructure to analysis libraries for IO and spatial queries. -- _SpatialData_ is not a format converter. We should not support converting to/from too many formats and instead use OME-NGFF as the interchange format. Nevertheless,[spatialdata-io][] offers a place for some common data conversions (external contributions are highly encouraged). -- _SpatialData_ is based on standard on-disk storage formats (Zarr and Parquet) and on existing specifications (NGFF, AnnData) and uses existing solutions when possible. The resulting storage objects which brings together these technologies defines the _SpatialData on-disk format_, which is described in this document and finely characterized in [this online resource](https://github.com/scverse/spatialdata-notebooks/tree/main/notebooks/developers_resources/storage_format). +- _SpatialData_ is not an analysis library. Instead the aim is to provide an infrastructure to analysis libraries for IO and spatial queries. +- _SpatialData_ is not a format converter. We should not support converting to/from too many formats and instead use OME-NGFF as the interchange format. Nevertheless,[spatialdata-io][] offers a place for some common data conversions (external contributions are highly encouraged). +- _SpatialData_ is based on standard on-disk storage formats (Zarr and Parquet) and on existing specifications (NGFF, AnnData) and uses existing solutions when possible. The resulting storage objects which brings together these technologies defines the _SpatialData on-disk format_, which is described in this document and finely characterized in [this online resource](https://github.com/scverse/spatialdata-notebooks/tree/main/notebooks/developers_resources/storage_format). ## Satellite projects We strongly encourage collaborations and community supports in all of these projects. -- [x] P0. _Visualization_: we are developing a napari plugin for interactive visualization of _SpatialData_ objects @ [napari-spatialdata][]. -- [x] P0. _Raw data IO_: we are implementing readers for raw data of common spatial omics technologies @ [spatialdata-io][]. -- [x] P1. _Static plotting_: a static plotting library for _SpatialData_ @ [spatialdata-plot][]. -- [ ] P2. _Image analysis_: Library to perform image analysis, wrapping common analysis library in python such as skimage. - Once ready, we will deprecate such functionalities in [squidpy][]. -- [ ] P2. _Spatial and graph analysis_: [squidpy][] will be refactor to accept SpatialData objects as input. -- [ ] P2. _Database_: Some form of update on released datasets with updated specs as development progresses. A temporary sandbox where we store downloader and converter scripts for representative datasets is available @ [spatialdata-sandbox][]. +- [x] P0. _Visualization_: we are developing a napari plugin for interactive visualization of _SpatialData_ objects @ [napari-spatialdata][]. +- [x] P0. _Raw data IO_: we are implementing readers for raw data of common spatial omics technologies @ [spatialdata-io][]. +- [x] P1. _Static plotting_: a static plotting library for _SpatialData_ @ [spatialdata-plot][]. +- [ ] P2. _Image analysis_: Library to perform image analysis, wrapping common analysis library in python such as skimage. + Once ready, we will deprecate such functionalities in [squidpy][]. +- [ ] P2. _Spatial and graph analysis_: [squidpy][] will be refactor to accept SpatialData objects as input. +- [ ] P2. _Database_: Some form of update on released datasets with updated specs as development progresses. A temporary sandbox where we store downloader and converter scripts for representative datasets is available @ [spatialdata-sandbox][]. @@ -107,12 +107,12 @@ The `SpatialData` object contains a set of Elements to be used for analysis. Ele We model a spatial dataset as a composition of distinct elements, of any type. The elements correspond to: -- Pixel-based _Images_, 2D or 3D -- Regions of interest - - _Shapes_ (circles, polygons, multipolygons), 2D - - Pixel masks (such as segmentation masks), aka _Labels_, 2D, or 3D -- Points (such as transcript locations, point clouds, ...), 2D or 3D -- _Tables_ of annotations +- Pixel-based _Images_, 2D or 3D +- Regions of interest + - _Shapes_ (circles, polygons, multipolygons), 2D + - Pixel masks (such as segmentation masks), aka _Labels_, 2D, or 3D +- Points (such as transcript locations, point clouds, ...), 2D or 3D +- _Tables_ of annotations Each of these elements should be useful by itself, and in combination with other relevant elements. All elements are stored in the Zarr container in hierarchy store that MAY be flat; currently Zarr hierarchies are not supported, [see here](https://github.com/scverse/spatialdata/issues/340)). @@ -124,29 +124,29 @@ By decomposing the data model into building blocks (i.e. Elements) we support th _SpatialData_ follows the OME-NGFF specifications whenever possible and therefore much of its assumptions are inherited from it. Extra assumptions will be discussed with the OME-NGFF community and adapted to the community-agreed design. The key assumptions are the following: -- `Images`, `Labels`, `Points` and `Shapes` MUST have one or more _coordinate systems_ and _coordinate transformations_. -- `Tables` CAN NOT have a _coordinate system_ or _coordinate transforms_. Tables should not contain spatial coordinate: the user can decided to store them there, but they will not be processed by the library and needs to placed in a element and a coordiante system to be recognized by the framework. -- `Labels` and `Shapes` are both instances of `Regions`, `Regions` are `Elements`. -- Any `Element` MAY be annotated by `Tables`; also `Shapes` and `Points` MAY contain annotations within themselves as additional dataframe columns (e.g. intensity of point spread function of a each point, or gene id). -- `Tables` CAN NOT be annotated by other `Tables`. +- `Images`, `Labels`, `Points` and `Shapes` MUST have one or more _coordinate systems_ and _coordinate transformations_. +- `Tables` CAN NOT have a _coordinate system_ or _coordinate transforms_. Tables should not contain spatial coordinate: the user can decided to store them there, but they will not be processed by the library and needs to placed in a element and a coordiante system to be recognized by the framework. +- `Labels` and `Shapes` are both instances of `Regions`, `Regions` are `Elements`. +- Any `Element` MAY be annotated by `Tables`; also `Shapes` and `Points` MAY contain annotations within themselves as additional dataframe columns (e.g. intensity of point spread function of a each point, or gene id). +- `Tables` CAN NOT be annotated by other `Tables`. #### Images Images of a sample. Should conform to the [OME-NGFF concept of an image](https://ngff.openmicroscopy.org/latest/#image-layout). Images are n-dimensional arrays where each element of an array is a pixel of an image. These arrays have labelled dimensions which correspond to: -- Spatial dimensions (height and width). -- Imaging or feature channels. -- Z-stacks. +- Spatial dimensions (height and width). +- Imaging or feature channels. +- Z-stacks. We require the following axes (in the following order): -- 2D images: cyx -- 3D images: czyx +- 2D images: cyx +- 3D images: czyx Other ordering or axes neames are currently not supported. -- [ ] P2 We will support also time-point axes in the future. Furthermore, thanks to NGFF specs v0.5, such axes will not have name constraints (although they do for first iteration due to NGFF specs v0.4). +- [ ] P2 We will support also time-point axes in the future. Furthermore, thanks to NGFF specs v0.5, such axes will not have name constraints (although they do for first iteration due to NGFF specs v0.4). The image object itself builds on prior art in image analysis, in particular the [xarray library][]. @@ -172,19 +172,19 @@ The xarray coordinates are not saved in the NGFF storage. APIs to take into acco Regions of interest define distinct regions of space that can be used to select and aggregate observations. For instance, regions can correspond to -- Tissues -- Tissue structures -- Clinical annotations -- Multi-cellular communities -- Cells -- Subcellular structures -- Physical structures from the assay (e.g. Visium "spots") -- Synthetic regions created by analysts (e.g. output of algorithms) +- Tissues +- Tissue structures +- Clinical annotations +- Multi-cellular communities +- Cells +- Subcellular structures +- Physical structures from the assay (e.g. Visium "spots") +- Synthetic regions created by analysts (e.g. output of algorithms) As an example, regions can be used for: -- subsetting observations (e.g., get all observations in a given region) -- aggregating observations (e.g., count all observations in an region) +- subsetting observations (e.g., get all observations in a given region) +- aggregating observations (e.g., count all observations in an region) Regions can be defined in multiple ways. @@ -197,8 +197,8 @@ The Python data structures used for Labels are the same one that we discussed fo We require the following axes (in the following order): -- 2D labels: yx -- 3D labels: zyx +- 2D labels: yx +- 3D labels: zyx ##### Shapes @@ -216,24 +216,24 @@ Coordinates of points for single molecule data. Each observation is a point, and Current implementation represent points as a Parquet file and a [`dask.dataframe.DataFrame`](https://docs.dask.org/en/stable/dataframe.html) in memory. The requirements are the following: -- The table MUST contains axis name to represent the axes. - - If it's 2D, the axes should be `["x","y"]`. - - If it's 3D, the axes should be `["x","y","z"]`. -- It MUST also contains coordinates transformations in `dask.dataframe.DataFrame().attrs["transform"]`. +- The table MUST contains axis name to represent the axes. + - If it's 2D, the axes should be `["x","y"]`. + - If it's 3D, the axes should be `["x","y","z"]`. +- It MUST also contains coordinates transformations in `dask.dataframe.DataFrame().attrs["transform"]`. Additional information is stored in `dask.dataframe.DataFrame().attrs["spatialdata_attrs"]` -- It MAY also contains `"feature_key"`, that is, the column name of the table that refers to the features. This `Series` MAY be of type `pandas.Categorical`. -- It MAY contains additional information in `dask.dataframe.DataFrame().attrs["spatialdata_attrs"]`, specifically: - - `"instance_key"`: the column name of the table where unique instance ids that this point refers to are stored, if available. +- It MAY also contains `"feature_key"`, that is, the column name of the table that refers to the features. This `Series` MAY be of type `pandas.Categorical`. +- It MAY contains additional information in `dask.dataframe.DataFrame().attrs["spatialdata_attrs"]`, specifically: + - `"instance_key"`: the column name of the table where unique instance ids that this point refers to are stored, if available. #### Table (table of annotations for regions) Annotations of regions of interest. Each row in this table corresponds to a single region on the coordinate space. This is represented as an `AnnData` object to allow for complex annotations on the data. This includes: -- multivariate feature support, e.g. a matrix of dimensions regions x variables; -- annotations on top of the features or of the observations. E.g. calculated statistic, prior knowledge based annotations, cell types etc. -- graphs of observations or variables. These can be spatial graphs, nearest neighbor networks based on feature similarity, etc. +- multivariate feature support, e.g. a matrix of dimensions regions x variables; +- annotations on top of the features or of the observations. E.g. calculated statistic, prior knowledge based annotations, cell types etc. +- graphs of observations or variables. These can be spatial graphs, nearest neighbor networks based on feature similarity, etc. One region table can refer to multiple sets of Regions. But each row can map to only one region in its Regions element. For example, one region table can store annotation for multiple slides, though each slide would have its own label element. @@ -247,21 +247,21 @@ If any of `region`, `region_key` and `instance_key` are defined, they all MUST b In `spatialdata-io` we use a consistent naming scheme for the `region_key` and `instance_key` column, which is suggested (but not required): -- we use the name `'region'` as the default name for the `region_key` column; -- we use the name `'instance_id'` as the default name for the `instance_key` column. +- we use the name `'region'` as the default name for the `region_key` column; +- we use the name `'instance_id'` as the default name for the `instance_key` column. ### Summary -- Image `type: Image` -- Regions `type: Union[Labels, Shapes]` - - Labels `type: Labels` - - Shapes `type: Shapes` -- Points `type: Points` -- Tables `type: Table` +- Image `type: Image` +- Regions `type: Union[Labels, Shapes]` + - Labels `type: Labels` + - Shapes `type: Shapes` +- Points `type: Points` +- Tables `type: Table` #### Open discussions -- Points vs Circles [discussion](https://github.com/scverse/spatialdata/issues/46) +- Points vs Circles [discussion](https://github.com/scverse/spatialdata/issues/46) ### Transforms and coordinate systems @@ -276,14 +276,14 @@ There are two types of coordinate systems: intrinsic (called also implicit) and The NGFF specification only operates with images and labels, so it specifies rules for the coordinate systems only for these two types of elements. The main points are the following: -- each image/labels MUST have one and only one intrinsic coordinate system; -- each image/labels MAY have a transformation mapping them to one (at last one MUST be present) or more extrinsic coordinate systems; -- a transformation MAY be defined between any two coordinate systems, including intrinsic and extrinsic coordinate systems. +- each image/labels MUST have one and only one intrinsic coordinate system; +- each image/labels MAY have a transformation mapping them to one (at last one MUST be present) or more extrinsic coordinate systems; +- a transformation MAY be defined between any two coordinate systems, including intrinsic and extrinsic coordinate systems. Furthermore, acoording to NGFF, a coordinate system: -- MUST have a name; -- MUST specify all the axes. +- MUST have a name; +- MUST specify all the axes. #### SpatialData approach @@ -292,18 +292,18 @@ Since elements are allowed to have only (a subset of the) c, x, y, z axes and mu In details: -- we don't need to specify the intrinsic coordinate systems, these are inferred from the element schema -- each element MAY have a transformation mapping them to one or more extrinsic coordinate systems +- we don't need to specify the intrinsic coordinate systems, these are inferred from the element schema +- each element MAY have a transformation mapping them to one or more extrinsic coordinate systems Each coordinate system -- MUST have a name -- MAY specify its axes +- MUST have a name +- MAY specify its axes We also have a constraint (that we will relax in the future, [see here](https://github.com/scverse/spatialdata/issues/308)): -- a transformation MAY be defined only between an intrinsic coordinate system and an extrinsic coordinate system -- each element MUST be mapped at least to an extrinsic coordinate system. When no mapping is specified, we define a mapping to the "global" coordinate system via an "Identity" transformation. +- a transformation MAY be defined only between an intrinsic coordinate system and an extrinsic coordinate system +- each element MUST be mapped at least to an extrinsic coordinate system. When no mapping is specified, we define a mapping to the "global" coordinate system via an "Identity" transformation. #### In-memory representation @@ -344,19 +344,19 @@ This section describes a more detailed timeline of future developments, includin #### Early 2024 -- [ ] Simplify data models - - [x] Use `xarray.DataArray` instead of the subclass `SpatialImage` and `xarray.DataTree` instad of the subclass `MultiscaleSpatialImage` - - [ ] Use `GeoDataFrame` for points -- [ ] More performant disk storage - - [ ] Use `geoparquet` for shapes and points -- [ ] Support for nested hierarchies in NGFF stores -- [x] Start working on multiple tables -- [x] Start working on the transformations refactoring +- [ ] Simplify data models + - [x] Use `xarray.DataArray` instead of the subclass `SpatialImage` and `xarray.DataTree` instad of the subclass `MultiscaleSpatialImage` + - [ ] Use `GeoDataFrame` for points +- [ ] More performant disk storage + - [ ] Use `geoparquet` for shapes and points +- [ ] Support for nested hierarchies in NGFF stores +- [x] Start working on multiple tables +- [x] Start working on the transformations refactoring #### Late 2024 -- [x] Finalize multiple tables support -- [ ] Finalize transformations refactoring +- [x] Finalize multiple tables support +- [ ] Finalize transformations refactoring --- @@ -386,32 +386,32 @@ The layout of some common datasets. **Layout of [MERFISH example](https://github.com/giovp/spatialdata-sandbox/tree/main/merfish)** -- points (coordinates of spots); -- each point has features (e.g., gene, size, cell assignment); -- segmented cell locations are saved as labels (missing in this example) or approximated as circles of variable diameter; -- gene expression for cells, obtained by counting the points inside each cell; -- large anatomical regions saved as polygons; -- rasterized version of the single molecule points (to mimic the original hires image, missing in this example). +- points (coordinates of spots); +- each point has features (e.g., gene, size, cell assignment); +- segmented cell locations are saved as labels (missing in this example) or approximated as circles of variable diameter; +- gene expression for cells, obtained by counting the points inside each cell; +- large anatomical regions saved as polygons; +- rasterized version of the single molecule points (to mimic the original hires image, missing in this example). **Layout of [Visium example](https://github.com/giovp/spatialdata-sandbox/tree/main/visium)** -- The datasets include multiple slides from the same individual, or slides from multiple samples; -- "Visium spots" (circular regions) where sequences are captured; -- each spot has RNA expression; -- H&E image (multiscale 2D); -- (optional) large microscopy (e.g. 40x magnification, 50K x 50K pixels) images may be available, which would need to be aligned to the rest of spatial elements; -- (optional) cell segmentation labels can be derived from the H&E images; -- (optional) the cell segmentation can be annotated with image-derived features (image features/statistics). +- The datasets include multiple slides from the same individual, or slides from multiple samples; +- "Visium spots" (circular regions) where sequences are captured; +- each spot has RNA expression; +- H&E image (multiscale 2D); +- (optional) large microscopy (e.g. 40x magnification, 50K x 50K pixels) images may be available, which would need to be aligned to the rest of spatial elements; +- (optional) cell segmentation labels can be derived from the H&E images; +- (optional) the cell segmentation can be annotated with image-derived features (image features/statistics). #### Code/pseudo-code workflows **Workflows to show** -- [x] loading multiple samples visium data from disk (SpaceRanger), concatenating and saving them to .zarr -- [x] loading a generic NGFF dataset -- [ ] calling the SpatialData constructor with some transformations on it -- [x] accumulation with multiple types of elements -- [x] subsetting/querying by coordinate system, bounding box, spatial region, table rows +- [x] loading multiple samples visium data from disk (SpaceRanger), concatenating and saving them to .zarr +- [x] loading a generic NGFF dataset +- [ ] calling the SpatialData constructor with some transformations on it +- [x] accumulation with multiple types of elements +- [x] subsetting/querying by coordinate system, bounding box, spatial region, table rows #### Loading multiple Visium samples from the SpaceRanger output and saving them to NGFF using the SpatialData APIs @@ -538,5 +538,5 @@ sdata1 = sdata.query.table(...) #### Related notes/issues/PRs -- [Issue discussing SpatialData layout](https://github.com/scverse/spatialdata/issues/12) -- [Notes from Basel Hackathon](https://hackmd.io/MPeMr2mbSRmeIzOCgwKbxw) +- [Issue discussing SpatialData layout](https://github.com/scverse/spatialdata/issues/12) +- [Notes from Basel Hackathon](https://hackmd.io/MPeMr2mbSRmeIzOCgwKbxw) diff --git a/docs/index.md b/docs/index.md index 8a0aff416..085a4839a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -8,9 +8,9 @@ SpatialData is a data framework that comprises a FAIR storage format and a collection of python libraries for performant access, alignment, and processing of uni- and multi-modal spatial omics datasets. This page provides documentation on how to install, use, and extend the core `spatialdata` library. See the links below to learn more about other packages in the SpatialData ecosystem. -- `spatialdata-io`: load data from common spatial omics technologies into `spatialdata` ([repository][spatialdata-io-repo], [documentation][spatialdata-io-docs]). -- `spatialdata-plot`: Static plotting library for `spatialdata` ([repository][spatialdata-plot-repo], [documentation][spatialdata-plot-docs]). -- `napari-spatialdata-repo`: napari plugin for interactive exploration and annotation of `spatialdata` ([repository][napari-spatialdata-repo], [documentation][napari-spatialdata-docs]). +- `spatialdata-io`: load data from common spatial omics technologies into `spatialdata` ([repository][spatialdata-io-repo], [documentation][spatialdata-io-docs]). +- `spatialdata-plot`: Static plotting library for `spatialdata` ([repository][spatialdata-plot-repo], [documentation][spatialdata-plot-docs]). +- `napari-spatialdata-repo`: napari plugin for interactive exploration and annotation of `spatialdata` ([repository][napari-spatialdata-repo], [documentation][napari-spatialdata-docs]). Please see our publication {cite}`marconatoSpatialDataOpenUniversal2024` for citation and to learn more. diff --git a/docs/installation.md b/docs/installation.md index 23c25d858..7d25ef78c 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -14,9 +14,9 @@ Install `spatialdata` by running: The SpatialData ecosystem is designed to work with the following packages: -- [spatialdata-io][]: `spatialdata` readers and converters for common spatial omics technologies. -- [spatialdata-plot][]: Static plotting library for `spatialdata`. -- [napari-spatialdata][]: napari plugin for `spatialdata`. +- [spatialdata-io][]: `spatialdata` readers and converters for common spatial omics technologies. +- [spatialdata-plot][]: Static plotting library for `spatialdata`. +- [napari-spatialdata][]: napari plugin for `spatialdata`. They can be installed with: