diff --git a/CHANGELOG.md b/CHANGELOG.md index fa8fcae..7071519 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,24 @@ +# toasty 0.7.0 (2021-08-06) + +- Add the `toasty pipeline ignore-rejects` command to allow you to tell the + Toasty pipeline system to ignore certain images going forward. This will be + helpful if your pipeline provides some images that, say, aren't actually + images of the sky (#51, @pkgw). +- Start requiring and using version 0.10 of the [wwt_data_formats] support + library. This version includes important improvements to how image coordinates + (WCS) are handled. Previously, some kinds of coordinates weren't handled + completely correctly. While it's better to do this correctly, the new code may + break some existing workflows if they accidentally relied on the broken + behavior. +- Implement end-to-end support for tiling FITS data in the backend (#52, @pkgw)! + These datasets can be displayed using the very latest version of the WWT + rendering engine. Toasty's support for creating these datasets needs to be + exposed in a more user-friendly way, including documentation and examples, + but the core algorithms should generate working datasets. + +[wwt_data_formats]: https://wwt-data-formats.readthedocs.io/ + + # toasty 0.6.4 (2021-02-09) - Properly handle CLI glob arguments on Windows. It turns out that we need to diff --git a/README.md b/README.md index 48a7d4e..30a1bd0 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,7 @@ Telescope Team. It is licensed under the [MIT License](./LICENSE). [toasty] is part of the AAS WorldWide Telescope system, a [.NET Foundation] project managed by the non-profit [American Astronomical Society] (AAS). Work on WWT has been supported by the AAS, the US [National Science Foundation] -(grants [1550701] and [1642446]), the [Gordon and Betty Moore Foundation], and +(grants [1550701], [1642446], and [2004840]), the [Gordon and Betty Moore Foundation], and [Microsoft]. [.NET Foundation]: https://dotnetfoundation.org/ @@ -113,5 +113,6 @@ on WWT has been supported by the AAS, the US [National Science Foundation] [National Science Foundation]: https://www.nsf.gov/ [1550701]: https://www.nsf.gov/awardsearch/showAward?AWD_ID=1550701 [1642446]: https://www.nsf.gov/awardsearch/showAward?AWD_ID=1642446 +[2004840]: https://www.nsf.gov/awardsearch/showAward?AWD_ID=2004840 [Gordon and Betty Moore Foundation]: https://www.moore.org/ [Microsoft]: https://www.microsoft.com/ diff --git a/ci/azure-build-and-test.yml b/ci/azure-build-and-test.yml index 36ad949..87a773b 100644 --- a/ci/azure-build-and-test.yml +++ b/ci/azure-build-and-test.yml @@ -134,7 +134,7 @@ jobs: source activate-conda.sh conda activate build set -x - \conda install -y numpydoc sphinx sphinx-automodapi + \conda install -y astropy numpydoc sphinx sphinx-automodapi pip install astropy-sphinx-theme cd docs make html diff --git a/docs/api.rst b/docs/api.rst index 7a995a2..e26a68d 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -14,6 +14,10 @@ Python API Reference :no-inheritance-diagram: :no-inherited-members: +.. automodapi:: toasty.collection + :no-inheritance-diagram: + :no-inherited-members: + .. automodapi:: toasty.image :no-inheritance-diagram: :no-inherited-members: diff --git a/docs/api/toasty.builder.Builder.rst b/docs/api/toasty.builder.Builder.rst index 642565f..04468a5 100644 --- a/docs/api/toasty.builder.Builder.rst +++ b/docs/api/toasty.builder.Builder.rst @@ -21,6 +21,7 @@ Builder ~Builder.apply_avm_info ~Builder.apply_wcs_info ~Builder.cascade + ~Builder.create_wtml_folder ~Builder.default_tiled_study_astrometry ~Builder.load_from_wwtl ~Builder.make_placeholder_thumbnail @@ -41,6 +42,7 @@ Builder .. automethod:: apply_avm_info .. automethod:: apply_wcs_info .. automethod:: cascade + .. automethod:: create_wtml_folder .. automethod:: default_tiled_study_astrometry .. automethod:: load_from_wwtl .. automethod:: make_placeholder_thumbnail diff --git a/docs/api/toasty.collection.ImageCollection.rst b/docs/api/toasty.collection.ImageCollection.rst new file mode 100644 index 0000000..c2d5235 --- /dev/null +++ b/docs/api/toasty.collection.ImageCollection.rst @@ -0,0 +1,19 @@ +ImageCollection +=============== + +.. currentmodule:: toasty.collection + +.. autoclass:: ImageCollection + :show-inheritance: + + .. rubric:: Methods Summary + + .. autosummary:: + + ~ImageCollection.descriptions + ~ImageCollection.images + + .. rubric:: Methods Documentation + + .. automethod:: descriptions + .. automethod:: images diff --git a/docs/api/toasty.collection.RubinDirectoryCollection.rst b/docs/api/toasty.collection.RubinDirectoryCollection.rst new file mode 100644 index 0000000..da651e9 --- /dev/null +++ b/docs/api/toasty.collection.RubinDirectoryCollection.rst @@ -0,0 +1,19 @@ +RubinDirectoryCollection +======================== + +.. currentmodule:: toasty.collection + +.. autoclass:: RubinDirectoryCollection + :show-inheritance: + + .. rubric:: Methods Summary + + .. autosummary:: + + ~RubinDirectoryCollection.descriptions + ~RubinDirectoryCollection.images + + .. rubric:: Methods Documentation + + .. automethod:: descriptions + .. automethod:: images diff --git a/docs/api/toasty.collection.SimpleFitsCollection.rst b/docs/api/toasty.collection.SimpleFitsCollection.rst new file mode 100644 index 0000000..1dce190 --- /dev/null +++ b/docs/api/toasty.collection.SimpleFitsCollection.rst @@ -0,0 +1,19 @@ +SimpleFitsCollection +==================== + +.. currentmodule:: toasty.collection + +.. autoclass:: SimpleFitsCollection + :show-inheritance: + + .. rubric:: Methods Summary + + .. autosummary:: + + ~SimpleFitsCollection.descriptions + ~SimpleFitsCollection.images + + .. rubric:: Methods Documentation + + .. automethod:: descriptions + .. automethod:: images diff --git a/docs/api/toasty.image.Image.rst b/docs/api/toasty.image.Image.rst index b5b6580..4b8c09e 100644 --- a/docs/api/toasty.image.Image.rst +++ b/docs/api/toasty.image.Image.rst @@ -25,9 +25,13 @@ Image ~Image.asarray ~Image.aspil ~Image.clear + ~Image.ensure_negative_parity ~Image.fill_into_maskable_buffer + ~Image.flip_parity ~Image.from_array ~Image.from_pil + ~Image.get_parity_sign + ~Image.has_wcs ~Image.make_thumbnail_bitmap ~Image.save ~Image.update_into_maskable_buffer @@ -47,9 +51,13 @@ Image .. automethod:: asarray .. automethod:: aspil .. automethod:: clear + .. automethod:: ensure_negative_parity .. automethod:: fill_into_maskable_buffer + .. automethod:: flip_parity .. automethod:: from_array .. automethod:: from_pil + .. automethod:: get_parity_sign + .. automethod:: has_wcs .. automethod:: make_thumbnail_bitmap .. automethod:: save .. automethod:: update_into_maskable_buffer diff --git a/docs/api/toasty.image.ImageDescription.rst b/docs/api/toasty.image.ImageDescription.rst new file mode 100644 index 0000000..e59b43e --- /dev/null +++ b/docs/api/toasty.image.ImageDescription.rst @@ -0,0 +1,41 @@ +ImageDescription +================ + +.. currentmodule:: toasty.image + +.. autoclass:: ImageDescription + :show-inheritance: + + .. rubric:: Attributes Summary + + .. autosummary:: + + ~ImageDescription.dtype + ~ImageDescription.height + ~ImageDescription.mode + ~ImageDescription.shape + ~ImageDescription.wcs + ~ImageDescription.width + + .. rubric:: Methods Summary + + .. autosummary:: + + ~ImageDescription.ensure_negative_parity + ~ImageDescription.flip_parity + ~ImageDescription.get_parity_sign + + .. rubric:: Attributes Documentation + + .. autoattribute:: dtype + .. autoattribute:: height + .. autoattribute:: mode + .. autoattribute:: shape + .. autoattribute:: wcs + .. autoattribute:: width + + .. rubric:: Methods Documentation + + .. automethod:: ensure_negative_parity + .. automethod:: flip_parity + .. automethod:: get_parity_sign diff --git a/docs/api/toasty.image.ImageMode.rst b/docs/api/toasty.image.ImageMode.rst index cccb6e3..e8e8550 100644 --- a/docs/api/toasty.image.ImageMode.rst +++ b/docs/api/toasty.image.ImageMode.rst @@ -20,6 +20,7 @@ ImageMode .. autosummary:: + ~ImageMode.from_array_info ~ImageMode.make_maskable_buffer ~ImageMode.try_as_pil @@ -33,5 +34,6 @@ ImageMode .. rubric:: Methods Documentation + .. automethod:: from_array_info .. automethod:: make_maskable_buffer .. automethod:: try_as_pil diff --git a/docs/api/toasty.image.get_format_vertical_parity_sign.rst b/docs/api/toasty.image.get_format_vertical_parity_sign.rst new file mode 100644 index 0000000..885d7b9 --- /dev/null +++ b/docs/api/toasty.image.get_format_vertical_parity_sign.rst @@ -0,0 +1,6 @@ +get_format_vertical_parity_sign +=============================== + +.. currentmodule:: toasty.image + +.. autofunction:: get_format_vertical_parity_sign diff --git a/docs/api/toasty.multi_tan.MultiTanDataSource.rst b/docs/api/toasty.multi_tan.MultiTanDataSource.rst deleted file mode 100644 index 5b3ea40..0000000 --- a/docs/api/toasty.multi_tan.MultiTanDataSource.rst +++ /dev/null @@ -1,21 +0,0 @@ -MultiTanDataSource -================== - -.. currentmodule:: toasty.multi_tan - -.. autoclass:: MultiTanDataSource - :show-inheritance: - - .. rubric:: Methods Summary - - .. autosummary:: - - ~MultiTanDataSource.compute_global_pixelization - ~MultiTanDataSource.create_wtml - ~MultiTanDataSource.generate_deepest_layer_numpy - - .. rubric:: Methods Documentation - - .. automethod:: compute_global_pixelization - .. automethod:: create_wtml - .. automethod:: generate_deepest_layer_numpy diff --git a/docs/api/toasty.multi_tan.MultiTanProcessor.rst b/docs/api/toasty.multi_tan.MultiTanProcessor.rst new file mode 100644 index 0000000..69f8c78 --- /dev/null +++ b/docs/api/toasty.multi_tan.MultiTanProcessor.rst @@ -0,0 +1,19 @@ +MultiTanProcessor +================= + +.. currentmodule:: toasty.multi_tan + +.. autoclass:: MultiTanProcessor + :show-inheritance: + + .. rubric:: Methods Summary + + .. autosummary:: + + ~MultiTanProcessor.compute_global_pixelization + ~MultiTanProcessor.tile + + .. rubric:: Methods Documentation + + .. automethod:: compute_global_pixelization + .. automethod:: tile diff --git a/docs/api/toasty.pyramid.PyramidIO.rst b/docs/api/toasty.pyramid.PyramidIO.rst index 60cb065..59eb748 100644 --- a/docs/api/toasty.pyramid.PyramidIO.rst +++ b/docs/api/toasty.pyramid.PyramidIO.rst @@ -10,6 +10,9 @@ PyramidIO .. autosummary:: + ~PyramidIO.clean_lockfiles + ~PyramidIO.get_default_format + ~PyramidIO.get_default_vertical_parity_sign ~PyramidIO.get_path_scheme ~PyramidIO.open_metadata_for_read ~PyramidIO.open_metadata_for_write @@ -20,6 +23,9 @@ PyramidIO .. rubric:: Methods Documentation + .. automethod:: clean_lockfiles + .. automethod:: get_default_format + .. automethod:: get_default_vertical_parity_sign .. automethod:: get_path_scheme .. automethod:: open_metadata_for_read .. automethod:: open_metadata_for_write diff --git a/docs/cli.rst b/docs/cli.rst index 3c0fad4..82c3ff4 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -9,6 +9,7 @@ CLI Reference cli/make-thumbnail cli/pipeline-approve cli/pipeline-fetch + cli/pipeline-ignore-rejects cli/pipeline-init cli/pipeline-process-todos cli/pipeline-publish diff --git a/docs/cli/pipeline-ignore-rejects.rst b/docs/cli/pipeline-ignore-rejects.rst new file mode 100644 index 0000000..4ade860 --- /dev/null +++ b/docs/cli/pipeline-ignore-rejects.rst @@ -0,0 +1,50 @@ +.. _cli-pipeline-ignore-rejects: + +================================== +``toasty pipeline ignore-rejects`` +================================== + +The ``ignore-rejects`` :ref:`pipeline command ` marks all of the +rejected images in a workspace to be ignored by future processing runs. + + +Usage +===== + +.. code-block:: shell + + toasty pipeline ignore-rejects [--workdir=WORKDIR] + +The ``WORKDIR`` argument optionally specifies the location of the pipeline +workspace directory. The default is the current directory. + + +Example +======= + +Ignore every image in the ``rejects`` directory: + +.. code-block:: shell + + toasty pipeline ignore-rejects + +Subsequent invocations of :ref:`cli-pipeline-refresh` will ignore these images +when searching for new processing candidates. + + +Notes +===== + +This command will ignore every image in the “reject” state. That is, each file +inside the ``rejects`` subfolder of the pipeline workspace will be taken to be +an image ID that should be ignored. If you have rejects that should *not* be +permanently ignored (maybe you can't solve their coordinates right now, but will +be able to later), delete their files from the ``rejects`` subfolder before +running this command. + + +See Also +======== + +- :ref:`The toasty pipeline processing overview ` +- :ref:`cli-pipeline-refresh` diff --git a/docs/cli/pipeline-refresh.rst b/docs/cli/pipeline-refresh.rst index 3d23de6..f365655 100644 --- a/docs/cli/pipeline-refresh.rst +++ b/docs/cli/pipeline-refresh.rst @@ -31,9 +31,14 @@ image, a file will be created in the ``candidates`` subdirectory. The next step is to select candidates for processing and download them using :ref:`cli-pipeline-fetch`. +If there are candidates that should *never* be processed, they can be marked to +be ignored by subsequent refreshes. Currently you can only do this with images +that are rejected during processing, using :ref:`cli-pipeline-ignore-rejects`. + See Also ======== - :ref:`The toasty pipeline processing overview ` - :ref:`cli-pipeline-fetch` +- :ref:`cli-pipeline-ignore-rejects` diff --git a/docs/conf.py b/docs/conf.py index 85f8585..098a937 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -43,12 +43,12 @@ ), 'astropy': ( - 'http://docs.astropy.org/en/stable/', + 'https://docs.astropy.org/en/stable/', None ), 'numpy': ( - 'https://docs.scipy.org/doc/numpy/', + 'https://numpy.org/doc/stable/', (None, 'http://data.astropy.org/intersphinx/numpy.inv') ), diff --git a/setup.py b/setup.py index 4f6bafa..527079d 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,7 @@ def get_long_desc(): setup_args = dict( name = 'toasty', # cranko project-name - version = '0.6.4', # cranko project-version + version = '0.7.0', # cranko project-version description = 'Generate TOAST image tile pyramids from existing image data', long_description = get_long_desc(), long_description_content_type = 'text/markdown', @@ -78,7 +78,7 @@ def get_long_desc(): 'pillow>=7.0', 'PyYAML>=5.0', 'tqdm>=4.0', - 'wwt_data_formats>=0.9.1', + 'wwt_data_formats>=0.10.0', ], extras_require = { diff --git a/toasty/builder.py b/toasty/builder.py index a5cedda..11edfbd 100644 --- a/toasty/builder.py +++ b/toasty/builder.py @@ -47,25 +47,42 @@ class Builder(object): def __init__(self, pio): self.pio = pio + self.imgset = ImageSet() self.imgset.name = 'Toasty' + self.imgset.file_type = '.' + pio.get_default_format() + self.imgset.url = pio.get_path_scheme() + self.imgset.file_type + self.place = Place() self.place.foreground_image_set = self.imgset self.place.name = 'Toasty' + def _check_no_wcs_yet(self): + """ + The astrometric fields of ImageSet change their meaning depending on + whether the image in question is tiled or not. Therefore, you'll get + bogus results if change the tiling status *after* setting the + astrometric information. This method should be called by other methods + that control tiling in order to catch the issue if the user does things + backwards. + """ + if self.imgset.center_x != 0 or self.imgset.center_y != 0: + raise Exception('order-of-operations error: you must apply WCS after applying tiling settings') + + def tile_base_as_study(self, image, **kwargs): from .study import tile_study_image + self._check_no_wcs_yet() tiling = tile_study_image(image, self.pio, **kwargs) tiling.apply_to_imageset(self.imgset) - self.imgset.url = self.pio.get_path_scheme() + '.png' - self.imgset.file_type = '.png' return self def default_tiled_study_astrometry(self): + self._check_no_wcs_yet() self.imgset.data_set_type = DataSetType.SKY self.imgset.base_degrees_per_tile = 1.0 self.imgset.projection = ProjectionType.TAN @@ -73,7 +90,7 @@ def default_tiled_study_astrometry(self): return self - def load_from_wwtl(self, cli_settings, wwtl_path): + def load_from_wwtl(self, cli_settings, wwtl_path, cli_progress=False): from contextlib import closing from io import BytesIO @@ -95,13 +112,16 @@ def load_from_wwtl(self, cli_settings, wwtl_path): img_data = lc.read_layer_file(layer, layer.extension) img = loader.load_stream(BytesIO(img_data)) + self.imgset = imgset + self.place.foreground_image_set = self.imgset + # Transmogrify untiled image info to tiled image info. We reuse the # existing imageset as much as possible, but update the parameters that # change in the tiling process. - self.imgset = imgset - self.place.foreground_image_set = self.imgset + wcs_keywords = self.imgset.wcs_headers_from_position() - self.tile_base_as_study(img) + self.imgset.center_x = self.imgset.center_y = 0 # hack to satisfy _check_no_wcs_yet() + self.tile_base_as_study(img, cli_progress=cli_progress) self.imgset.set_position_from_wcs(wcs_keywords, img.width, img.height, place=self.place) return img @@ -109,6 +129,8 @@ def load_from_wwtl(self, cli_settings, wwtl_path): def toast_base(self, sampler, depth, is_planet=False, **kwargs): from .toast import sample_layer + + self._check_no_wcs_yet() sample_layer(self.pio, sampler, depth, **kwargs) if is_planet: @@ -117,10 +139,8 @@ def toast_base(self, sampler, depth, is_planet=False, **kwargs): self.imgset.data_set_type = DataSetType.SKY self.imgset.base_degrees_per_tile = 180 - self.imgset.file_type = '.png' self.imgset.projection = ProjectionType.TOAST self.imgset.tile_levels = depth - self.imgset.url = self.pio.get_path_scheme() + '.png' self.place.zoom_level = 360 return self @@ -189,8 +209,11 @@ def set_name(self, name): return self - def write_index_rel_wtml(self): - from wwt_data_formats import write_xml_doc + def create_wtml_folder(self): + """ + Create a one-item :class:`wwt_data_formats.folder.Folder` object + capturing this image. + """ from wwt_data_formats.folder import Folder self.place.name = self.imgset.name @@ -210,6 +233,14 @@ def write_index_rel_wtml(self): else: folder.children = [self.place] + return folder + + + def write_index_rel_wtml(self): + from wwt_data_formats import write_xml_doc + + folder = self.create_wtml_folder() + with self.pio.open_metadata_for_write('index_rel.wtml') as f: write_xml_doc(folder.to_xml(), dest_stream=f, dest_wants_bytes=True) diff --git a/toasty/cli.py b/toasty/cli.py index 66dc73a..f1e0e69 100644 --- a/toasty/cli.py +++ b/toasty/cli.py @@ -111,50 +111,6 @@ def make_thumbnail_impl(settings): thumb.save(f, format='JPEG') -# "multi_tan_make_data_tiles" subcommand - -def multi_tan_make_data_tiles_getparser(parser): - parser.add_argument( - '--hdu-index', - metavar = 'INDEX', - type = int, - default = 0, - help = 'Which HDU to load in each input FITS file', - ) - parser.add_argument( - '--outdir', - metavar = 'PATH', - default = '.', - help = 'The root directory of the output tile pyramid', - ) - parser.add_argument( - 'paths', - metavar = 'PATHS', - action = EnsureGlobsExpandedAction, - nargs = '+', - help = 'The FITS files with image data', - ) - -def multi_tan_make_data_tiles_impl(settings): - from .multi_tan import MultiTanDataSource - from .pyramid import PyramidIO - - pio = PyramidIO(settings.outdir, default_format='npy') - ds = MultiTanDataSource(settings.paths, hdu_index=settings.hdu_index) - ds.compute_global_pixelization() - - print('Generating Numpy-formatted data tiles in directory {!r} ...'.format(settings.outdir)) - percentiles = ds.generate_deepest_layer_numpy(pio) - - if len(percentiles): - print() - print('Median percentiles in the data:') - for p in sorted(percentiles.keys()): - print(' {} = {}'.format(p, percentiles[p])) - - # TODO: this should populate and emit a stub index_rel.wtml file. - - # "pipeline" subcommands from .pipeline.cli import pipeline_getparser, pipeline_impl @@ -298,6 +254,63 @@ def tile_healpix_impl(settings): print(f' toasty cascade --start {builder.imgset.tile_levels} {settings.outdir}') +# "tile_multi_tan" subcommand + +def tile_multi_tan_getparser(parser): + parser.add_argument( + '--parallelism', '-j', + metavar = 'COUNT', + type = int, + help = 'The parallelization level (default: use all CPUs; specify `1` to force serial processing)', + ) + parser.add_argument( + '--hdu-index', + metavar = 'INDEX', + type = int, + default = 0, + help = 'Which HDU to load in each input FITS file', + ) + parser.add_argument( + '--outdir', + metavar = 'PATH', + default = '.', + help = 'The root directory of the output tile pyramid', + ) + parser.add_argument( + 'paths', + metavar = 'PATHS', + action = EnsureGlobsExpandedAction, + nargs = '+', + help = 'The FITS files with image data', + ) + +def tile_multi_tan_impl(settings): + from .builder import Builder + from .collection import SimpleFitsCollection + from .image import ImageMode + from .multi_tan import MultiTanProcessor + from .pyramid import PyramidIO + + pio = PyramidIO(settings.outdir, default_format='fits') + builder = Builder(pio) + + collection = SimpleFitsCollection(settings.paths, hdu_index=settings.hdu_index) + + mtp = MultiTanProcessor(collection) + mtp.compute_global_pixelization(builder) + mtp.tile( + pio, + parallel=settings.parallelism, + cli_progress=True, + ) + builder.write_index_rel_wtml() + + print(f'Successfully tiled inputs at level {builder.imgset.tile_levels}.') + print('To create parent tiles, consider running:') + print() + print(f' toasty cascade --start {builder.imgset.tile_levels} {settings.outdir}') + + # "tile_study" subcommand def tile_study_getparser(parser): @@ -389,7 +402,7 @@ def tile_wwtl_impl(settings): pio = PyramidIO(settings.outdir) builder = Builder(pio) - img = builder.load_from_wwtl(settings, settings.wwtl_path) + img = builder.load_from_wwtl(settings, settings.wwtl_path, cli_progress=True) # Do the thumbnail first since for large inputs it can be the memory high-water mark! if settings.placeholder_thumbnail: @@ -397,7 +410,6 @@ def tile_wwtl_impl(settings): else: builder.make_thumbnail_from_other(img) - builder.tile_base_as_study(img, cli_progress=True) builder.write_index_rel_wtml() print(f'Successfully tiled input "{settings.wwtl_path}" at level {builder.imgset.tile_levels}.') diff --git a/toasty/collection.py b/toasty/collection.py new file mode 100644 index 0000000..1292779 --- /dev/null +++ b/toasty/collection.py @@ -0,0 +1,182 @@ +# -*- mode: python; coding: utf-8 -*- +# Copyright 2021 the AAS WorldWide Telescope project +# Licensed under the MIT License. + +""" +Collections of related input images. + +Some Toasty processing tasks operate over collections of input images that are +related in some way. This module provides standardized mechanisms for +manipulating such collections. In particular, it provides a framework for +scanning over image "descriptions" without reading in the complete image data. +This can be useful because many collection-related operations want to do an +initial pass over the collection to gather some global information, then a +second pass with the actual data processing. +""" + +__all__ = ''' +ImageCollection +RubinDirectoryCollection +SimpleFitsCollection +'''.split() + +from abc import ABC +from glob import glob +import numpy as np +from os.path import join +import warnings + +from .image import Image, ImageDescription, ImageMode +from .study import StudyTiling + + +class ImageCollection(ABC): + def descriptions(self): + """ + Generate a sequence of :class:`toasty.image.ImageDescription` items + associated with this collection. + + Each description will have an added string attribute ``collection_id`` + that gives a unique textual identifer for the item in the collection. + + Unlike :meth:`ImageCollection.images`, this function does cause the full + data for each image to be loaded. + """ + raise NotImplementedError() + + def images(self): + """ + Generate a sequence of :class:`toasty.image.Image` items associated with + this collection. + + Each image will have an added string attribute ``collection_id`` that + gives a unique textual identifer for the item in the collection. + """ + raise NotImplementedError() + + +class SimpleFitsCollection(ImageCollection): + def __init__(self, paths, hdu_index=None): + self._paths = list(paths) + self._hdu_index = hdu_index + + def _load(self, actually_load_data): + from astropy.io import fits + from astropy.wcs import WCS + + for fits_path in self._paths: + with fits.open(fits_path) as hdul: + if self._hdu_index is not None: + hdu = hdul[self._hdu_index] + else: + for hdu in hdul: + if len(hdu.shape) > 1: + break + + wcs = WCS(hdu.header) + + if actually_load_data: + result = Image.from_array(hdu.data, wcs=wcs, default_format='fits') + else: + shape = hdu.shape + + if hasattr(hdu, 'dtype'): + mode = ImageMode.from_array_info(shape, hdu.dtype) + else: + mode = None # CompImageHDU doesn't have dtype + + result = ImageDescription(mode=mode, shape=shape, wcs=wcs) + + result.collection_id = fits_path + yield result + + def descriptions(self): + return self._load(False) + + def images(self): + return self._load(True) + + +class RubinDirectoryCollection(ImageCollection): + """ + Load up imagery from a directory containing FITS files capturing a Rubin + Observatory tract. + + The directory will be searched for files whose names end in ``.fits``. In + each of those files, the HDUs beyond the first will be treated as separate + science images that should be individually loaded. The images will be + trimmed according to their ``DATASEC`` specification before being returned. + + This class requires the ``ccdproc`` package to trim FITS CCD datasets. + """ + + def __init__(self, dirname, unit=None): + self._dirname = dirname + self._unit = unit + + def _load(self, actually_load_data): + from astropy.io import fits + from astropy.nddata import ccddata + import ccdproc + + for fits_path in glob(join(self._dirname, '*.fits')): + # `astropy.nddata.ccddata.fits_ccddata_reader` only opens FITS from + # filenames, not from an open HDUList, which means that creating + # multiple CCDDatas from the same FITS file rapidly becomes + # inefficient. So, we emulate its logic. + + with fits.open(fits_path) as hdu_list: + for idx, hdu in enumerate(hdu_list): + if idx == 0: + header0 = hdu.header + else: + hdr = hdu.header + hdr.extend(header0, unique=True) + + # This ccddata function often generates annoying warnings + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + hdr, wcs = ccddata._generate_wcs_and_update_header(hdr) + + # We can't use `ccdproc.trim_image()` without having a + # CCDData in hand, so we have to create a fake empty + # array even when we're not actually loading any data. + # + # Note: we skip all the unit-handling logic from + # `fits_ccddata_reader` here basically since the LSST + # sim data I'm using don't have anything useful. + + if actually_load_data: + data = hdu.data + + if data.dtype.kind == 'i': + data = data.astype(np.float32) + else: + data = np.empty(hdu.shape, dtype=np.void) + + ccd = ccddata.CCDData(data, meta=hdr, unit=self._unit, wcs=wcs) + ccd = ccdproc.trim_image(ccd, fits_section=ccd.header['DATASEC']) + data = ccd.data + shape = data.shape + wcs = ccd.wcs + + if actually_load_data: + mode = ImageMode.from_array_info(shape, data.dtype) + elif hasattr(hdu, 'dtype'): + mode = ImageMode.from_array_info(shape, hdu.dtype) + else: + mode = None # CompImageHDU doesn't have dtype + + if actually_load_data: + result = Image.from_array(data, wcs=wcs, default_format='fits') + else: + result = ImageDescription(mode=mode, shape=shape, wcs=wcs) + + result.collection_id = f'{fits_path}:{idx}' + yield result + + def descriptions(self): + return self._load(False) + + def images(self): + return self._load(True) diff --git a/toasty/image.py b/toasty/image.py index 9176028..54f7273 100644 --- a/toasty/image.py +++ b/toasty/image.py @@ -1,5 +1,5 @@ # -*- mode: python; coding: utf-8 -*- -# Copyright 2020 the AAS WorldWide Telescope project +# Copyright 2020-2021 the AAS WorldWide Telescope project # Licensed under the MIT License. """ @@ -13,9 +13,12 @@ from __future__ import absolute_import, division, print_function __all__ = ''' +get_format_vertical_parity_sign Image +ImageDescription ImageLoader ImageMode +SUPPORTED_FORMATS '''.split() from enum import Enum @@ -44,6 +47,38 @@ def _validate_format(name, fmt): raise ValueError('{0} should be one of {1}'.format(name, '/'.join(sorted(SUPPORTED_FORMATS)))) +def get_format_vertical_parity_sign(format): + """ + Get the vertical data layout associated with an image format, expressed as a + coordinate system parity sign. + + Parameters + ---------- + format : :class:`str` + The format name; one of ``SUPPORTED_FORMATS`` + + Returns + ------- + Either +1 or -1, depending on the format's preferred parity. FITS has +1 and + everything else has -1 (for now). + + Notes + ----- + See :meth:`Image.get_parity_sign` for a discussion of general parity + concepts. While parity should be associated with coordinate systems, rather + than image file formats, there is a connection in the vertical data layout + that different file formats are assumed to use. In particular, WWT assumes + that most image file formats use a "top-down" data layout: if the data + buffer is a 2D array expressed in standard C/Python conventions, the zeroth + row (``data[0,:]``) is located at the top of the image. FITS formats, on the + other hand, are assumed to be bottoms-up: the zeroth row is at the bottom. + While proper WCS coordinates can convey *either* parity in a FITS file, + WWT's renderer assumes bottoms-up. + """ + if format == 'fits': + return +1 + return -1 + def _array_to_mode(array): if array.ndim == 2 and array.dtype.kind == 'f': if array.dtype.itemsize == 4: @@ -72,10 +107,45 @@ class ImageMode(Enum): """ RGB = 'RGB' + "24-bit color with three uint8 channels for red, green, and blue." + RGBA = 'RGBA' + "32-bit color with four uint8 channels for red, green, blue, and alpha (transparency)." + F32 = 'F' + "32-bit floating-point scalar data." + F64 = 'D' + "64-bit floating-point scalar data." + F16x3 = 'F16x3' + """ + 48-bit color with three 16-bit floating-point channels for red, green, and + blue. + + This mode is useful for high-dynamic-range image processing and can be + stored in the OpenEXR file format. + """ + + @classmethod + def from_array_info(cls, shape, dtype): + if len(shape) == 2 and dtype.kind == 'f': + if dtype.itemsize == 4: + return cls.F32 + elif dtype.itemsize == 8: + return cls.F64 + elif len(shape) == 3: + if shape[2] == 3: + if dtype.kind == 'f' and dtype.itemsize == 2: + return cls.F16x3 + elif dtype.kind == 'u' and dtype.itemsize == 1: + return cls.RGB + elif shape[2] == 4: + if dtype.kind == 'u' and dtype.itemsize == 1: + return cls.RGBA + + raise ValueError('Could not determine mode for array with dtype {0} and shape {1}'.format(dtype, shape)) + def make_maskable_buffer(self, buf_height, buf_width): """ @@ -128,6 +198,140 @@ def try_as_pil(self): return self.value +def _wcs_to_parity_sign(wcs): + h = wcs.to_header() + + cd1_1 = h['CDELT1'] * h.get('PC1_1', 1.0) + cd1_2 = h['CDELT1'] * h.get('PC1_2', 0.0) + cd2_1 = h['CDELT2'] * h.get('PC2_1', 0.0) + cd2_2 = h['CDELT2'] * h.get('PC2_2', 1.0) + + det = cd1_1 * cd2_2 - cd1_2 * cd2_1 + + if det < 0: + return 1 # yes! *negative* determinant is *positive* parity sign + return -1 + + +def _flip_wcs_parity(wcs, image_height): + from astropy.wcs import WCS + + h = wcs.to_header() + h['CD1_1'] = h['CDELT1'] * h.setdefault('PC1_1', 1.0) + h['CD1_2'] = h['CDELT1'] * h.setdefault('PC1_2', 0.0) + h['CD2_1'] = h['CDELT2'] * h.setdefault('PC2_1', 0.0) + h['CD2_2'] = h['CDELT2'] * h.setdefault('PC2_2', 1.0) + + for hn in 'CDELT1 CDELT2 PC1_1 PC1_2 PC2_1 PC2_2'.split(): + del h[hn] + + # Here's what we need to flip: + + h['CD1_2'] *= -1 + h['CD2_2'] *= -1 + h['CRPIX2'] = image_height + 1 - h['CRPIX2'] # this is FITS, so pixel indices are 1-based + return WCS(h) + + +class ImageDescription(object): + """ + Information about an image, without its actual bitmap data. + + This comes in handy when processing a large block of images -- often, one + wants to analyze them all to determine some global settings, then do the + actual processing on an image-by-image basis. + """ + + mode = None + "The image's pixel data format." + + shape = None + "The shape of the image's data array." + + wcs = None + "The WCS information associated with the image, if available." + + + def __init__(self, mode=None, shape=None, wcs=None): + self.mode = mode + self.shape = shape + self.wcs = wcs + + # Stuff redundant with plain Image. I would like to reduce the duplication + # here but I'm not liking the idea of making image data-optional. + + @property + def dtype(self): + return self.asarray().dtype + + @property + def width(self): + return self.shape[1] + + @property + def height(self): + return self.shape[0] + + + def get_parity_sign(self): + """ + Get this ImageDescription's parity, based on its WCS. + + Returns + ------- + Either +1 or -1, depending on the image parity. + + Notes + ----- + See :meth:`Image.get_parity_sign` for detailed discussion. + """ + if self.wcs is None: + raise ValueError('cannot determine parity of an ImageDescription without WCS') + + return _wcs_to_parity_sign(self.wcs) + + + def flip_parity(self): + """ + Invert the parity of this ImageDescription's WCS. + + Returns + ------- + *self*, for chaining convenience. + + Notes + ----- + See :meth:`Image.flip_parity` for detailed discussion. This method + inverts the WCS but, because image descriptions do not come with + associated data, doesn't do that. + + """ + if self.wcs is None: + raise ValueError('cannot flip the parity of an ImageDescription without WCS') + + self.wcs = _flip_wcs_parity(self.wcs, self.height) + return self + + + def ensure_negative_parity(self): + """ + Ensure that this ImageDescription has negative parity. + + Returns + ------- + *self*, for chaining convenience. + + Notes + ----- + See :meth:`Image.ensure_negative_parity` for detailed discussion. This + method mgiht invert the WCS but, because image descriptions do not come + with associated data, doesn't do that. + """ + if self.get_parity_sign() == 1: + self.flip_parity() + return self + + class ImageLoader(object): """ A class defining how to load an image. @@ -437,12 +641,13 @@ def load_path(self, path): class Image(object): - """A 2D data array stored in memory. + """ + A 2D data array stored in memory, potential with spatial positioning information. This class primarily exists to help us abstract between the cases where we have "bitmap" RGB(A) images and "science" floating-point images. - """ + _pil = None _array = None _mode = None @@ -611,6 +816,109 @@ def default_format(self, value): else: raise ValueError('Unrecognized format: {0}'.format(value)) + + def has_wcs(self): + """ + Return whether this image has attached WCS information. + + Returns + ------- + True if this image has WCS, False otherwise. + """ + return self._wcs is not None + + + def get_parity_sign(self): + """ + Get this image's parity, based on its WCS. + + Returns + ------- + Either +1 or -1, depending on the image parity as defined below. + + Notes + ----- + Images of the sky can have one of two "parities". An image's parity + relates its pixel buffer coordinate system to the sky coordinate system. + + If you point a digital camera at the sky, take a picture, and save it as + a JPEG, the resulting image will have **negative** parity: if the image + is not rotated, an increasing pixel X value will mean a *decreasing* RA + (longitudinal) coordinate (because RA increases to the left), and an + increasing pixel Y value will mean a decreasing declination + (latitudinal) coordinate as well. Negative parity is also called + "top-down" in WWT, or "JPEG-like", and is associated with a *positive* + determinant of the FITS "CD" matrix. If this image's WCS coordinates + indicate negative parity, this method returns -1. + + FITS images of the sky are flipped in comparison: when no rotation is in + effect, an increasing pixel X value still means a decreasing RA, but an + increasing pixel Y value mean an *increasing* declination. No rotation + operation can convert FITS parity to JPEG parity. This is called + positive parity, AKA "bottoms-up", and is associated with a *negative* + determinant of the FITS CD matrix. If this image's WCS coordinates + indicate positive parity, this method returns +1. + + If the image has no WCS, :exc:`ValueError` will be raised. + + This is all relevant because in WWT, images must be tiled in a + coordinate system that has negative parity. + + """ + if self._wcs is None: + raise ValueError('cannot determine parity of an image without WCS') + + return _wcs_to_parity_sign(self._wcs) + + + def flip_parity(self): + """ + Invert the parity of this image without changing its appearance. + + Returns + ------- + *self*, for chaining convenience. + + Notes + ----- + See :meth:`get_parity_sign` for an introduction to image parity. This + method flips the parity of the current image by vertically flipping both + its pixel data *and* its WCS, so that the image's appearance on the sky + will remain the same. If the image has no WCS, :exc:`ValueError` will be + raised. + + """ + if self._wcs is None: + raise ValueError('cannot flip the parity of an image without WCS') + + self._wcs = _flip_wcs_parity(self._wcs, self.height) + self._array = self.asarray()[::-1] + return self + + + def ensure_negative_parity(self): + """ + Ensure that this image has negative parity. + + Returns + ------- + *self*, for chaining convenience. + + Notes + ----- + See :meth:`get_parity_sign` for an introduction to image parity. This + method ensures that the current image has negative (JPEG-like, top-down) + parity, flipping it from positive parity if needed. Only images with + negative parity may be tiled in WWT. + + This operation requires the image to have WCS information. If none is + available, :exc:`ValueError` will be raised. + """ + if self.get_parity_sign() == 1: + self.flip_parity() + return self + + def fill_into_maskable_buffer(self, buffer, iy_idx, ix_idx, by_idx, bx_idx): """ Fill a maskable buffer with a rectangle of data from this image. @@ -726,8 +1034,7 @@ def save(self, path_or_stream, format=None, mode=None): elif format == 'npy': np.save(path_or_stream, self.asarray()) elif format == 'fits': - if self._wcs is None: - header = None if self._wcs is None else self._wcs.toheader() + header = None if self._wcs is None else self._wcs.to_header() fits.writeto(path_or_stream, self.asarray(), header=header, overwrite=True) diff --git a/toasty/merge.py b/toasty/merge.py index 5a340ea..9ae5a50 100644 --- a/toasty/merge.py +++ b/toasty/merge.py @@ -37,6 +37,21 @@ from .image import Image +SLICES_MATCHING_PARITY = [ + (slice(None, 256), slice(None, 256)), + (slice(None, 256), slice(256, None)), + (slice(256, None), slice(None, 256)), + (slice(256, None), slice(256, None)), +] + +SLICES_OPPOSITE_PARITY = [ + (slice(256, None), slice(None, 256)), + (slice(256, None), slice(256, None)), + (slice(None, 256), slice(None, 256)), + (slice(None, 256), slice(256, None)), +] + + def averaging_merger(data): """A merger function that averages quartets of pixels. @@ -96,12 +111,18 @@ def cascade_images(pio, start, merger, parallel=None, cli_progress=False): def _cascade_images_serial(pio, start, merger, cli_progress): buf = None - SLICES = [ - (slice(None, 256), slice(None, 256)), - (slice(None, 256), slice(256, None)), - (slice(256, None), slice(None, 256)), - (slice(256, None), slice(256, None)), - ] + + # Pyramids always follow a negative-parity (JPEG-like) coordinate system: + # tile X=0,Y=0 is at the top left. The file formats for individual tiles may + # share the same parity, or they may be negative: pixel x=0,y=0 is at the + # bottom-left. In particular, this is the case for FITS files. When this + # happens, we can pretty much cascade as normal, but when putting tile + # quartets together we need to y-flip at the tile level. + + if pio.get_default_vertical_parity_sign() == 1: + slices = SLICES_OPPOSITE_PARITY + else: + slices = SLICES_MATCHING_PARITY with tqdm(total=pyramid.depth2tiles(start - 1), disable=not cli_progress) as progress: for pos in pyramid.generate_pos(start): @@ -124,7 +145,7 @@ def _cascade_images_serial(pio, start, merger, cli_progress): if buf is not None: buf.clear() - for slidx, subimg in zip(SLICES, (img0, img1, img2, img3)): + for slidx, subimg in zip(slices, (img0, img1, img2, img3)): if subimg is not None: if buf is None: buf = subimg.mode.make_maskable_buffer(512, 512) @@ -256,12 +277,12 @@ def _mp_cascade_worker(done_queue, ready_queue, pio, merger): from queue import Empty buf = None - SLICES = [ - (slice(None, 256), slice(None, 256)), - (slice(None, 256), slice(256, None)), - (slice(256, None), slice(None, 256)), - (slice(256, None), slice(256, None)), - ] + + # See discussion in the serial implementation. + if pio.get_default_vertical_parity_sign() == 1: + slices = SLICES_OPPOSITE_PARITY + else: + slices = SLICES_MATCHING_PARITY while True: try: @@ -286,7 +307,7 @@ def _mp_cascade_worker(done_queue, ready_queue, pio, merger): if buf is not None: buf.clear() - for slidx, subimg in zip(SLICES, (img0, img1, img2, img3)): + for slidx, subimg in zip(slices, (img0, img1, img2, img3)): if subimg is not None: if buf is None: buf = subimg.mode.make_maskable_buffer(512, 512) diff --git a/toasty/multi_tan.py b/toasty/multi_tan.py index 093c423..d6c7c2e 100644 --- a/toasty/multi_tan.py +++ b/toasty/multi_tan.py @@ -1,415 +1,310 @@ # -*- mode: python; coding: utf-8 -*- -# Copyright 2019-2020 the AAS WorldWide Telescope project +# Copyright 2019-2021 the AAS WorldWide Telescope project # Licensed under the MIT License. -"""Generate tiles from a collection of images on a common TAN projection. - -TODO: shuld be migrated to wwt_data_formats. - """ -from __future__ import absolute_import, division, print_function +Generate tiles from a collection of images on a common TAN projection. +""" __all__ = ''' -MultiTanDataSource +MultiTanProcessor '''.split() +from astropy.wcs import WCS import numpy as np +from tqdm import tqdm +import warnings -from .pyramid import Pos, next_highest_power_of_2 +from .study import StudyTiling MATCH_HEADERS = [ 'CTYPE1', 'CTYPE2', 'CRVAL1', 'CRVAL2', + 'CDELT1', 'CDELT2', ] # In my sample set, some of these files vary minutely from one header to the # next, so we save them but don't demand exact matching. We should use # np.isclose() or whatever it is. SAVE_HEADERS = [ - 'CD1_1', 'CD2_2', + 'PC1_1', 'PC2_2', ] -class MultiTanDataSource(object): - """Generate tiles from a collection of images on a common TAN projection. +class MultiTanDescriptor(object): + ident = None + in_shape = None + + crxmin = None + crxmax = None + crymin = None + crymax = None - Some large astronomical images are stored as a collection of sub-images - that share a common tangential projection, a format is that is nice and - easy to convert into a WWT "study" tile pyramid. This class can process a - collection of such images and break them into the highest-resolution layer - of such a tile pyramid. + imin = None + imax = None + jmin = None + jmax = None + + sub_tiling = None + + +class MultiTanProcessor(object): + """ + Generate tiles from a collection of images on a common TAN projection. + Some large astronomical images are stored as a collection of sub-images that + share a common tangential projection, a format is that is nice and easy to + convert into a WWT "study" tile pyramid. This class can process a collection + of such images and break them into the highest-resolution layer of such a + tile pyramid. """ - _ra_deg = None - "The RA of the center of the TAN projection on the sky, in degrees." - _dec_deg = None - "The declination of the center of the TAN projection on the sky, in degrees." - _rot_rad = None - "The rotation of the TAN projection on the sky, in radians." - _width = None - "The width of the region in which image data are available, in pixels." - _height = None - "The height of the region in which image data are available, in pixels." - _scale_x = None - "The horizontal pixel scale, in degrees per pixel. May be negative." - _scale_y = None - "The vertical pixel scale, in degrees per pixel. Always positive." - _tile_levels = None - "How many levels there are in the tile pyramid for this study." - _p2w = None - """The width of the highest-resolution tiling, in pixels. This is the - first power of 2 greater than or equal to _width.""" - _p2h = None - """The height of the highest-resolution tiling, in pixels. This is the - first power of 2 greater than or equal to _height.""" - _center_crx = None - """The X position of the image center, in pixels relative to the center of - the tangential projection.""" - _center_cry = None - """The Y position of the image center, in pixels relative to the center of - the tangential projection.""" - _crxmin = None - "The minimum observed pixel X index, relative to the CRPIX of the projection." - _crymin = None - "The minimum observed pixel Y index, relative to the CRPIX of the projection." - - def __init__(self, paths, hdu_index=0): - self._paths = paths - self._hdu_index = hdu_index - - - def _input_hdus(self): - from astropy.io import fits - - for path in self._paths: - with fits.open(path) as hdu_list: - yield path, hdu_list[self._hdu_index] - - - def compute_global_pixelization(self): - """Read the input images to determine the global pixelation of this data set. + + def __init__(self, collection): + self._collection = collection + + + def compute_global_pixelization(self, builder): + """ + Read the input images to determine the global pixelation of this data + set. This function reads the FITS headers of all of the input data files to determine the overall image size and the parameters of its tiling as a - WWT study. - + WWT study. This should be pretty easy, because we've been assured that + everything is on a nice common TAN, which is exactly what we need to end + up with anyway. """ + + self._descs = [] ref_headers = None - crxmin = None + global_crxmin = None + + for desc in self._collection.descriptions(): + # For figuring out the tiling properties, we need to ensure that + # we're working in top-down mode everywhere. + desc.ensure_negative_parity() + + # Build up information for the common TAN projection. + header = desc.wcs.to_header() - for path, hdu in self._input_hdus(): if ref_headers is None: ref_headers = {} - for header in MATCH_HEADERS: - ref_headers[header] = hdu.header[header] + for h in MATCH_HEADERS: + ref_headers[h] = header[h] - for header in SAVE_HEADERS: - ref_headers[header] = hdu.header[header] + for h in SAVE_HEADERS: + ref_headers[h] = header[h] if ref_headers['CTYPE1'] != 'RA---TAN': - raise Exception('all inputs must be in a TAN projection, but {} is not'.format(path)) + raise Exception('all inputs must be in a TAN projection, but {} is not'.format(input_id)) if ref_headers['CTYPE2'] != 'DEC--TAN': - raise Exception('all inputs must be in a TAN projection, but {} is not'.format(path)) + raise Exception('all inputs must be in a TAN projection, but {} is not'.format(input_id)) else: - for header in MATCH_HEADERS: - expected = ref_headers[header] - observed = hdu.header[header] + for h in MATCH_HEADERS: + expected = ref_headers[h] + observed = header[h] if observed != expected: raise Exception('inputs are not on uniform WCS grid; in file {}, expected ' - 'value {} for header {} but observed {}'.format(path, expected, header, observed)) + 'value {} for header {} but observed {}'.format(input_id, expected, h, observed)) + + this_crpix1 = header['CRPIX1'] - 1 + this_crpix2 = header['CRPIX2'] - 1 - crpix1 = hdu.header['CRPIX1'] - 1 - crpix2 = hdu.header['CRPIX2'] - 1 + mtdesc = MultiTanDescriptor() + mtdesc.ident = desc.collection_id + mtdesc.in_shape = desc.shape - this_crxmin = 0 - crpix1 - this_crxmax = (hdu.shape[1] - 1) - crpix1 - this_crymin = 0 - crpix2 - this_crymax = (hdu.shape[0] - 1) - crpix2 + mtdesc.crxmin = 0 - this_crpix1 + mtdesc.crxmax = (desc.shape[1] - 1) - this_crpix1 + mtdesc.crymin = 0 - this_crpix2 + mtdesc.crymax = (desc.shape[0] - 1) - this_crpix2 - if crxmin is None: - crxmin = this_crxmin - crxmax = this_crxmax - crymin = this_crymin - crymax = this_crymax + if global_crxmin is None: + global_crxmin = mtdesc.crxmin + global_crxmax = mtdesc.crxmax + global_crymin = mtdesc.crymin + global_crymax = mtdesc.crymax else: - crxmin = min(crxmin, this_crxmin) - crxmax = max(crxmax, this_crxmax) - crymin = min(crymin, this_crymin) - crymax = max(crymax, this_crymax) - - # Figure out the global properties of the tiled TAN representation - self._crxmin = crxmin - self._crymin = crymin - - self._width = int(crxmax - self._crxmin) + 1 - self._height = int(crymax - self._crymin) + 1 - self._p2w = next_highest_power_of_2(self._width) - self._p2h = next_highest_power_of_2(self._height) - - if self._p2w != self._p2h: # TODO: figure this out and make it work. - raise Exception('TODO: we don\'t properly handle non-square-ish images; got {},{}'.format(self._p2w, self._p2h)) - p2max = max(self._p2w, self._p2h) - self._tile_levels = int(np.log2(p2max / 256)) - nfullx = self._p2w // 256 - nfully = self._p2h // 256 - - self._ra_deg = ref_headers['CRVAL1'] - self._dec_deg = ref_headers['CRVAL2'] - - cd1_1 = ref_headers['CD1_1'] - cd2_2 = ref_headers['CD2_2'] - cd1_2 = ref_headers.get('CD1_2', 0.0) - cd2_1 = ref_headers.get('CD2_1', 0.0) - - if cd1_1 * cd2_2 - cd1_2 * cd2_1 < 0: - cd_sign = -1 - else: - cd_sign = 1 + global_crxmin = min(global_crxmin, mtdesc.crxmin) + global_crxmax = max(global_crxmax, mtdesc.crxmax) + global_crymin = min(global_crymin, mtdesc.crymin) + global_crymax = max(global_crymax, mtdesc.crymax) - self._rot_rad = np.arctan2(-cd_sign * cd1_2, cd2_2) - self._scale_x = np.sqrt(cd1_1**2 + cd2_1**2) * cd_sign - self._scale_y = np.sqrt(cd1_2**2 + cd2_2**2) + self._descs.append(mtdesc) - self._center_crx = self._width // 2 + self._crxmin - self._center_cry = self._height // 2 + self._crymin + # We can now compute the global properties of the tiled TAN representation: - return self # chaining convenience + width = int(global_crxmax - global_crxmin) + 1 + height = int(global_crymax - global_crymin) + 1 + self._tiling = StudyTiling(width, height) - def create_wtml( - self, - name = 'MultiTan', - url_prefix = './', - fov_factor = 1.7, - bandpass = 'Visible', - description_text = '', - credits_text = 'Created by toasty, part of the AAS WorldWide Telescope.', - credits_url = '', - thumbnail_url = '', - ): - """Create a WTML document with the proper metadata for this data set. - - :meth:`compute_global_pixelization` must have been called first. + ref_headers['CRPIX1'] = this_crpix1 + 1 + (mtdesc.crxmin - global_crxmin) + ref_headers['CRPIX2'] = this_crpix2 + 1 + (mtdesc.crymin - global_crymin) + wcs = WCS(ref_headers) - Parameters - ---------- - name : str, defaults to "MultiTan" - The dataset name to embed in the WTML file. - url_prefix : str, default to "./" - The beginning of the URL path to the tile data. The URL given in - the WTML will be "URL_PREFIX{1}/{3}/{3}_{2}.png" - fov_factor : float, defaults to 1.7 - The WTML file specifies the height of viewport that the client should - zoom to when viewing this image. The value used is *fov_factor* times - the height of the image. - bandpass : str, defaults to "Visible" - The bandpass of the image, as chosen from a menu of options - supported by the WTML format: "Gamma", "HydrogenAlpha", "IR", - "Microwave", "Radio", "Ultraviolet", "Visible", "VisibleNight", - "XRay". - description_text : str, defaults to "" - Free text describing what this image is. - credits_text : str, defaults to a toasty credit - A brief textual credit of who created and processed the image data. - credits_url: str, defaults to "" - A URL with additional image credit information, or the empty string - if no such URL is available. - thumbnail_url : str, defaults to "" - A URL of a thumbnail image (96x45 JPEG) representing this image data - set, or the empty string for a default to be used. - - Returns - ------- - folder : xml.etree.ElementTree.Element - An XML element containing the WWT metadata. - - Examples - -------- - To convert the returned XML structure into text, use - :func:`xml.etree.ElementTree.tostring`: - >>> from xml.etree import ElementTree as etree - >>> folder = data_source.create_wtml() - >>> print(etree.tostring(folder)) + self._tiling.apply_to_imageset(builder.imgset) + builder.apply_wcs_info(wcs, width, height) + + # While we're here, figure out how each input will map onto the global + # tiling. This makes sure that nothing funky happened during the + # computation and allows us to know how many tiles we'll have to visit. + + self._n_todo = 0 + + for desc in self._descs: + desc.imin = int(np.floor(desc.crxmin - global_crxmin)) + desc.imax = int(np.ceil(desc.crxmax - global_crxmin)) + desc.jmin = int(np.floor(desc.crymin - global_crymin)) + desc.jmax = int(np.ceil(desc.crymax - global_crymin)) + + # Compute the sub-tiling now so that we can count how many total + # tiles we'll need to process. + + if desc.imax < desc.imin or desc.jmax < desc.jmin: + raise Exception(f'segment {desc.ident} maps to zero size in the global mosaic') + + desc.sub_tiling = self._tiling.compute_for_subimage( + desc.imin, + desc.jmin, + desc.imax - desc.imin, + desc.jmax - desc.jmin, + ) + + self._n_todo += desc.sub_tiling.count_populated_positions() + + return self # chaining convenience + + def tile(self, pio, parallel=None, cli_progress=False, **kwargs): """ - from xml.etree import ElementTree as etree - - folder = etree.Element('Folder') - folder.set('Name', name) - folder.set('Group', 'Explorer') - - place = etree.SubElement(folder, 'Place') - place.set('Name', name) - place.set('DataSetType', 'Sky') - place.set('Rotation', str(self._rot_rad * 180 / np.pi)) - place.set('Angle', '0') - place.set('Opacity', '100') - place.set('RA', str(self._ra_deg / 15)) # this RA is in hours - place.set('Dec', str(self._dec_deg)) - place.set('ZoomLevel', str(self._height * self._scale_y * fov_factor * 6)) # zoom = 6 * (FOV height in deg) - # skipped: Constellation, Classification, Magnitude, AngularSize - - fgimgset = etree.SubElement(place, 'ForegroundImageSet') - - imgset = etree.SubElement(fgimgset, 'ImageSet') - imgset.set('Name', name) - imgset.set('Url', url_prefix + '{1}/{3}/{3}_{2}.png') - imgset.set('WidthFactor', '2') - imgset.set('BaseTileLevel', '0') - imgset.set('TileLevels', str(self._tile_levels)) - imgset.set('BaseDegreesPerTile', str(self._scale_y * self._p2h)) - imgset.set('FileType', '.png') - imgset.set('BottomsUp', 'False') - imgset.set('Projection', 'Tan') - imgset.set('CenterX', str(self._ra_deg)) - imgset.set('CenterY', str(self._dec_deg)) - imgset.set('OffsetX', str(self._center_crx * np.abs(self._scale_x))) - imgset.set('OffsetY', str(self._center_cry * self._scale_y)) - imgset.set('Rotation', str(self._rot_rad * 180 / np.pi)) - imgset.set('DataSetType', 'Sky') - imgset.set('BandPass', bandpass) - imgset.set('Sparse', 'True') - - credits = etree.SubElement(imgset, 'Credits') - credits.text = credits_text - - credurl = etree.SubElement(imgset, 'CreditsUrl') - credurl.text = credits_url - - thumburl = etree.SubElement(imgset, 'ThumbnailUrl') - thumburl.text = thumbnail_url - - desc = etree.SubElement(imgset, 'Description') - desc.text = description_text - - return folder - - - def generate_deepest_layer_numpy( - self, - pio, - percentiles = [1, 99], - ): - """Fill in the deepest layer of the tile pyramid with Numpy-format data. + Tile the input images into the deepest layer of the pyramid. Parameters ---------- pio : :class:`toasty.pyramid.PyramidIO` - A :class:`~toasty.pyramid.PyramidIO` instance to manage the I/O with - the tiles in the tile pyramid. - percentiles : iterable of numbers - This is a list of percentile points to calculate while reading the - data. Each number should be between 0 and 100. For each - high-resolution tile, the percentiles are computed; then the *median* - across all tiles is computed and returned. - - Returns - ------- - percentiles : dict mapping numbers to numbers - This dictionary contains the result of the median-percentile - computation. The keys are the values provided in the *percentiles* - parameter. The values are the median of each percentile across - all of the tiles. - - Notes - ----- - The implementation assumes that if individual images overlap, we can - just use the pixels from any one of them without caring which - particular one we choose. - - Because this operation involves reading the complete image data set, - it offers a convenient opportunity to do some statistics on the data. - This motivates the presence of the *percentiles* feature. - + A :class:`~toasty.pyramid.PyramidIO` instance to manage the I/O with + the tiles in the tile pyramid. + parallel : integer or None (the default) + The level of parallelization to use. If unspecified, defaults to using + all CPUs. If the OS does not support fork-based multiprocessing, + parallel processing is not possible and serial processing will be + forced. Pass ``1`` to force serial processing. + cli_progress : optional boolean, defaults False + If true, a progress bar will be printed to the terminal using tqdm. """ - crxofs = (self._p2w - self._width) // 2 - cryofs = (self._p2h - self._height) // 2 - - percentile_data = {p: [] for p in percentiles} - - for path, hdu in self._input_hdus(): - crpix1 = hdu.header['CRPIX1'] - 1 - crpix2 = hdu.header['CRPIX2'] - 1 - - # (inclusive) image bounds in global pixel coords, which range - # from 0 to p2{w,h} (non-inclusive), and have y=0 at the top. The FITS - # data have y=0 at the bottom, so we need to flip them vertically. - img_gx0 = int((crxofs - self._crxmin) - crpix1) - img_gx1 = img_gx0 + hdu.shape[1] - 1 - img_gy1 = self._p2h - 1 - int((cryofs - self._crymin) - crpix2) - img_gy0 = img_gy1 - (hdu.shape[0] - 1) - - assert img_gx0 >= 0 - assert img_gy0 >= 0 - assert img_gx1 < self._p2w - assert img_gy1 < self._p2h - - # Tile indices at the highest resolution. - - ix0 = img_gx0 // 256 - iy0 = img_gy0 // 256 - ix1 = img_gx1 // 256 - iy1 = img_gy1 // 256 - - # OK, load up the data, with a vertical flip, and grid them. While - # we're doing that, calculate percentiles to inform the RGB-ification. - - data = hdu.data[::-1] - n_tiles = 0 - - pvals = np.nanpercentile(data, percentiles) - for pct, pv in zip(percentiles, pvals): - percentile_data[pct].append(pv) - - for iy in range(iy0, iy1 + 1): - for ix in range(ix0, ix1 + 1): - # (inclusive) tile bounds in global pixel coords - tile_gx0 = ix * 256 - tile_gy0 = iy * 256 - tile_gx1 = tile_gx0 + 255 - tile_gy1 = tile_gy0 + 255 - - # overlap (= intersection) of the image and the tile in global pixel coords - overlap_gx0 = max(tile_gx0, img_gx0) - overlap_gy0 = max(tile_gy0, img_gy0) - overlap_gx1 = min(tile_gx1, img_gx1) - overlap_gy1 = min(tile_gy1, img_gy1) - - # coordinates of the overlap in image pixel coords - img_overlap_x0 = overlap_gx0 - img_gx0 - img_overlap_x1 = overlap_gx1 - img_gx0 - img_overlap_xslice = slice(img_overlap_x0, img_overlap_x1 + 1) - img_overlap_y0 = overlap_gy0 - img_gy0 - img_overlap_y1 = overlap_gy1 - img_gy0 - img_overlap_yslice = slice(img_overlap_y0, img_overlap_y1 + 1) - - # coordinates of the overlap in this tile's coordinates - tile_overlap_x0 = overlap_gx0 - tile_gx0 - tile_overlap_x1 = overlap_gx1 - tile_gx0 - tile_overlap_xslice = slice(tile_overlap_x0, tile_overlap_x1 + 1) - tile_overlap_y0 = overlap_gy0 - tile_gy0 - tile_overlap_y1 = overlap_gy1 - tile_gy0 - tile_overlap_yslice = slice(tile_overlap_y0, tile_overlap_y1 + 1) - - ###print(f' {ix} {iy} -- {overlap_gx0} {overlap_gy0} {overlap_gx1} {overlap_gy1} -- ' - ### f'{tile_overlap_x0} {tile_overlap_y0} {tile_overlap_x1} {tile_overlap_y1} -- ' - ### f'{img_overlap_x0} {img_overlap_y0} {img_overlap_x1} {img_overlap_y1}') - - pos = Pos(self._tile_levels, ix, iy) - p = pio.tile_path(pos) - - try: - a = np.load(p) - except IOError as e: - if e.errno == 2: - a = np.empty((256, 256)) - a.fill(np.nan) - else: - raise - - a[tile_overlap_yslice,tile_overlap_xslice] = data[img_overlap_yslice,img_overlap_xslice] - np.save(p, a) - - percentile_data = {p: np.median(a) for p, a in percentile_data.items()} - return percentile_data + + from .par_util import resolve_parallelism + parallel = resolve_parallelism(parallel) + + if parallel > 1: + self._tile_parallel(pio, cli_progress, parallel, **kwargs) + else: + self._tile_serial(pio, cli_progress, **kwargs) + + # Since we used `pio.update_image()`, we should clean up the lockfiles + # that were generated. + pio.clean_lockfiles(self._tiling._tile_levels) + + + def _tile_serial(self, pio, cli_progress, **kwargs): + tile_parity_sign = pio.get_default_vertical_parity_sign() + + with tqdm(total=self._n_todo, disable=not cli_progress) as progress: + for image, desc in zip(self._collection.images(), self._descs): + # Ensure that the image and the eventual tile agree on parity + if image.get_parity_sign() != tile_parity_sign: + image.flip_parity() + + for pos, width, height, image_x, image_y, tile_x, tile_y in desc.sub_tiling.generate_populated_positions(): + # Tiling coordinate systems are always negative (top-down) + # parity, with the Y=0 tile at the top. But the actual data + # buffers might be positive (bottoms-up) parity -- this is + # the case for FITS. In those situations, we have to flip + # the vertical positioning. Because we have ensured that the + # source image and tile layouts agree, the needed tweak is + # actually quite minimal: + + if tile_parity_sign == 1: + image_y = image.height - (image_y + height) + tile_y = 256 - (tile_y + height) + + ix_idx = slice(image_x, image_x + width) + bx_idx = slice(tile_x, tile_x + width) + iy_idx = slice(image_y, image_y + height) + by_idx = slice(tile_y, tile_y + height) + + with pio.update_image(pos, masked_mode=image.mode, default='masked') as basis: + image.update_into_maskable_buffer(basis, iy_idx, ix_idx, by_idx, bx_idx) + + progress.update(1) + + if cli_progress: + print() + + + def _tile_parallel(self, pio, cli_progress, parallel, **kwargs): + import multiprocessing as mp + + # Start up the workers + + queue = mp.Queue(maxsize = 2 * parallel) + workers = [] + + for _ in range(parallel): + w = mp.Process(target=_mp_tile_worker, args=(queue, pio, kwargs)) + w.daemon = True + w.start() + workers.append(w) + + # Send out them segments + + with tqdm(total=len(self._descs), disable=not cli_progress) as progress: + for image, desc in zip(self._collection.images(), self._descs): + queue.put((image, desc)) + progress.update(1) + + queue.close() + + for w in workers: + w.join() + + if cli_progress: + print() + + +def _mp_tile_worker(queue, pio, kwargs): + """ + Generate and enqueue the tiles that need to be processed. + """ + from queue import Empty + + tile_parity_sign = pio.get_default_vertical_parity_sign() + + while True: + try: + # un-pickling WCS objects always triggers warnings right now + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + image, desc = queue.get(True, timeout=1) + except (OSError, ValueError, Empty): + # OSError or ValueError => queue closed. This signal seems not to + # cross multiprocess lines, though. + break + + if image.get_parity_sign() != tile_parity_sign: + image.flip_parity() + + for pos, width, height, image_x, image_y, tile_x, tile_y in desc.sub_tiling.generate_populated_positions(): + if tile_parity_sign == 1: + image_y = image.height - (image_y + height) + tile_y = 256 - (tile_y + height) + + ix_idx = slice(image_x, image_x + width) + bx_idx = slice(tile_x, tile_x + width) + iy_idx = slice(image_y, image_y + height) + by_idx = slice(tile_y, tile_y + height) + + with pio.update_image(pos, masked_mode=image.mode, default='masked') as basis: + image.update_into_maskable_buffer(basis, iy_idx, ix_idx, by_idx, bx_idx) diff --git a/toasty/multi_wcs.py b/toasty/multi_wcs.py index 3e930b9..7cd4a1b 100644 --- a/toasty/multi_wcs.py +++ b/toasty/multi_wcs.py @@ -1,5 +1,5 @@ # -*- mode: python; coding: utf-8 -*- -# Copyright 2020 the AAS WorldWide Telescope project +# Copyright 2020-2021 the AAS WorldWide Telescope project # Licensed under the MIT License. """ @@ -9,14 +9,11 @@ This module has the following Python package dependencies: - astropy -- ccdproc (to trim FITS CCD datasets) - reproject - shapely (to optimize the projection in reproject) - """ __all__ = ''' -make_lsst_directory_loader_generator MultiWcsProcessor '''.split() @@ -24,58 +21,10 @@ from tqdm import tqdm import warnings -from .image import Image, ImageMode +from .image import Image, ImageDescription, ImageMode from .study import StudyTiling -def make_lsst_directory_loader_generator(dirname, unit=None): - from astropy.io import fits - from astropy.nddata import ccddata - import ccdproc - from glob import glob - from os.path import join - - def loader_generator(actually_load_data): - # Ideally we would just return a shape instead of an empty data array in - # the case where actually_load_data is false, but we can't use - # `ccdproc.trim_image()` without having a CCDData in hand, so to get the - # right shape we're going to need to create a full CCDData anyway. - - for fits_path in glob(join(dirname, '*.fits')): - # `astropy.nddata.ccddata.fits_ccddata_reader` only opens FITS from - # filenames, not from an open HDUList, which means that creating - # multiple CCDDatas from the same FITS file rapidly becomes - # inefficient. So, we emulate its logic. - - with fits.open(fits_path) as hdu_list: - for idx, hdu in enumerate(hdu_list): - if idx == 0: - header0 = hdu.header - else: - hdr = hdu.header - hdr.extend(header0, unique=True) - - # This ccddata function often generates annoying warnings - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - hdr, wcs = ccddata._generate_wcs_and_update_header(hdr) - - # Note: we skip all the unit-handling logic here since the LSST - # sim data I'm using don't have anything useful. - - if actually_load_data: - data = hdu.data - else: - data = np.empty(hdu.shape, dtype=np.void) - - ccd = ccddata.CCDData(data, meta=hdr, unit=unit, wcs=wcs) - - ccd = ccdproc.trim_image(ccd, fits_section=ccd.header['DATASEC']) - yield (f'{fits_path}:{idx}', ccd) - - return loader_generator - - class MultiWcsDescriptor(object): ident = None in_shape = None @@ -90,33 +39,43 @@ class MultiWcsDescriptor(object): class MultiWcsProcessor(object): - def __init__(self, loader_generator): - self._loader_generator = loader_generator + def __init__(self, collection): + self._collection = collection - def compute_global_pixelization(self): + def compute_global_pixelization(self, builder): from reproject.mosaicking.wcs_helpers import find_optimal_celestial_wcs # Load up current WCS information for all of the inputs - def create_descriptor(loader_data): + def create_mwcs_descriptor(coll_desc): desc = MultiWcsDescriptor() - desc.ident = loader_data[0] - desc.in_shape = loader_data[1].shape - desc.in_wcs = loader_data[1].wcs + desc.ident = coll_desc.collection_id + desc.in_shape = coll_desc.shape + desc.in_wcs = coll_desc.wcs return desc - self._descs = [create_descriptor(tup) for tup in self._loader_generator(False)] + self._descs = [create_mwcs_descriptor(d) for d in self._collection.descriptions()] - # Compute the optimal tangential tiling that fits all of them. + # Compute the optimal tangential tiling that fits all of them. Since WWT + # tilings must be done in a negative-parity coordinate system, we use an + # ImageDescription helper to ensure we get that. - self._combined_wcs, self._combined_shape = find_optimal_celestial_wcs( + wcs, shape = find_optimal_celestial_wcs( ((desc.in_shape, desc.in_wcs) for desc in self._descs), auto_rotate = True, projection = 'TAN', ) - self._tiling = StudyTiling(self._combined_shape[1], self._combined_shape[0]) + desc = ImageDescription(wcs=wcs, shape=shape) + desc.ensure_negative_parity() + self._combined_wcs = desc.wcs + self._combined_shape = desc.shape + height, width = self._combined_shape + + self._tiling = StudyTiling(width, height) + self._tiling.apply_to_imageset(builder.imgset) + builder.apply_wcs_info(self._combined_wcs, width, height) # While we're here, figure out how each input will map onto the global # tiling. This makes sure that nothing funky happened during the @@ -146,17 +105,14 @@ def create_descriptor(loader_data): desc.jmax = min(self._combined_shape[0], int(np.ceil(yc_out.max() + 0.5))) # Compute the sub-tiling now so that we can count how many total - # tiles we'll need to process. Note that the combined WCS coordinate - # system has y=0 on the bottom, whereas the tiling coordinate system - # has y=0 at the top. So we need to invert the coordinates - # vertically when determining the sub-tiling. + # tiles we'll need to process. if desc.imax < desc.imin or desc.jmax < desc.jmin: raise Exception(f'segment {desc.ident} maps to zero size in the global mosaic') desc.sub_tiling = self._tiling.compute_for_subimage( desc.imin, - self._combined_shape[0] - desc.jmax, + desc.jmin, desc.imax - desc.imin, desc.jmax - desc.jmin, ) @@ -194,10 +150,16 @@ def tile(self, pio, reproject_function, parallel=None, cli_progress=False, **kwa else: self._tile_serial(pio, reproject_function, cli_progress, **kwargs) + # Since we used `pio.update_image()`, we should clean up the lockfiles + # that were generated. + pio.clean_lockfiles(self._tiling._tile_levels) + def _tile_serial(self, pio, reproject_function, cli_progress, **kwargs): + invert_into_tiles = pio.get_default_vertical_parity_sign() == 1 + with tqdm(total=self._n_todo, disable=not cli_progress) as progress: - for (ident, ccd), desc in zip(self._loader_generator(True), self._descs): + for image, desc in zip(self._collection.images(), self._descs): # XXX: more copying from # `reproject.mosaicking.coadd.reproject_and_coadd`. @@ -205,21 +167,38 @@ def _tile_serial(self, pio, reproject_function, cli_progress, **kwargs): shape_out_indiv = (desc.jmax - desc.jmin, desc.imax - desc.imin) array = reproject_function( - (ccd.data, ccd.wcs), + (image.asarray(), image.wcs), output_projection=wcs_out_indiv, shape_out=shape_out_indiv, return_footprint=False, **kwargs ) - # Once again, FITS coordinates have y=0 at the bottom and our - # coordinates have y=0 at the top, so we need a vertical flip. - image = Image.from_array(array.astype(np.float32)[::-1]) + image = Image.from_array(array.astype(np.float32)) for pos, width, height, image_x, image_y, tile_x, tile_y in desc.sub_tiling.generate_populated_positions(): + # Because we are doing an arbitrary WCS reprojection anyway, + # we can ensure that our source image is stored with a + # top-down vertical data layout, AKA negative image parity, + # which is what the overall "study" coordinate system needs. + # But if we're writing to FITS format tiles, those need to + # end up with a bottoms-up format. So we need to flip the + # vertical orientation of how we put the data into the tile + # buffer. + + if invert_into_tiles: + flip_tile_y1 = 255 - tile_y + flip_tile_y0 = flip_tile_y1 - height + + if flip_tile_y0 == -1: + flip_tile_y0 = None # with a slice, -1 does the wrong thing + + by_idx = slice(flip_tile_y1, flip_tile_y0, -1) + else: + by_idx = slice(tile_y, tile_y + height) + iy_idx = slice(image_y, image_y + height) ix_idx = slice(image_x, image_x + width) - by_idx = slice(tile_y, tile_y + height) bx_idx = slice(tile_x, tile_x + width) with pio.update_image(pos, masked_mode=image.mode, default='masked') as basis: @@ -248,9 +227,9 @@ def _tile_parallel(self, pio, reproject_function, cli_progress, parallel, **kwar # Send out them segments with tqdm(total=len(self._descs), disable=not cli_progress) as progress: - for (ident, ccd), desc in zip(self._loader_generator(True), self._descs): + for image, desc in zip(self._collection.images(), self._descs): wcs_out_indiv = self._combined_wcs[desc.jmin:desc.jmax, desc.imin:desc.imax] - queue.put((ident, ccd, desc, wcs_out_indiv)) + queue.put((image, desc, wcs_out_indiv)) progress.update(1) queue.close() @@ -268,12 +247,14 @@ def _mp_tile_worker(queue, pio, reproject_function, kwargs): """ from queue import Empty + invert_into_tiles = pio.get_default_vertical_parity_sign() == 1 + while True: try: # un-pickling WCS objects always triggers warnings right now with warnings.catch_warnings(): warnings.simplefilter('ignore') - ident, ccd, desc, wcs_out_indiv = queue.get(True, timeout=1) + image, desc, wcs_out_indiv = queue.get(True, timeout=1) except (OSError, ValueError, Empty): # OSError or ValueError => queue closed. This signal seems not to # cross multiprocess lines, though. @@ -282,21 +263,29 @@ def _mp_tile_worker(queue, pio, reproject_function, kwargs): shape_out_indiv = (desc.jmax - desc.jmin, desc.imax - desc.imin) array = reproject_function( - (ccd.data, ccd.wcs), + (image.asarray(), image.wcs), output_projection=wcs_out_indiv, shape_out=shape_out_indiv, return_footprint=False, **kwargs ) - # Once again, FITS coordinates have y=0 at the bottom and our - # coordinates have y=0 at the top, so we need a vertical flip. - image = Image.from_array(array.astype(np.float32)[::-1]) + image = Image.from_array(array.astype(np.float32)) for pos, width, height, image_x, image_y, tile_x, tile_y in desc.sub_tiling.generate_populated_positions(): + if invert_into_tiles: + flip_tile_y1 = 255 - tile_y + flip_tile_y0 = flip_tile_y1 - height + + if flip_tile_y0 == -1: + flip_tile_y0 = None # with a slice, -1 does the wrong thing + + by_idx = slice(flip_tile_y1, flip_tile_y0, -1) + else: + by_idx = slice(tile_y, tile_y + height) + iy_idx = slice(image_y, image_y + height) ix_idx = slice(image_x, image_x + width) - by_idx = slice(tile_y, tile_y + height) bx_idx = slice(tile_x, tile_x + width) with pio.update_image(pos, masked_mode=image.mode, default='masked') as basis: diff --git a/toasty/pipeline/__init__.py b/toasty/pipeline/__init__.py index 459a541..cb84d65 100644 --- a/toasty/pipeline/__init__.py +++ b/toasty/pipeline/__init__.py @@ -462,3 +462,21 @@ def publish(self): self._pipeio.put_item(*sub_components[1:], source=f) os.rename(os.path.join(todo_dir, uniq_id), os.path.join(done_dir, uniq_id)) + + def ignore_rejects(self): + from io import BytesIO + + rejects_dir = self._path('rejects') + n = 0 + + # maybe one day this will be JSON with data? + flag_content = BytesIO(b'{}') + + for uniq_id in os.listdir(rejects_dir): + print(f'ignoring {uniq_id} ...') + self._pipeio.put_item(uniq_id, 'skip.flag', source=flag_content) + n += 1 + + if n > 1: + print() + print(f'marked a total of {n} images to be permanently ignored') diff --git a/toasty/pipeline/astropix.py b/toasty/pipeline/astropix.py index de4ff11..2663f9f 100644 --- a/toasty/pipeline/astropix.py +++ b/toasty/pipeline/astropix.py @@ -8,6 +8,11 @@ TODO: update metadata tomfoolery to match the new things that I've learned. Cf. the ``wwtdatatool wtml report`` utility and the djangoplicity implementation. +NOTE: AstroPix seems to have its image parity backwards. Standard JPEGs are +reported with ``wcs_scale[0] < 0``, which is *positive* AKA bottoms-up AKA FITS +parity. If we pass their parameters more-or-less straight through to WWT, we get +the right appearance onscreen. + """ __all__ = ''' @@ -261,7 +266,10 @@ def as_wcs_headers(self, width, height): headers['CRPIX1'] = (self.wcs_reference_pixel[0] - 0.5) * factor0 + 0.5 headers['CRPIX2'] = (self.wcs_reference_pixel[1] - 0.5) * factor1 + 0.5 headers['CDELT1'] = self.wcs_scale[0] / factor0 - headers['CDELT2'] = self.wcs_scale[1] / factor1 + + # We need the negation here to properly express the parity of this + # JPEG-type image in WCS: + headers['CDELT2'] = -self.wcs_scale[1] / factor1 return headers diff --git a/toasty/pipeline/cli.py b/toasty/pipeline/cli.py index c788a8f..e7e35c7 100644 --- a/toasty/pipeline/cli.py +++ b/toasty/pipeline/cli.py @@ -294,28 +294,24 @@ def refresh_impl(settings): def pipeline_getparser(parser): subparsers = parser.add_subparsers(dest='pipeline_command') + + def add_manager_command(name): + subp = subparsers.add_parser(name) + subp.add_argument( + '--workdir', + nargs = '?', + metavar = 'WORKDIR', + default = '.', + help = 'The local working directory', + ) + return subp + approve_setup_parser(subparsers.add_parser('approve')) fetch_setup_parser(subparsers.add_parser('fetch')) + add_manager_command('ignore-rejects') init_setup_parser(subparsers.add_parser('init')) - - parser = subparsers.add_parser('process-todos') - parser.add_argument( - '--workdir', - nargs = '?', - metavar = 'WORKDIR', - default = '.', - help = 'The local working directory', - ) - - parser = subparsers.add_parser('publish') - parser.add_argument( - '--workdir', - nargs = '?', - metavar = 'WORKDIR', - default = '.', - help = 'The local working directory', - ) - + add_manager_command('process-todos') + add_manager_command('publish') refresh_setup_parser(subparsers.add_parser('refresh')) @@ -330,6 +326,9 @@ def pipeline_impl(settings): approve_impl(settings) elif settings.pipeline_command == 'fetch': fetch_impl(settings) + elif settings.pipeline_command == 'ignore-rejects': + mgr = PipelineManager(settings.workdir) + mgr.ignore_rejects() elif settings.pipeline_command == 'init': init_impl(settings) elif settings.pipeline_command == 'process-todos': diff --git a/toasty/pipeline/djangoplicity.py b/toasty/pipeline/djangoplicity.py index cb31a0d..82da5bd 100644 --- a/toasty/pipeline/djangoplicity.py +++ b/toasty/pipeline/djangoplicity.py @@ -271,6 +271,9 @@ def as_wcs_headers(self, width, height): headers['CRPIX1'] = (float(self.metadata['Spatial.ReferencePixel'][0]) - 0.5) * factor0 + 0.5 headers['CRPIX2'] = (float(self.metadata['Spatial.ReferencePixel'][1]) - 0.5) * factor1 + 0.5 headers['CDELT1'] = scale0 / factor0 - headers['CDELT2'] = scale1 / factor1 + + # We need the negation here to properly express the parity of this + # JPEG-type image in WCS: + headers['CDELT2'] = -scale1 / factor1 return headers diff --git a/toasty/pyramid.py b/toasty/pyramid.py index 0ed0be7..ec4d41f 100644 --- a/toasty/pyramid.py +++ b/toasty/pyramid.py @@ -31,7 +31,7 @@ import numpy as np import os.path -from .image import ImageLoader, SUPPORTED_FORMATS +from .image import ImageLoader, SUPPORTED_FORMATS, get_format_vertical_parity_sign Pos = namedtuple('Pos', 'n x y') @@ -284,6 +284,30 @@ def get_path_scheme(self): """ return self._scheme + def get_default_format(self): + """ + Get the default image storage format for this pyramid. + + Returns + ------- + The format, a string resembling ``"png"`` or ``"fits"``. + """ + return self._default_format + + def get_default_vertical_parity_sign(self): + """ + Get the parity sign (vertical data layout) of the tiles in this pyramid's + default format. + + Returns + ------- + Either +1 or -1, depending on the format. + """ + + if self._default_format is None: + raise Exception('cannot get default parity sign without a default format') + return get_format_vertical_parity_sign(self._default_format) + def read_image(self, pos, default='none', masked_mode=None, format=None): """ Read an Image for the specified tile position. @@ -341,13 +365,46 @@ def write_image(self, pos, image, format=None, mode=None): @contextmanager def update_image(self, pos, default='none', masked_mode=None, format=None): from filelock import FileLock + p = self.tile_path(pos) + with FileLock(p + '.lock'): - img = self.read_image(pos, default=default, masked_mode=masked_mode, - format=format or self._default_format) + img = self.read_image( + pos, + default=default, + masked_mode=masked_mode, + format=format or self._default_format + ) + yield img self.write_image(pos, img, format=format or self._default_format) + def clean_lockfiles(self, level): + """ + Clean up any lockfiles created during parallelized pyramid creation. + + Parameters + ---------- + level : :class:`int` A tile pyramid depth. + + Notes + ----- + If you use the :meth:`update_image` method, you should call this + function after your processing is complete to remove lockfiles that are + created to ensure that multiple processes don't stomp on each other's + work. The "cascade" stage doesn't need locking, so in general only the + deepest level of the pyramid will need to be cleaned. + """ + for x in range(0, 2**level): + for y in range(0, 2**level): + pos = Pos(level, x, y) + p = self.tile_path(pos) + '.lock' + + try: + os.unlink(p) + except FileNotFoundError: + pass + def open_metadata_for_read(self, basename): """ Open a metadata file in read mode. diff --git a/toasty/tests/test_multi_tan.py b/toasty/tests/test_multi_tan.py index 8063b7e..a72bc61 100644 --- a/toasty/tests/test_multi_tan.py +++ b/toasty/tests/test_multi_tan.py @@ -11,7 +11,9 @@ import sys from . import assert_xml_elements_equal, test_path +from ..builder import Builder from .. import cli +from .. import collection from .. import multi_tan @@ -66,66 +68,27 @@ def work_path(self, *pieces): return os.path.join(self.work_dir, *pieces) def test_basic(self): - ds = multi_tan.MultiTanDataSource([test_path('wcs512.fits.gz')]) - ds.compute_global_pixelization() - - from xml.etree import ElementTree as etree - expected = etree.fromstring(self.WTML) - - folder = ds.create_wtml( - name = 'TestName', - url_prefix = 'UP', - fov_factor = 1.0, - bandpass = 'Gamma', - description_text = 'DT', - credits_text = 'CT', - credits_url = 'CU', - thumbnail_url = 'TU', - ) - assert_xml_elements_equal(folder, expected) + coll = collection.SimpleFitsCollection([test_path('wcs512.fits.gz')]) + + proc = multi_tan.MultiTanProcessor(coll) from ..pyramid import PyramidIO - pio = PyramidIO(self.work_path('basic'), default_format='npy') - percentiles = ds.generate_deepest_layer_numpy(pio) - - # These are all hardcoded parameters of this test dataset, derived - # from a manual processing with checking that the results are correct. - # Note that to make the file more compressible, I quantized its data, - # which explains why some of the samples below are identical. - - PERCENTILES = { - 1: 0.098039217, - 99: 0.76862746, - } - - for pct, expected in PERCENTILES.items(): - nt.assert_almost_equal(percentiles[pct], expected) - - MEAN, TLC, TRC, BLC, BRC = range(5) - SAMPLES = { - (0, 0): [0.20828014, 0.20392157, 0.22745098, 0.18431373, 0.20000000], - (0, 1): [0.22180051, 0.18431373, 0.18823530, 0.16470589, 0.18431373], - (1, 0): [0.22178716, 0.16470589, 0.18431373, 0.11372549, 0.19607843], - (1, 1): [0.21140813, 0.18431373, 0.20784314, 0.12549020, 0.14117648], - } - - for (y, x), expected in SAMPLES.items(): - data = np.load(self.work_path('basic', '1', str(y), '{}_{}.npy'.format(y, x))) - nt.assert_almost_equal(data.mean(), expected[MEAN]) - nt.assert_almost_equal(data[10,10], expected[TLC]) - nt.assert_almost_equal(data[10,-10], expected[TRC]) - nt.assert_almost_equal(data[-10,10], expected[BLC]) - nt.assert_almost_equal(data[-10,-10], expected[BRC]) + pio = PyramidIO(self.work_path('basic'), default_format='fits') + + builder = Builder(pio) + proc.compute_global_pixelization(builder) + proc.tile(pio) def test_basic_cli(self): - """Test the CLI interface. We don't go out of our way to validate the + """ + Test the CLI interface. We don't go out of our way to validate the computations in detail -- that's for the unit tests that probe the module directly. - """ + args = [ - 'multi-tan-make-data-tiles', + 'tile-multi-tan', '--hdu-index', '0', '--outdir', self.work_path('basic_cli'), test_path('wcs512.fits.gz') diff --git a/toasty/tests/test_pipeline.py b/toasty/tests/test_pipeline.py index 6ba4b5d..28a0bdd 100644 --- a/toasty/tests/test_pipeline.py +++ b/toasty/tests/test_pipeline.py @@ -32,7 +32,7 @@ def query_candidates(self): "wcs_coordinate_frame": "ICRS", "wcs_equinox": "J2000", "wcs_reference_value": ["187.70593075", "12.39112325"], - "wcs_reference_dimension": ["7416.0", "4320.0"], + "wcs_reference_dimension": ["2166.0", "2129.0"], "wcs_reference_pixel": ["3738.9937831", "3032.00448074"], "wcs_scale": ["-5.91663506907e-14", "5.91663506907e-14"], "wcs_rotation": "0", @@ -112,6 +112,12 @@ def test_workflow(self): ] cli.entrypoint(args) + args = [ + 'pipeline', 'ignore-rejects', + '--workdir', self.work_path('work'), + ] + cli.entrypoint(args) + def test_args(self): with pytest.raises(SystemExit): args = [