diff --git a/config/defaults.yml b/config/defaults.yml index 778c7bd6..61ebdf7d 100644 --- a/config/defaults.yml +++ b/config/defaults.yml @@ -9,12 +9,14 @@ workflow: segmentation: unmicst segmentation-recyze: false downstream: scimap + iss_decoding: false options: ashlar: -m 30 cypository: --model zeisscyto ilastik: --num_channels 1 mcquant: --masks cell*.tif naivestates: -p png + starfish: --tile-size 2000 modules: illumination: name: basic @@ -114,3 +116,7 @@ modules: name: autominerva container: labsyspharm/auto-minerva version: '2022-06-06' + iss_decoding: + name: starfish + container: spacetx/starfish + version: 0.2.2 \ No newline at end of file diff --git a/config/schema.yml b/config/schema.yml index afed0c3d..9f4f88a7 100644 --- a/config/schema.yml +++ b/config/schema.yml @@ -13,6 +13,7 @@ workflow: - downstream - ilastik-model - mesmer-model + - iss_decoding deprecated: quantification-mask: "--quant-opts '--masks ...'" illum: --start-at illumination diff --git a/docs/parameters/other.md b/docs/parameters/other.md index 0851dbee..15f88dcd 100755 --- a/docs/parameters/other.md +++ b/docs/parameters/other.md @@ -14,10 +14,14 @@ Segmentation 1. [Cypository](./other.html#cypository) 1. [Mesmer](./other.html#mesmer) -Clsutering and cell type inference +Clustering and cell type inference 1. [Clustering](./other.html#clustering) 1. [naivestates](./other.html#naivestates) +In-situ sequencing spot deconvolution +1. [Starfish](./other.html#starfish) + +
[Back to main modules](./){: .btn .btn-outline} @@ -258,3 +262,30 @@ Nextflow will write all outputs to the `cell-states/naivestates/` subdirectory w |`--mct ` | |The tool has a basic marker -> cell type (mct) mapping in `typemap.csv`. More sophisticated mct mappings can be defined by creating a `custom-map.csv` file with two columns: `Marker` and `State`. | [Back to top](./other.html#other-modules){: .btn .btn-purple} [Back to main modules](./){: .btn .btn-outline} + +## Starfish + +### Description +starfish is a Python library for processing images of image-based spatial transcriptomics. We implemented part of the starfish pipeline to process ISS images starting from ... until the final result, a table with locations of all called spots. +### Usage +Add a `iss_decoding:` field to [workflow parameters]({{site.baseurl}}/parameters/) to select starfish. + +* Example `params.yml`: + +``` yaml +workflow: + stop-at: iss_decoding + downstream: naivestates + naivestates-model: /full/path/to/mct.csv +options: + naivestates: --log no +``` +* Default naivestates options: `-p png` +* Running outside of MCMICRO: [Instructions](https://github.com/labsyspharm/mcmicro-ilastik){:target="_blank"}. +### Inputs + + +### Outputs + + +### Optional arguments diff --git a/lib/mcmicro/Flow.groovy b/lib/mcmicro/Flow.groovy index 8e91322b..720d572c 100644 --- a/lib/mcmicro/Flow.groovy +++ b/lib/mcmicro/Flow.groovy @@ -32,7 +32,8 @@ static def flowSegment(wfp) { "segmentation", // Step 4 "watershed", // Step 5 "quantification", // Step 6 - "downstream"] // Step 7 + "downstream", // Step 7 + "iss_decoding"] // Step 8 // Identify starting and stopping indices int idxStart = mcsteps.indexOf( wfp['start-at'] ) @@ -65,7 +66,8 @@ static def precomputed(wfp) { dearray: idxStart > 3 && wfp.tma, 'probability-maps': idxStart == 5, segmentation: idxStart == 6, - quantification: idxStart == 7 + quantification: idxStart == 7, + iss_decoding: idxStart == 8 ] } @@ -94,6 +96,8 @@ static def doirun(step, wfp) { return(idxStart <= 6 && idxStop >= 6) case 'downstream': return(idxStart <= 7 && idxStop >= 7) + case 'iss_decoding': + return(idxStart <= 8 && idxStop >= 8) case 'viz': return(wfp.viz) default: diff --git a/main.nf b/main.nf index db48221a..bbb34425 100755 --- a/main.nf +++ b/main.nf @@ -49,6 +49,9 @@ findFiles = { key, pattern, ife -> pre[key] ? Channel.fromPath("${params.in}/$key/$pattern").ifEmpty(ife) : Channel.empty() } +// Find spot images in starfish_input +starimgs = Channel.fromPath( "${params.in}/primary", checkIfExists: true ) + // Some image formats store multiple fields of view in a single file. Other // formats store each field separately, typically in .tif files, with a separate // index file to tie them together. We will look for the index files from @@ -98,6 +101,7 @@ include {illumination} from "$projectDir/modules/illumination" include {registration} from "$projectDir/modules/registration" include {dearray} from "$projectDir/modules/dearray" include {segmentation} from "$projectDir/modules/segmentation" +include {starfish} from "$projectDir/modules/iss_decoding" include {quantification} from "$projectDir/modules/quantification" include {downstream} from "$projectDir/modules/downstream" include {viz} from "$projectDir/modules/viz" @@ -124,6 +128,9 @@ workflow { tmacores = dearray.out.cores.mix(pre_cores) tmamasks = dearray.out.masks.mix(pre_masks) + // Is the data type ISS? + starfish(mcp, starimgs) + // Reconcile WSI and TMA processing for downstream segmentation allimg = img.wsi.mix(tmacores) segmentation(mcp, allimg, tmamasks, pre_pmap) diff --git a/modules/iss_decoding.nf b/modules/iss_decoding.nf new file mode 100644 index 00000000..663fd79c --- /dev/null +++ b/modules/iss_decoding.nf @@ -0,0 +1,138 @@ +// Import utility functions from lib/mcmicro/*.groovy +import mcmicro.* + +process starfish_tile { + + container "${params.contPfx}${module.container}:${module.version}" + publishDir "${params.in}/iss_processing", mode: 'copy', pattern: "*.tif" + + publishDir "${Flow.QC(params.in, 'provenance')}", mode: 'copy', + pattern: '.command.{sh,log}', + saveAs: {fn -> fn.replace('.command', "${module.name}-${task.index}")} + + input: + val mcp + val module + path code_tile + path img_dir + + output: + path 'TILED' + tuple path('.command.sh'), path('.command.log') + + when: mcp.workflow["iss_decoding"] + + """ + python $code_tile --input ${img_dir} --output TILED + """ +} + +process starfish_convert { + + container "${params.contPfx}${module.container}:${module.version}" + publishDir "${params.in}/iss_processing", mode: 'copy', pattern: "*.tif" + + publishDir "${Flow.QC(params.in, 'provenance')}", mode: 'copy', + pattern: '.command.{sh,log}', + saveAs: {fn -> fn.replace('.command', "${module.name}-${task.index}")} + + input: + val mcp + val module + path code + path TILED + + output: + path 'SpaceTx' + tuple path('.command.sh'), path('.command.log') + + when: mcp.workflow["iss_decoding"] + + """ + python $code -i $TILED -o SpaceTx + """ +} + +process starfish_decode { + + // Use the container specification from the parameter file + // No change to this line is required + container "${params.contPfx}${module.container}:${module.version}" + + // Specify the project subdirectory for writing the outputs to + // The pattern: specification must match the output: files below + // TODO: replace report with the desired output directory + // TODO: replace the pattern to match the output: clause below + publishDir "${params.in}/iss_processing", mode: 'copy', pattern: "*.csv" + + // Stores .command.sh and .command.log from the work directory + // to the project provenance + // No change to this line is required + publishDir "${Flow.QC(params.in, 'provenance')}", mode: 'copy', + pattern: '.command.{sh,log}', + saveAs: {fn -> fn.replace('.command', "${module.name}-${task.index}")} + + // Inputs for the process + // mcp - MCMICRO parameters (workflow, options, etc.) + // module - module specifications (name, container, options, etc.) + // img/sft - pairs of images and their matching spatial feature tables + input: + val mcp + val module + path code + path SpaceTx + + // Process outputs that should be captured and + // a) returned as results + // b) published to the project directory + // TODO: replace *.html with the pattern of the tool output files + output: + path("*.csv"), emit: results + + // Provenance files -- no change is needed here + tuple path('.command.sh'), path('.command.log') + + // Specifies whether to run the process + // Here, we simply take the flag from the workflow parameters + // TODO: change snr to match the true/false workflow parameter in defaults.yml + when: mcp.workflow["iss_decoding"] + + // The command to be executed inside the tool container + // The command must write all outputs to the current working directory (.) + // Opts.moduleOpts() will identify and return the appropriate module options + """ + python $code -i SpaceTx + """ +} + +workflow starfish { + + // Inputs: + // mcp - MCMICRO parameters (workflow, options, etc.) + // imgs - images + // cbk - Codebook + take: + mcp + img_dir + + main: + + // Apply the process to each (image, sft) pair + + // code_tile = Channel.fromPath("$projectDir/starfish/bin/decoding.py") + ///code_convert = Channel.fromPath("$projectDir/starfish/bin/decoding.py") + + code_tile = Channel.fromPath("$projectDir/starfish/bin/tiling.py") + TILED = starfish_tile(mcp, mcp.modules['iss_decoding'], code_tile, img_dir) + + println TILED + code_convert = Channel.fromPath("$projectDir/starfish/bin/format_to_spacetx.py") + SpaceTx = starfish_convert(mcp, mcp.modules['iss_decoding'], code_convert, TILED[0]) + + code_decode = Channel.fromPath("$projectDir/starfish/bin/decoding.py") + results = starfish_decode(mcp, mcp.modules['iss_decoding'], code_decode, SpaceTx[0]) + // Return the outputs produced by the tool + + emit: + results[0] +} \ No newline at end of file diff --git a/starfish/bin/decoding.py b/starfish/bin/decoding.py new file mode 100644 index 00000000..c5714f35 --- /dev/null +++ b/starfish/bin/decoding.py @@ -0,0 +1,93 @@ +import os +import starfish +import argparse +from starfish.image import ApplyTransform, LearnTransform, Filter +from starfish.types import Axes +from starfish import data, FieldOfView +from starfish.spots import FindSpots, DecodeSpots, AssignTargets + +codebook = data.ISS(use_test_data=True).codebook + + +def get_args(): + """Get command-line arguments""" + + parser = argparse.ArgumentParser( + description='Input/output directories for data formatting', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument('-i', + '--input_dir', + default='Tiled', + type=str, + help='Input root directory') + + return parser.parse_args() + + +def iss_pipeline(fov, codebook): + #fov = experiment.fov() + primary_image = fov.get_image(FieldOfView.PRIMARY_IMAGES) + anchor = primary_image.sel({Axes.ROUND: 0}) + anchor_dots = anchor.reduce({Axes.CH, Axes.ZPLANE}, func="max") + + learn_translation = LearnTransform.Translation(reference_stack=anchor_dots, + axes=Axes.ROUND, upsampling=100) + + transforms_list = learn_translation.run( + primary_image.reduce({Axes.CH, Axes.ZPLANE}, func="max")) + + warp = ApplyTransform.Warp() + registered = warp.run(primary_image, transforms_list=transforms_list, in_place=False, verbose=True) + + # Filter raw data + masking_radius = 15 + filt = Filter.WhiteTophat(masking_radius, is_volume=False) + filtered = filt.run(registered, verbose=True, in_place=False) + print(filtered) + + bd = FindSpots.BlobDetector( + min_sigma=1, + max_sigma=3, + num_sigma=30, + threshold=0.01, + measurement_type='mean' + ) + + # Check if experiment has anchor or not + # Locate spots in a reference image: + # Old one: spots = bd.run(reference_image=fov.get_image("anchor_dots"), image_stack=filtered) + spots = bd.run(reference_image=anchor_dots, image_stack=filtered) + + # decode the pixel traces using the codebook + decoder = DecodeSpots.PerRoundMaxChannel(codebook=codebook) + decoded = decoder.run(spots=spots) + + return decoded +#imshow_plane(dots) + +# process all the fields of view: +def process_experiment(experiment: starfish.Experiment, cb: starfish.Codebook): + decoded_intensities = {} + for i, (name_, fov) in enumerate(experiment.items()): + print(name_) + decoded = iss_pipeline(fov, codebook=cb) + decoded_intensities[name_] = decoded + + return decoded_intensities + +def save_fov(fov, dataframe): + fov_df = dataframe.to_features_dataframe() + fov_df.to_csv(f'fov_{fov}.csv', index=False) + + +def main(): + args = get_args() + exp = starfish.Experiment.from_json(os.path.join(args.input_dir, 'primary', 'experiment.json')) + results = process_experiment(exp, codebook) + for fov, data in results.items(): + save_fov(fov, data) + + +if __name__ == '__main__': + main() diff --git a/starfish/bin/format_to_spacetx.py b/starfish/bin/format_to_spacetx.py new file mode 100644 index 00000000..1af4c5f7 --- /dev/null +++ b/starfish/bin/format_to_spacetx.py @@ -0,0 +1,48 @@ +from slicedimage import ImageFormat +from starfish.experiment.builder import format_structured_dataset +import argparse +import os + + +def get_args(): + """Get command-line arguments""" + + parser = argparse.ArgumentParser( + description='Input/output directories for data formatting', + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument('-i', '--input_dir', default='Tiled', type=str, help='Input root directory') + parser.add_argument('-o', '--output_dir', default='SpaceTx', type=str, help='Output root directory') + + return parser.parse_args() + + +def format_experiment( + in_dir: str = 'Tiled', + out_dir: str = 'SpaceTx', + subdirs: list = ['primary', 'nuclei', 'anchor_dots', 'anchor_nuclei'], + coordinates_filename: str = 'coordinates.csv' +) -> None: + for subdir in subdirs: + out_d = os.path.join(out_dir, subdir) + os.makedirs(out_d) + + format_structured_dataset( + in_dir, + os.path.join(in_dir, "coordinates.csv"), + out_d, + ImageFormat.TIFF, + ) + + +def main(): + args = get_args() + format_experiment( + in_dir=args.input_dir, + out_dir=args.output_dir + ) + + +if __name__ == '__main__': + main() diff --git a/starfish/bin/generate_dataset.py b/starfish/bin/generate_dataset.py new file mode 100644 index 00000000..9fd0e57c --- /dev/null +++ b/starfish/bin/generate_dataset.py @@ -0,0 +1,28 @@ +import os +from starfish import data, FieldOfView +import tifffile as tiff +from starfish.types import Axes +from pathlib import Path +from typing import Union + + +# Create sample raw dataset for tiling: +exp = data.ISS(use_test_data=False) +fov = exp['fov_000'] +primary = fov.get_image(FieldOfView.PRIMARY_IMAGES) + +# save_dir = '/Users/segonzal/Documents/Repositories/imbast/data/primary' +save_dir = Path(os.getcwd()) / "sample_dataset" +print(save_dir) + + +def generate_dataset(save_dir: Union[str, Path]): + save_dir = Path(save_dir) + for ch in range(4): + for r in range(4): + img = primary.sel({Axes.ROUND: r, Axes.CH: ch}).xarray.squeeze() + tiff.imwrite(save_dir / f"r{r}_CH{ch}.tiff", img) + + +if __name__ == '__main__': + generate_dataset(save_dir) diff --git a/starfish/bin/tiling.py b/starfish/bin/tiling.py new file mode 100644 index 00000000..5647b47b --- /dev/null +++ b/starfish/bin/tiling.py @@ -0,0 +1,131 @@ +# Tile images and generate 'coordinates.csv' file +import os +import csv +from skimage.io import imread, imsave +from collections import namedtuple +import pandas as pd +import numpy as np +import argparse +from pathlib import Path +import logging + +# input_path = '/Users/segonzal/Documents/Repositories/imbast/data/primary' +# img_input = imread(os.path.join(input_path, os.listdir(input_path)[0])) +# tile_size = 500 +# tile_overlap = 0 +channel_map = {'CH0': '0', 'CH1': '1', 'CH2': '2', 'CH3': '3'} + + +def get_tile_coordinates(img_input, tile_size: int, tile_overlap: int) -> None: + """Compute tile coordinates.""" + + tile_coordinates = {} + + RoI = namedtuple('RoI', ['row', 'col', 'nrows', 'ncols']) + tile_n = 0 + + tile_size = tile_size + tile_overlap + + def _get_size(position, tile_size, total_size): + dist_to_end = total_size - position + size = tile_size + size = size if dist_to_end > size else dist_to_end + + return size + + for r in range(0, img_input.shape[0], tile_size): + nrows = _get_size(r, tile_size, img_input.shape[0]) + + for c in range(0, img_input.shape[1], tile_size): + ncols = _get_size(c, tile_size, img_input.shape[1]) + + tile_coordinates[tile_n] = RoI(r, c, nrows, ncols) + tile_n += 1 + + return tile_coordinates + + +def select_roi(image, roi): + return image[roi.row:roi.row + roi.nrows, + roi.col:roi.col + roi.ncols] + + +def needs_padding(image_tile, tile_size): + return any([image_tile.shape[i] < tile_size for i in [0, 1]]) + + +def pad_to_size( + image, + tile_size + ) -> np.ndarray: + """Pad smaller tiles to match standard tile size.""" + # Images must be same size + # Pad with zeros to default size + if needs_padding(image, tile_size): + pad_width = tuple((0, tile_size - image.shape[i]) for i in [0, 1]) + image = np.pad(image, pad_width, 'constant') + + return image + + +def tile_image_set(in_dir, tile_size, tile_overlap, img_type, + coordinates, output_dir): + # File names and coordinates table according to + # SpaceTx Structured Data + tile_coordinates = get_tile_coordinates(imread(Path(in_dir) / os.listdir(in_dir)[0]), tile_size, tile_overlap) + coordinates = [] + for image_name in os.listdir(in_dir): + image = imread(os.path.join(in_dir, image_name)) + for tile_id in tile_coordinates: + coords = tile_coordinates[tile_id] + tile = select_roi( + image, roi=coords) + #tile = self._pad_to_size(tile, tile_size) + + r = image_name.split('_')[0][1:] + + c = channel_map[image_name.split('_')[1].split('.')[0]] + + file_name = f'primary-f{tile_id}-r{r}-c{c}-z0.tiff' + imsave(os.path.join(output_dir, file_name), pad_to_size(tile, tile_size)) + + coordinates.append([ + tile_id, r, c, 0, + coords.col, coords.row, 0, + coords.col + tile_size, coords.row + tile_size, 0.0001]) + + return coordinates + + +def write_coords_file(coordinates, file_path) -> None: + coords_df = pd.DataFrame( + coordinates, + columns=('fov', 'round', 'ch', 'zplane', + 'xc_min', 'yc_min', 'zc_min', + 'xc_max', 'yc_max', 'zc_max')) + coords_df.to_csv(file_path, index=False) + + +def main(input_path, output_path, tile_size=500, tile_overlap=0): + input_path = Path(input_path) + output_path = Path(output_path) + + output_path.mkdir(exist_ok=True) + + coords = tile_image_set( + input_path, tile_size, tile_overlap, "primary", + get_tile_coordinates(imread(input_path / os.listdir(input_path)[0]), tile_size=tile_size, tile_overlap=tile_overlap), output_path + ) + write_coords_file(coords, output_path / 'coordinates.csv') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--input", help="input path for images", type=str) + parser.add_argument("-o", "--output", help="output folder for tiled images", type=str) + + parser.add_argument("-ts", "--tile-size", type=int, help="the size of tiles", default=500) + parser.add_argument("-to", "--tile-overlap", type=int, help="the overlay for tiles", default=0) + args = parser.parse_args() + + main(args.input, args.output, args.tile_size, args.tile_overlap) diff --git a/starfish/nf-issdecoder.nf b/starfish/nf-issdecoder.nf new file mode 100755 index 00000000..8e2b0f73 --- /dev/null +++ b/starfish/nf-issdecoder.nf @@ -0,0 +1,77 @@ +/* + * pipeline input parameters +*/ +// params.datadir = "$projectDir/data/input/" +params.input_dir = "$projectDir/data/Registered" +params.register_dir = "/Registered" +params.codebook = "$projectDir/data/codebook.json" + + +log.info """\ + SPATIAL TRANSCRIPTOMICS PIPELINE + ================================ + test : ${params.test} + input datadir: ${params.input_dir} + """ + .stripIndent() + +// nextflow.enable.dsl=2 + +process TILING { + + input: + path x + + output: + path 'Tiled' +// path '*_results' + + script: + """ + python $projectDir/bin/tiling.py --input ${x} --output TILED + """ +} + +process TO_SPACETX { + + input: + path TILED + + output: + path SpaceTx +// path 'data_results' + + script: + """ + python $projectDir/bin/format_to_spacetx.py -i $TILED + """ +} + +process DECODE { + + input: + path SpaceTx + + output: + stdout +// path 'SpaceTx' +// path 'data_results' + + script: + """ + python $projectDir/bin/decoding.py + """ +} +workflow { + tiling_ch = TILING(params.input_dir) + tiling_ch.view() + spacetx_ch = TO_SPACETX(tiling_ch) +// tiling_ch = TO_SPACETX() +// decoding_ch = DECODE() +// decoding_ch.view() +// test_name = Channel.from(params.test) +// sample_in = Channel.from(params.sample_name) +// script_file = Channel.fromPath(params.script_Python_Registration) +// last_channel = Channel.from(image_registration(test_name)) +// TILING(result_ch) +} diff --git a/starfish/test_iss_decoding.yml b/starfish/test_iss_decoding.yml new file mode 100644 index 00000000..15c7a526 --- /dev/null +++ b/starfish/test_iss_decoding.yml @@ -0,0 +1,9 @@ +workflow: + start-at: iss_decoding + stop-at: iss_decoding + iss_decoding: true +# modules: +# iss_decoding: +# name: starfish +# container: spacetx/starfish +# version: 0.2.2 \ No newline at end of file