forked from labsyspharm/ome-tiff-pyramid-tools
-
Notifications
You must be signed in to change notification settings - Fork 1
/
pyramid_upgrade.py
224 lines (200 loc) · 8.46 KB
/
pyramid_upgrade.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import argparse
import io
import re
import sys
import tiffsurgeon
import xml.etree.ElementTree
def fix_attrib_namespace(elt):
"""Prefix un-namespaced XML attributes with the tag's namespace."""
# This fixes ElementTree's inability to round-trip XML with a default
# namespace ("cannot use non-qualified names with default_namespace option"
# error). 7-year-old BPO issue here: https://bugs.python.org/issue17088
# Code inspired by https://gist.github.com/provegard/1381912 .
if elt.tag[0] == "{":
uri, _ = elt.tag[1:].rsplit("}", 1)
new_attrib = {}
for name, value in elt.attrib.items():
if name[0] != "{":
# For un-namespaced attributes, copy namespace from element.
name = f"{{{uri}}}{name}"
new_attrib[name] = value
elt.attrib = new_attrib
for child in elt:
fix_attrib_namespace(child)
def parse_args():
parser = argparse.ArgumentParser(
description="Convert an OME-TIFF legacy pyramid to the BioFormats 6"
" OME-TIFF pyramid format in-place.",
)
parser.add_argument("image", help="OME-TIFF file to convert")
parser.add_argument(
"-n",
dest="channel_names",
nargs="+",
default=[],
metavar="NAME",
help="Channel names to be inserted into OME metadata. Number of names"
" must match number of channels in image. Be sure to put quotes"
" around names containing spaces or other special shell characters."
)
args = parser.parse_args()
return args
def main():
args = parse_args()
image_path = sys.argv[1]
try:
tiff = tiffsurgeon.TiffSurgeon(
image_path, encoding="utf-8", writeable=True
)
except tiffsurgeon.FormatError as e:
print(f"TIFF format error: {e}")
sys.exit(1)
tiff.read_ifds()
# ElementTree doesn't parse xml declarations so we'll just run some sanity
# checks that we do have UTF-8 and give it a decoded string instead of raw
# bytes. We need to both ensure that the raw tag bytes decode properly and
# that the declaration encoding is UTF-8 if present.
try:
omexml = tiff.ifds[0].tags.get_value(270, "")
except FormatError:
print("ImageDescription tag is not a valid UTF-8 string (not an OME-TIFF?)")
sys.exit(1)
if re.match(r'<\?xml [^>]*encoding="(?!UTF-8)[^"]*"', omexml):
print("OME-XML is encoded with something other than UTF-8.")
sys.exit(1)
xml_ns = {"ome": "http://www.openmicroscopy.org/Schemas/OME/2016-06"}
if xml_ns["ome"] not in omexml:
print("Not an OME-TIFF.")
sys.exit(1)
if (
"Faas" not in tiff.ifds[0].tags.get_value(305, "")
or 330 in tiff.ifds[0].tags
):
print("Not a legacy OME-TIFF pyramid.")
sys.exit(1)
# All XML manipulation assumes the document is valid OME-XML!
root = xml.etree.ElementTree.fromstring(omexml)
image = root.find("ome:Image", xml_ns)
pixels = image.find("ome:Pixels", xml_ns)
size_x = int(pixels.get("SizeX"))
size_y = int(pixels.get("SizeY"))
size_c = int(pixels.get("SizeC"))
size_z = int(pixels.get("SizeZ"))
size_t = int(pixels.get("SizeT"))
num_levels = len(root.findall("ome:Image", xml_ns))
page_dims = [(ifd.tags[256].value, ifd.tags[257].value) for ifd in tiff.ifds]
if len(root) != num_levels:
print("Top-level OME-XML elements other than Image are not supported.")
if size_z != 1 or size_t != 1:
print("Z-stacks and multiple timepoints are not supported.")
sys.exit(1)
if size_c * num_levels != len(tiff.ifds):
print("TIFF page count does not match OME-XML Image elements.")
sys.exit(1)
if any(dims != (size_x, size_y) for dims in page_dims[:size_c]):
print(f"TIFF does not begin with SizeC={size_c} full-size pages.")
sys.exit(1)
for level in range(1, num_levels):
level_dims = page_dims[level * size_c : (level + 1) * size_c]
if len(set(level_dims)) != 1:
print(
f"Pyramid level {level + 1} out of {num_levels} has inconsistent"
f" sizes:\n{level_dims}"
)
sys.exit(1)
if args.channel_names and len(args.channel_names) != size_c:
print(
f"Wrong number of channel names -- image has {size_c} channels but"
f" {len(args.channel_names)} names were specified:"
)
for i, n in enumerate(args.channel_names, 1):
print(f"{i:4}: {n}")
sys.exit(1)
print("Input image summary")
print("===================")
print(f"Dimensions: {size_x} x {size_y}")
print(f"Number of channels: {size_c}")
print(f"Pyramid sub-resolutions ({num_levels - 1} total):")
for dim_x, dim_y in page_dims[size_c::size_c]:
print(f" {dim_x} x {dim_y}")
software = tiff.ifds[0].tags.get_value(305, "<not set>")
print(f"Software: {software}")
print()
print("Updating OME-XML metadata...")
# We already verified there is nothing but Image elements under the root.
for other_image in root[1:]:
root.remove(other_image)
for tiffdata in pixels.findall("ome:TiffData", xml_ns):
pixels.remove(tiffdata)
new_tiffdata = xml.etree.ElementTree.Element(
f"{{{xml_ns['ome']}}}TiffData",
attrib={"IFD": "0", "PlaneCount": str(size_c)},
)
# A valid OME-XML Pixels begins with size_c Channels; then comes TiffData.
pixels.insert(size_c, new_tiffdata)
if args.channel_names:
print("Renaming channels...")
channels = pixels.findall("ome:Channel", xml_ns)
for channel, name in zip(channels, args.channel_names):
channel.attrib["Name"] = name
fix_attrib_namespace(root)
# ElementTree.tostring would have been simpler but it only supports
# xml_declaration and default_namespace starting with Python 3.8.
xml_file = io.BytesIO()
tree = xml.etree.ElementTree.ElementTree(root)
tree.write(
xml_file,
encoding="utf-8",
xml_declaration=True,
default_namespace=xml_ns["ome"],
)
new_omexml = xml_file.getvalue()
print("Writing new TIFF headers...")
stale_ranges = [ifd.offset_range for ifd in tiff.ifds]
main_ifds = tiff.ifds[:size_c]
channel_sub_ifds = [tiff.ifds[c + size_c : : size_c] for c in range(size_c)]
for i, (main_ifd, sub_ifds) in enumerate(zip(main_ifds, channel_sub_ifds)):
for ifd in sub_ifds:
if 305 in ifd.tags:
stale_ranges.append(ifd.tags[305].offset_range)
del ifd.tags[305]
ifd.tags.insert(tiff.append_tag_data(254, 3, 1))
if i == 0:
stale_ranges.append(main_ifd.tags[305].offset_range)
stale_ranges.append(main_ifd.tags[270].offset_range)
old_software = main_ifd.tags[305].value.replace("Faas", "F*a*a*s")
new_software = f"pyramid_upgrade.py (was {old_software})"
main_ifd.tags.insert(tiff.append_tag_data(305, 2, new_software))
main_ifd.tags.insert(tiff.append_tag_data(270, 2, new_omexml))
else:
if 305 in main_ifd.tags:
stale_ranges.append(main_ifd.tags[305].offset_range)
del main_ifd.tags[305]
sub_ifds[:] = tiff.append_ifd_sequence(sub_ifds)
offsets = [ifd.offset for ifd in sub_ifds]
main_ifd.tags.insert(tiff.append_tag_data(330, 16, offsets))
main_ifds = tiff.append_ifd_sequence(main_ifds)
tiff.write_first_ifd_offset(main_ifds[0].offset)
print("Clearing old headers and tag values...")
# We overwrite all the old IFDs and referenced data values with obvious
# "filler" as a courtesy to anyone who might need to poke around in the TIFF
# structure down the road. A real TIFF parser wouldn't see the stale data,
# but a human might just scan for the first thing that looks like a run of
# OME-XML and not realize it's been replaced with something else. The filler
# content is the repeated string "unused " with square brackets at the
# beginning and end of each filled IFD or data value.
filler = b"unused "
f_len = len(filler)
for r in stale_ranges:
tiff.file.seek(r.start)
tiff.file.write(b"[")
f_total = len(r) - 2
for i in range(f_total // f_len):
tiff.file.write(filler)
tiff.file.write(b" " * (f_total % f_len))
tiff.file.write(b"]")
tiff.close()
print()
print("Success!")
if __name__ == "__main__":
main()