pynbt.py


# vim:set sw=2 sts=2 ts=2:

"""
Named Binary Tag library. Serializes and deserializes TAG_* objects
to and from binary data. Load a Minecraft level by calling nbt.load().
Create your own TAG_* objects and set their values.
Save a TAG_* object to a file or StringIO object.

Read the test functions at the end of the file to get started.

This library requires Numpy.    Get it here:
http://new.scipy.org/download.html

Official NBT documentation is here:
http://www.minecraft.net/docs/NBT.txt


Copyright 2010 David Rio Vierra
"""
import collections
import itertools
import struct
import gzip
from cStringIO import StringIO
import os
from contextlib import closing
from numpy import array, zeros, uint8, fromstring
TAGfmt = ">b"


class NBTFormatError(RuntimeError):
    pass


class TAG_Value(object):
    """Simple values. Subclasses override fmt to change the type and size.
    Subclasses may set dataType instead of overriding setValue for automatic data type coercion"""

    fmt = ">b"
    tag = -1  # error!

    _value = None

    def getValue(self):
            return self._value

    def setValue(self, newVal):
            self._value = self.dataType(newVal)
    value = property(getValue, setValue, None, "Change the TAG's value.    Data types are checked and coerced if needed.")

    _name = None

    def getName(self):
            return self._name

    def setName(self, newVal):
            self._name = str(newVal)

    def delName(self):
            self._name = ""
    name = property(getName, setName, delName, "Change the TAG's name.    Coerced to a string.")

    @classmethod
    def load_from(cls, data, data_cursor):
        data = data[data_cursor:]
        (value,) = struct.unpack_from(cls.fmt, data)
        self = cls(value=value)
        return self, data_cursor + struct.calcsize(self.fmt)

    def __init__(self, value=0, name=None):
        self.name = name
        self.value = value

    def __repr__(self):
        return "%s( \"%s\" ): %s" % (str(self.__class__), self.name, repr(self.value))

    def __str__(self):
        return self.pretty_string()

    def pretty_string(self, indent=0):
        if self.name:
            return  " " * indent + "%s( \"%s\" ): %s" % (str(self.__class__.__name__), self.name, self.value)
        else:
            return  " " * indent + "%s: %s" % (str(self.__class__.__name__), self.value)

    def write_tag(self, buf):
        buf.write(struct.pack(TAGfmt, self.tag))

    def write_name(self, buf):
        if self.name != None:
            TAG_String(self.name).write_value(buf)

    def write_value(self, buf):
        buf.write(struct.pack(self.fmt, self.value))

    def save(self, filename="", buf=None):
        if filename:
            self.saveGzipped(filename)
            return
        "Save the tagged element to a file."
        if self.name == None:
            self.name = ""  # root tag must have name
        self.write_tag(buf)
        self.write_name(buf)
        self.write_value(buf)

    def saveGzipped(self, filename, compresslevel=1):
        sio = StringIO()
        # atomic write
        try:
            os.rename(filename, filename + ".old")
        except Exception, e:
            # print "Atomic Save: No existing file to rename"
            pass

        with closing(gzip.GzipFile(fileobj=sio, mode="wb", compresslevel=compresslevel)) as outputGz:
                self.save(buf=outputGz)
                outputGz.flush()

        # print len(sio.getvalue())
        try:
                with open(filename, 'wb') as f:
                        f.write(sio.getvalue())
        except:
            try:
                os.rename(filename + ".old", filename,)
            except Exception, e:
                print e
                return

        try:
            os.remove(filename + ".old")
        except Exception, e:
            # print "Atomic Save: No old file to remove"
            pass


class TAG_Byte(TAG_Value):
    tag = 1
    fmt = ">b"
    dataType = int


class TAG_Short(TAG_Value):
    tag = 2
    fmt = ">h"
    dataType = int


class TAG_Int(TAG_Value):
    tag = 3
    fmt = ">i"
    dataType = int


class TAG_Long(TAG_Value):
    tag = 4
    fmt = ">q"
    dataType = long


class TAG_Float(TAG_Value):
    tag = 5
    fmt = ">f"
    dataType = float


class TAG_Double(TAG_Value):
    tag = 6
    fmt = ">d"
    dataType = float


class TAG_Byte_Array(TAG_Value):
    """Like a string, but for binary data.    four length bytes instead of
    two.    value is a numpy array, and you can change its elements"""

    tag = 7
    fmt = ">i%ds"

    def dataType(self, value):
        return array(value, uint8)

    def __repr__(self):
        return "<%s: length %d> ( %s )" % (self.__class__, len(self.value), self.name)

    def pretty_string(self, indent=0):
        if self.name:
            return  " " * indent + "%s( \"%s\" ): shape=%s dtype=%s %s" % (
                str(self.__class__.__name__),
                self.name,
                str(self.value.shape),
                str(self.value.dtype),
                self.value)
        else:
            return  " " * indent + "%s: %s %s" % (str(self.__class__.__name__), str(self.value.shape), self.value)

    @classmethod
    def load_from(cls, data, data_cursor):
        data = data[data_cursor:]
        (string_len,) = struct.unpack_from(">I", data)
        value = fromstring(data[4:string_len + 4], 'uint8')
        self = cls(value)
        return self, data_cursor + string_len + 4

    def __init__(self, value=zeros(0, uint8), name=None):
        if name:
            self.name = name
        self.value = value

    def write_value(self, buf):
        # print self.value
        valuestr = self.value.tostring()
        buf.write(struct.pack(self.fmt % (len(valuestr),), len(valuestr), valuestr))


class TAG_Int_Array(TAG_Byte_Array):
    """An array of ints"""
    tag = 11

    def dataType(self, value):
        return array(value, '>u4')

    @classmethod
    def load_from(cls, data, data_cursor):
        data = data[data_cursor:]
        (string_len,) = struct.unpack_from(">I", data)
        value = fromstring(data[4:string_len * 4 + 4], '>u4')
        self = cls(value)
        return self, data_cursor + len(self.value) * 4 + 4

    def __init__(self, value=zeros(0, ">u4"), name=None):
        self.name = name
        self.value = value

    def write_value(self, buf):
        # print self.value
        valuestr = self.value.tostring()
        buf.write(struct.pack(self.fmt % (len(valuestr),), len(valuestr) / 4, valuestr))


class TAG_Short_Array(TAG_Int_Array):
    """An array of ints"""
    tag = 12

    def dataType(self, value):
        return array(value, '>u2')

    @classmethod
    def load_from(cls, data, data_cursor):
        data = data[data_cursor:]
        (string_len,) = struct.unpack_from(">I", data)
        value = fromstring(data[4:string_len * 2 + 4], '>u2')
        self = cls(value)
        return self, data_cursor + len(self.value) * 2 + 4

    def __init__(self, value=zeros(0, ">u2"), name=None):
        self.name = name
        self.value = value

    def write_value(self, buf):
        # print self.value
        valuestr = self.value.tostring()
        buf.write(struct.pack(self.fmt % (len(valuestr),), len(valuestr) / 2, valuestr))


class TAG_String(TAG_Value):
    """String in UTF-8
    The value parameter must be a 'unicode' or a UTF-8 encoded 'str'
    """

    tag = 8
    fmt = ">h%ds"
    dataType = lambda self, s: isinstance(s, unicode) and s.encode('utf-8') or s

    @classmethod
    def load_from(cls, data, data_cursor):
        data = data[data_cursor:]
        (string_len,) = struct.unpack_from(">H", data)
        value = data[2:string_len + 2].tostring()
        self = cls(value)
        return self, data_cursor + string_len + 2

    def __init__(self, value="", name=None):
        if name:
            self.name = name
        self.value = value

    def write_value(self, buf):
        u8value = self._value
        buf.write(struct.pack(self.fmt % (len(u8value),), len(u8value), u8value))

    @property
    def unicodeValue(self):
        return self.value.decode('utf-8')


class TAG_Compound(TAG_Value, collections.MutableMapping):
    """A heterogenous list of named tags. Names must be unique within
    the TAG_Compound. Add tags to the compound using the subscript
    operator [].    This will automatically name the tags."""

    tag = 10

    def dataType(self, val):
            for i in val:
                    assert isinstance(i, TAG_Value)
                    assert i.name
            return list(val)

    def __repr__(self):
        return "%s( %s ): %s" % (str(self.__class__.__name__), self.name, self.value)

    def pretty_string(self, indent=0):
        if self.name:
            pretty = " " * indent + "%s( \"%s\" ): %d items\n" % (str(self.__class__.__name__), self.name, len(self.value))
        else:
            pretty = " " * indent + "%s(): %d items\n" % (str(self.__class__.__name__), len(self.value))
        indent += 4
        for tag in self.value:
            pretty += tag.pretty_string(indent) + "\n"
        return pretty

    @classmethod
    def load_from(cls, data, data_cursor):
        self = cls()
        while data_cursor < len(data):
            tag_type = data[data_cursor]
            data_cursor += 1
            if tag_type == 0:
                break

            tag, data_cursor = load_named(data, data_cursor, tag_type)

            self._value.append(tag)

        return self, data_cursor

    def __init__(self, value=[], name=""):

        self.name = name
        if value.__class__ == ''.__class__:
            self.name = value
            value = []
        self.value = value

    def write_value(self, buf):
        for i in self.value:
            i.save(buf=buf)
        buf.write("\x00")

    "collection functions"

    def __getitem__(self, k):
        # hits=filter(lambda x: x.name==k, self.value)
        # if(len(hits)): return hits[0]
        for key in self.value:
                if key.name == k:
                    return key
        raise KeyError("Key {0} not found in tag {1}".format(k, self))

    def __iter__(self):
        return itertools.imap(lambda x: x.name, self.value)

    def __contains__(self, k):
        return k in map(lambda x: x.name, self.value)

    def __len__(self):
        return self.value.__len__()

    def __setitem__(self, k, v):
        """Automatically wraps lists and tuples in a TAG_List, and wraps strings
        and unicodes in a TAG_String."""
        if isinstance(v, (list, tuple)):
            v = TAG_List(v)
        elif isinstance(v, basestring):
            v = TAG_String(v)

        if not (v.__class__ in tag_classes.values()):
            raise TypeError("Invalid type %s for TAG_Compound" % v.__class__)
        """remove any items already named "k".    """
        olditems = filter(lambda x: x.name == k, self.value)
        for i in olditems:
            self.value.remove(i)
        self.value.append(v)
        v.name = k

    def __delitem__(self, k):
        self.value.__delitem__(self.value.index(self[k]))

    def add(self, v):
        self[v.name] = v


class TAG_List(TAG_Value, collections.MutableSequence):

    """A homogenous list of unnamed data of a single TAG_* type.
    Once created, the type can only be changed by emptying the list
    and adding an element of the new type. If created with no arguments,
    returns a list of TAG_Compound

    Empty lists in the wild have been seen with type TAG_Byte"""

    tag = 9

    def dataType(self, val):
        if val:
            listType = val[0].__class__
            # FIXME: This is kinda weird; None as the empty tag name?
            assert all(isinstance(x, listType) and x.name in ("", "None") for x in val)
        return list(val)

    def __repr__(self):
        return "%s( %s ): %s" % (self.__class__.__name__, self.name, self.value)

    def pretty_string(self, indent=0):
        if self.name:
            pretty = " " * indent + "%s( \"%s\" ):\n" % (str(self.__class__.__name__), self.name)
        else:
            pretty = " " * indent + "%s():\n" % (str(self.__class__.__name__),)

        indent += 4
        for tag in self.value:
            pretty += tag.pretty_string(indent) + "\n"
        return pretty

    @classmethod
    def load_from(cls, data, data_cursor):
        self = cls()
        self.list_type = data[data_cursor]

        data_cursor += 1

        list_length, data_cursor = TAG_Int.load_from(data, data_cursor)
        list_length = list_length.value

        for i in range(list_length):

            tag, data_cursor = tag_classes[self.list_type].load_from(data, data_cursor)
            self.append(tag)

        return self, data_cursor

    def __init__(self, value=[], name=None, list_type=TAG_Compound):
        # can be created from a list of tags in value, with an optional
        # name, or created from raw tag data, or created with list_type
        # taken from a TAG class or instance

        self.name = name
        self.list_type = list_type.tag

        if len(value):
            self.list_type = value[0].tag
            value = filter(lambda x: x.__class__ == value[0].__class__, value)

        self.value = value

    """ collection methods """

    def __iter__(self):
        return iter(self.value)

    def __contains__(self, k):
        return k in self.value

    def __getitem__(self, i):
        return self.value[i]

    def __len__(self):
        return len(self.value)

    def __setitem__(self, i, v):
        if v.__class__ != tag_classes[self.list_type]:
            raise TypeError("Invalid type %s for TAG_List(%s)" % (v.__class__, tag_classes[self.list_type]))
        v.name = ""
        self.value[i] = v

    def __delitem__(self, i):
        del self.value[i]

    def insert(self, i, v):
            if not v.tag in tag_classes:
                raise TypeError("Not a tag type: %s" % (v,))
            if len(self) == 0:
                    self.list_type = v.tag
            else:
                    if v.__class__ != tag_classes[self.list_type]:
                        raise TypeError("Invalid type %s for TAG_List(%s)" % (v.__class__, tag_classes[self.list_type]))

            v.name = ""
            self.value.insert(i, v)

    def write_value(self, buf):
        buf.write(struct.pack(TAGfmt, self.list_type))
        TAG_Int(len(self)).write_value(buf)
        for i in self.value:
            i.write_value(buf)

tag_classes = {
    1: TAG_Byte,
    2: TAG_Short,
    3: TAG_Int,
    4: TAG_Long,
    5: TAG_Float,
    6: TAG_Double,
    7: TAG_Byte_Array,
    8: TAG_String,
    9: TAG_List,
    10: TAG_Compound,
    11: TAG_Int_Array,
    12: TAG_Short_Array,
    }

import zlib


def gunzip(data):
    # strip off the header and use negative WBITS to tell zlib there's no header
    return zlib.decompress(data[10:], -zlib.MAX_WBITS)


def loadFile(filename):
    with file(filename, "rb") as f:
        inputdata = f.read()
    data = inputdata
    try:
        data = gunzip(inputdata)
    except (zlib.error, IOError):
        print "File %s not zipped" % filename

    return load(buf=fromstring(data, 'uint8'))


def load_named(data, data_cursor, tag_type):
    tag_name, data_cursor = TAG_String.load_from(data, data_cursor)
    tag_name = tag_name.value

    tag, data_cursor = tag_classes[tag_type].load_from(data, data_cursor)
    tag.name = tag_name

    return tag, data_cursor


def load(filename="", buf=None):
    """Unserialize data from an entire NBT file and return the
    root TAG_Compound object. Argument can be a string containing a
    filename or an array of integers containing TAG_Compound data. """

    if filename and isinstance(filename, (str, unicode)):
        return loadFile(filename)
    if isinstance(buf, str):
        buf = fromstring(buf, uint8)
    data = buf
    # if buf != None:
    #    data = buf
    if not len(buf):
        raise NBTFormatError("Asked to load root tag of zero length")

    data_cursor = 0
    tag_type = data[data_cursor]
    if tag_type != 10:
        raise NBTFormatError('Not an NBT file with a root TAG_Compound (found {0})'.format(tag_type))
    data_cursor += 1

    tag, data_cursor = load_named(data, data_cursor, tag_type)

    return tag

__all__ = [a.__name__ for a in tag_classes.itervalues()] + ["load", "loadFile", "gunzip"]