From bc203dafa109550b90131431a6fb77401a8b06be Mon Sep 17 00:00:00 2001 From: Jeremy Cole Date: Mon, 18 Nov 2024 15:15:58 -0800 Subject: [PATCH] Refactor Innodb::DataType; add SET/ENUM; fix DECIMAL --- Gemfile.lock | 2 + innodb_ruby.gemspec | 1 + lib/innodb.rb | 16 + lib/innodb/data_type.rb | 484 +++--------------- lib/innodb/data_type/bit.rb | 30 ++ lib/innodb/data_type/blob.rb | 28 + lib/innodb/data_type/character.rb | 53 ++ lib/innodb/data_type/date.rb | 23 + lib/innodb/data_type/datetime.rb | 28 + lib/innodb/data_type/decimal.rb | 133 +++++ lib/innodb/data_type/enum.rb | 33 ++ lib/innodb/data_type/floating_point.rb | 30 ++ lib/innodb/data_type/innodb_roll_pointer.rb | 39 ++ lib/innodb/data_type/innodb_transaction_id.rb | 18 + lib/innodb/data_type/integer.rb | 55 ++ lib/innodb/data_type/set.rb | 32 ++ lib/innodb/data_type/time.rb | 22 + lib/innodb/data_type/timestamp.rb | 25 + lib/innodb/data_type/year.rb | 35 ++ lib/innodb/field.rb | 40 +- lib/innodb/page/index.rb | 2 +- spec/innodb/data_type_spec.rb | 77 ++- 22 files changed, 714 insertions(+), 492 deletions(-) create mode 100644 lib/innodb/data_type/bit.rb create mode 100644 lib/innodb/data_type/blob.rb create mode 100644 lib/innodb/data_type/character.rb create mode 100644 lib/innodb/data_type/date.rb create mode 100644 lib/innodb/data_type/datetime.rb create mode 100644 lib/innodb/data_type/decimal.rb create mode 100644 lib/innodb/data_type/enum.rb create mode 100644 lib/innodb/data_type/floating_point.rb create mode 100644 lib/innodb/data_type/innodb_roll_pointer.rb create mode 100644 lib/innodb/data_type/innodb_transaction_id.rb create mode 100644 lib/innodb/data_type/integer.rb create mode 100644 lib/innodb/data_type/set.rb create mode 100644 lib/innodb/data_type/time.rb create mode 100644 lib/innodb/data_type/timestamp.rb create mode 100644 lib/innodb/data_type/year.rb diff --git a/Gemfile.lock b/Gemfile.lock index 7e932e2..d7a7040 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -4,6 +4,7 @@ PATH innodb_ruby (0.14.0) bigdecimal (~> 3.1.8) bindata (>= 1.4.5, < 3.0) + csv (~> 3.3) digest-crc (~> 0.4, >= 0.4.1) getoptlong (~> 0.2.1) histogram (~> 0.2) @@ -14,6 +15,7 @@ GEM ast (2.4.2) bigdecimal (3.1.8) bindata (2.5.0) + csv (3.3.0) diff-lcs (1.5.1) digest-crc (0.6.5) rake (>= 12.0.0, < 14.0.0) diff --git a/innodb_ruby.gemspec b/innodb_ruby.gemspec index 4c20227..3a0bef6 100644 --- a/innodb_ruby.gemspec +++ b/innodb_ruby.gemspec @@ -23,6 +23,7 @@ Gem::Specification.new do |s| s.add_dependency("bigdecimal", "~> 3.1.8") s.add_dependency("bindata", ">= 1.4.5", "< 3.0") + s.add_dependency("csv", "~> 3.3") s.add_dependency("digest-crc", "~> 0.4", ">= 0.4.1") s.add_dependency("getoptlong", "~> 0.2.1") s.add_dependency("histogram", "~> 0.2") diff --git a/lib/innodb.rb b/lib/innodb.rb index 99dbcc8..d3ab183 100644 --- a/lib/innodb.rb +++ b/lib/innodb.rb @@ -45,6 +45,22 @@ def self.debug=(value) require "innodb/page/sdi_blob" require "innodb/page/sys" require "innodb/page/undo_log" +require "innodb/data_type" +require "innodb/data_type/bit" +require "innodb/data_type/blob" +require "innodb/data_type/character" +require "innodb/data_type/date" +require "innodb/data_type/datetime" +require "innodb/data_type/decimal" +require "innodb/data_type/enum" +require "innodb/data_type/floating_point" +require "innodb/data_type/innodb_roll_pointer" +require "innodb/data_type/innodb_transaction_id" +require "innodb/data_type/integer" +require "innodb/data_type/set" +require "innodb/data_type/time" +require "innodb/data_type/timestamp" +require "innodb/data_type/year" require "innodb/record" require "innodb/field" require "innodb/space" diff --git a/lib/innodb/data_type.rb b/lib/innodb/data_type.rb index bfe9e1f..1964bcf 100644 --- a/lib/innodb/data_type.rb +++ b/lib/innodb/data_type.rb @@ -1,469 +1,121 @@ # frozen_string_literal: true -require "stringio" -require "bigdecimal" -require "date" +require "csv" module Innodb class DataType - # MySQL's Bit-Value Type (BIT). - class BitType - attr_reader :name - attr_reader :width + class InvalidSpecificationError < StandardError; end - def initialize(base_type, modifiers, properties) - nbits = modifiers.fetch(0, 1) - raise "Unsupported width for BIT type." unless nbits >= 0 && nbits <= 64 + # A hash of page types to specialized classes to handle them. Normally + # subclasses will register themselves in this list. + @specialized_classes = {} - @width = (nbits + 7) / 8 - @name = Innodb::DataType.make_name(base_type, modifiers, properties) - end - - def value(data) - "0b%b" % BinData.const_get("Uint%dbe" % (@width * 8)).read(data) - end + class << self + attr_reader :specialized_classes end - class IntegerType - attr_reader :name - attr_reader :width - - def initialize(base_type, modifiers, properties) - @width = base_type_width_map[base_type] - @unsigned = properties.include?(:UNSIGNED) - @name = Innodb::DataType.make_name(base_type, modifiers, properties) - end - - def base_type_width_map - { - BOOL: 1, - BOOLEAN: 1, - TINYINT: 1, - SMALLINT: 2, - MEDIUMINT: 3, - INT: 4, - INT6: 6, - BIGINT: 8, - } - end - - def value(data) - nbits = @width * 8 - @unsigned ? get_uint(data, nbits) : get_int(data, nbits) - end - - def get_uint(data, nbits) - BinData.const_get("Uint%dbe" % nbits).read(data) - end - - def get_int(data, nbits) - BinData.const_get("Int%dbe" % nbits).read(data) ^ (-1 << (nbits - 1)) - end + def self.register_specialization(data_type, specialized_class) + @specialized_classes[data_type] = specialized_class end - class FloatType - attr_reader :name - attr_reader :width - - def initialize(base_type, modifiers, properties) - @width = 4 - @name = Innodb::DataType.make_name(base_type, modifiers, properties) - end - - # Read a little-endian single-precision floating-point number. - def value(data) - BinData::FloatLe.read(data) - end + def self.specialization_for(data_type) + # This needs to intentionally use Innodb::Page because we need to register + # in the class instance variable in *that* class. + Innodb::DataType.register_specialization(data_type, self) end - class DoubleType - attr_reader :name - attr_reader :width - - def initialize(base_type, modifiers, properties) - @width = 8 - @name = Innodb::DataType.make_name(base_type, modifiers, properties) - end - - # Read a little-endian double-precision floating-point number. - def value(data) - BinData::DoubleLe.read(data) - end + def self.specialization_for?(data_type) + Innodb::DataType.specialized_classes.include?(data_type) end - # MySQL's Fixed-Point Type (DECIMAL), stored in InnoDB as a binary string. - class DecimalType - attr_reader :name - attr_reader :width - - # The value is stored as a sequence of signed big-endian integers, each - # representing up to 9 digits of the integral and fractional parts. The - # first integer of the integral part and/or the last integer of the - # fractional part might be compressed (or packed) and are of variable - # length. The remaining integers (if any) are uncompressed and 32 bits - # wide. - MAX_DIGITS_PER_INTEGER = 9 - BYTES_PER_DIGIT = [0, 1, 1, 2, 2, 3, 3, 4, 4, 4].freeze - - def initialize(base_type, modifiers, properties) - precision, scale = sanity_check(modifiers) - integral = precision - scale - @uncomp_integral = integral / MAX_DIGITS_PER_INTEGER - @uncomp_fractional = scale / MAX_DIGITS_PER_INTEGER - @comp_integral = integral - (@uncomp_integral * MAX_DIGITS_PER_INTEGER) - @comp_fractional = scale - (@uncomp_fractional * MAX_DIGITS_PER_INTEGER) - @width = (@uncomp_integral * 4) + BYTES_PER_DIGIT[@comp_integral] + - (@comp_fractional * 4) + BYTES_PER_DIGIT[@comp_fractional] - @name = Innodb::DataType.make_name(base_type, modifiers, properties) - end - - def value(data) - # Strings representing the integral and fractional parts. - intg = "".dup - frac = "".dup - - stream = StringIO.new(data) - mask = sign_mask(stream) - - intg << get_digits(stream, mask, @comp_integral) - - (1..@uncomp_integral).each do - intg << get_digits(stream, mask, MAX_DIGITS_PER_INTEGER) - end - - (1..@uncomp_fractional).each do - frac << get_digits(stream, mask, MAX_DIGITS_PER_INTEGER) - end - - frac << get_digits(stream, mask, @comp_fractional) - frac = "0" if frac.empty? - - # Convert to something resembling a string representation. - str = "#{mask.to_s.chop}#{intg}.#{frac}" - - BigDecimal(str).to_s("F") - end - - private - - # Ensure width specification (if any) is compliant. - def sanity_check(modifiers) - raise "Invalid width specification" unless modifiers.size <= 2 - - precision = modifiers.fetch(0, 10) - raise "Unsupported precision for DECIMAL type" unless precision >= 1 && precision <= 65 - - scale = modifiers.fetch(1, 0) - raise "Unsupported scale for DECIMAL type" unless scale >= 0 && scale <= 30 && scale <= precision - - [precision, scale] - end - - # The sign is encoded in the high bit of the first byte/digit. The byte - # might be part of a larger integer, so apply the bit-flipper and push - # back the byte into the stream. - def sign_mask(stream) - byte = BinData::Uint8.read(stream) - sign = byte & 0x80 - byte.assign(byte ^ 0x80) - stream.rewind - byte.write(stream) - stream.rewind - sign.zero? ? -1 : 0 - end - - # Return a string representing an integer with a specific number of digits. - def get_digits(stream, mask, digits) - nbits = BYTES_PER_DIGIT[digits] * 8 - return "" unless nbits.positive? - - value = (BinData.const_get("Int%dbe" % nbits).read(stream) ^ mask) - # Preserve leading zeros. - "%0#{digits}d" % value - end + def self.ceil_to(value, multiple) + ((value + (multiple - 1)) / multiple) * multiple end - # Fixed-length character type. - class CharacterType - attr_reader :name - attr_reader :width - - def initialize(base_type, modifiers, properties) - @width = modifiers.fetch(0, 1) - @name = Innodb::DataType.make_name(base_type, modifiers, properties) - end - - def value(data) - # The SQL standard defines that CHAR fields should have end-spaces - # stripped off. - data.sub(/ +$/, "") + module HasNumericModifiers + def coerce_modifiers(modifiers) + modifiers = modifiers&.split(",") if modifiers.is_a?(String) + modifiers&.map(&:to_i) end end - class VariableCharacterType - attr_reader :name - attr_reader :width - - def initialize(base_type, modifiers, properties) - @width = modifiers[0] - raise "Invalid width specification" unless modifiers.size == 1 - - @name = Innodb::DataType.make_name(base_type, modifiers, properties) - end - - def value(data) - # The SQL standard defines that VARCHAR fields should have end-spaces - # stripped off. - data.sub(/ +$/, "") + module HasStringListModifiers + def coerce_modifiers(modifiers) + CSV.parse_line(modifiers, quote_char: "'")&.map(&:to_s) end - end - - # Fixed-length binary type. - class BinaryType - attr_reader :name - attr_reader :width - def initialize(base_type, modifiers, properties) - @width = modifiers.fetch(0, 1) - @name = Innodb::DataType.make_name(base_type, modifiers, properties) + def formatted_modifiers + CSV.generate_line(modifiers, quote_char: "'", force_quotes: true, row_sep: "") end end - class VariableBinaryType - attr_reader :name - attr_reader :width - - def initialize(base_type, modifiers, properties) - @width = modifiers[0] - raise "Invalid width specification" unless modifiers.size == 1 + attr_reader :type_name + attr_reader :modifiers + attr_reader :properties - @name = Innodb::DataType.make_name(base_type, modifiers, properties) - end + def initialize(type_name, modifiers = nil, properties = nil) + @type_name = type_name + @modifiers = Array(coerce_modifiers(modifiers)) + @properties = Array(properties) end - class BlobType - attr_reader :name - - def initialize(base_type, modifiers, properties) - @name = Innodb::DataType.make_name(base_type, modifiers, properties) - end + def variable? + false end - class YearType - attr_reader :name - attr_reader :width - - def initialize(base_type, modifiers, properties) - @width = 1 - @display_width = modifiers.fetch(0, 4) - @name = Innodb::DataType.make_name(base_type, modifiers, properties) - end - - def value(data) - year = BinData::Uint8.read(data) - return (year % 100).to_s if @display_width != 4 - return (year + 1900).to_s if year != 0 - - "0000" - end + def blob? + false end - class TimeType - attr_reader :name - attr_reader :width - - def initialize(base_type, modifiers, properties) - @width = 3 - @name = Innodb::DataType.make_name(base_type, modifiers, properties) - end - - def value(data) - time = BinData::Int24be.read(data) ^ (-1 << 23) - sign = "-" if time.negative? - time = time.abs - "%s%02d:%02d:%02d" % [sign, time / 10_000, (time / 100) % 100, time % 100] - end + def value(data) + data end - class DateType - attr_reader :name - attr_reader :width - - def initialize(base_type, modifiers, properties) - @width = 3 - @name = Innodb::DataType.make_name(base_type, modifiers, properties) - end - - def value(data) - date = BinData::Int24be.read(data) ^ (-1 << 23) - day = date & 0x1f - month = (date >> 5) & 0xf - year = date >> 9 - "%04d-%02d-%02d" % [year, month, day] - end + def coerce_modifiers(modifiers) + modifiers end - class DatetimeType - attr_reader :name - attr_reader :width - - def initialize(base_type, modifiers, properties) - @width = 8 - @name = Innodb::DataType.make_name(base_type, modifiers, properties) - end - - def value(data) - datetime = BinData::Int64be.read(data) ^ (-1 << 63) - date = datetime / 1_000_000 - year = date / 10_000 - month = (date / 100) % 100 - day = date % 100 - time = datetime - (date * 1_000_000) - hour = time / 10_000 - min = (time / 100) % 100 - sec = time % 100 - "%04d-%02d-%02d %02d:%02d:%02d" % [year, month, day, hour, min, sec] - end + def formatted_modifiers + modifiers.join(",") end - class TimestampType - attr_reader :name - attr_reader :width - - def initialize(base_type, modifiers, properties) - @width = 4 - @name = Innodb::DataType.make_name(base_type, modifiers, properties) - end - - # Returns the UTC timestamp as a value in 'YYYY-MM-DD HH:MM:SS' format. - def value(data) - timestamp = BinData::Uint32be.read(data) - return "0000-00-00 00:00:00" if timestamp.zero? - - DateTime.strptime(timestamp.to_s, "%s").strftime "%Y-%m-%d %H:%M:%S" - end + def format_type_name + [ + [ + type_name.to_s, + modifiers&.any? ? "(#{formatted_modifiers})" : nil, + ].compact.join, + *properties&.map { |p| p.to_s.sub("_", " ") }, + ].compact.join(" ") end - class EnumType - attr_reader :name - attr_reader :width - - def initialize(base_type, modifiers, properties) - @width = 1 - @name = Innodb::DataType.make_name(base_type, modifiers, properties) - end - - def value(data) - nbits = @width * 8 - BinData.const_get("Int%dbe" % nbits).read(data) ^ (-1 << (nbits - 1)) - end + def name + @name ||= format_type_name end - # - # Data types for InnoDB system columns. - # - - # Transaction ID. - class TransactionIdType - attr_reader :name - attr_reader :width - - def initialize(base_type, modifiers, properties) - @width = 6 - @name = Innodb::DataType.make_name(base_type, modifiers, properties) - end - - def read(cursor) - cursor.name("transaction_id") { cursor.read_uint48 } - end + def length + raise NotImplementedError end - # Rollback data pointer. - class RollPointerType - extend ReadBitsAtOffset - - Pointer = Struct.new( - :is_insert, - :rseg_id, - :undo_log, - keyword_init: true - ) - - attr_reader :name - attr_reader :width + # Parse a data type definition and extract the base type and any modifiers. + def self.parse_type_name_and_modifiers(type_string) + matches = /^(?[a-zA-Z0-9_]+)(?:\((?.+)\))?(?\s+unsigned)?$/.match(type_string) + raise "Unparseable type #{type_string}" unless matches - def initialize(base_type, modifiers, properties) - @width = 7 - @name = Innodb::DataType.make_name(base_type, modifiers, properties) - end - - def self.parse_roll_pointer(roll_ptr) - Pointer.new( - is_insert: read_bits_at_offset(roll_ptr, 1, 55) == 1, - rseg_id: read_bits_at_offset(roll_ptr, 7, 48), - undo_log: Innodb::Page::Address.new( - page: read_bits_at_offset(roll_ptr, 32, 16), - offset: read_bits_at_offset(roll_ptr, 16, 0) - ) - ) - end + type_name = matches[:type_name].upcase.to_sym + return [type_name, []] unless matches[:modifiers] - def value(data) - roll_ptr = BinData::Uint56be.read(data) - self.class.parse_roll_pointer(roll_ptr) - end + # Use the CSV parser since it can understand quotes properly. + [type_name, matches[:modifiers]] end - # Maps base type to data type class. - TYPES = { - BIT: BitType, - BOOL: IntegerType, - BOOLEAN: IntegerType, - TINYINT: IntegerType, - SMALLINT: IntegerType, - MEDIUMINT: IntegerType, - INT: IntegerType, - INT6: IntegerType, - BIGINT: IntegerType, - FLOAT: FloatType, - DOUBLE: DoubleType, - DECIMAL: DecimalType, - NUMERIC: DecimalType, - CHAR: CharacterType, - VARCHAR: VariableCharacterType, - BINARY: BinaryType, - VARBINARY: VariableBinaryType, - TINYBLOB: BlobType, - BLOB: BlobType, - MEDIUMBLOB: BlobType, - LONGBLOB: BlobType, - TINYTEXT: BlobType, - TEXT: BlobType, - MEDIUMTEXT: BlobType, - LONGTEXT: BlobType, - YEAR: YearType, - TIME: TimeType, - DATE: DateType, - DATETIME: DatetimeType, - TIMESTAMP: TimestampType, - TRX_ID: TransactionIdType, - ROLL_PTR: RollPointerType, - ENUM: EnumType, - JSON: BlobType, - }.freeze - - def self.make_name(base_type, modifiers, properties) - name = base_type.to_s.dup - name << "(#{modifiers.join(',')})" unless modifiers.empty? - name << " " - name << properties.join(" ") - name.strip - end + def self.parse(type_string, properties = nil) + type_name, modifiers = parse_type_name_and_modifiers(type_string.to_s) - def self.new(base_type, modifiers, properties) - raise "Data type '#{base_type}' is not supported" unless TYPES.key?(base_type) + type_class = Innodb::DataType.specialized_classes[type_name] + raise "Unrecognized type #{type_name}" unless type_class - TYPES[base_type].new(base_type, modifiers, properties) + type_class.new(type_name, modifiers, properties) end end end diff --git a/lib/innodb/data_type/bit.rb b/lib/innodb/data_type/bit.rb new file mode 100644 index 0000000..5a47a59 --- /dev/null +++ b/lib/innodb/data_type/bit.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Innodb + class DataType + # MySQL's Bit-Value Type (BIT). + class Bit < DataType + specialization_for :BIT + + include HasNumericModifiers + + DEFAULT_SIZE = 1 + SUPPORTED_SIZE_RANGE = (1..64).freeze + + def initialize(type_name, modifiers, properties) + super + + @size = @modifiers.fetch(0, DEFAULT_SIZE) + raise "Unsupported width for #{@type_name} type" unless SUPPORTED_SIZE_RANGE.include?(@size) + end + + def value(data) + "0b%b" % BinData.const_get("Uint%dbe" % Innodb::DataType.ceil_to(@size, 8)).read(data) + end + + def length + @length = Innodb::DataType.ceil_to(@size, 8) / 8 + end + end + end +end diff --git a/lib/innodb/data_type/blob.rb b/lib/innodb/data_type/blob.rb new file mode 100644 index 0000000..e4bfc94 --- /dev/null +++ b/lib/innodb/data_type/blob.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module Innodb + class DataType + class Blob < DataType + specialization_for :TINYBLOB + specialization_for :BLOB + specialization_for :MEDIUMBLOB + specialization_for :LONGBLOB + specialization_for :TINYTEXT + specialization_for :TEXT + specialization_for :MEDIUMTEXT + specialization_for :LONGTEXT + specialization_for :JSON + specialization_for :GEOMETRY + + include HasNumericModifiers + + def variable? + true + end + + def blob? + true + end + end + end +end diff --git a/lib/innodb/data_type/character.rb b/lib/innodb/data_type/character.rb new file mode 100644 index 0000000..df1521d --- /dev/null +++ b/lib/innodb/data_type/character.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +module Innodb + class DataType + # Fixed-length character type. + class Character < DataType + specialization_for :CHAR + specialization_for :VARCHAR + specialization_for :BINARY + specialization_for :VARBINARY + + include HasNumericModifiers + + VALID_LENGTH_RANGE = (0..65_535).freeze # 1..255 characters, up to 4 bytes each + DEFAULT_LENGTH = 1 + + def initialize(type_name, modifiers, properties) + super + + @variable = false + @binary = false + + if %i[VARCHAR VARBINARY].include?(@type_name) + @variable = true + if @modifiers.empty? + raise InvalidSpecificationError, "Missing length specification for variable-length type #{@type_name}" + elsif @modifiers.size > 1 + raise InvalidSpecificationError, "Invalid length specification for variable-length type #{@type_name}" + end + end + + @binary = true if %i[BINARY VARBINARY].include?(@type_name) + + @length = @modifiers.fetch(0, DEFAULT_LENGTH) + return if VALID_LENGTH_RANGE.include?(@length) + + raise InvalidSpecificationError, "Length #{@length} out of range for #{@type_name}" + end + + def variable? + @variable + end + + def value(data) + # The SQL standard defines that CHAR fields should have end-spaces + # stripped off. + @binary ? data : data.sub(/ +$/, "") + end + + attr_reader :length + end + end +end diff --git a/lib/innodb/data_type/date.rb b/lib/innodb/data_type/date.rb new file mode 100644 index 0000000..2e92a87 --- /dev/null +++ b/lib/innodb/data_type/date.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +module Innodb + class DataType + class Date < DataType + specialization_for :DATE + + include HasNumericModifiers + + def value(data) + date = BinData::Int24be.read(data) ^ (-1 << 23) + day = date & 0x1f + month = (date >> 5) & 0xf + year = date >> 9 + "%04d-%02d-%02d" % [year, month, day] + end + + def length + 3 + end + end + end +end diff --git a/lib/innodb/data_type/datetime.rb b/lib/innodb/data_type/datetime.rb new file mode 100644 index 0000000..7b6b2a6 --- /dev/null +++ b/lib/innodb/data_type/datetime.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module Innodb + class DataType + class Datetime < DataType + specialization_for :DATETIME + + include HasNumericModifiers + + def value(data) + datetime = BinData::Int64be.read(data) ^ (-1 << 63) + date = datetime / 1_000_000 + year = date / 10_000 + month = (date / 100) % 100 + day = date % 100 + time = datetime - (date * 1_000_000) + hour = time / 10_000 + min = (time / 100) % 100 + sec = time % 100 + "%04d-%02d-%02d %02d:%02d:%02d" % [year, month, day, hour, min, sec] + end + + def length + 8 + end + end + end +end diff --git a/lib/innodb/data_type/decimal.rb b/lib/innodb/data_type/decimal.rb new file mode 100644 index 0000000..e6ff669 --- /dev/null +++ b/lib/innodb/data_type/decimal.rb @@ -0,0 +1,133 @@ +# frozen_string_literal: true + +require "bigdecimal" +require "stringio" + +module Innodb + class DataType + # MySQL's Fixed-Point Type (DECIMAL), stored in InnoDB as a binary string. + class Decimal < DataType + specialization_for :DECIMAL + specialization_for :NUMERIC + + include HasNumericModifiers + + # The value is stored as a sequence of signed big-endian integers, each + # representing up to 9 digits of the integral and fractional parts. The + # first integer of the integral part and/or the last integer of the + # fractional part might be compressed (or packed) and are of variable + # length. The remaining integers (if any) are uncompressed and 32 bits + # wide. + MAX_DIGITS_PER_INTEGER = 9 + BYTES_PER_DIGIT = [0, 1, 1, 2, 2, 3, 3, 4, 4, 4].freeze + + DEFAULT_PRECISION = 10 + VALID_PRECISION_RANGE = (1..65).freeze + + DEFAULT_SCALE = 0 + VALID_SCALE_RANGE = (0..30).freeze + + def self.length_attributes(precision, scale) + integral = precision - scale + + integral_count_full_parts = integral / MAX_DIGITS_PER_INTEGER + integral_first_part_length = integral - (integral_count_full_parts * MAX_DIGITS_PER_INTEGER) + + fractional_count_full_parts = scale / MAX_DIGITS_PER_INTEGER + fractional_first_part_length = scale - (fractional_count_full_parts * MAX_DIGITS_PER_INTEGER) + + integral_length = (integral_count_full_parts * 4) + BYTES_PER_DIGIT[integral_first_part_length] + fractional_length = (fractional_count_full_parts * 4) + BYTES_PER_DIGIT[fractional_first_part_length] + + { + length: integral_length + fractional_length, + integral: { + length: integral_length, + first_part_length: integral_first_part_length, + count_full_parts: integral_count_full_parts, + }, + fractional: { + length: fractional_length, + first_part_length: fractional_first_part_length, + count_full_parts: fractional_count_full_parts, + }, + } + end + + def initialize(type_name, modifiers, properties) + super + + raise "Invalid #{@type_name} specification: #{@modifiers}" unless @modifiers.size <= 2 + + @precision = @modifiers.fetch(0, DEFAULT_PRECISION) + @scale = @modifiers.fetch(1, DEFAULT_SCALE) + + unless VALID_PRECISION_RANGE.include?(@precision) + raise "Unsupported precision #{@precision} for #{@type_name} type" + end + + unless VALID_SCALE_RANGE.include?(@scale) && @scale <= @precision + raise "Unsupported scale #{@scale} for #{@type_name} type" + end + + @length_attributes = self.class.length_attributes(@precision, @scale) + end + + def length + @length_attributes[:length] + end + + def value(data) + # Strings representing the integral and fractional parts. + intg = "".dup + frac = "".dup + + stream = StringIO.new(data) + mask = sign_mask(stream) + + intg << get_digits(stream, mask, @length_attributes[:integral][:first_part_length]) + + @length_attributes[:integral][:count_full_parts].times do + intg << get_digits(stream, mask, MAX_DIGITS_PER_INTEGER) + end + + @length_attributes[:fractional][:count_full_parts].times do + frac << get_digits(stream, mask, MAX_DIGITS_PER_INTEGER) + end + + frac << get_digits(stream, mask, @length_attributes[:fractional][:first_part_length]) + frac = "0" if frac.empty? + + # Convert to something resembling a string representation. + str = "#{mask.to_s.chop}#{intg}.#{frac}" + + BigDecimal(str).to_s("F") + end + + private + + # The sign is encoded in the high bit of the first byte/digit. The byte + # might be part of a larger integer, so apply the bit-flipper and push + # back the byte into the stream. + def sign_mask(stream) + byte = BinData::Uint8.read(stream) + sign = byte & 0x80 + byte.assign(byte ^ 0x80) + stream.rewind + byte.write(stream) + stream.rewind + sign.zero? ? -1 : 0 + end + + # Return a string representing an integer with a specific number of digits. + def get_digits(stream, mask, digits) + nbits = BYTES_PER_DIGIT[digits] * 8 + return "" unless nbits.positive? + + value = (BinData.const_get("Int%dbe" % nbits).read(stream) ^ mask) + # Preserve leading zeros. + "%0#{digits}d" % value + end + end + end +end diff --git a/lib/innodb/data_type/enum.rb b/lib/innodb/data_type/enum.rb new file mode 100644 index 0000000..1ba497e --- /dev/null +++ b/lib/innodb/data_type/enum.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module Innodb + class DataType + class Enum < DataType + specialization_for :ENUM + + include HasStringListModifiers + + attr_reader :values + + def initialize(type_name, modifiers, properties) + super + + @values = { 0 => "" } + @values.merge!(@modifiers.each_with_index.to_h { |s, i| [i + 1, s] }) + end + + def bit_length + @bit_length ||= Innodb::DataType.ceil_to(Math.log2(@values.length).ceil, 8) + end + + def value(data) + index = BinData.const_get("Int%dbe" % bit_length).read(data) + values[index] + end + + def length + bit_length / 8 + end + end + end +end diff --git a/lib/innodb/data_type/floating_point.rb b/lib/innodb/data_type/floating_point.rb new file mode 100644 index 0000000..9e7cedd --- /dev/null +++ b/lib/innodb/data_type/floating_point.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Innodb + class DataType + class FloatingPoint < DataType + specialization_for :FLOAT + specialization_for :DOUBLE + + include HasNumericModifiers + + def value(data) + case type_name + when :FLOAT + BinData::FloatLe.read(data) + when :DOUBLE + BinData::DoubleLe.read(data) + end + end + + def length + case type_name + when :FLOAT + 4 + when :DOUBLE + 8 + end + end + end + end +end diff --git a/lib/innodb/data_type/innodb_roll_pointer.rb b/lib/innodb/data_type/innodb_roll_pointer.rb new file mode 100644 index 0000000..9208d07 --- /dev/null +++ b/lib/innodb/data_type/innodb_roll_pointer.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module Innodb + class DataType + # Rollback data pointer. + class InnodbRollPointer < DataType + specialization_for :ROLL_PTR + + extend ReadBitsAtOffset + + Pointer = Struct.new( + :is_insert, + :rseg_id, + :undo_log, + keyword_init: true + ) + + def self.parse_roll_pointer(roll_ptr) + Pointer.new( + is_insert: read_bits_at_offset(roll_ptr, 1, 55) == 1, + rseg_id: read_bits_at_offset(roll_ptr, 7, 48), + undo_log: Innodb::Page::Address.new( + page: read_bits_at_offset(roll_ptr, 32, 16), + offset: read_bits_at_offset(roll_ptr, 16, 0) + ) + ) + end + + def value(data) + roll_ptr = BinData::Uint56be.read(data) + self.class.parse_roll_pointer(roll_ptr) + end + + def length + 7 + end + end + end +end diff --git a/lib/innodb/data_type/innodb_transaction_id.rb b/lib/innodb/data_type/innodb_transaction_id.rb new file mode 100644 index 0000000..3559e78 --- /dev/null +++ b/lib/innodb/data_type/innodb_transaction_id.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +module Innodb + class DataType + # Transaction ID. + class InnodbTransactionId < DataType + specialization_for :TRX_ID + + def value(data) + BinData::Uint48be.read(data).to_i + end + + def length + 6 + end + end + end +end diff --git a/lib/innodb/data_type/integer.rb b/lib/innodb/data_type/integer.rb new file mode 100644 index 0000000..45c4d53 --- /dev/null +++ b/lib/innodb/data_type/integer.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +module Innodb + class DataType + class Integer < DataType + specialization_for :BOOL + specialization_for :BOOLEAN + specialization_for :TINYINT + specialization_for :SMALLINT + specialization_for :MEDIUMINT + specialization_for :INT + specialization_for :INT6 + specialization_for :BIGINT + + include HasNumericModifiers + + TYPE_BIT_LENGTH_MAP = { + BOOL: 8, + BOOLEAN: 8, + TINYINT: 8, + SMALLINT: 16, + MEDIUMINT: 24, + INT: 32, + INT6: 48, + BIGINT: 64, + }.freeze + + def initialize(type_name, modifiers, properties) + super + + @unsigned = properties&.include?(:UNSIGNED) + end + + def bit_length + @bit_length ||= TYPE_BIT_LENGTH_MAP[type_name] + end + + def unsigned? + @unsigned + end + + def value(data) + if unsigned? + BinData.const_get("Uint%dbe" % bit_length).read(data) + else + BinData.const_get("Int%dbe" % bit_length).read(data) ^ (-1 << (bit_length - 1)) + end + end + + def length + bit_length / 8 + end + end + end +end diff --git a/lib/innodb/data_type/set.rb b/lib/innodb/data_type/set.rb new file mode 100644 index 0000000..addeb3f --- /dev/null +++ b/lib/innodb/data_type/set.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +module Innodb + class DataType + class Set < DataType + specialization_for :SET + + include HasStringListModifiers + + attr_reader :values + + def initialize(type_name, modifiers, properties) + super + + @values = @modifiers.each_with_index.to_h { |s, i| [2**i, s] } + end + + def bit_length + @bit_length ||= Innodb::DataType.ceil_to(@values.length, 8) + end + + def value(data) + bitmap = BinData.const_get("Int%dbe" % bit_length).read(data) + (0...bit_length).map { |i| bitmap & (2**i) }.reject(&:zero?).map { |i| values[i] } + end + + def length + bit_length / 8 + end + end + end +end diff --git a/lib/innodb/data_type/time.rb b/lib/innodb/data_type/time.rb new file mode 100644 index 0000000..339a26e --- /dev/null +++ b/lib/innodb/data_type/time.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +module Innodb + class DataType + class Time < DataType + specialization_for :TIME + + include HasNumericModifiers + + def value(data) + time = BinData::Int24be.read(data) ^ (-1 << 23) + sign = "-" if time.negative? + time = time.abs + "%s%02d:%02d:%02d" % [sign, time / 10_000, (time / 100) % 100, time % 100] + end + + def length + 3 + end + end + end +end diff --git a/lib/innodb/data_type/timestamp.rb b/lib/innodb/data_type/timestamp.rb new file mode 100644 index 0000000..cbcd845 --- /dev/null +++ b/lib/innodb/data_type/timestamp.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require "date" + +module Innodb + class DataType + class Timestamp < DataType + specialization_for :TIMESTAMP + + include HasNumericModifiers + + # Returns the UTC timestamp as a value in 'YYYY-MM-DD HH:MM:SS' format. + def value(data) + timestamp = BinData::Uint32be.read(data) + return "0000-00-00 00:00:00" if timestamp.zero? + + DateTime.strptime(timestamp.to_s, "%s").strftime "%Y-%m-%d %H:%M:%S" + end + + def length + 4 + end + end + end +end diff --git a/lib/innodb/data_type/year.rb b/lib/innodb/data_type/year.rb new file mode 100644 index 0000000..c7ad95d --- /dev/null +++ b/lib/innodb/data_type/year.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +module Innodb + class DataType + class Year < DataType + specialization_for :YEAR + + include HasNumericModifiers + + DEFAULT_DISPLAY_WIDTH = 4 + VALID_DISPLAY_WIDTHS = [2, 4].freeze + + def initialize(type_name, modifiers, properties) + super + + @display_width = modifiers.fetch(0, DEFAULT_DISPLAY_WIDTH) + return if VALID_DISPLAY_WIDTHS.include?(@display_width) + + raise InvalidSpecificationError, "Unsupported display width #{@display_width} for type #{type_name}" + end + + def value(data) + year = BinData::Uint8.read(data) + return (year % 100).to_s if @display_width != 4 + return (year + 1900).to_s if year != 0 + + "0000" + end + + def length + 1 + end + end + end +end diff --git a/lib/innodb/field.rb b/lib/innodb/field.rb index a366efe..6771786 100644 --- a/lib/innodb/field.rb +++ b/lib/innodb/field.rb @@ -27,8 +27,7 @@ def initialize(position, name, type_definition, *properties) @position = position @name = name @nullable = !properties.delete(:NOT_NULL) - base_type, modifiers = parse_type_definition(type_definition.to_s) - @data_type = Innodb::DataType.new(base_type, modifiers, properties) + @data_type = Innodb::DataType.parse(type_definition, properties) end # Return whether this field can be NULL. @@ -47,24 +46,26 @@ def extern?(record) end def variable? - [ - Innodb::DataType::BlobType, - Innodb::DataType::VariableBinaryType, - Innodb::DataType::VariableCharacterType, - ].any? { |c| @data_type.is_a?(c) } + @data_type.variable? + end + + def fixed? + !variable? end def blob? - @data_type.is_a?(Innodb::DataType::BlobType) + @data_type.blob? end # Return the actual length of this variable-length field. def length(record) if record.header.lengths.include?(@name) len = record.header.lengths[@name] - raise "Fixed-length mismatch" unless variable? || len == @data_type.width + if fixed? && len != @data_type.length + raise "Fixed-length mismatch; #{len} vs #{@data_type.length} for #{@data_type.name}" + end else - len = @data_type.width + len = @data_type.length end extern?(record) ? len - EXTERN_FIELD_SIZE : len end @@ -75,13 +76,7 @@ def read(cursor, field_length) end def value_by_length(cursor, field_length) - if @data_type.respond_to?(:read) - cursor.name(@data_type.name) { @data_type.read(cursor) } - elsif @data_type.respond_to?(:value) - @data_type.value(read(cursor, field_length)) - else - read(cursor, field_length) - end + @data_type.value(read(cursor, field_length)) end # Read the data value (e.g. encoded in the data). @@ -112,16 +107,5 @@ def read_extern(cursor) ) end end - - # Parse a data type definition and extract the base type and any modifiers. - def parse_type_definition(type_string) - matches = /^([a-zA-Z0-9_]+)(\((.+)\))?(\s+unsigned)?$/.match(type_string) - raise "Unparseable type #{type_string}" unless matches - - base_type = matches[1].upcase.to_sym - return [base_type, []] unless matches[3] - - [base_type, matches[3].sub(/ /, "").split(",").map(&:to_i)] - end end end diff --git a/lib/innodb/page/index.rb b/lib/innodb/page/index.rb index c7a4325..11e55a6 100644 --- a/lib/innodb/page/index.rb +++ b/lib/innodb/page/index.rb @@ -453,7 +453,7 @@ def record_header_compact_variable_lengths_and_externs(cursor, nulls) # Two bytes are used only if the length exceeds 127 bytes and the # maximum length exceeds 255 bytes (or the field is a BLOB type). - if len > 127 && (f.blob? || f.data_type.width > 255) + if len > 127 && (f.blob? || f.data_type.length > 255) ext = (0x40 & len) != 0 len = ((len & 0x3f) << 8) + cursor.read_uint8 end diff --git a/spec/innodb/data_type_spec.rb b/spec/innodb/data_type_spec.rb index 438e100..309625f 100644 --- a/spec/innodb/data_type_spec.rb +++ b/spec/innodb/data_type_spec.rb @@ -4,46 +4,29 @@ require "stringio" describe Innodb::DataType do - it "makes proper data type names" do - Innodb::DataType.make_name("BIGINT", [], %i[UNSIGNED]).should eql "BIGINT UNSIGNED" - Innodb::DataType.make_name("SMALLINT", [], []).should eql "SMALLINT" - Innodb::DataType.make_name("VARCHAR", [32], []).should eql "VARCHAR(32)" - Innodb::DataType.make_name("CHAR", [16], []).should eql "CHAR(16)" - Innodb::DataType.make_name("CHAR", [], []).should eql "CHAR" - Innodb::DataType.make_name("VARBINARY", [48], []).should eql "VARBINARY(48)" - Innodb::DataType.make_name("BINARY", [64], []).should eql "BINARY(64)" - Innodb::DataType.make_name("BINARY", [], []).should eql "BINARY" - end - - describe Innodb::DataType::CharacterType do - it "handles optional width" do - Innodb::DataType.new(:CHAR, [], []).width.should eql 1 - Innodb::DataType.new(:CHAR, [16], []).width.should eql 16 + describe Innodb::DataType::Character do + it "handles optional length" do + Innodb::DataType.parse("CHAR", []).length.should eql 1 + Innodb::DataType.parse("CHAR(16)", []).length.should eql 16 end - end - describe Innodb::DataType::VariableCharacterType do it "throws an error on invalid modifiers" do - expect { Innodb::DataType.new(:VARCHAR, [], []) }.to raise_error "Invalid width specification" - expect { Innodb::DataType.new(:VARCHAR, [1, 1], []) }.to raise_error "Invalid width specification" + expect { Innodb::DataType.parse("VARCHAR", []) }.to raise_error Innodb::DataType::InvalidSpecificationError + expect { Innodb::DataType.parse("VARCHAR(1,1)", []) }.to raise_error Innodb::DataType::InvalidSpecificationError end - end - describe Innodb::DataType::BinaryType do - it "handles optional width" do - Innodb::DataType.new(:BINARY, [], []).width.should eql 1 - Innodb::DataType.new(:BINARY, [16], []).width.should eql 16 + it "handles optional length" do + Innodb::DataType.parse("BINARY", []).length.should eql 1 + Innodb::DataType.parse("BINARY(16)", []).length.should eql 16 end - end - describe Innodb::DataType::VariableBinaryType do it "throws an error on invalid modifiers" do - expect { Innodb::DataType.new(:VARBINARY, [], []) }.to raise_error "Invalid width specification" - expect { Innodb::DataType.new(:VARBINARY, [1, 1], []) }.to raise_error "Invalid width specification" + expect { Innodb::DataType.parse("VARBINARY", []) }.to raise_error Innodb::DataType::InvalidSpecificationError + expect { Innodb::DataType.parse("VARBINARY(1,1)", []) }.to raise_error Innodb::DataType::InvalidSpecificationError end end - describe Innodb::DataType::IntegerType do + describe Innodb::DataType::Integer do before :all do @data = { offset: {}, @@ -74,8 +57,8 @@ end it "returns a TINYINT value correctly" do - data_type = Innodb::DataType.new(:TINYINT, [], []) - data_type.should be_an_instance_of Innodb::DataType::IntegerType + data_type = Innodb::DataType.parse("TINYINT", []) + data_type.should be_an_instance_of Innodb::DataType::Integer @buffer.seek(@data[:offset][:innodb_sint_pos]) data_type.value(@buffer.read(1)).should eql 0x00 @buffer.seek(@data[:offset][:innodb_sint_neg]) @@ -83,8 +66,8 @@ end it "returns a TINYINT UNSIGNED value correctly" do - data_type = Innodb::DataType.new(:TINYINT, [], %i[UNSIGNED]) - data_type.should be_an_instance_of Innodb::DataType::IntegerType + data_type = Innodb::DataType.parse("TINYINT", %i[UNSIGNED]) + data_type.should be_an_instance_of Innodb::DataType::Integer data_type.value(@buffer.read(1)).should eql 0x00 data_type.value(@buffer.read(1)).should eql 0x01 data_type.value(@buffer.read(1)).should eql 0x02 @@ -94,8 +77,8 @@ end it "returns a SMALLINT value correctly" do - data_type = Innodb::DataType.new(:SMALLINT, [], []) - data_type.should be_an_instance_of Innodb::DataType::IntegerType + data_type = Innodb::DataType.parse("SMALLINT", []) + data_type.should be_an_instance_of Innodb::DataType::Integer @buffer.seek(@data[:offset][:innodb_sint_pos]) data_type.value(@buffer.read(2)).should eql 0x0001 @buffer.seek(@data[:offset][:innodb_sint_neg]) @@ -103,8 +86,8 @@ end it "returns a SMALLINT UNSIGNED value correctly" do - data_type = Innodb::DataType.new(:SMALLINT, [], %i[UNSIGNED]) - data_type.should be_an_instance_of Innodb::DataType::IntegerType + data_type = Innodb::DataType.parse("SMALLINT", %i[UNSIGNED]) + data_type.should be_an_instance_of Innodb::DataType::Integer data_type.value(@buffer.read(2)).should eql 0x0001 data_type.value(@buffer.read(2)).should eql 0x0203 data_type.value(@buffer.read(2)).should eql 0x0405 @@ -114,8 +97,8 @@ end it "returns a MEDIUMINT value correctly" do - data_type = Innodb::DataType.new(:MEDIUMINT, [], []) - data_type.should be_an_instance_of Innodb::DataType::IntegerType + data_type = Innodb::DataType.parse("MEDIUMINT", []) + data_type.should be_an_instance_of Innodb::DataType::Integer @buffer.seek(@data[:offset][:innodb_sint_pos]) data_type.value(@buffer.read(3)).should eql 0x000102 @buffer.seek(@data[:offset][:innodb_sint_neg]) @@ -123,8 +106,8 @@ end it "returns a MEDIUMINT UNSIGNED value correctly" do - data_type = Innodb::DataType.new(:MEDIUMINT, [], %i[UNSIGNED]) - data_type.should be_an_instance_of Innodb::DataType::IntegerType + data_type = Innodb::DataType.parse("MEDIUMINT", %i[UNSIGNED]) + data_type.should be_an_instance_of Innodb::DataType::Integer data_type.value(@buffer.read(3)).should eql 0x000102 data_type.value(@buffer.read(3)).should eql 0x030405 data_type.value(@buffer.read(3)).should eql 0x060708 @@ -134,8 +117,8 @@ end it "returns an INT value correctly" do - data_type = Innodb::DataType.new(:INT, [], []) - data_type.should be_an_instance_of Innodb::DataType::IntegerType + data_type = Innodb::DataType.parse("INT", []) + data_type.should be_an_instance_of Innodb::DataType::Integer @buffer.seek(@data[:offset][:innodb_sint_pos]) data_type.value(@buffer.read(4)).should eql 0x00010203 @buffer.seek(@data[:offset][:innodb_sint_neg]) @@ -143,8 +126,8 @@ end it "returns an INT UNSIGNED value correctly" do - data_type = Innodb::DataType.new(:INT, [], %i[UNSIGNED]) - data_type.should be_an_instance_of Innodb::DataType::IntegerType + data_type = Innodb::DataType.parse("INT", %i[UNSIGNED]) + data_type.should be_an_instance_of Innodb::DataType::Integer data_type.value(@buffer.read(4)).should eql 0x00010203 data_type.value(@buffer.read(4)).should eql 0x04050607 data_type.value(@buffer.read(4)).should eql 0x08090a0b @@ -154,7 +137,7 @@ end it "returns a BIGINT value correctly" do - data_type = Innodb::DataType.new(:BIGINT, [], []) + data_type = Innodb::DataType.parse("BIGINT", []) @buffer.seek(@data[:offset][:innodb_sint_pos]) data_type.value(@buffer.read(8)).should eql 0x0001020304050607 @buffer.seek(@data[:offset][:innodb_sint_neg]) @@ -162,7 +145,7 @@ end it "returns a BIGINT UNSIGNED value correctly" do - data_type = Innodb::DataType.new(:BIGINT, [], %i[UNSIGNED]) + data_type = Innodb::DataType.parse("BIGINT", %i[UNSIGNED]) data_type.value(@buffer.read(8)).should eql 0x0001020304050607 data_type.value(@buffer.read(8)).should eql 0x08090a0b0c0d0e0f @buffer.seek(@data[:offset][:max_uint])