Skip to content

Commit

Permalink
fix new type and byte escapes
Browse files Browse the repository at this point in the history
  • Loading branch information
moonshadow565 committed Oct 31, 2020
1 parent 762d92b commit 567b39d
Show file tree
Hide file tree
Showing 7 changed files with 288 additions and 68 deletions.
141 changes: 135 additions & 6 deletions src/ritobin/bin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <array>
#include <vector>
#include <unordered_map>
#include <bit>

namespace ritobin {
enum class Type : uint8_t {
Expand All @@ -31,6 +32,7 @@ namespace ritobin {
RGBA = 15,
STRING = 16,
HASH = 17,
FILE = 18,
LIST = 0x80 | 0,
LIST2 = 0x80 | 1,
POINTER = 0x80 | 2,
Expand All @@ -41,7 +43,7 @@ namespace ritobin {
FLAG = 0x80 | 7,
};

constexpr inline auto MAX_PRIMITIVE = Type::HASH;
constexpr inline auto MAX_PRIMITIVE = Type::FILE;

constexpr inline auto MAX_COMPLEX = Type::FLAG;

Expand Down Expand Up @@ -79,8 +81,10 @@ namespace ritobin {
}

inline FNV1a& operator=(uint32_t h) noexcept {
hash_ = h;
str_.clear();
if (hash_ != h) {
hash_ = h;
str_.clear();
}
return *this;
}

Expand All @@ -97,6 +101,123 @@ namespace ritobin {
}
};

struct XXH64 {
private:
uint64_t hash_ = {};
std::string str_ = {};
static constexpr uint64_t xxh64(char const* data, size_t size, uint64_t seed = 0) noexcept {
constexpr uint64_t Prime1 = 11400714785074694791U;
constexpr uint64_t Prime2 = 14029467366897019727U;
constexpr uint64_t Prime3 = 1609587929392839161U;
constexpr uint64_t Prime4 = 9650029242287828579U;
constexpr uint64_t Prime5 = 2870177450012600261U;
constexpr auto Char = [](char c) constexpr -> uint64_t {
return static_cast<uint8_t>(c >= 'A' && c <= 'Z' ? c - 'A' + 'a' : c);
};
constexpr auto HalfBlock = [Char](char const* data) constexpr -> uint64_t {
return Char(*data)
| (Char(*(data + 1)) << 8)
| (Char(*(data + 2)) << 16)
| (Char(*(data + 3)) << 24);
};
constexpr auto Block = [Char](char const* data) constexpr -> uint64_t {
return Char(*data)
| (Char(*(data + 1)) << 8)
| (Char(*(data + 2)) << 16)
| (Char(*(data + 3)) << 24)
| (Char(*(data + 4)) << 32)
| (Char(*(data + 5)) << 40)
| (Char(*(data + 6)) << 48)
| (Char(*(data + 7)) << 56);
};
constexpr auto ROL = [](uint64_t value, int ammount) -> uint64_t {
return std::rotl(value, ammount);
};
char const* const end = data + size;
uint64_t result = 0;
if (size >= 32u) {
uint64_t s1 = seed + Prime1 + Prime2;
uint64_t s2 = seed + Prime2;
uint64_t s3 = seed;
uint64_t s4 = seed - Prime1;
for(; data + 32 <= end; data += 32) {
s1 = ROL(s1 + Block(data) * Prime2, 31) * Prime1;
s2 = ROL(s2 + Block(data + 8) * Prime2, 31) * Prime1;
s3 = ROL(s3 + Block(data + 16) * Prime2, 31) * Prime1;
s4 = ROL(s4 + Block(data + 24) * Prime2, 31) * Prime1;
}
result = ROL(s1, 1) +
ROL(s2, 7) +
ROL(s3, 12) +
ROL(s4, 18);
result ^= ROL(s1 * Prime2, 31) * Prime1;
result = result * Prime1 + Prime4;
result ^= ROL(s2 * Prime2, 31) * Prime1;
result = result * Prime1 + Prime4;
result ^= ROL(s3 * Prime2, 31) * Prime1;
result = result * Prime1 + Prime4;
result ^= ROL(s4 * Prime2, 31) * Prime1;
result = result * Prime1 + Prime4;
} else {
result = seed + Prime5;
}
result += static_cast<uint64_t>(size);
for(; data + 8 <= end; data += 8) {
result ^= ROL(Block(data) * Prime2, 31) * Prime1;
result = ROL(result, 27) * Prime1 + Prime4;
}
for(; data + 4 <= end; data += 4) {
result ^= HalfBlock(data) * Prime1;
result = ROL(result, 23) * Prime2 + Prime3;
}
for(; data != end; ++data) {
result ^= Char(*data) * Prime5;
result = ROL(result, 11) * Prime1;
}
result ^= result >> 33;
result *= Prime2;
result ^= result >> 29;
result *= Prime3;
result ^= result >> 32;
return result;
}
static constexpr uint64_t xxh64(std::string_view data, uint64_t seed = 0) noexcept {
return xxh64(data.data(), data.size(), seed);
}
public:
inline XXH64() noexcept = default;

inline XXH64(std::string str) noexcept : hash_(xxh64(str)), str_(std::move(str)) {}

inline XXH64(uint64_t h) noexcept : hash_(h), str_() {}

inline XXH64& operator=(std::string str) noexcept {
hash_ = xxh64(str);
str_ = std::move(str);
return *this;
}

inline XXH64& operator=(uint64_t h) noexcept {
if (hash_ != h) {
hash_ = h;
str_.clear();
}
return *this;
}

inline uint64_t hash() const noexcept {
return hash_;
}

inline std::string_view str() const& noexcept {
return str_;
}

inline std::string str() && noexcept {
return std::move(str_);
}
};

struct Element;
struct Field;
struct Pair;
Expand Down Expand Up @@ -212,6 +333,12 @@ namespace ritobin {
FNV1a value;
};

struct File {
static inline constexpr Type type = Type::FILE;
static inline constexpr char type_name[] = "file";
XXH64 value;
};

struct List {
static inline constexpr Type type = Type::LIST;
static inline constexpr char type_name[] = "list";
Expand Down Expand Up @@ -286,6 +413,7 @@ namespace ritobin {
RGBA,
String,
Hash,
File,
List,
List2,
Pointer,
Expand Down Expand Up @@ -360,12 +488,13 @@ namespace ritobin {
void write_text_file(std::string const& filename, size_t ident_size = 2) const;
};

using HashTable = std::unordered_map<uint32_t, std::string>;
struct BinUnhasher {
HashTable lookup;
std::unordered_map<uint32_t, std::string> fnv1a;
std::unordered_map<uint64_t, std::string> xxh64;

void unhash(Bin& bin) const noexcept;
bool load_CDTB(std::string const& filename) noexcept;
bool load_fnv1a_CDTB(std::string const& filename) noexcept;
bool load_xxh64_CDTB(std::string const& filename) noexcept;
};
}

Expand Down
12 changes: 11 additions & 1 deletion src/ritobin/binary_read.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,17 @@ namespace ritobin {
if (!read(h)) {
return false;
}
value = { h };
value = FNV1a{ h };
return true;
}


bool read(XXH64& value) noexcept {
uint64_t h;
if (!read(h)) {
return false;
}
value = XXH64{ h };
return true;
}
};
Expand Down
4 changes: 4 additions & 0 deletions src/ritobin/binary_write.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ namespace ritobin {
write(value.hash());
}

void write(XXH64 const& value) {
write(value.hash());
}

inline size_t position() const noexcept {
return buffer_.size();
}
Expand Down
60 changes: 36 additions & 24 deletions src/ritobin/text_read.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,6 @@ namespace ritobin {
Escape_CariageReturn,
Escape_Byte_0,
Escape_Byte_1,
Escape_Unicode_0,
Escape_Unicode_1,
Escape_Unicode_2,
Escape_Unicode_3,
} state = State::Take;
char escaped[8];
while (cur_ != cap_) {
Expand Down Expand Up @@ -165,7 +161,6 @@ namespace ritobin {
state = State::Escape_CariageReturn;
} else if (c == 'u') {
// FIXME: implement unicode code points
state = State::Escape_Unicode_0;
return false;
} else if (c == 'x') {
state = State::Escape_Byte_0;
Expand All @@ -188,30 +183,13 @@ namespace ritobin {
uint8_t value = 0;
if (to_num({escaped, 2}, value, 16)) {
result.push_back(static_cast<char>(value));
return true;
}
} else if (state == State::Escape_Unicode_0) {
state = State::Escape_Unicode_1;
escaped[0] = c;
} else if (state == State::Escape_Unicode_1) {
state = State::Escape_Unicode_2;
escaped[1] = c;
} else if (state == State::Escape_Unicode_2) {
state = State::Escape_Unicode_3;
escaped[2] = c;
} else if (state == State::Escape_Unicode_3) {
state = State::Take;
escaped[3] = c;
uint16_t value = 0;
if (to_num({escaped, 4}, value, 16)) {
// FIXME: encode unicode
} else {
return false;
}
return false;
} else {
if (c == '\\') {
state = State::Escape;
}else if (c < ' ') {
} else if (c < ' ') {
return false;
} else if (c == term) {
cur_++;
Expand Down Expand Up @@ -241,6 +219,22 @@ namespace ritobin {
return false;
}

bool read_hash(XXH64& value) noexcept {
auto const word = read_word();
if (word.size() < 2) {
return false;
}
if (word[0] != '0' || (word[1] != 'x' && word[1] != 'X')) {
return false;
}
uint64_t result = 0;
if (to_num({word.data() + 2, word.size() - 2}, result, 16)) {
value = XXH64{ result };
return true;
}
return false;
}

bool read_name(std::string& value) noexcept {
auto const word = read_word();
if (word.empty()) {
Expand Down Expand Up @@ -287,6 +281,19 @@ namespace ritobin {
return false;
}

bool read_hash_string(XXH64& value) noexcept {
auto const backup = cur_;
if (read_hash(value)) {
return true;
}
cur_ = backup;
if (std::string str; read_string(str)) {
value = { str };
return true;
}
return false;
}

bool read_bool(bool& value) noexcept {
auto const word = read_word();
if (word.empty()) {
Expand Down Expand Up @@ -510,6 +517,11 @@ namespace ritobin {
return true;
}

bool read_value_visit(File& value) noexcept {
bin_assert(reader.read_hash_string(value.value));
return true;
}

bool read_value_visit(Vec2& value) noexcept {
bin_assert(read_array<float, 2>(value.value));
return true;
Expand Down
22 changes: 22 additions & 0 deletions src/ritobin/text_write.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,16 @@ namespace ritobin {
write_raw(std::string_view{ result, sizeof(result) });
}

void write_hex(uint64_t hex) noexcept {
constexpr char digits[] = "0123456789abcdef";
char result[18] = { '0', 'x' };
for (size_t i = 17; i > 1; i--) {
result[i] = digits[hex & 0x0Fu];
hex >>= 4;
}
write_raw(std::string_view{ result, sizeof(result) });
}

void write_name(FNV1a const& value) noexcept {
if (!value.str().empty()) {
write_raw(value.str());
Expand All @@ -140,6 +150,14 @@ namespace ritobin {
write_hex(value.hash());
}
}

void write_string(XXH64 const& value) noexcept {
if (!value.str().empty()) {
write(value.str());
} else {
write_hex(value.hash());
}
}
};

struct BinTextWriter {
Expand Down Expand Up @@ -310,6 +328,10 @@ namespace ritobin {
writer.write_string(value.value);
}

void write_value_visit(File const& value) noexcept {
writer.write_string(value.value);
}

void write_value_visit(Link const& value) noexcept {
writer.write_string(value.value);
}
Expand Down
Loading

0 comments on commit 567b39d

Please sign in to comment.