Skip to content

Commit

Permalink
Merge pull request #402 from openzim/no_0part
Browse files Browse the repository at this point in the history
  • Loading branch information
mgautierfr authored May 2, 2024
2 parents 9abf37a + 3eee552 commit 9894a5c
Showing 1 changed file with 15 additions and 26 deletions.
41 changes: 15 additions & 26 deletions src/zimsplit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,25 +37,25 @@ class ZimSplitter
private:
zim::Archive archive;
const std::string prefix;
zim::size_type partSize;
zim::size_type maxPartSize;

char first_index, second_index;

std::ifstream ifile;
std::ofstream ofile;
std::string part_name;
zim::size_type out_size;
zim::size_type currentPartSize;
char* batch_buffer;

public:
ZimSplitter(const std::string& fname, const std::string& out_prefix, zim::size_type partSize)
ZimSplitter(const std::string& fname, const std::string& out_prefix, zim::size_type maxPartSize)
: archive(fname),
prefix(out_prefix),
partSize(partSize),
maxPartSize(maxPartSize),
first_index(0),
second_index(0),
ifile(fname, std::ios::binary),
out_size(0)
currentPartSize(0)
{
batch_buffer = new char[BUFFER_SIZE];
}
Expand All @@ -80,9 +80,9 @@ class ZimSplitter
}

void close_file() {
if (out_size > partSize) {
if (currentPartSize > maxPartSize) {
std::cout << "WARNING: Part " << part_name << " is bigger that max part size."
<< " (" << out_size << ">" << partSize << ")" << std::endl;
<< " (" << currentPartSize << ">" << maxPartSize << ")" << std::endl;
}
ofile.close();
}
Expand All @@ -92,7 +92,7 @@ class ZimSplitter
part_name = prefix + get_new_suffix();
std::cout << "opening new file " << part_name << std::endl;
ofile.open(part_name, std::ios::binary);
out_size = 0;
currentPartSize = 0;
}

void copy_out(zim::size_type size) {
Expand All @@ -106,7 +106,7 @@ class ZimSplitter
if (!ofile) {
throw std::runtime_error("Error while writing zim part");
}
out_size += size_to_copy;
currentPartSize += size_to_copy;
size -= size_to_copy;
}
}
Expand All @@ -129,22 +129,11 @@ class ZimSplitter

zim::offset_type last(0);
for(auto offset:offsets) {
auto currentSize = offset-last;
if (currentSize > partSize) {
// One part is bigger than what we want :/
// Still have to write it.
if (out_size) {
new_file();
}
copy_out(currentSize);
new_file();
} else {
if (out_size+currentSize > partSize) {
// It would be too much to write the current part in the current file.
auto chunkSize = offset-last;
if (currentPartSize > 0 && currentPartSize + chunkSize > maxPartSize) {
new_file();
}
copy_out(currentSize);
}
copy_out(chunkSize);
last = offset;
}
}
Expand All @@ -156,12 +145,12 @@ class ZimSplitter

zim::offset_type last(0);
for(auto offset:offsets) {
auto currentSize = offset-last;
if (currentSize > partSize) {
auto chunkSize = offset-last;
if (chunkSize > maxPartSize) {
// One part is bigger than what we want :/
// Still have to write it.
std::cout << "The part (probably a cluster) is to big to fit in one part." << std::endl;
std::cout << " size is " << currentSize << "(" << offset << "-" << last << ")." << std::endl;
std::cout << " size is " << chunkSize << "(" << offset << "-" << last << ")." << std::endl;
error = true;
}
last = offset;
Expand Down

0 comments on commit 9894a5c

Please sign in to comment.