Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not create extra 0 sized file if last part is bigger than expected. #402

Merged
merged 4 commits into from
May 2, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 15 additions & 26 deletions src/zimsplit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,25 +37,25 @@
private:
zim::Archive archive;
const std::string prefix;
zim::size_type partSize;
zim::size_type maxPartSize;

char first_index, second_index;

std::ifstream ifile;
std::ofstream ofile;
std::string part_name;
zim::size_type out_size;
zim::size_type currentPartSize;
char* batch_buffer;

public:
ZimSplitter(const std::string& fname, const std::string& out_prefix, zim::size_type partSize)
ZimSplitter(const std::string& fname, const std::string& out_prefix, zim::size_type maxPartSize)

Check warning on line 51 in src/zimsplit.cpp

View check run for this annotation

Codecov / codecov/patch

src/zimsplit.cpp#L51

Added line #L51 was not covered by tests
: archive(fname),
prefix(out_prefix),
partSize(partSize),
maxPartSize(maxPartSize),
first_index(0),
second_index(0),
ifile(fname, std::ios::binary),
out_size(0)
currentPartSize(0)
{
batch_buffer = new char[BUFFER_SIZE];
}
Expand All @@ -80,9 +80,9 @@
}

void close_file() {
if (out_size > partSize) {
if (currentPartSize > maxPartSize) {
std::cout << "WARNING: Part " << part_name << " is bigger that max part size."
<< " (" << out_size << ">" << partSize << ")" << std::endl;
<< " (" << currentPartSize << ">" << maxPartSize << ")" << std::endl;

Check warning on line 85 in src/zimsplit.cpp

View check run for this annotation

Codecov / codecov/patch

src/zimsplit.cpp#L85

Added line #L85 was not covered by tests
}
ofile.close();
}
Expand All @@ -92,7 +92,7 @@
part_name = prefix + get_new_suffix();
std::cout << "opening new file " << part_name << std::endl;
ofile.open(part_name, std::ios::binary);
out_size = 0;
currentPartSize = 0;

Check warning on line 95 in src/zimsplit.cpp

View check run for this annotation

Codecov / codecov/patch

src/zimsplit.cpp#L95

Added line #L95 was not covered by tests
}

void copy_out(zim::size_type size) {
Expand All @@ -106,7 +106,7 @@
if (!ofile) {
throw std::runtime_error("Error while writing zim part");
}
out_size += size_to_copy;
currentPartSize += size_to_copy;

Check warning on line 109 in src/zimsplit.cpp

View check run for this annotation

Codecov / codecov/patch

src/zimsplit.cpp#L109

Added line #L109 was not covered by tests
size -= size_to_copy;
}
}
Expand All @@ -129,22 +129,11 @@

zim::offset_type last(0);
for(auto offset:offsets) {
auto currentSize = offset-last;
if (currentSize > partSize) {
// One part is bigger than what we want :/
// Still have to write it.
if (out_size) {
new_file();
}
copy_out(currentSize);
new_file();
} else {
if (out_size+currentSize > partSize) {
// It would be too much to write the current part in the current file.
auto chunkSize = offset-last;

Check warning on line 132 in src/zimsplit.cpp

View check run for this annotation

Codecov / codecov/patch

src/zimsplit.cpp#L132

Added line #L132 was not covered by tests
if (currentPartSize > 0 && currentPartSize + chunkSize > maxPartSize) {
new_file();
}
copy_out(currentSize);
}
copy_out(chunkSize);
last = offset;
}
}
Expand All @@ -156,12 +145,12 @@

zim::offset_type last(0);
for(auto offset:offsets) {
auto currentSize = offset-last;
if (currentSize > partSize) {
auto chunkSize = offset-last;

Check warning on line 148 in src/zimsplit.cpp

View check run for this annotation

Codecov / codecov/patch

src/zimsplit.cpp#L148

Added line #L148 was not covered by tests
if (chunkSize > maxPartSize) {
// One part is bigger than what we want :/
// Still have to write it.
std::cout << "The part (probably a cluster) is to big to fit in one part." << std::endl;
std::cout << " size is " << currentSize << "(" << offset << "-" << last << ")." << std::endl;
std::cout << " size is " << chunkSize << "(" << offset << "-" << last << ")." << std::endl;
error = true;
}
last = offset;
Expand Down
Loading