Skip to content

Commit

Permalink
[BugFix] fix potential illegal memory access when partial update by c…
Browse files Browse the repository at this point in the history
…olumn (StarRocks#31235)

Fixes StarRocks#20436
Remove `_update_chunk_cache.reset()`, because `update_chunk_cache` may reuse later if another segment also needs this `_update_chunk_cache`. It can work right now because `_update_chunk_cache.reset()` actually calls `std::vector.clear`, it won't free memory actually. It still has potential illegal memory access risk due to undefined behavior.

Signed-off-by: luohaha <[email protected]>
  • Loading branch information
luohaha authored Sep 18, 2023
1 parent eab4553 commit 48c56b0
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 2 deletions.
6 changes: 4 additions & 2 deletions be/src/storage/rowset_column_update_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -394,11 +394,12 @@ Status RowsetColumnUpdateState::_read_chunk_from_update(const RowidsToUpdateRowi
// meet different update file, handle this round.
if (!_enable_preload_column_mode_update_data) {
_update_chunk_cache[cur_update_file_id]->reserve(4096);
// if already read from this update file, iterator will return end of file, and continue
RETURN_IF_ERROR(read_chunk_from_update_file(update_iterators[cur_update_file_id],
_update_chunk_cache[cur_update_file_id]));
DCHECK(_update_chunk_cache[cur_update_file_id]->num_rows() >= batch_append_rowids.size());
result_chunk->append_selective(*_update_chunk_cache[cur_update_file_id], batch_append_rowids.data(), 0,
batch_append_rowids.size());
_update_chunk_cache[cur_update_file_id]->reset();
} else {
result_chunk->append_selective(*_update_chunk_cache[cur_update_file_id], batch_append_rowids.data(), 0,
batch_append_rowids.size());
Expand All @@ -412,11 +413,12 @@ Status RowsetColumnUpdateState::_read_chunk_from_update(const RowidsToUpdateRowi
// finish last round.
if (!_enable_preload_column_mode_update_data) {
_update_chunk_cache[cur_update_file_id]->reserve(4096);
// if already read from this update file, iterator will return end of file, and continue
RETURN_IF_ERROR(read_chunk_from_update_file(update_iterators[cur_update_file_id],
_update_chunk_cache[cur_update_file_id]));
DCHECK(_update_chunk_cache[cur_update_file_id]->num_rows() >= batch_append_rowids.size());
result_chunk->append_selective(*_update_chunk_cache[cur_update_file_id], batch_append_rowids.data(), 0,
batch_append_rowids.size());
_update_chunk_cache[cur_update_file_id]->reset();
} else {
result_chunk->append_selective(*_update_chunk_cache[cur_update_file_id], batch_append_rowids.data(), 0,
batch_append_rowids.size());
Expand Down
55 changes: 55 additions & 0 deletions be/test/storage/rowset_column_partial_update_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -830,4 +830,59 @@ TEST_F(RowsetColumnPartialUpdateTest, test_upsert) {
}
}

TEST_F(RowsetColumnPartialUpdateTest, partial_update_two_rowset_and_check) {
const int N = 100;
const int M = N / 2;
auto tablet = create_tablet(rand(), rand());
ASSERT_EQ(1, tablet->updates()->version_history_count());

// create full rowsets first
std::vector<int64_t> keys(N);
std::vector<int64_t> keys1(M);
std::vector<int64_t> keys2(M);
for (int i = 0; i < N; i++) {
keys[i] = i;
if (i < M) {
keys1[i] = i;
} else {
keys2[i - M] = i;
}
}
std::vector<RowsetSharedPtr> rowsets;
rowsets.reserve(2);
rowsets.emplace_back(create_rowset(tablet, keys1));
rowsets.emplace_back(create_rowset(tablet, keys2));
int64_t version = 1;
commit_rowsets(tablet, rowsets, version);
// check data
ASSERT_TRUE(check_tablet(tablet, version, N, [](int64_t k1, int64_t v1, int32_t v2) {
return (int16_t)(k1 % 100 + 1) == v1 && (int32_t)(k1 % 1000 + 2) == v2;
}));

std::vector<int32_t> column_indexes = {0, 1};
auto v1_func = [](int64_t k1) { return (int16_t)(k1 % 100 + 3); };
auto v2_func = [](int64_t k1) { return (int32_t)(k1 % 1000 + 4); };
std::shared_ptr<TabletSchema> partial_schema = TabletSchema::create(tablet->tablet_schema(), column_indexes);
RowsetSharedPtr partial_rowset =
create_partial_rowset(tablet, keys, column_indexes, v1_func, v2_func, partial_schema, 1);
// check data of write column
RowsetColumnUpdateState state;
state.load(tablet.get(), partial_rowset.get(), _update_mem_tracker.get());
const std::vector<ColumnPartialUpdateState>& parital_update_states = state.parital_update_states();
ASSERT_EQ(parital_update_states.size(), 1);
ASSERT_EQ(parital_update_states[0].src_rss_rowids.size(), N);
ASSERT_EQ(parital_update_states[0].rss_rowid_to_update_rowid.size(), N);
for (int upt_id = 0; upt_id < parital_update_states[0].src_rss_rowids.size(); upt_id++) {
uint64_t src_rss_rowid = parital_update_states[0].src_rss_rowids[upt_id];
ASSERT_EQ(parital_update_states[0].rss_rowid_to_update_rowid.find(src_rss_rowid)->second, upt_id);
}
// commit partial update
auto st = tablet->rowset_commit(++version, partial_rowset, 10000);
ASSERT_TRUE(st.ok()) << st.to_string();
// check data
ASSERT_TRUE(check_tablet(tablet, version, N, [](int64_t k1, int64_t v1, int32_t v2) {
return (int16_t)(k1 % 100 + 3) == v1 && (int32_t)(k1 % 1000 + 2) == v2;
}));
}

} // namespace starrocks

0 comments on commit 48c56b0

Please sign in to comment.