Skip to content

Commit

Permalink
Merge pull request #32821 from vespa-engine/toregge/add-cache-disk-io…
Browse files Browse the repository at this point in the history
…-stats

Add CacheDiskIoStats.
  • Loading branch information
geirst authored Nov 8, 2024
2 parents 28ff629 + d5e44e1 commit d52a051
Show file tree
Hide file tree
Showing 12 changed files with 127 additions and 43 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include "index_metrics_entry.h"
#include <vespa/searchlib/util/disk_io_stats.h>
#include <vespa/searchlib/util/cache_disk_io_stats.h>

using search::CacheDiskIoStats;
using search::DiskIoStats;

namespace proton {
Expand All @@ -12,23 +13,27 @@ namespace {
const std::string entry_name("index");
const std::string entry_description("Metrics for indexes for a given field");

void update_helper(metrics::LongValueMetric &metric, const DiskIoStats &stats) {
metric.addTotalValueBatch(stats.read_bytes_total(), stats.read_operations(),
stats.read_bytes_min(), stats.read_bytes_max());
}

}

IndexMetricsEntry::DiskIoMetrics::SearchMetrics::SearchMetrics(metrics::MetricSet* parent)
: MetricSet("search", {}, "The search io for a given component", parent),
_read_bytes("read_bytes", {}, "Bytes read in posting list files as part of search", this)
_read_bytes("read_bytes", {}, "Bytes read in posting list files as part of search", this),
_cached_read_bytes("cached_read_bytes", {}, "Bytes read from posting list files cache as part of search", this)
{
}

IndexMetricsEntry::DiskIoMetrics::SearchMetrics::~SearchMetrics() = default;

void
IndexMetricsEntry::DiskIoMetrics::SearchMetrics::update(const DiskIoStats& disk_io_stats)
IndexMetricsEntry::DiskIoMetrics::SearchMetrics::update(const CacheDiskIoStats& cache_disk_io_stats)
{
_read_bytes.addTotalValueBatch(disk_io_stats.read_bytes_total(),
disk_io_stats.read_operations(),
disk_io_stats.read_bytes_min(),
disk_io_stats.read_bytes_max());
update_helper(_read_bytes, cache_disk_io_stats.read());
update_helper(_cached_read_bytes, cache_disk_io_stats.cached_read());
}

IndexMetricsEntry::DiskIoMetrics::DiskIoMetrics(metrics::MetricSet* parent)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#include "field_metrics_entry.h"

namespace search { class DiskIoStats; }
namespace search { class CacheDiskIoStats; }

namespace proton {

Expand All @@ -16,26 +16,27 @@ class IndexMetricsEntry : public FieldMetricsEntry {
class DiskIoMetrics : public metrics::MetricSet {
class SearchMetrics : public metrics::MetricSet {
metrics::LongValueMetric _read_bytes;
metrics::LongValueMetric _cached_read_bytes;
public:
explicit SearchMetrics(metrics::MetricSet* parent);
~SearchMetrics() override;
void update(const search::DiskIoStats& disk_io_stats);
void update(const search::CacheDiskIoStats& cache_disk_io_stats);
};

SearchMetrics _search;

public:
explicit DiskIoMetrics(metrics::MetricSet* parent);
~DiskIoMetrics() override;
void update(const search::DiskIoStats& disk_io_stats) { _search.update(disk_io_stats); }
void update(const search::CacheDiskIoStats& cache_disk_io_stats) { _search.update(cache_disk_io_stats); }
};

DiskIoMetrics _disk_io;

public:
explicit IndexMetricsEntry(const std::string& field_name);
~IndexMetricsEntry() override;
void update_disk_io(const search::DiskIoStats& disk_io_stats) { _disk_io.update(disk_io_stats); }
void update_disk_io(const search::CacheDiskIoStats& cache_disk_io_stats) { _disk_io.update(cache_disk_io_stats); }
};

} // namespace proton
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ updateIndexMetrics(DocumentDBTaggedMetrics &metrics, const search::SearchableSta
if (entry) {
entry->memoryUsage.update(field.second.memory_usage());
entry->size_on_disk.set(field.second.size_on_disk());
entry->update_disk_io(field.second.disk_io_stats());
entry->update_disk_io(field.second.cache_disk_io_stats());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,23 @@ TEST(SearchableStatsTest, stats_can_be_merged)
TEST(SearchableStatsTest, field_stats_can_be_merged)
{
SearchableStats base_stats;
auto read_2_once_stats = DiskIoStats().read_operations(1).read_bytes_total(2).read_bytes_min(2).read_bytes_max(2);
auto read_1000_once_stats = DiskIoStats().read_operations(1).read_bytes_total(1000).read_bytes_min(1000).
read_bytes_max(1000);
auto read_mixed_4_stats = DiskIoStats().read_operations(4).read_bytes_total(6000).read_bytes_min(1100).
read_bytes_max(2700);
auto read_mixed_5_stats = DiskIoStats().read_operations(5).read_bytes_total(7000).read_bytes_min(1000).
read_bytes_max(2700);
auto f1_stats = FieldIndexStats().memory_usage({100, 40, 10, 5}).size_on_disk(1000).
disk_io_stats(DiskIoStats().read_operations(1).read_bytes_total(1000).read_bytes_min(1000).read_bytes_max(1000));
cache_disk_io_stats(CacheDiskIoStats().read(read_1000_once_stats));
auto f2_stats1 = FieldIndexStats().memory_usage({400, 200, 60, 10}).size_on_disk(1500).
disk_io_stats(DiskIoStats().read_operations(1).read_bytes_total(1000).read_bytes_min(1000).read_bytes_max(1000));
cache_disk_io_stats(CacheDiskIoStats().read(read_1000_once_stats));
auto f2_stats2 = FieldIndexStats().memory_usage({300, 100, 40, 5}).size_on_disk(500).
disk_io_stats(DiskIoStats().read_operations(4).read_bytes_total(6000).read_bytes_min(1100).read_bytes_max(2700));
cache_disk_io_stats(CacheDiskIoStats().read(read_mixed_4_stats).cached_read(read_2_once_stats));
auto f2_stats3 = FieldIndexStats().memory_usage({700, 300, 100, 15}).size_on_disk(2000).
disk_io_stats(DiskIoStats().read_operations(5).read_bytes_total(7000).read_bytes_min(1000).read_bytes_max(2700));
cache_disk_io_stats(CacheDiskIoStats().read(read_mixed_5_stats).cached_read(read_2_once_stats));
auto f3_stats = FieldIndexStats().memory_usage({110, 50, 20, 12}).size_on_disk(500).
disk_io_stats(DiskIoStats().read_operations(1).read_bytes_total(1000).read_bytes_min(1000).read_bytes_max(1000));
cache_disk_io_stats(CacheDiskIoStats().read(read_1000_once_stats));
base_stats.add_field_stats("f1", f1_stats).add_field_stats("f2", f2_stats1);
SearchableStats added_stats;
added_stats.add_field_stats("f2", f2_stats2).add_field_stats("f3", f3_stats);
Expand Down
24 changes: 15 additions & 9 deletions searchlib/src/vespa/searchlib/diskindex/field_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,21 @@ const std::vector<std::string> field_file_names{

std::atomic<uint64_t> FieldIndex::_file_id_source(0);

FieldIndex::LockedDiskIoStats::LockedDiskIoStats() noexcept
: DiskIoStats(),
FieldIndex::LockedCacheDiskIoStats::LockedCacheDiskIoStats() noexcept
: _stats(),
_mutex()
{
}

FieldIndex::LockedDiskIoStats::~LockedDiskIoStats() = default;
FieldIndex::LockedCacheDiskIoStats::~LockedCacheDiskIoStats() = default;

FieldIndex::FieldIndex()
: _posting_file(),
_bit_vector_dict(),
_dict(),
_file_id(0),
_size_on_disk(0),
_disk_io_stats(std::make_shared<LockedDiskIoStats>()),
_cache_disk_io_stats(std::make_shared<LockedCacheDiskIoStats>()),
_posting_list_cache()
{
}
Expand Down Expand Up @@ -157,15 +157,15 @@ FieldIndex::reuse_files(const FieldIndex& rhs)
_bit_vector_dict = rhs._bit_vector_dict;
_file_id = rhs._file_id;
_size_on_disk = rhs._size_on_disk;
_disk_io_stats = rhs._disk_io_stats;
_cache_disk_io_stats = rhs._cache_disk_io_stats;
}

PostingListHandle
FieldIndex::read_uncached_posting_list(const DictionaryLookupResult& lookup_result) const
{
auto handle = _posting_file->read_posting_list(lookup_result);
if (handle._read_bytes != 0) {
_disk_io_stats->add_read_operation(handle._read_bytes);
_cache_disk_io_stats->add_uncached_read_operation(handle._read_bytes);
}
return handle;
}
Expand All @@ -176,6 +176,7 @@ FieldIndex::read(const IPostingListCache::Key& key) const
DictionaryLookupResult lookup_result;
lookup_result.bitOffset = key.bit_offset;
lookup_result.counts._bitLength = key.bit_length;
key.backing_store_file = nullptr; // Signal cache miss back to layer above cache
return read_uncached_posting_list(lookup_result);
}

Expand All @@ -194,7 +195,12 @@ FieldIndex::read_posting_list(const DictionaryLookupResult& lookup_result) const
key.file_id = _file_id;
key.bit_offset = lookup_result.bitOffset;
key.bit_length = lookup_result.counts._bitLength;
return _posting_list_cache->read(key);
auto result = _posting_list_cache->read(key);
auto cache_hit = key.backing_store_file == this;
if (cache_hit && result._read_bytes != 0) {
_cache_disk_io_stats->add_cached_read_operation(result._read_bytes);
}
return result;
}

std::unique_ptr<BitVector>
Expand Down Expand Up @@ -224,8 +230,8 @@ FieldIndex::get_field_length_info() const
FieldIndexStats
FieldIndex::get_stats() const
{
auto disk_io_stats = _disk_io_stats->read_and_clear();
return FieldIndexStats().size_on_disk(_size_on_disk).disk_io_stats(disk_io_stats);
auto cache_disk_io_stats = _cache_disk_io_stats->read_and_clear();
return FieldIndexStats().size_on_disk(_size_on_disk).cache_disk_io_stats(cache_disk_io_stats);
}

}
21 changes: 13 additions & 8 deletions searchlib/src/vespa/searchlib/diskindex/field_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,26 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking {
using DiskPostingFileReal = Zc4PosOccRandRead;
using DiskPostingFileDynamicKReal = ZcPosOccRandRead;

class LockedDiskIoStats : public DiskIoStats {
class LockedCacheDiskIoStats {
CacheDiskIoStats _stats;
std::mutex _mutex;

public:
LockedDiskIoStats() noexcept;
~LockedDiskIoStats();
LockedCacheDiskIoStats() noexcept;
~LockedCacheDiskIoStats();

void add_read_operation(uint64_t bytes) {
void add_uncached_read_operation(uint64_t bytes) {
std::lock_guard guard(_mutex);
DiskIoStats::add_read_operation(bytes);
_stats.add_uncached_read_operation(bytes);
}
void add_cached_read_operation(uint64_t bytes) {
std::lock_guard guard(_mutex);
_stats.add_cached_read_operation(bytes);
}

DiskIoStats read_and_clear() {
CacheDiskIoStats read_and_clear() {
std::lock_guard guard(_mutex);
return DiskIoStats::read_and_clear();
return _stats.read_and_clear();
}
};

Expand All @@ -47,7 +52,7 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking {
std::unique_ptr<index::DictionaryFileRandRead> _dict;
uint64_t _file_id;
uint64_t _size_on_disk;
std::shared_ptr<LockedDiskIoStats> _disk_io_stats;
std::shared_ptr<LockedCacheDiskIoStats> _cache_disk_io_stats;
std::shared_ptr<IPostingListCache> _posting_list_cache;
static std::atomic<uint64_t> _file_id_source;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class IPostingListCache {
public:
class IPostingListFileBacking;
struct Key {
const IPostingListFileBacking* backing_store_file; // Used by backing store on cache miss
mutable const IPostingListFileBacking* backing_store_file; // Used by backing store on cache miss
uint64_t file_id;
uint64_t bit_offset;
uint64_t bit_length;
Expand Down
1 change: 1 addition & 0 deletions searchlib/src/vespa/searchlib/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
vespa_add_library(searchlib_util OBJECT
SOURCES
bufferwriter.cpp
cache_disk_io_stats.cpp
comprbuffer.cpp
comprfile.cpp
data_buffer_writer.cpp
Expand Down
13 changes: 13 additions & 0 deletions searchlib/src/vespa/searchlib/util/cache_disk_io_stats.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include "cache_disk_io_stats.h"
#include <ostream>

namespace search {

std::ostream& operator<<(std::ostream& os, const CacheDiskIoStats& stats) {
os << "{read: " << stats.read() << ", cached_read: " << stats.cached_read() << "}";
return os;
}

}
46 changes: 46 additions & 0 deletions searchlib/src/vespa/searchlib/util/cache_disk_io_stats.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once

#include "disk_io_stats.h"

namespace search {

/*
* Class tracking disk io when using a cache.
*/
class CacheDiskIoStats {
DiskIoStats _read; // cache miss
DiskIoStats _cached_read; // cache hit

public:
CacheDiskIoStats() noexcept
: _read(),
_cached_read()
{
}

CacheDiskIoStats& read(const DiskIoStats& value) { _read = value; return *this; }
CacheDiskIoStats& cached_read(DiskIoStats& value) { _cached_read = value; return *this; }
const DiskIoStats& read() const noexcept { return _read; }
const DiskIoStats& cached_read() const noexcept { return _cached_read; }
void merge(const CacheDiskIoStats& rhs) noexcept {
_read.merge(rhs.read());
_cached_read.merge(rhs.cached_read());
}

bool operator==(const CacheDiskIoStats &rhs) const noexcept {
return _read == rhs.read() &&
_cached_read == rhs.cached_read();
}
CacheDiskIoStats read_and_clear() noexcept { auto result = *this; clear(); return result; }
void clear() noexcept {
_read.clear();
_cached_read.clear();
}
void add_uncached_read_operation(uint64_t bytes) noexcept { _read.add_read_operation(bytes); }
void add_cached_read_operation(uint64_t bytes) noexcept { _cached_read.add_read_operation(bytes); }
};

std::ostream& operator<<(std::ostream& os, const CacheDiskIoStats& stats);

}
2 changes: 1 addition & 1 deletion searchlib/src/vespa/searchlib/util/field_index_stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ namespace search {

std::ostream& operator<<(std::ostream& os, const FieldIndexStats& stats) {
os << "{memory: " << stats.memory_usage() << ", disk: " << stats.size_on_disk() <<
", diskio: " << stats.disk_io_stats() << "}";
", diskio: " << stats.cache_disk_io_stats() << "}";
return os;
}

Expand Down
14 changes: 7 additions & 7 deletions searchlib/src/vespa/searchlib/util/field_index_stats.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once

#include "disk_io_stats.h"
#include "cache_disk_io_stats.h"
#include <vespa/vespalib/util/memoryusage.h>
#include <iosfwd>

Expand All @@ -15,13 +15,13 @@ class FieldIndexStats
private:
vespalib::MemoryUsage _memory_usage;
size_t _size_on_disk; // in bytes
DiskIoStats _disk_io_stats;
CacheDiskIoStats _cache_disk_io_stats;

public:
FieldIndexStats() noexcept
: _memory_usage(),
_size_on_disk(0),
_disk_io_stats()
_cache_disk_io_stats()
{}
FieldIndexStats &memory_usage(const vespalib::MemoryUsage &usage) noexcept {
_memory_usage = usage;
Expand All @@ -34,19 +34,19 @@ class FieldIndexStats
}
size_t size_on_disk() const noexcept { return _size_on_disk; }

FieldIndexStats& disk_io_stats(const DiskIoStats& stats) { _disk_io_stats = stats; return *this; }
const DiskIoStats& disk_io_stats() const noexcept { return _disk_io_stats; }
FieldIndexStats& cache_disk_io_stats(const CacheDiskIoStats& stats) { _cache_disk_io_stats = stats; return *this; }
const CacheDiskIoStats& cache_disk_io_stats() const noexcept { return _cache_disk_io_stats; }

void merge(const FieldIndexStats &rhs) noexcept {
_memory_usage.merge(rhs._memory_usage);
_size_on_disk += rhs._size_on_disk;
_disk_io_stats.merge(rhs._disk_io_stats);
_cache_disk_io_stats.merge(rhs._cache_disk_io_stats);
}

bool operator==(const FieldIndexStats& rhs) const noexcept {
return _memory_usage == rhs._memory_usage &&
_size_on_disk == rhs._size_on_disk &&
_disk_io_stats == rhs._disk_io_stats;
_cache_disk_io_stats == rhs._cache_disk_io_stats;
}
};

Expand Down

0 comments on commit d52a051

Please sign in to comment.