Skip to content

Commit

Permalink
arc_read()/arc_access() refactoring and cleanup
Browse files Browse the repository at this point in the history
ARC code was many times significantly modified over the years, that
created significant amount of tangled and potentially broken code.
This should make arc_access()/arc_read() code some more readable.

 - Decouple prefetch status tracking from b_refcnt.  It made sense
originally, but became highly cryptic over the years.  Move all the
logic into arc_access().  While there, clean up and comment state
transitions in arc_access().  Some transitions were weird IMO.
 - Unify arc_access() calls to arc_read() instead of sometimes calling
it from arc_read_done().  To avoid extra state changes and checks add
one more b_refcnt for ARC_FLAG_IO_IN_PROGRESS.
 - Reimplement ARC_FLAG_WAIT in case of ARC_FLAG_IO_IN_PROGRESS with
the same callback mechanism to not falsely account them as hits. Count
those as "iohits", an intermediate between "hits" and "misses". While
there, call read callbacks in original request order, that should be
good for fairness and random speculations/allocations/aggregations.
 - Introduce additional statistic counters for prefetch, accounting
predictive vs prescient and hits vs iohits vs misses.
 - Remove hash_lock argument from functions not needing it.
 - Remove ARC_FLAG_PREDICTIVE_PREFETCH, since it should be opposite
to ARC_FLAG_PRESCIENT_PREFETCH if ARC_FLAG_PREFETCH is set.  We may
wish to add ARC_FLAG_PRESCIENT_PREFETCH to few more places.
 - Fix few false positive tests found in the process.

Reviewed-by: George Wilson <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Richard Yao <[email protected]>
Reviewed-by: Ryan Moeller <[email protected]>
Signed-off-by:	Alexander Motin <[email protected]>
Sponsored by:	iXsystems, Inc.
Closes #14123
  • Loading branch information
amotin authored Dec 22, 2022
1 parent dc8c2f6 commit c935fe2
Show file tree
Hide file tree
Showing 11 changed files with 310 additions and 292 deletions.
4 changes: 2 additions & 2 deletions include/os/linux/zfs/sys/trace_arc.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,12 @@ DEFINE_EVENT(zfs_arc_buf_hdr_class, name, \
TP_PROTO(arc_buf_hdr_t *ab), \
TP_ARGS(ab))
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__hit);
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__iohit);
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__evict);
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__delete);
DEFINE_ARC_BUF_HDR_EVENT(zfs_new_state__mru);
DEFINE_ARC_BUF_HDR_EVENT(zfs_new_state__mfu);
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__async__upgrade__sync);
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__demand__hit__predictive__prefetch);
DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__hit);
DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__miss);

Expand Down Expand Up @@ -387,12 +387,12 @@ DEFINE_ARC_WAIT_FOR_EVICTION_EVENT(zfs_arc__wait__for__eviction);
#else

DEFINE_DTRACE_PROBE1(arc__hit);
DEFINE_DTRACE_PROBE1(arc__iohit);
DEFINE_DTRACE_PROBE1(arc__evict);
DEFINE_DTRACE_PROBE1(arc__delete);
DEFINE_DTRACE_PROBE1(new_state__mru);
DEFINE_DTRACE_PROBE1(new_state__mfu);
DEFINE_DTRACE_PROBE1(arc__async__upgrade__sync);
DEFINE_DTRACE_PROBE1(arc__demand__hit__predictive__prefetch);
DEFINE_DTRACE_PROBE1(l2arc__hit);
DEFINE_DTRACE_PROBE1(l2arc__miss);
DEFINE_DTRACE_PROBE2(l2arc__read);
Expand Down
1 change: 0 additions & 1 deletion include/sys/arc.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ typedef enum arc_flags
ARC_FLAG_PREFETCH = 1 << 2, /* I/O is a prefetch */
ARC_FLAG_CACHED = 1 << 3, /* I/O was in cache */
ARC_FLAG_L2CACHE = 1 << 4, /* cache in L2ARC */
ARC_FLAG_PREDICTIVE_PREFETCH = 1 << 5, /* I/O from zfetch */
ARC_FLAG_PRESCIENT_PREFETCH = 1 << 6, /* long min lifespan */

/*
Expand Down
36 changes: 36 additions & 0 deletions include/sys/arc_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,14 @@ struct arc_callback {
boolean_t acb_compressed;
boolean_t acb_noauth;
boolean_t acb_nobuf;
boolean_t acb_wait;
int acb_wait_error;
kmutex_t acb_wait_lock;
kcondvar_t acb_wait_cv;
zbookmark_phys_t acb_zb;
zio_t *acb_zio_dummy;
zio_t *acb_zio_head;
arc_callback_t *acb_prev;
arc_callback_t *acb_next;
};

Expand Down Expand Up @@ -511,15 +516,27 @@ struct arc_buf_hdr {
};

typedef struct arc_stats {
/* Number of requests that were satisfied without I/O. */
kstat_named_t arcstat_hits;
/* Number of requests for which I/O was already running. */
kstat_named_t arcstat_iohits;
/* Number of requests for which I/O has to be issued. */
kstat_named_t arcstat_misses;
/* Same three, but specifically for demand data. */
kstat_named_t arcstat_demand_data_hits;
kstat_named_t arcstat_demand_data_iohits;
kstat_named_t arcstat_demand_data_misses;
/* Same three, but specifically for demand metadata. */
kstat_named_t arcstat_demand_metadata_hits;
kstat_named_t arcstat_demand_metadata_iohits;
kstat_named_t arcstat_demand_metadata_misses;
/* Same three, but specifically for prefetch data. */
kstat_named_t arcstat_prefetch_data_hits;
kstat_named_t arcstat_prefetch_data_iohits;
kstat_named_t arcstat_prefetch_data_misses;
/* Same three, but specifically for prefetch metadata. */
kstat_named_t arcstat_prefetch_metadata_hits;
kstat_named_t arcstat_prefetch_metadata_iohits;
kstat_named_t arcstat_prefetch_metadata_misses;
kstat_named_t arcstat_mru_hits;
kstat_named_t arcstat_mru_ghost_hits;
Expand Down Expand Up @@ -844,8 +861,18 @@ typedef struct arc_stats {
kstat_named_t arcstat_meta_max;
kstat_named_t arcstat_meta_min;
kstat_named_t arcstat_async_upgrade_sync;
/* Number of predictive prefetch requests. */
kstat_named_t arcstat_predictive_prefetch;
/* Number of requests for which predictive prefetch has completed. */
kstat_named_t arcstat_demand_hit_predictive_prefetch;
/* Number of requests for which predictive prefetch was running. */
kstat_named_t arcstat_demand_iohit_predictive_prefetch;
/* Number of prescient prefetch requests. */
kstat_named_t arcstat_prescient_prefetch;
/* Number of requests for which prescient prefetch has completed. */
kstat_named_t arcstat_demand_hit_prescient_prefetch;
/* Number of requests for which prescient prefetch was running. */
kstat_named_t arcstat_demand_iohit_prescient_prefetch;
kstat_named_t arcstat_need_free;
kstat_named_t arcstat_sys_free;
kstat_named_t arcstat_raw_size;
Expand All @@ -855,14 +882,19 @@ typedef struct arc_stats {

typedef struct arc_sums {
wmsum_t arcstat_hits;
wmsum_t arcstat_iohits;
wmsum_t arcstat_misses;
wmsum_t arcstat_demand_data_hits;
wmsum_t arcstat_demand_data_iohits;
wmsum_t arcstat_demand_data_misses;
wmsum_t arcstat_demand_metadata_hits;
wmsum_t arcstat_demand_metadata_iohits;
wmsum_t arcstat_demand_metadata_misses;
wmsum_t arcstat_prefetch_data_hits;
wmsum_t arcstat_prefetch_data_iohits;
wmsum_t arcstat_prefetch_data_misses;
wmsum_t arcstat_prefetch_metadata_hits;
wmsum_t arcstat_prefetch_metadata_iohits;
wmsum_t arcstat_prefetch_metadata_misses;
wmsum_t arcstat_mru_hits;
wmsum_t arcstat_mru_ghost_hits;
Expand Down Expand Up @@ -936,8 +968,12 @@ typedef struct arc_sums {
wmsum_t arcstat_prune;
aggsum_t arcstat_meta_used;
wmsum_t arcstat_async_upgrade_sync;
wmsum_t arcstat_predictive_prefetch;
wmsum_t arcstat_demand_hit_predictive_prefetch;
wmsum_t arcstat_demand_iohit_predictive_prefetch;
wmsum_t arcstat_prescient_prefetch;
wmsum_t arcstat_demand_hit_prescient_prefetch;
wmsum_t arcstat_demand_iohit_prescient_prefetch;
wmsum_t arcstat_raw_size;
wmsum_t arcstat_cached_only_in_progress;
wmsum_t arcstat_abd_chunk_waste_size;
Expand Down
Loading

0 comments on commit c935fe2

Please sign in to comment.