Skip to content

Commit

Permalink
admin: Calculate the CPU utilization based on the average tasklet loo…
Browse files Browse the repository at this point in the history
…p time

As it provides more accurate results than the NIC statistics method.

Test with:
./build/app/RxTxApp --config_file tests/script/loop_json/1080p_16v_migrate.json

Signed-off-by: Frank Du <[email protected]>
  • Loading branch information
frankdjx committed Dec 28, 2023
1 parent 92c5d73 commit bf13c9d
Show file tree
Hide file tree
Showing 10 changed files with 69 additions and 107 deletions.
4 changes: 2 additions & 2 deletions lib/src/mt_admin.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ static int admin_cal_cpu_busy(struct mtl_main_impl* impl) {
for (int j = 0; j < tx_mgr->max_idx; j++) {
tx_s = tx_video_session_get(tx_mgr, j);
if (tx_s) {
tx_video_session_cal_cpu_busy(tx_s);
tx_video_session_cal_cpu_busy(sch, tx_s);
tx_video_session_put(tx_mgr, j);
}
}
Expand All @@ -39,7 +39,7 @@ static int admin_cal_cpu_busy(struct mtl_main_impl* impl) {
for (int j = 0; j < rx_mgr->max_idx; j++) {
rx_s = rx_video_session_get(rx_mgr, j);
if (rx_s) {
rx_video_session_cal_cpu_busy(rx_s);
rx_video_session_cal_cpu_busy(sch, rx_s);
rx_video_session_put(rx_mgr, j);
}
}
Expand Down
2 changes: 2 additions & 0 deletions lib/src/mt_main.h
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,8 @@ struct mtl_sch_impl {
pthread_cond_t sleep_wake_cond;
pthread_mutex_t sleep_wake_mutex;

uint64_t avg_ns_per_loop;

/* the sch sleep ratio */
float sleep_ratio_score;
uint64_t sleep_ratio_start_ns;
Expand Down
16 changes: 14 additions & 2 deletions lib/src/mt_sch.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ static int sch_tasklet_func(void* args) {
struct mt_sch_tasklet_impl* tasklet;
bool time_measure = mt_user_tasklet_time_measure(impl);
uint64_t tsc_s = 0;
uint64_t loop_cal_start_ns;
uint64_t loop_cnt = 0;

num_tasklet = sch->max_tasklet_idx;
info("%s(%d), start with %d tasklets\n", __func__, idx, num_tasklet);
Expand All @@ -125,6 +127,7 @@ static int sch_tasklet_func(void* args) {
}

sch->sleep_ratio_start_ns = mt_get_tsc(impl);
loop_cal_start_ns = mt_get_tsc(impl);

while (rte_atomic32_read(&sch->request_stop) == 0) {
int pending = MTL_TASKLET_ALL_DONE;
Expand Down Expand Up @@ -153,6 +156,15 @@ static int sch_tasklet_func(void* args) {
if (sch->allow_sleep && (pending == MTL_TASKLET_ALL_DONE)) {
sch_tasklet_sleep(impl, sch);
}

loop_cnt++;
/* cal avg_ns_per_loop per 5s */
uint64_t delta_loop_ns = mt_get_tsc(impl) - loop_cal_start_ns;
if (delta_loop_ns > ((uint64_t)NS_PER_S * 5)) {
sch->avg_ns_per_loop = delta_loop_ns / loop_cnt;
loop_cnt = 0;
loop_cal_start_ns = mt_get_tsc(impl);
}
}

num_tasklet = sch->max_tasklet_idx;
Expand Down Expand Up @@ -376,8 +388,8 @@ static int sch_stat(void* priv) {

if (!mt_sch_is_active(sch)) return 0;

notice("SCH(%d:%s): tasklets %d max idx %d, lcore %u\n", idx, sch->name, num_tasklet,
sch->max_tasklet_idx, sch->lcore);
notice("SCH(%d:%s): tasklets %d, lcore %u, avg loop %" PRIu64 " ns\n", idx, sch->name,
num_tasklet, sch->lcore, mt_sch_avg_ns_loop(sch));
if (mt_user_tasklet_time_measure(sch->parent)) {
for (int i = 0; i < num_tasklet; i++) {
tasklet = sch->tasklet[i];
Expand Down
4 changes: 4 additions & 0 deletions lib/src/mt_sch.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ static inline void mt_sch_set_cpu_busy(struct mtl_sch_impl* sch, bool busy) {
sch->cpu_busy = busy;
}

static inline uint64_t mt_sch_avg_ns_loop(struct mtl_sch_impl* sch) {
return sch->avg_ns_per_loop;
}

int mt_sch_put_lcore(struct mtl_main_impl* impl, unsigned int lcore);
int mt_sch_get_lcore(struct mtl_main_impl* impl, unsigned int* lcore,
enum mt_lcore_type type);
Expand Down
22 changes: 9 additions & 13 deletions lib/src/st2110/st_header.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,11 +344,7 @@ struct st_tx_video_session_impl {
struct rte_mbuf* pad[MTL_SESSION_PORT_MAX][ST20_PKT_TYPE_MAX];

/* the cpu resource to handle tx, 0: full, 100: cpu is very busy */
float cpu_busy_score;
int pri_nic_burst_cnt; /* sync to atomic if this reach a threshold */
int pri_nic_inflight_cnt;
rte_atomic32_t nic_burst_cnt;
rte_atomic32_t nic_inflight_cnt;
double cpu_busy_score;

/* info for st22 */
struct st22_tx_video_info* st22_info;
Expand Down Expand Up @@ -393,6 +389,8 @@ struct st_tx_video_session_impl {
/* interlace */
uint32_t stat_interlace_first_field;
uint32_t stat_interlace_second_field;
/* for display */
double stat_cpu_busy_score;
};

struct st_tx_video_sessions_mgr {
Expand Down Expand Up @@ -629,6 +627,8 @@ struct st_rx_video_session_impl {
struct st20_pgroup st20_pg;
double frame_time; /* time of the frame in nanoseconds */
double frame_time_sampling; /* time of the frame in sampling(90k) */
/* in ns for of 2 consecutive packets, T-Frame / N-Packets */
double trs;

size_t st20_uframe_size; /* size per user frame */
struct st20_rx_uframe_pg_meta pg_meta;
Expand Down Expand Up @@ -674,15 +674,9 @@ struct st_rx_video_session_impl {
rte_atomic32_t pkt_lcore_stopped;

/* the cpu resource to handle rx, 0: full, 100: cpu is very busy */
float cpu_busy_score;
float dma_busy_score;
int pri_nic_burst_cnt; /* sync to atomic if this reach a threshold */
int pri_nic_inflight_cnt;
rte_atomic32_t nic_burst_cnt;
rte_atomic32_t nic_inflight_cnt;
double cpu_busy_score;
double dma_busy_score;
rte_atomic32_t dma_previous_busy_cnt;
rte_atomic32_t cbs_frame_slot_cnt;
rte_atomic32_t cbs_incomplete_frame_cnt;

struct mt_rtcp_rx* rtcp_rx[MTL_SESSION_PORT_MAX];
uint16_t burst_loss_max;
Expand Down Expand Up @@ -741,6 +735,8 @@ struct st_rx_video_session_impl {
uint32_t stat_interlace_second_field;
/* for st22 */
uint32_t stat_st22_boxes;
/* for stat display */
double stat_cpu_busy_score;
};

struct st_rx_video_sessions_mgr {
Expand Down
80 changes: 28 additions & 52 deletions lib/src/st2110/st_rx_video_session.c
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,10 @@ static void rv_frame_notify(struct st_rx_video_session_impl* s,
rv_put_frame(s, slot->frame);
slot->frame = NULL;
}

/* update trs */
double reactive = 1080.0 / 1125.0;
s->trs = s->frame_time * reactive / meta->pkts_total;
} else {
dbg("%s(%d): frame_recv_size %" PRIu64 ", frame_total_size %" PRIu64 ", tmstamp %u\n",
__func__, s->idx, meta->frame_recv_size, meta->frame_total_size, slot->tmstamp);
Expand All @@ -793,7 +797,6 @@ static void rv_frame_notify(struct st_rx_video_session_impl* s,
}
#endif

rte_atomic32_inc(&s->cbs_incomplete_frame_cnt);
/* notify the incomplete frame if user required */
if (ops->flags & ST20_RX_FLAG_RECEIVE_INCOMPLETE_FRAME) {
rv_notify_frame_ready(s, slot->frame->addr, meta);
Expand Down Expand Up @@ -839,6 +842,9 @@ static void rv_st22_frame_notify(struct st_rx_video_session_impl* s,
rv_put_frame(s, slot->frame);
slot->frame = NULL;
}
/* update trs */
double reactive = 1080.0 / 1125.0;
s->trs = s->frame_time * reactive / meta->pkts_total;
} else {
s->stat_frames_dropped++;
/* record the miss pkts */
Expand All @@ -857,7 +863,6 @@ static void rv_st22_frame_notify(struct st_rx_video_session_impl* s,
}
#endif

rte_atomic32_inc(&s->cbs_incomplete_frame_cnt);
/* notify the incomplete frame if user required */
if (ops->flags & ST20_RX_FLAG_RECEIVE_INCOMPLETE_FRAME) {
st22_notify_frame_ready(s, slot->frame->addr, meta);
Expand Down Expand Up @@ -1036,8 +1041,6 @@ static struct st_rx_video_slot_impl* rv_slot_by_tmstamp(
memset(slot->frame_bitmap, 0x0, s->st20_frame_bitmap_size);
if (slot->slice_info) memset(slot->slice_info, 0x0, sizeof(*slot->slice_info));

rte_atomic32_inc(&s->cbs_frame_slot_cnt);

dbg("%s(%d): assign slot %d framebuff %p for tmstamp %u\n", __func__, s->idx, slot_idx,
slot->frame->addr, tmstamp);
return slot;
Expand Down Expand Up @@ -2384,14 +2387,12 @@ static int rv_init_sw(struct mtl_main_impl* impl, struct st_rx_video_sessions_mg
s->vsync.meta.frame_time = (double)1000000000.0 * fps_tm.den / fps_tm.mul;
st_vsync_calculate(impl, &s->vsync);
s->vsync.init = true;

/* init advice sleep us */
int estimated_total_pkts = s->st20_frame_size / ST_VIDEO_BPM_SIZE;
double trs = s->vsync.meta.frame_time / estimated_total_pkts;
double sleep_ns = trs * 128;
double sleep_ns = s->trs * 128;
s->advice_sleep_us = sleep_ns / NS_PER_US;
if (mt_user_tasklet_sleep(impl)) {
info("%s(%d), advice sleep us %" PRIu64 ", trs %fns, total pkts %d\n", __func__, idx,
s->advice_sleep_us, trs, estimated_total_pkts);
info("%s(%d), advice sleep us %" PRIu64 "\n", __func__, idx, s->advice_sleep_us);
}

return 0;
Expand Down Expand Up @@ -2596,8 +2597,6 @@ static int rv_handle_mbuf(void* priv, struct rte_mbuf** mbuf, uint16_t nb) {
}
if (!nb) return 0;

s->pri_nic_inflight_cnt++;

/* now dispatch the pkts to handler */
for (uint16_t i = 0; i < nb; i++) {
if ((s->ops.flags & ST20_RX_FLAG_SIMULATE_PKT_LOSS) && rv_simulate_pkt_loss(s))
Expand Down Expand Up @@ -2643,14 +2642,6 @@ static int rv_pkt_rx_tasklet(struct st_rx_video_session_impl* s) {
rte_pktmbuf_free_bulk(&mbuf[0], rv);
}

s->pri_nic_burst_cnt++;
if (s->pri_nic_burst_cnt > ST_VIDEO_STAT_UPDATE_INTERVAL) {
rte_atomic32_add(&s->nic_burst_cnt, s->pri_nic_burst_cnt);
s->pri_nic_burst_cnt = 0;
rte_atomic32_add(&s->nic_inflight_cnt, s->pri_nic_inflight_cnt);
s->pri_nic_inflight_cnt = 0;
}

if (rv) done = false;
}

Expand Down Expand Up @@ -2954,6 +2945,10 @@ static int rv_attach(struct mtl_main_impl* impl, struct st_rx_video_sessions_mgr
s->st20_dst_port[i] = (ops->udp_port[i]) ? (ops->udp_port[i]) : (10000 + idx * 2);
}

/* init trs */
int estimated_total_pkts = s->st20_frame_size / ST_VIDEO_BPM_SIZE;
s->trs = s->frame_time / estimated_total_pkts;

/* init simulated packet loss for test usage */
if (s->ops.flags & ST20_RX_FLAG_SIMULATE_PKT_LOSS) {
uint16_t burst_loss_max = 32;
Expand Down Expand Up @@ -2983,20 +2978,15 @@ static int rv_attach(struct mtl_main_impl* impl, struct st_rx_video_sessions_mgr
s->stat_frames_dropped = 0;
s->stat_pkts_simulate_loss = 0;
rte_atomic32_set(&s->stat_frames_received, 0);
rte_atomic32_set(&s->cbs_incomplete_frame_cnt, 0);
rte_atomic32_set(&s->cbs_frame_slot_cnt, 0);
s->stat_last_time = mt_get_monotonic_time();
s->dma_nb_desc = 128;
s->dma_slot = NULL;
s->dma_dev = NULL;

s->pri_nic_burst_cnt = 0;
s->pri_nic_inflight_cnt = 0;
rte_atomic32_set(&s->nic_burst_cnt, 0);
rte_atomic32_set(&s->nic_inflight_cnt, 0);
rte_atomic32_set(&s->dma_previous_busy_cnt, 0);
s->cpu_busy_score = 0;
s->dma_busy_score = 0;

s->st22_expect_frame_size = 0;
s->burst_loss_cnt = 0;
if (s->ops.flags & ST20_RX_FLAG_ENABLE_TIMING_PARSER) {
Expand Down Expand Up @@ -3130,37 +3120,23 @@ static int rvs_ctl_tasklet_handler(void* priv) {
}

void rx_video_session_clear_cpu_busy(struct st_rx_video_session_impl* s) {
rte_atomic32_set(&s->nic_burst_cnt, 0);
rte_atomic32_set(&s->nic_inflight_cnt, 0);
rte_atomic32_set(&s->dma_previous_busy_cnt, 0);
rte_atomic32_set(&s->cbs_frame_slot_cnt, 0);
rte_atomic32_set(&s->cbs_incomplete_frame_cnt, 0);
s->cpu_busy_score = 0;
s->dma_busy_score = 0;
}

void rx_video_session_cal_cpu_busy(struct st_rx_video_session_impl* s) {
float nic_burst_cnt = rte_atomic32_read(&s->nic_burst_cnt);
float nic_inflight_cnt = rte_atomic32_read(&s->nic_inflight_cnt);
float dma_previous_busy_cnt = rte_atomic32_read(&s->dma_previous_busy_cnt);
int frame_slot_cnt = rte_atomic32_read(&s->cbs_frame_slot_cnt);
int incomplete_frame_cnt = rte_atomic32_read(&s->cbs_incomplete_frame_cnt);
float cpu_busy_score = 0;
float dma_busy_score = s->dma_busy_score; /* save old */
float old_cpu_busy_score = s->cpu_busy_score; /* save old */

rx_video_session_clear_cpu_busy(s);

if (nic_burst_cnt) {
cpu_busy_score = 100.0 * nic_inflight_cnt / nic_burst_cnt;
}
if ((frame_slot_cnt > 10) && (incomplete_frame_cnt > 10)) {
/* do we need check if imiss? */
cpu_busy_score = old_cpu_busy_score + 40;
}
if (cpu_busy_score > 100.0) cpu_busy_score = 100.0;
s->cpu_busy_score = cpu_busy_score;

void rx_video_session_cal_cpu_busy(struct mtl_sch_impl* sch,
struct st_rx_video_session_impl* s) {
uint64_t avg_ns_per_loop = mt_sch_avg_ns_loop(sch);
s->cpu_busy_score = (double)avg_ns_per_loop / s->trs * 100.0;
dbg("%s(%d), avg_ns_per_loop %" PRIu64 ", trs %f, busy %f\n", __func__, s->idx,
avg_ns_per_loop, s->trs, s->cpu_busy_score);
s->stat_cpu_busy_score = s->cpu_busy_score;

/* update dma busy */
int dma_previous_busy_cnt = rte_atomic32_read(&s->dma_previous_busy_cnt);
rte_atomic32_set(&s->dma_previous_busy_cnt, 0);
float dma_busy_score = s->dma_busy_score;
if (dma_previous_busy_cnt) {
dma_busy_score += 40.0;
if (dma_busy_score > 100.0) dma_busy_score = 100.0;
Expand Down Expand Up @@ -3199,7 +3175,7 @@ static void rv_stat(struct st_rx_video_sessions_mgr* mgr,
notice("RX_VIDEO_SESSION(%d,%d:%s): throughput %f Mb/s, cpu busy %f\n", m_idx, idx,
s->ops_name,
(double)s->stat_bytes_received * 8 / dump_period_s / MTL_STAT_M_UNIT,
s->cpu_busy_score);
s->stat_cpu_busy_score);
s->stat_pkts_received = 0;
s->stat_bytes_received = 0;
s->stat_slices_received = 0;
Expand Down
3 changes: 2 additions & 1 deletion lib/src/st2110/st_rx_video_session.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ static inline void rx_video_session_put(struct st_rx_video_sessions_mgr* mgr, in
st20_rx_handle st20_rx_create_with_mask(struct mtl_main_impl* impl,
struct st20_rx_ops* ops, mt_sch_mask_t sch_mask);

void rx_video_session_cal_cpu_busy(struct st_rx_video_session_impl* s);
void rx_video_session_cal_cpu_busy(struct mtl_sch_impl* sch,
struct st_rx_video_session_impl* s);
void rx_video_session_clear_cpu_busy(struct st_rx_video_session_impl* s);

static inline bool rx_video_session_is_cpu_busy(struct st_rx_video_session_impl* s) {
Expand Down
27 changes: 6 additions & 21 deletions lib/src/st2110/st_tx_video_session.c
Original file line number Diff line number Diff line change
Expand Up @@ -3054,12 +3054,6 @@ static int tv_attach(struct mtl_main_impl* impl, struct st_tx_video_sessions_mgr
rte_atomic32_set(&s->stat_frame_cnt, 0);
s->stat_last_time = mt_get_monotonic_time();

s->pri_nic_burst_cnt = 0;
s->pri_nic_inflight_cnt = 0;
rte_atomic32_set(&s->nic_burst_cnt, 0);
rte_atomic32_set(&s->nic_inflight_cnt, 0);
s->cpu_busy_score = 0;

for (int i = 0; i < num_port; i++) {
s->inflight[i][0] = NULL;
s->inflight_cnt[i] = 0;
Expand All @@ -3083,23 +3077,14 @@ static int tv_attach(struct mtl_main_impl* impl, struct st_tx_video_sessions_mgr
}

void tx_video_session_clear_cpu_busy(struct st_tx_video_session_impl* s) {
rte_atomic32_set(&s->nic_burst_cnt, 0);
rte_atomic32_set(&s->nic_inflight_cnt, 0);
s->cpu_busy_score = 0;
}

void tx_video_session_cal_cpu_busy(struct st_tx_video_session_impl* s) {
float nic_burst_cnt = rte_atomic32_read(&s->nic_burst_cnt);
float nic_inflight_cnt = rte_atomic32_read(&s->nic_inflight_cnt);
float cpu_busy_score = 0;

tx_video_session_clear_cpu_busy(s);

if (nic_burst_cnt) {
cpu_busy_score = 100.0 * nic_inflight_cnt / nic_burst_cnt;
cpu_busy_score = 100.0 - cpu_busy_score;
}
s->cpu_busy_score = cpu_busy_score;
void tx_video_session_cal_cpu_busy(struct mtl_sch_impl* sch,
struct st_tx_video_session_impl* s) {
uint64_t avg_ns_per_loop = mt_sch_avg_ns_loop(sch);
s->cpu_busy_score = (double)avg_ns_per_loop / s->pacing.trs * 100.0;
s->stat_cpu_busy_score = s->cpu_busy_score;
}

static void tv_stat(struct st_tx_video_sessions_mgr* mgr,
Expand All @@ -3119,7 +3104,7 @@ static void tv_stat(struct st_tx_video_sessions_mgr* mgr,
idx,
(double)s->stat_bytes_tx[MTL_SESSION_PORT_P] * 8 / time_sec / MTL_STAT_M_UNIT,
(double)s->stat_bytes_tx[MTL_SESSION_PORT_R] * 8 / time_sec / MTL_STAT_M_UNIT,
s->cpu_busy_score);
s->stat_cpu_busy_score);
s->stat_last_time = cur_time_ns;
s->stat_pkts_build = 0;
s->stat_pkts_burst = 0;
Expand Down
3 changes: 2 additions & 1 deletion lib/src/st2110/st_tx_video_session.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ static inline void tx_video_session_put(struct st_tx_video_sessions_mgr* mgr, in
rte_spinlock_unlock(&mgr->mutex[idx]);
}

void tx_video_session_cal_cpu_busy(struct st_tx_video_session_impl* s);
void tx_video_session_cal_cpu_busy(struct mtl_sch_impl* sch,
struct st_tx_video_session_impl* s);
void tx_video_session_clear_cpu_busy(struct st_tx_video_session_impl* s);

static inline bool tx_video_session_is_cpu_busy(struct st_tx_video_session_impl* s) {
Expand Down
Loading

0 comments on commit bf13c9d

Please sign in to comment.