Skip to content

Commit 37df6b6

Browse files
arekinathdanmcd
authored andcommitted
mlxcx: add tx latency timers with DEBUG
1 parent 6a16c08 commit 37df6b6

File tree

4 files changed

+114
-1
lines changed

4 files changed

+114
-1
lines changed

usr/src/uts/common/io/mlxcx/mlxcx.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,21 @@ extern uint_t mlxcx_stuck_intr_count;
246246
*/
247247
#define MLXCX_FUNC_ID_MAX 0
248248

249+
#if defined(DEBUG)
250+
#define MLXCX_PERF_TIMERS
251+
#endif
252+
253+
#if defined(MLXCX_PERF_TIMERS)
254+
static inline void
255+
mlxcx_ptimer(hrtime_t *arr, uint idx)
256+
{
257+
arr[idx] = gethrtime();
258+
}
259+
#define MLXCX_PTIMER(A, I) mlxcx_ptimer(A, I)
260+
#else
261+
#define MLXCX_PTIMER(A, I)
262+
#endif
263+
249264
/*
250265
* Forwards
251266
*/
@@ -547,6 +562,25 @@ typedef struct mlxcx_buf_shard {
547562
kcondvar_t mlbs_free_nonempty;
548563
} mlxcx_buf_shard_t;
549564

565+
typedef enum {
566+
MLXCX_BUF_TIMER_PRE_RING_TX,
567+
MLXCX_BUF_TIMER_POST_OFFLOAD_INFO,
568+
MLXCX_BUF_TIMER_POST_INLINE_BCOPY,
569+
MLXCX_BUF_TIMER_POST_BUF_BIND_COPY,
570+
MLXCX_BUF_TIMER_POST_SQE_BUF,
571+
MLXCX_BUF_TIMER_POST_PREPARE_SQE_INLINE,
572+
MLXCX_BUF_TIMER_POST_PREPARE_SQE,
573+
MLXCX_BUF_TIMER_POST_WQ_MTX,
574+
MLXCX_BUF_TIMER_POST_SQE_IN_RING,
575+
MLXCX_BUF_TIMER_POST_SQ_ADD_BUF,
576+
MLXCX_BUF_TIMER_PRE_TX_COMP,
577+
MLXCX_BUF_TIMER_PRE_STEP2,
578+
MLXCX_BUF_TIMER_COPY_TOTAL,
579+
MLXCX_BUF_TIMER_TAKE_FOREIGN_TOTAL,
580+
MLXCX_BUF_TIMER_BIND_MBLK_TOTAL,
581+
MLXCX_BUF_TIMER_MAX
582+
} mlxcx_buf_timer_t;
583+
550584
typedef struct mlxcx_buffer {
551585
mlxcx_buf_shard_t *mlb_shard;
552586
list_node_t mlb_entry;
@@ -579,6 +613,10 @@ typedef struct mlxcx_buffer {
579613
};
580614
size_t mlb_sqe_size;
581615
uint_t mlb_sqe_count;
616+
617+
#if defined(MLXCX_PERF_TIMERS)
618+
hrtime_t mlb_t[MLXCX_BUF_TIMER_MAX];
619+
#endif
582620
} mlxcx_buffer_t;
583621

584622
typedef enum {

usr/src/uts/common/io/mlxcx/mlxcx_gld.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -634,9 +634,18 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
634634
size_t take = 0;
635635
uint_t bcount;
636636
mlxcx_tx_ctx_t ctx;
637+
#if defined(MLXCX_PERF_TIMERS)
638+
hrtime_t times[MLXCX_BUF_TIMER_MAX];
639+
uint i;
640+
#endif
637641

638642
VERIFY(mp->b_next == NULL);
639643

644+
#if defined(MLXCX_PERF_TIMERS)
645+
bzero(times, sizeof (times));
646+
times[MLXCX_BUF_TIMER_PRE_RING_TX] = gethrtime();
647+
#endif
648+
640649
mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &ctx.mtc_chkflags);
641650
mac_lso_get(mp, &ctx.mtc_mss, &ctx.mtc_lsoflags);
642651

@@ -653,6 +662,10 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
653662
return (NULL);
654663
}
655664

665+
#if defined(MLXCX_PERF_TIMERS)
666+
times[MLXCX_BUF_TIMER_POST_OFFLOAD_INFO] = gethrtime();
667+
#endif
668+
656669
ctx.mtc_inline_hdrlen = meoi.meoi_l2hlen;
657670

658671
/*
@@ -698,23 +711,37 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
698711
}
699712
}
700713

714+
MLXCX_PTIMER(times, MLXCX_BUF_TIMER_POST_INLINE_BCOPY);
715+
701716
bcount = mlxcx_buf_bind_or_copy(mlxp, sq, mp, kmp, take, &b);
702717
if (bcount == 0) {
703718
atomic_or_uint(&sq->mlwq_state, MLXCX_WQ_BLOCKED_MAC);
704719
return (mp);
705720
}
706721

722+
MLXCX_PTIMER(times, MLXCX_BUF_TIMER_POST_BUF_BIND_COPY);
723+
724+
#if defined(MLXCX_PERF_TIMERS)
725+
/* Copy our temporary timers over to the buffer_t */
726+
for (i = 0; i <= MLXCX_BUF_TIMER_POST_BUF_BIND_COPY; ++i)
727+
b->mlb_t[i] = times[i];
728+
#endif
729+
707730
if (!mlxcx_buf_prepare_sqe(mlxp, sq, b, &ctx)) {
708731
mlxcx_warn(mlxp, "!tried to tx packet that couldn't fit in "
709732
"an SQE, dropping");
710733
freemsg(mp);
711734
return (NULL);
712735
}
713736

737+
MLXCX_PTIMER(b->mlb_t, MLXCX_BUF_TIMER_POST_PREPARE_SQE);
738+
714739
mutex_enter(&sq->mlwq_mtx);
715740
VERIFY3U(sq->mlwq_inline_mode, <=, MLXCX_ETH_INLINE_L2);
716741
cq = sq->mlwq_cq;
717742

743+
MLXCX_PTIMER(b->mlb_t, MLXCX_BUF_TIMER_POST_WQ_MTX);
744+
718745
/*
719746
* state is a single int, so read-only access without the CQ lock
720747
* should be fine.
@@ -756,6 +783,7 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
756783
}
757784

758785
mutex_exit(&sq->mlwq_mtx);
786+
MLXCX_PTIMER(b->mlb_t, MLXCX_BUF_TIMER_POST_SQ_ADD_BUF);
759787

760788
return (NULL);
761789

usr/src/uts/common/io/mlxcx/mlxcx_intr.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -981,6 +981,8 @@ mlxcx_process_cq(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, mblk_t **mpp,
981981
list_remove(&mlcq->mlcq_buffers, buf);
982982
bufcnt++;
983983

984+
MLXCX_PTIMER(buf->mlb_t, MLXCX_BUF_TIMER_PRE_TX_COMP);
985+
984986
switch (mlcq->mlcq_wq->mlwq_type) {
985987
case MLXCX_WQ_TYPE_SENDQ:
986988
mlxcx_tx_completion(mlxp, mlcq, cent, buf, &rbatch);

usr/src/uts/common/io/mlxcx/mlxcx_ring.c

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1674,6 +1674,8 @@ mlxcx_sq_add_buffer(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq,
16741674
return (B_FALSE);
16751675
}
16761676

1677+
MLXCX_PTIMER(b0->mlb_t, MLXCX_BUF_TIMER_POST_SQE_IN_RING);
1678+
16771679
/*
16781680
* Stash the bufbgen counter, which is incremented every time
16791681
* buffers_b is merged into buffers. This lets us easily tell which
@@ -2285,6 +2287,11 @@ mlxcx_bind_or_copy_mblk(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
22852287
size_t sz;
22862288
boolean_t ret;
22872289

2290+
#if defined(MLXCX_PERF_TIMERS)
2291+
hrtime_t t0, t1;
2292+
t0 = gethrtime();
2293+
#endif
2294+
22882295
rptr = mp->b_rptr;
22892296
sz = MBLKL(mp);
22902297

@@ -2299,17 +2306,37 @@ mlxcx_bind_or_copy_mblk(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
22992306

23002307
if (sz < mlxp->mlx_props.mldp_tx_bind_threshold) {
23012308
b = mlxcx_copy_data(mlxp, wq, rptr, sz);
2309+
#if defined(MLXCX_PERF_TIMERS)
2310+
t1 = gethrtime();
2311+
b->mlb_t[MLXCX_BUF_TIMER_COPY_TOTAL] += t1 - t0;
2312+
#endif
23022313
} else {
23032314
b = mlxcx_buf_take_foreign(mlxp, wq);
23042315
if (b == NULL)
23052316
return (NULL);
2317+
#if defined(MLXCX_PERF_TIMERS)
2318+
t1 = gethrtime();
2319+
b->mlb_t[MLXCX_BUF_TIMER_TAKE_FOREIGN_TOTAL] += t1 - t0;
2320+
t0 = t1;
2321+
#endif
23062322

23072323
ret = mlxcx_dma_bind_mblk(mlxp, &b->mlb_dma, mp, off, B_TRUE);
23082324

2325+
#if defined(MLXCX_PERF_TIMERS)
2326+
t1 = gethrtime();
2327+
b->mlb_t[MLXCX_BUF_TIMER_BIND_MBLK_TOTAL] += t1 - t0;
2328+
t0 = t1;
2329+
#endif
2330+
23092331
if (!ret) {
23102332
mlxcx_buf_return(mlxp, b);
23112333

23122334
b = mlxcx_copy_data(mlxp, wq, rptr, sz);
2335+
2336+
#if defined(MLXCX_PERF_TIMERS)
2337+
t1 = gethrtime();
2338+
b->mlb_t[MLXCX_BUF_TIMER_COPY_TOTAL] += t1 - t0;
2339+
#endif
23132340
}
23142341
}
23152342

@@ -2338,6 +2365,8 @@ mlxcx_buf_prepare_sqe(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq,
23382365
b0->mlb_sqe = kmem_zalloc(b0->mlb_sqe_size, KM_SLEEP);
23392366
}
23402367

2368+
MLXCX_PTIMER(b0->mlb_t, MLXCX_BUF_TIMER_POST_SQE_BUF);
2369+
23412370
ents = 1;
23422371
ent0 = &b0->mlb_sqe[0];
23432372

@@ -2418,6 +2447,8 @@ mlxcx_buf_prepare_sqe(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq,
24182447
ent0->mlsqe_eth.mles_mss = to_be16(ctx->mtc_mss);
24192448
}
24202449

2450+
MLXCX_PTIMER(b0->mlb_t, MLXCX_BUF_TIMER_POST_PREPARE_SQE_INLINE);
2451+
24212452
b = b0;
24222453
while (b != NULL) {
24232454
rem = b->mlb_used;
@@ -2502,8 +2533,17 @@ mlxcx_buf_bind_or_copy(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
25022533
b->mlb_tx_head = b0;
25032534
b->mlb_used = MBLKL(mp) - offset;
25042535

2505-
if (!first)
2536+
if (!first) {
25062537
list_insert_tail(&b0->mlb_tx_chain, b);
2538+
#if defined(MLXCX_PERF_TIMERS)
2539+
b0->mlb_t[MLXCX_BUF_TIMER_COPY_TOTAL] +=
2540+
b->mlb_t[MLXCX_BUF_TIMER_COPY_TOTAL];
2541+
b0->mlb_t[MLXCX_BUF_TIMER_TAKE_FOREIGN_TOTAL] +=
2542+
b->mlb_t[MLXCX_BUF_TIMER_TAKE_FOREIGN_TOTAL];
2543+
b0->mlb_t[MLXCX_BUF_TIMER_BIND_MBLK_TOTAL] +=
2544+
b->mlb_t[MLXCX_BUF_TIMER_BIND_MBLK_TOTAL];
2545+
#endif
2546+
}
25072547
first = B_FALSE;
25082548
offset = 0;
25092549

@@ -2839,6 +2879,10 @@ mlxcx_buf_return_step2(mlxcx_t *mlxp, mlxcx_buffer_t *b)
28392879
break;
28402880
}
28412881

2882+
#if defined(MLXCX_PERF_TIMERS)
2883+
bzero(b->mlb_t, sizeof (b->mlb_t));
2884+
#endif
2885+
28422886
list_insert_tail(&s->mlbs_free, b);
28432887
cv_broadcast(&s->mlbs_free_nonempty);
28442888
}
@@ -2858,6 +2902,7 @@ mlxcx_buf_return_batch_flush_shard(mlxcx_t *mlxp,
28582902
}
28592903
mutex_enter(&mbrb->mbrb_shard[i]->mlbs_mtx);
28602904
while ((b = list_remove_head(&mbrb->mbrb_list[i]))) {
2905+
MLXCX_PTIMER(b->mlb_t, MLXCX_BUF_TIMER_PRE_STEP2);
28612906
mlxcx_buf_return_step2(mlxp, b);
28622907
}
28632908
mutex_exit(&mbrb->mbrb_shard[i]->mlbs_mtx);

0 commit comments

Comments
 (0)