@@ -473,7 +473,7 @@ class VPUNN_API(VPUNNPerformanceModel) {
473473 // CMX clock. COnsiders also limitation like compression...
474474 int cmx_agregated_bytes_per_cycle_bw (const VPUTensor& tensor, VPUDevice device, bool half_duplex, bool permute,
475475 bool compression, float decompression_ratio = 1 .0F ,
476- int compressed_BW_BytesPerCycle = 0 ) const {
476+ int compressed_BW_BytesPerCycle = 1 ) const {
477477 // permute limits the bw to one element per cycle
478478 if (permute) {
479479 return dtype_to_bytes (tensor.get_dtype ()); // size of one element (what about half duplex?). should not be
@@ -548,12 +548,12 @@ class VPUNN_API(VPUNNPerformanceModel) {
548548 const float decompression_ratio{is_DDR2CMX_decompresion ? ((float )wl.output .size () / (float )wl.input .size ())
549549 : 1 .0f };
550550
551- const unsigned int input_bw_bpc =
551+ const int input_bw_bpc =
552552 get_bytes_per_cycle_read_bw (wl.input , wl.device , wl.input_location , is_half_duplex_limitation);
553553 const auto CMX_cycles_read = (float )wl.input .size () / (float )input_bw_bpc;
554554 const auto input_cycles_DPU = Cycles::toCycleInterfaceType (CMX_cycles_read * dpuPerCmx_clock_ratio);
555555
556- const unsigned int output_bw_bpc = get_bytes_per_cycle_write_bw (
556+ const int output_bw_bpc = get_bytes_per_cycle_write_bw (
557557 wl.output , wl.device , wl.output_location , is_half_duplex_limitation, is_cmx2cmx_permutation,
558558 is_DDR2CMX_decompresion, decompression_ratio, input_bw_bpc);
559559 const auto CMX_cycles_write = (float )wl.output .size () / (float )output_bw_bpc;
0 commit comments