@@ -44,6 +44,8 @@ module snitch_cc #(
44
44
parameter type hive_rsp_t = logic ,
45
45
parameter type acc_req_t = logic ,
46
46
parameter type acc_resp_t = logic ,
47
+ parameter type dca_req_t = logic ,
48
+ parameter type dca_resp_t = logic ,
47
49
parameter type dma_events_t = logic ,
48
50
parameter fpnew_pkg :: fpu_implementation_t FPUImplementation = '0 ,
49
51
// / Boot address of core.
@@ -66,6 +68,8 @@ module snitch_cc #(
66
68
parameter bit Xfrep = 1 ,
67
69
// / Has `SSR` support.
68
70
parameter bit Xssr = 1 ,
71
+ // / Has `DCA` support.
72
+ parameter bit Xdca = 0 ,
69
73
// / Has `COPIFT` support.
70
74
parameter bit Xcopift = 1 ,
71
75
// / Has `IPU` support.
@@ -106,6 +110,10 @@ module snitch_cc #(
106
110
parameter bit RegisterFPUIn = 0 ,
107
111
// / Insert Pipeline registers immediately after FPU datapath
108
112
parameter bit RegisterFPUOut = 0 ,
113
+ // / Insert Pipeline register between DCA from Router and FPU
114
+ parameter bit RegisterDCAIn = 0 ,
115
+ // / Insert Pipeline register between DCA from FPU and Router
116
+ parameter bit RegisterDCAOut = 0 ,
109
117
parameter snitch_pma_pkg :: snitch_pma_t SnitchPMACfg = '{ default: 0 } ,
110
118
// / Consistency Address Queue (CAQ) parameters.
111
119
parameter int unsigned CaqDepth = 0 ,
@@ -146,7 +154,14 @@ module snitch_cc #(
146
154
input addr_t tcdm_addr_base_i,
147
155
// Cluster HW barrier
148
156
output logic barrier_o,
149
- input logic barrier_i
157
+ input logic barrier_i,
158
+ // Direct Compute Access (DCA) interface
159
+ input dca_req_t dca_req_i,
160
+ input logic dca_req_valid_i,
161
+ output logic dca_req_ready_o,
162
+ output dca_resp_t dca_resp_o,
163
+ output logic dca_resp_valid_o,
164
+ input logic dca_resp_ready_i
150
165
);
151
166
152
167
// FMA architecture is "merged" -> mulexp and macexp instructions are supported
@@ -469,6 +484,7 @@ module snitch_cc #(
469
484
// pragma translate_off
470
485
snitch_pkg :: fpu_trace_port_t fpu_trace;
471
486
snitch_pkg :: fpu_sequencer_trace_port_t fpu_sequencer_trace;
487
+ snitch_pkg :: dca_trace_port_t dca_trace;
472
488
// pragma translate_on
473
489
474
490
logic [2 : 0 ][4 : 0 ] ssr_raddr;
@@ -485,6 +501,54 @@ module snitch_cc #(
485
501
logic ssr_streamctl_valid;
486
502
logic ssr_streamctl_ready;
487
503
504
+ // Signals for the DCA
505
+ dca_req_t dca_req_q; // Delayed Request by the (optional) Spill Register
506
+ logic dca_req_valid_q;
507
+ logic dca_req_ready_q;
508
+ dca_resp_t dca_resp; // Response from the FPU in front of the (optional) Spill Register
509
+ logic dca_resp_valid;
510
+ logic dca_resp_ready;
511
+
512
+ // Cut off-DCA Interface Request
513
+ if (Xdca) begin : gen_spill_register
514
+ spill_register # (
515
+ .T (dca_req_t),
516
+ .Bypass (~ RegisterDCAIn)
517
+ ) i_spill_reg_dca_req (
518
+ .clk_i (clk_i),
519
+ .rst_ni (rst_ni),
520
+ .valid_i (dca_req_valid_i),
521
+ .ready_o (dca_req_ready_o),
522
+ .data_i (dca_req_i),
523
+ .valid_o (dca_req_valid_q),
524
+ .ready_i (dca_req_ready_q),
525
+ .data_o (dca_req_q)
526
+ );
527
+
528
+ // Cut off-DCA Interface Response
529
+ spill_register # (
530
+ .T (dca_resp_t),
531
+ .Bypass (~ RegisterDCAOut)
532
+ ) i_spill_reg_dca_resp (
533
+ .clk_i (clk_i),
534
+ .rst_ni (rst_ni),
535
+ .valid_i (dca_resp_valid),
536
+ .ready_o (dca_resp_ready),
537
+ .data_i (dca_resp),
538
+ .valid_o (dca_resp_valid_o),
539
+ .ready_i (dca_resp_ready_i),
540
+ .data_o (dca_resp_o)
541
+ );
542
+ end else begin
543
+ assign dca_req_ready_o = 1'b0 ;
544
+ assign dca_req_valid_q = 1'b0 ;
545
+ assign dca_req_q = '0 ;
546
+
547
+ assign dca_resp_ready = 1'b0 ;
548
+ assign dca_resp_valid_o = 1'b0 ;
549
+ assign dca_resp_o = '0 ;
550
+ end
551
+
488
552
if (FPEn) begin : gen_fpu
489
553
snitch_pkg :: core_events_t fp_ss_core_events;
490
554
@@ -505,11 +569,14 @@ module snitch_cc #(
505
569
.drsp_t (drsp_t),
506
570
.acc_req_t (acc_req_t),
507
571
.acc_resp_t (acc_resp_t),
572
+ .dca_req_t (dca_req_t),
573
+ .dca_resp_t (dca_resp_t),
508
574
.RegisterSequencer (RegisterSequencer),
509
575
.RegisterFPUIn (RegisterFPUIn),
510
576
.RegisterFPUOut (RegisterFPUOut),
511
577
.Xfrep (Xfrep),
512
578
.Xssr (Xssr),
579
+ .Xdca (Xdca),
513
580
.Xcopift (Xcopift),
514
581
.RVF (RVF ),
515
582
.RVD (RVD ),
@@ -525,34 +592,41 @@ module snitch_cc #(
525
592
// pragma translate_off
526
593
.trace_port_o ( fpu_trace ),
527
594
.sequencer_tracer_port_o ( fpu_sequencer_trace ),
595
+ .dca_trace_port_o ( dca_trace ),
528
596
// pragma translate_on
529
- .hart_id_i ( hart_id_i ),
530
- .acc_req_i ( acc_snitch_req ),
531
- .acc_req_valid_i ( acc_qvalid ),
532
- .acc_req_ready_o ( acc_qready ),
533
- .acc_resp_o ( acc_seq ),
534
- .acc_resp_valid_o ( acc_pvalid ),
535
- .acc_resp_ready_i ( acc_pready ),
536
- .caq_pvalid_o ( caq_pvalid ),
537
- .data_req_o ( fpu_dreq ),
538
- .data_rsp_i ( fpu_drsp ),
539
- .fpu_rnd_mode_i ( fpu_rnd_mode ),
540
- .fpu_fmt_mode_i ( fpu_fmt_mode ),
541
- .fpu_status_o ( fpu_status ),
542
- .ssr_raddr_o ( ssr_raddr ),
543
- .ssr_rdata_i ( ssr_rdata ),
544
- .ssr_rvalid_o ( ssr_rvalid ),
545
- .ssr_rready_i ( ssr_rready ),
546
- .ssr_rdone_o ( ssr_rdone ),
547
- .ssr_waddr_o ( ssr_waddr ),
548
- .ssr_wdata_o ( ssr_wdata ),
549
- .ssr_wvalid_o ( ssr_wvalid ),
550
- .ssr_wready_i ( ssr_wready ),
551
- .ssr_wdone_o ( ssr_wdone ),
597
+ .hart_id_i ( hart_id_i ),
598
+ .acc_req_i ( acc_snitch_req ),
599
+ .acc_req_valid_i ( acc_qvalid ),
600
+ .acc_req_ready_o ( acc_qready ),
601
+ .acc_resp_o ( acc_seq ),
602
+ .acc_resp_valid_o ( acc_pvalid ),
603
+ .acc_resp_ready_i ( acc_pready ),
604
+ .caq_pvalid_o ( caq_pvalid ),
605
+ .data_req_o ( fpu_dreq ),
606
+ .data_rsp_i ( fpu_drsp ),
607
+ .fpu_rnd_mode_i ( fpu_rnd_mode ),
608
+ .fpu_fmt_mode_i ( fpu_fmt_mode ),
609
+ .fpu_status_o ( fpu_status ),
610
+ .ssr_raddr_o ( ssr_raddr ),
611
+ .ssr_rdata_i ( ssr_rdata ),
612
+ .ssr_rvalid_o ( ssr_rvalid ),
613
+ .ssr_rready_i ( ssr_rready ),
614
+ .ssr_rdone_o ( ssr_rdone ),
615
+ .ssr_waddr_o ( ssr_waddr ),
616
+ .ssr_wdata_o ( ssr_wdata ),
617
+ .ssr_wvalid_o ( ssr_wvalid ),
618
+ .ssr_wready_i ( ssr_wready ),
619
+ .ssr_wdone_o ( ssr_wdone ),
552
620
.streamctl_done_i ( ssr_streamctl_done ),
553
621
.streamctl_valid_i ( ssr_streamctl_valid ),
554
622
.streamctl_ready_o ( ssr_streamctl_ready ),
555
- .core_events_o ( fp_ss_core_events )
623
+ .core_events_o ( fp_ss_core_events ),
624
+ .dca_req_i ( dca_req_q ),
625
+ .dca_req_valid_i ( dca_req_valid_q ),
626
+ .dca_req_ready_o ( dca_req_ready_q ),
627
+ .dca_resp_o ( dca_resp ),
628
+ .dca_resp_valid_o ( dca_resp_valid ),
629
+ .dca_resp_ready_i ( dca_resp_ready )
556
630
);
557
631
558
632
reqrsp_mux # (
@@ -604,6 +678,10 @@ module snitch_cc #(
604
678
assign core_events_o.issue_fpu = '0 ;
605
679
assign core_events_o.issue_fpu_seq = '0 ;
606
680
assign core_events_o.issue_core_to_fpu = '0 ;
681
+
682
+ assign dca_resp_valid = 1'b0 ;
683
+ assign dca_resp = '0 ;
684
+ assign dca_req_ready_q = 1'b0 ;
607
685
end
608
686
609
687
// Decide whether to go to SoC or TCDM
@@ -902,6 +980,7 @@ module snitch_cc #(
902
980
automatic snitch_pkg :: snitch_trace_port_t extras_snitch;
903
981
automatic snitch_pkg :: fpu_trace_port_t extras_fpu;
904
982
automatic snitch_pkg :: fpu_sequencer_trace_port_t extras_fpu_seq_out;
983
+ automatic snitch_pkg :: dca_trace_port_t extras_dca;
905
984
906
985
if (rst_ni) begin
907
986
extras_snitch = '{
@@ -953,6 +1032,11 @@ module snitch_cc #(
953
1032
end
954
1033
end
955
1034
1035
+ // If dca enabled then forward the trace port
1036
+ if (Xdca) begin
1037
+ extras_dca = dca_trace;
1038
+ end
1039
+
956
1040
cycle++ ;
957
1041
// Trace snitch iff:
958
1042
// we are not stalled <==> we have issued and processed an instruction (including offloads)
@@ -988,6 +1072,16 @@ module snitch_cc #(
988
1072
end
989
1073
end
990
1074
end
1075
+ if (Xdca) begin
1076
+ // Trace DCA iff
1077
+ // When either an input or output handshake occures
1078
+ if (extras_dca.dca_in_hs || extras_dca.dca_out_hs) begin
1079
+ $sformat (trace_entry, " %t %1d %8d 0x%h DASM(%h ) #; %s \n " ,
1080
+ $time , cycle, i_snitch.priv_lvl_q, 32'hz , extras_dca.dca_in_op_code,
1081
+ snitch_pkg :: print_dca_trace (extras_dca));
1082
+ $fwrite (f, trace_entry);
1083
+ end
1084
+ end
991
1085
end else begin
992
1086
cycle = '0 ;
993
1087
end
@@ -1000,5 +1094,8 @@ module snitch_cc #(
1000
1094
// pragma translate_on
1001
1095
1002
1096
`ASSERT_INIT (BootAddrAligned, BootAddr[1 : 0 ] == 2'b00 )
1097
+
1098
+ // For the DCA Extension the is is required that each core has the FPU D-ext loaded
1099
+ `ASSERT_INIT (DCACoreConfiguration, (~ Xdca) || RVD )
1003
1100
1004
1101
endmodule
0 commit comments