Skip to content

Commit 9459607

Browse files
author
Raphael
committed
(feat) Add external DCA (direct compute access) to the snitch cluster.
* Access the combined FPU from outside of the cluster * Extension of the tracer (Cherry-Picked from 5d029e6)
1 parent d651676 commit 9459607

File tree

10 files changed

+739
-70
lines changed

10 files changed

+739
-70
lines changed

hw/snitch/src/snitch_pkg.sv

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,8 @@ package snitch_pkg;
184184
typedef enum logic [1:0] {
185185
SrcSnitch = 0,
186186
SrcFpu = 1,
187-
SrcFpuSeq = 2
187+
SrcFpuSeq = 2,
188+
SrcDca = 3
188189
} trace_src_e;
189190

190191
typedef struct packed {
@@ -354,6 +355,51 @@ package snitch_pkg;
354355
extras_str = $sformatf("%s}", extras_str);
355356
return extras_str;
356357
endfunction
358+
359+
typedef struct packed {
360+
longint source;
361+
longint dca_in_hs; // Handshake to indicate DCA Data in
362+
longint dca_out_hs; // Handshake to indicate DCA Data out
363+
longint dca_in_op_code; // OPS-Code of the FPU (@FPNEW Doku)
364+
longint dca_in_op_mode; // OP-Mode of the FPU (@FPNEW Doku)
365+
longint dca_in_rnd_mode; // Round-Mode of the FPU (@FPNEW Doku)
366+
longint dca_in_vector_mode; // Vector-Mode of the FPU (@FPNEW Doku)
367+
longint dca_in_op_0; // First Operand of the FPU
368+
longint dca_in_op_1; // Second Operand of the FPU
369+
longint dca_in_op_2; // Third Operand of the FPU
370+
longint dca_in_src_fmt; // Input SRC format (@FPNEW Doku)
371+
longint dca_in_dst_fmt; // Output SRC format (@FPNEW Doku)
372+
longint dca_in_int_fmt; // Intermidiate format (@FPNEW Doku)
373+
longint dca_in_tag; // Unique input Tag
374+
longint dca_out_tag; // Unique output Tag
375+
longint dca_out_status; // Status of the FPU (@FPNEW Doku)
376+
longint dca_out_result; // Result of the FPU
377+
} dca_trace_port_t;
378+
// All Dokumentation with (@FPNEW Doku) can be found here:
379+
// https://github.com/openhwgroup/cvfpu/tree/master/docs
380+
381+
function automatic string print_dca_trace(dca_trace_port_t dca_trace);
382+
string extras_str = "{";
383+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "source", dca_trace.source);
384+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_in_hs", dca_trace.dca_in_hs);
385+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_out_hs", dca_trace.dca_out_hs);
386+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "op_code", dca_trace.dca_in_op_code);
387+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "op_mode", dca_trace.dca_in_op_mode);
388+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "rnd_mode", dca_trace.dca_in_rnd_mode);
389+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "vector_mode", dca_trace.dca_in_vector_mode);
390+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "op_0", dca_trace.dca_in_op_0);
391+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "op_1", dca_trace.dca_in_op_1);
392+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "op_2", dca_trace.dca_in_op_2);
393+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "src_format", dca_trace.dca_in_src_fmt);
394+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dst_format", dca_trace.dca_in_dst_fmt);
395+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "int_format", dca_trace.dca_in_int_fmt);
396+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "in_tag", dca_trace.dca_in_tag);
397+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "status", dca_trace.dca_out_status);
398+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "result", dca_trace.dca_out_result);
399+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "out_tag", dca_trace.dca_out_tag);
400+
extras_str = $sformatf("%s}", extras_str);
401+
return extras_str;
402+
endfunction
357403
// pragma translate_on
358404

359405
endpackage

hw/snitch_cluster/src/snitch_cc.sv

Lines changed: 122 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ module snitch_cc #(
4444
parameter type hive_rsp_t = logic,
4545
parameter type acc_req_t = logic,
4646
parameter type acc_resp_t = logic,
47+
parameter type dca_req_t = logic,
48+
parameter type dca_resp_t = logic,
4749
parameter type dma_events_t = logic,
4850
parameter fpnew_pkg::fpu_implementation_t FPUImplementation = '0,
4951
/// Boot address of core.
@@ -66,6 +68,8 @@ module snitch_cc #(
6668
parameter bit Xfrep = 1,
6769
/// Has `SSR` support.
6870
parameter bit Xssr = 1,
71+
/// Has `DCA` support.
72+
parameter bit Xdca = 0,
6973
/// Has `COPIFT` support.
7074
parameter bit Xcopift = 1,
7175
/// Has `IPU` support.
@@ -106,6 +110,10 @@ module snitch_cc #(
106110
parameter bit RegisterFPUIn = 0,
107111
/// Insert Pipeline registers immediately after FPU datapath
108112
parameter bit RegisterFPUOut = 0,
113+
/// Insert Pipeline register between DCA from Router and FPU
114+
parameter bit RegisterDCAIn = 0,
115+
/// Insert Pipeline register between DCA from FPU and Router
116+
parameter bit RegisterDCAOut = 0,
109117
parameter snitch_pma_pkg::snitch_pma_t SnitchPMACfg = '{default: 0},
110118
/// Consistency Address Queue (CAQ) parameters.
111119
parameter int unsigned CaqDepth = 0,
@@ -146,7 +154,14 @@ module snitch_cc #(
146154
input addr_t tcdm_addr_base_i,
147155
// Cluster HW barrier
148156
output logic barrier_o,
149-
input logic barrier_i
157+
input logic barrier_i,
158+
// Direct Compute Access (DCA) interface
159+
input dca_req_t dca_req_i,
160+
input logic dca_req_valid_i,
161+
output logic dca_req_ready_o,
162+
output dca_resp_t dca_resp_o,
163+
output logic dca_resp_valid_o,
164+
input logic dca_resp_ready_i
150165
);
151166

152167
// FMA architecture is "merged" -> mulexp and macexp instructions are supported
@@ -469,6 +484,7 @@ module snitch_cc #(
469484
// pragma translate_off
470485
snitch_pkg::fpu_trace_port_t fpu_trace;
471486
snitch_pkg::fpu_sequencer_trace_port_t fpu_sequencer_trace;
487+
snitch_pkg::dca_trace_port_t dca_trace;
472488
// pragma translate_on
473489

474490
logic [2:0][4:0] ssr_raddr;
@@ -485,6 +501,54 @@ module snitch_cc #(
485501
logic ssr_streamctl_valid;
486502
logic ssr_streamctl_ready;
487503

504+
// Signals for the DCA
505+
dca_req_t dca_req_q; // Delayed Request by the (optional) Spill Register
506+
logic dca_req_valid_q;
507+
logic dca_req_ready_q;
508+
dca_resp_t dca_resp; // Response from the FPU in front of the (optional) Spill Register
509+
logic dca_resp_valid;
510+
logic dca_resp_ready;
511+
512+
// Cut off-DCA Interface Request
513+
if(Xdca) begin : gen_spill_register
514+
spill_register #(
515+
.T (dca_req_t),
516+
.Bypass (~RegisterDCAIn)
517+
) i_spill_reg_dca_req (
518+
.clk_i (clk_i),
519+
.rst_ni (rst_ni),
520+
.valid_i (dca_req_valid_i),
521+
.ready_o (dca_req_ready_o),
522+
.data_i (dca_req_i),
523+
.valid_o (dca_req_valid_q),
524+
.ready_i (dca_req_ready_q),
525+
.data_o (dca_req_q)
526+
);
527+
528+
// Cut off-DCA Interface Response
529+
spill_register #(
530+
.T (dca_resp_t),
531+
.Bypass (~RegisterDCAOut)
532+
) i_spill_reg_dca_resp (
533+
.clk_i (clk_i),
534+
.rst_ni (rst_ni),
535+
.valid_i (dca_resp_valid),
536+
.ready_o (dca_resp_ready),
537+
.data_i (dca_resp),
538+
.valid_o (dca_resp_valid_o),
539+
.ready_i (dca_resp_ready_i),
540+
.data_o (dca_resp_o)
541+
);
542+
end else begin
543+
assign dca_req_ready_o = 1'b0;
544+
assign dca_req_valid_q = 1'b0;
545+
assign dca_req_q = '0;
546+
547+
assign dca_resp_ready = 1'b0;
548+
assign dca_resp_valid_o = 1'b0;
549+
assign dca_resp_o = '0;
550+
end
551+
488552
if (FPEn) begin : gen_fpu
489553
snitch_pkg::core_events_t fp_ss_core_events;
490554

@@ -505,11 +569,14 @@ module snitch_cc #(
505569
.drsp_t (drsp_t),
506570
.acc_req_t (acc_req_t),
507571
.acc_resp_t (acc_resp_t),
572+
.dca_req_t (dca_req_t),
573+
.dca_resp_t (dca_resp_t),
508574
.RegisterSequencer (RegisterSequencer),
509575
.RegisterFPUIn (RegisterFPUIn),
510576
.RegisterFPUOut (RegisterFPUOut),
511577
.Xfrep (Xfrep),
512578
.Xssr (Xssr),
579+
.Xdca (Xdca),
513580
.Xcopift (Xcopift),
514581
.RVF (RVF),
515582
.RVD (RVD),
@@ -525,34 +592,41 @@ module snitch_cc #(
525592
// pragma translate_off
526593
.trace_port_o ( fpu_trace ),
527594
.sequencer_tracer_port_o ( fpu_sequencer_trace ),
595+
.dca_trace_port_o ( dca_trace ),
528596
// pragma translate_on
529-
.hart_id_i ( hart_id_i ),
530-
.acc_req_i ( acc_snitch_req ),
531-
.acc_req_valid_i ( acc_qvalid ),
532-
.acc_req_ready_o ( acc_qready ),
533-
.acc_resp_o ( acc_seq ),
534-
.acc_resp_valid_o ( acc_pvalid ),
535-
.acc_resp_ready_i ( acc_pready ),
536-
.caq_pvalid_o ( caq_pvalid ),
537-
.data_req_o ( fpu_dreq ),
538-
.data_rsp_i ( fpu_drsp ),
539-
.fpu_rnd_mode_i ( fpu_rnd_mode ),
540-
.fpu_fmt_mode_i ( fpu_fmt_mode ),
541-
.fpu_status_o ( fpu_status ),
542-
.ssr_raddr_o ( ssr_raddr ),
543-
.ssr_rdata_i ( ssr_rdata ),
544-
.ssr_rvalid_o ( ssr_rvalid ),
545-
.ssr_rready_i ( ssr_rready ),
546-
.ssr_rdone_o ( ssr_rdone ),
547-
.ssr_waddr_o ( ssr_waddr ),
548-
.ssr_wdata_o ( ssr_wdata ),
549-
.ssr_wvalid_o ( ssr_wvalid ),
550-
.ssr_wready_i ( ssr_wready ),
551-
.ssr_wdone_o ( ssr_wdone ),
597+
.hart_id_i ( hart_id_i ),
598+
.acc_req_i ( acc_snitch_req ),
599+
.acc_req_valid_i ( acc_qvalid ),
600+
.acc_req_ready_o ( acc_qready ),
601+
.acc_resp_o ( acc_seq ),
602+
.acc_resp_valid_o ( acc_pvalid ),
603+
.acc_resp_ready_i ( acc_pready ),
604+
.caq_pvalid_o ( caq_pvalid ),
605+
.data_req_o ( fpu_dreq ),
606+
.data_rsp_i ( fpu_drsp ),
607+
.fpu_rnd_mode_i ( fpu_rnd_mode ),
608+
.fpu_fmt_mode_i ( fpu_fmt_mode ),
609+
.fpu_status_o ( fpu_status ),
610+
.ssr_raddr_o ( ssr_raddr ),
611+
.ssr_rdata_i ( ssr_rdata ),
612+
.ssr_rvalid_o ( ssr_rvalid ),
613+
.ssr_rready_i ( ssr_rready ),
614+
.ssr_rdone_o ( ssr_rdone ),
615+
.ssr_waddr_o ( ssr_waddr ),
616+
.ssr_wdata_o ( ssr_wdata ),
617+
.ssr_wvalid_o ( ssr_wvalid ),
618+
.ssr_wready_i ( ssr_wready ),
619+
.ssr_wdone_o ( ssr_wdone ),
552620
.streamctl_done_i ( ssr_streamctl_done ),
553621
.streamctl_valid_i ( ssr_streamctl_valid ),
554622
.streamctl_ready_o ( ssr_streamctl_ready ),
555-
.core_events_o ( fp_ss_core_events )
623+
.core_events_o ( fp_ss_core_events ),
624+
.dca_req_i ( dca_req_q ),
625+
.dca_req_valid_i ( dca_req_valid_q ),
626+
.dca_req_ready_o ( dca_req_ready_q ),
627+
.dca_resp_o ( dca_resp ),
628+
.dca_resp_valid_o ( dca_resp_valid ),
629+
.dca_resp_ready_i ( dca_resp_ready )
556630
);
557631

558632
reqrsp_mux #(
@@ -604,6 +678,10 @@ module snitch_cc #(
604678
assign core_events_o.issue_fpu = '0;
605679
assign core_events_o.issue_fpu_seq = '0;
606680
assign core_events_o.issue_core_to_fpu = '0;
681+
682+
assign dca_resp_valid = 1'b0;
683+
assign dca_resp = '0;
684+
assign dca_req_ready_q = 1'b0;
607685
end
608686

609687
// Decide whether to go to SoC or TCDM
@@ -902,6 +980,7 @@ module snitch_cc #(
902980
automatic snitch_pkg::snitch_trace_port_t extras_snitch;
903981
automatic snitch_pkg::fpu_trace_port_t extras_fpu;
904982
automatic snitch_pkg::fpu_sequencer_trace_port_t extras_fpu_seq_out;
983+
automatic snitch_pkg::dca_trace_port_t extras_dca;
905984

906985
if (rst_ni) begin
907986
extras_snitch = '{
@@ -953,6 +1032,11 @@ module snitch_cc #(
9531032
end
9541033
end
9551034

1035+
// If dca enabled then forward the trace port
1036+
if(Xdca) begin
1037+
extras_dca = dca_trace;
1038+
end
1039+
9561040
cycle++;
9571041
// Trace snitch iff:
9581042
// we are not stalled <==> we have issued and processed an instruction (including offloads)
@@ -988,6 +1072,16 @@ module snitch_cc #(
9881072
end
9891073
end
9901074
end
1075+
if(Xdca) begin
1076+
// Trace DCA iff
1077+
// When either an input or output handshake occures
1078+
if(extras_dca.dca_in_hs || extras_dca.dca_out_hs) begin
1079+
$sformat(trace_entry, "%t %1d %8d 0x%h DASM(%h) #; %s\n",
1080+
$time, cycle, i_snitch.priv_lvl_q, 32'hz, extras_dca.dca_in_op_code,
1081+
snitch_pkg::print_dca_trace(extras_dca));
1082+
$fwrite(f, trace_entry);
1083+
end
1084+
end
9911085
end else begin
9921086
cycle = '0;
9931087
end
@@ -1000,5 +1094,8 @@ module snitch_cc #(
10001094
// pragma translate_on
10011095

10021096
`ASSERT_INIT(BootAddrAligned, BootAddr[1:0] == 2'b00)
1097+
1098+
// For the DCA Extension the is is required that each core has the FPU D-ext loaded
1099+
`ASSERT_INIT(DCACoreConfiguration, (~Xdca) || RVD)
10031100

10041101
endmodule

0 commit comments

Comments
 (0)