좌측(원본)이 조금 뚜렷하고, 우측(필터링)은 일종의 블러효과를 넣은 것처럼 흐릿해졌습니다.
A = imread('lena_gray.png');
B = imresize(A, 0.5);
dlmwrite('img_in.txt',B);
MATLAB을 통해 512 X 512 size 원본 이미지를 256 X 256 size로 조절한 뒤 저장하였습니다.
이제 이것이 input image가 될 것입니다.
전체 Process 1. C코드로 먼저 Fixed Point로 변환 및 구현하며, 입력, 출력 예시를 준비 2. Verilog 구현 3. Double Buffering으로 데이터 충돌 방지 4. Line Buffering으로 입력과 동시에 처리해 Cycle 5. Parameterizing, Testbench에서 DPI를 통한 C코드 활용
3. 소스코드
- 3.1 C코드 구현
#include <stdio.h>
#include <math.h>
void filter2d(unsigned char in_img[], unsigned char out_img[],
int height, int width) {
int h[3][3] = {0x08, 0x10, 0x08, 0x10, 0x20, 0x10, 0x08, 0x10, 0x08};
for(int i=0;i<height;i++) {
for(int j=0;j<width;j++) {
int sum = 0;
if(i>0 && j>0) sum += in_img[(i-1)*width+j-1]*h[0][0];
if(i>0) sum += in_img[(i-1)*width+j ]*h[0][1];
if(i>0 && j<width-1) sum += in_img[(i-1)*width+j+1]*h[0][2];
if(j>0) sum += in_img[(i )*width+j-1]*h[1][0];
sum += in_img[(i )*width+j ]*h[1][1];
if(j<width-1) sum += in_img[(i )*width+j+1]*h[1][2];
if(i<height-1 && j>0) sum += in_img[(i+1)*width+j-1]*h[2][0];
if(i<height-1) sum += in_img[(i+1)*width+j ]*h[2][1];
if(i<height-1 && j<width-1) sum += in_img[(i+1)*width+j+1]*h[2][2];
sum = (sum + (1<<6)) >> 7;
if(sum < 0) out_img[i*width+j] = 0;
else if(sum > 255) out_img[i*width+j] = 255;
else out_img[i*width+j] = sum;
}
}
}
int main(void) {
int i, a;
FILE *inf, *outf, *memf;
unsigned char in_img[256*256];
unsigned char out_img[256*256];
inf = fopen("img_in.txt", "r");
outf = fopen("img_out.txt", "w");
memf = fopen("img_in.dat", "w");
for(i=0;i<256*256;i++) {
fscanf(inf, "%d,", &a);
in_img[i] = a;
fprintf(memf, "%02X\n", in_img[i]);
}
filter2d(in_img, out_img, 256, 256);
for(i=0;i<256*256;i++) {
fprintf(outf, "%3d ", out_img[i]);
if(i%256 == 255) fprintf(outf, "\n");
}
fclose(inf);
fclose(outf);
fclose(memf);
}
Process 1. Quartus II에서 사용할 Peripheral을 AHB-Lite에 적절히 연결 2. 메모리맵을 참고하여 Keil에서 ASSEMBLY 코드 작성 후 빌드 3. 완성된 code.hex를 FPGA/AHB_BRAM/에 복사 4. Quartus 에서 compile, program 후 동작 확인
Process 1. Quartus II에서 사용할 Peripheral을 AHB-Lite에 적절히 연결 2. 메모리맵을 참고하여 Keil에서 ASSEMBLY 코드 작성 후 빌드 3. 완성된 code.hex를 FPGA/AHB_BRAM/에 복사 4. Quartus 에서 compile, program 후 동작 확인
다양한 Test case들에 대해 Coverage를 올리는 Verification에 대해 학습해 봅니다.
2. 과제 정의 및 개요
위 그림처럼 원래는 Cache가 DRAM에 접근하지만,
이번 설계에서는 조금 더 간단하게 하기 위해
Bus Interface가 있다고 가정하고 설계해 보겠습니다.
CPU Interface 1. cpu_addr은 32-bit address 2. cpu_din/cpu_dout은 32-bit in/out data 3. cpu_cs가 0일 때 전송 4. cpu_we는 Read/Write, 0일 때 Read 5. cpu_nwait는 전송이 끝나는 flag
Dram Interface 1. dram_addr은 32-bit address 2. dram_din/dram_dout은 32-bit in/out data 3. dram_cs가 0일때 전송 4. dram_we는 Read/Write, 0일 때 Read 5. dram_nwait는 전송이 끝나는 flag
Cache Spec. 1. 한 line에 4 word 2. 1024 lines, block address는 10-bit 3. address는 32-bit : tag는 18-bit 즉, 18(tag)+10(block addr)+2(word addr)+2(intra word addr) = 32 bit
Cache Interface 1. cache_din/cache_dout은 data(32*4) + tag(18) = 146-bit 2. valid, dirty bit는 따로 관리
State Diagram과 Timing Diagram은 아래와 같습니다.
3. 소스코드
module cache (
input clk,
input n_reset,
input cpu_cs,
input cpu_we,
input [31:0] cpu_addr,
input [31:0] cpu_din,
output reg [31:0] cpu_dout,
output cpu_nwait,
output dram_cs,
output dram_we,
output [31:0] dram_addr,
output [31:0] dram_din,
input [31:0] dram_dout,
input dram_nwait
);
parameter IDLE = 4'b0000;
parameter READ = 4'b0001;
parameter R_WMEM = 4'b0010;
parameter R_RMEM = 4'b0011;
parameter R_REND = 4'b0100;
parameter R_OUT = 4'b0101;
parameter WRITE = 4'b1001;
parameter W_WMEM = 4'b1010;
parameter W_RMEM = 4'b1011;
parameter W_REND = 4'b1100;
reg [3:0] state, next;
wire hit, dirty, valid;
reg [1:0] cnt;
always@(*) begin
next = state;
case(state)
IDLE: begin
if(cpu_cs == 1'b1) begin
if(cpu_we == 1'b1) next = WRITE;
else next = READ;
end
end
READ: begin
if(hit == 1'b1) begin
if(cpu_cs == 1'b1) begin
if(cpu_we == 1'b1) next = WRITE;
else next = READ;
end else begin
next = IDLE;
end
end else begin
if(dirty == 1'b1) next = R_WMEM;
else next = R_RMEM;
end
end
R_WMEM: begin
if((dram_nwait == 1'b1) && (cnt == 3)) next = R_RMEM;
end
R_RMEM: begin
if((dram_nwait == 1'b1) && (cnt == 3)) next = R_REND;
end
R_REND: begin
if(dram_nwait == 1'b1) next = R_OUT;
end
R_OUT: begin
if(cpu_cs == 1'b1) begin
if(cpu_we == 1'b1) next = WRITE;
else next = READ;
end else begin
next = IDLE;
end
end
WRITE: begin
if(hit == 1'b1) begin
next = IDLE;
end else begin
if(dirty == 1'b1) next = W_WMEM;
else next = W_RMEM;
end
end
W_WMEM: begin
if((dram_nwait == 1'b1) && (cnt == 3)) next = W_RMEM;
end
W_RMEM: begin
if((dram_nwait == 1'b1) && (cnt == 3)) next = W_REND;
end
W_REND: begin
if(dram_nwait == 1'b1) next = IDLE;
end
endcase
end
always@(negedge n_reset or posedge clk) begin
if(n_reset == 1'b0) begin
state <= IDLE;
cnt <= 0;
end else begin
state <= next;
if((state == R_WMEM) || (state == R_RMEM) ||
(state == W_WMEM) || (state == W_RMEM)) begin
if(dram_nwait == 1'b1) begin
cnt <= cnt + 1;
end
end
end
end
reg [31:0] cpu_addr_d;
reg [31:0] cpu_din_d;
wire [145:0] cache_dout;
reg [145:0] cache_line;
always@(negedge n_reset or posedge clk) begin
if(n_reset == 1'b0) begin
cpu_addr_d <= 'b0;
cpu_din_d <= 'b0;
end else begin
if((cpu_cs == 1'b1) && (cpu_nwait == 1'b1)) begin
cpu_addr_d <= cpu_addr;
if(cpu_we == 1'b1) cpu_din_d <= cpu_din;
end
end
end
assign cpu_nwait = (state == IDLE) ||
((state == READ) && (hit == 1'b1)) ||
(state == R_OUT);
always@(*) begin
if(state == READ) begin
case(cpu_addr_d[3:2])
2'b00: cpu_dout = cache_dout[31:0];
2'b01: cpu_dout = cache_dout[63:32];
2'b10: cpu_dout = cache_dout[95:64];
2'b11: cpu_dout = cache_dout[127:96];
endcase
end else begin
case(cpu_addr_d[3:2])
2'b00: cpu_dout = cache_line[31:0];
2'b01: cpu_dout = cache_line[63:32];
2'b10: cpu_dout = cache_line[95:64];
2'b11: cpu_dout = cache_line[127:96];
endcase
end
end
wire cache_read = ((state == IDLE) && (cpu_cs == 1'b1))
|| ((state == READ) && (cpu_cs == 1'b1) && (hit == 1'b1))
|| ((state == R_OUT) && (cpu_cs == 1'b1));
wire cache_write = ((state == WRITE) && (hit == 1'b1))
|| ((state == R_REND) && (dram_nwait == 1'b1))
|| ((state == W_REND) && (dram_nwait == 1'b1));
wire cache_cs = cache_read || cache_write;
wire cache_we = cache_write;
wire [9:0] cache_addr = (state == IDLE) || (state == READ) ||
(state == R_OUT) ?
cpu_addr[13:4] : cpu_addr_d[13:4];
// tag 18, 4 words
reg [145:0] cache_din;
always@(*) begin
if(state == WRITE) begin
cache_din[145:0] = cache_dout[145:0];
if(cpu_addr_d[3:2] == 2'b00) cache_din[31:0] = cpu_din_d;
if(cpu_addr_d[3:2] == 2'b01) cache_din[63:32] = cpu_din_d;
if(cpu_addr_d[3:2] == 2'b10) cache_din[95:64] = cpu_din_d;
if(cpu_addr_d[3:2] == 2'b11) cache_din[127:96] = cpu_din_d;
end else if(state == R_REND) begin
cache_din[145:128] = cpu_addr_d[31:14];
cache_din[127:96] = dram_dout;
cache_din[95:0] = cache_line[95:0];
end else if(state == W_REND) begin
cache_din[145:128] = cpu_addr_d[31:14];
cache_din[127:96] = dram_dout;
cache_din[95:0] = cache_line[95:0];
if(cpu_addr_d[3:2] == 2'b00) cache_din[31:0] = cpu_din_d;
if(cpu_addr_d[3:2] == 2'b01) cache_din[63:32] = cpu_din_d;
if(cpu_addr_d[3:2] == 2'b10) cache_din[95:64] = cpu_din_d;
if(cpu_addr_d[3:2] == 2'b11) cache_din[127:96] = cpu_din_d;
end else begin
cache_din = cache_line;
end
end
mem_single #(
.WD(146),
.DEPTH(1024)
) i_cache_mem (
.clk(clk),
.cs(cache_cs),
.we(cache_we),
.addr(cache_addr),
.din(cache_din),
.dout(cache_dout)
);
reg [1023:0] valids;
reg [1023:0] dirtys;
always@(negedge n_reset or posedge clk) begin
if(n_reset == 1'b0) begin
cache_line <= 'b0;
valids <= 1024'b0;
dirtys <= 1024'b0;
end else begin
if(state == READ) begin
cache_line <= cache_dout;
end else if(state == WRITE) begin
cache_line <= cache_dout;
end else if((state == R_RMEM) && (dram_nwait == 1'b1)) begin
if(cnt == 2'b01) cache_line[31:0] <= dram_dout;
if(cnt == 2'b10) cache_line[63:32] <= dram_dout;
if(cnt == 2'b11) cache_line[95:64] <= dram_dout;
end else if((state == R_REND) && (dram_nwait == 1'b1)) begin
cache_line[127:96] <= dram_dout;
end else if((state == W_RMEM) && (dram_nwait == 1'b1)) begin
if(cnt == 2'b01) cache_line[31:0] <= dram_dout;
if(cnt == 2'b10) cache_line[63:32] <= dram_dout;
if(cnt == 2'b11) cache_line[95:64] <= dram_dout;
end else if((state == W_REND) && (dram_nwait == 1'b1)) begin
cache_line[127:96] <= dram_dout;
end
if((state == WRITE) && (hit == 1'b1)) begin
dirtys[cpu_addr_d[13:4]] <= 1'b1;
end else if((state == R_REND) && (dram_nwait == 1'b1)) begin
dirtys[cpu_addr_d[13:4]] <= 1'b0;
end else if((state == W_REND) && (dram_nwait == 1'b1)) begin
dirtys[cpu_addr_d[13:4]] <= 1'b1;
end
if((state == R_REND) && (dram_nwait == 1'b1)) begin
valids[cpu_addr_d[13:4]] <= 1'b1;
end else if((state == W_REND) && (dram_nwait == 1'b1)) begin
valids[cpu_addr_d[13:4]] <= 1'b1;
end
end
end
assign valid = valids[cpu_addr_d[13:4]];
assign dirty = dirtys[cpu_addr_d[13:4]];
wire [17:0] tag = cache_dout[145:128];
assign hit = (tag == cpu_addr_d[31:14]) &&
(valid == 1'b1);
wire dram_read = (state == R_RMEM) || (state == W_RMEM);
wire dram_write = (state == R_WMEM) || (state == W_WMEM);
assign dram_cs = dram_read || dram_write;
assign dram_we = dram_write;
assign dram_addr = (state == R_RMEM) || (state == W_RMEM) ?
{cpu_addr_d[31:4], cnt, 2'b00} :
{cache_line[145:128], cpu_addr_d[13:4],cnt, 2'b00};
assign dram_din = (cnt == 2'b00) ? cache_line[31:0] :
(cnt == 2'b01) ? cache_line[63:32] :
(cnt == 2'b10) ? cache_line[95:64] :
cache_line[127:96];
endmodule
module mem_single #(
WD = 128
, DEPTH = 64
, WA = $clog2(DEPTH)
) (
input clk
, input cs
, input we
, input [WA-1:0] addr
, input [WD-1:0] din
, output [WD-1:0] dout
);
reg [WD-1:0] data[DEPTH-1:0];
reg [WA-1:0] addr_d;
always@(posedge clk) begin
if(cs == 1'b1) begin
if(we == 1'b1) data[addr] <= din;
addr_d <= addr;
end
end
assign dout = data[addr_d];
endmodule
편의를 위해 system-verilog가 사용되었습니다.
verilog-HDL로 컴파일하기 위해서는
Parameter 자리에 WA 부분을 localparam으로 옮기고,
이에 따라 input 정의에 WA가 아닌 직접 기술해줘야 합니다.
- top0 (Test case 직접 구상)
module top_cache;
reg clk, n_reset;
initial clk = 1'b0;
always #5 clk = ~clk;
reg [31:0] dram_data[0:64*1024*1024-1];
initial begin
$vcdplusfile("cache.vpd");
$vcdpluson(0,top_cache);
end
reg cpu_cs;
reg cpu_we;
reg [31:0] cpu_addr;
reg [31:0] cpu_din;
wire [31:0] cpu_dout;
wire cpu_nwait;
initial begin
n_reset = 1'b1;
for(int i=0;i<64*1024*1024;i++) dram_data[i] = $random;
#3;
n_reset = 1'b0;
#20;
n_reset = 1'b1;
cpu_cs = 1'b0;
@(posedge clk);
@(posedge clk);
@(posedge clk);
// first miss
cpu_cs = 1'b1;
cpu_we = 1'b0;
cpu_addr = 32'h00A37B9C;
while(1) begin
@(posedge clk);
#6;
cpu_cs = 1'b0;
if(cpu_nwait == 1'b1) break;
end
// hit
cpu_cs = 1'b1;
cpu_we = 1'b0;
cpu_addr = 32'h00A37B98;
while(1) begin
@(posedge clk);
#6;
cpu_cs = 1'b0;
if(cpu_nwait == 1'b1) break;
end
// miss on the same cache line
cpu_cs = 1'b1;
cpu_we = 1'b0;
cpu_addr = 32'h00A3BB98;
while(1) begin
@(posedge clk);
#6;
cpu_cs = 1'b0;
if(cpu_nwait == 1'b1) break;
end
// write hit
cpu_cs = 1'b1;
cpu_we = 1'b1;
cpu_addr = 32'h00A3BB90;
cpu_din = 32'hFFFFFFFF;
while(1) begin
@(posedge clk);
#6;
cpu_cs = 1'b0;
if(cpu_nwait == 1'b1) break;
end
// miss & write back because of dirty
cpu_cs = 1'b1;
cpu_we = 1'b0;
cpu_addr = 32'h00A37B94;
while(1) begin
@(posedge clk);
#6;
cpu_cs = 1'b0;
if(cpu_nwait == 1'b1) break;
end
// miss on the same cache line
cpu_cs = 1'b1;
cpu_we = 1'b0;
cpu_addr = 32'h00A3BB90;
while(1) begin
@(posedge clk);
#6;
cpu_cs = 1'b0;
if(cpu_nwait == 1'b1) break;
end
@(posedge clk);
@(posedge clk);
@(posedge clk);
@(posedge clk);
$finish;
end
wire dram_cs;
wire dram_we;
wire [31:0] dram_addr;
wire [31:0] dram_din;
wire [31:0] dram_dout;
wire dram_nwait;
cache i_cache(
.clk(clk),
.n_reset(n_reset),
.cpu_cs(cpu_cs),
.cpu_we(cpu_we),
.cpu_addr(cpu_addr),
.cpu_din(cpu_din),
.cpu_dout(cpu_dout),
.cpu_nwait(cpu_nwait),
.dram_cs(dram_cs),
.dram_we(dram_we),
.dram_addr(dram_addr),
.dram_din(dram_din),
.dram_dout(dram_dout),
.dram_nwait(dram_nwait)
);
reg [1:0] cnt;
reg dram_we_d;
reg [31:0] dram_addr_d;
reg [31:0] dram_din_d;
always@(negedge n_reset or posedge clk) begin
if(n_reset == 1'b0) begin
cnt <= 0;
end else begin
if((dram_cs == 1'b1) || (cnt > 0)) begin
cnt <= cnt + 1;
end
if((dram_cs == 1'b1) && (dram_nwait == 1'b1)) begin
dram_we_d <= dram_we;
dram_addr_d <= dram_addr;
dram_din_d <= dram_din;
end
if((dram_we_d == 1'b1) && (cnt == 3)) begin
dram_data[dram_addr_d[31:2]] <= dram_din_d;
end
end
end
assign dram_dout = (dram_nwait==1'b1) ? dram_data[dram_addr_d[31:2]] : 'bx;
assign dram_nwait = (cnt == 0);
endmodule
기존의 방식처럼 4가지 경우의 수를 직접 기술한 테스트벤치입니다.
편하게 확장하기 위해 system verilog로 작성하였습니다.
- top1 Random generation
module top_cache;
parameter DRAM_SIZE = 64*1024*1024;
reg clk, n_reset;
initial clk = 1'b0;
always #5 clk = ~clk;
reg [31:0] dram_data[0:DRAM_SIZE-1];
reg [31:0] dram_data_ref[0:DRAM_SIZE-1];
initial begin
$shm_open("./waveform");
$shm_probe(top_cache,"AS");
end
reg cpu_cs;
reg cpu_we;
reg [31:0] cpu_addr;
reg [31:0] cpu_din;
wire [31:0] cpu_dout;
wire cpu_nwait;
initial begin
n_reset = 1'b1;
for(int i=0;i<DRAM_SIZE;i++) begin
dram_data[i] = $random;
dram_data_ref[i] = dram_data[i];
end
#3;
n_reset = 1'b0;
#20;
n_reset = 1'b1;
cpu_cs = 1'b0;
@(posedge clk);
@(posedge clk);
@(posedge clk);
#6;
repeat(10000) begin // -----> 10000 cases
cpu_cs = $random % 2; // -----> access or not
if(cpu_cs == 1'b1) begin
cpu_we = $random % 2; // -----> read or write
cpu_addr = {$random & (DRAM_SIZE-1), 2'b00}; // ----> random addr
if(cpu_we == 1'b1) cpu_din = $random; // ----> random data on write
else cpu_din = 'bx;
end else begin
cpu_we = 1'bx;
cpu_addr = 'bx;
cpu_din = 'bx;
end
while(1) begin
@(posedge clk);
#6;
if(cpu_nwait == 1'b1) begin
break;
end else begin
cpu_cs = 1'bx;
cpu_we = 1'bx;
cpu_addr = 'bx;
cpu_din = 'bx;
end
end
end
@(posedge clk);
@(posedge clk);
@(posedge clk);
@(posedge clk);
$finish;
end
wire dram_cs;
wire dram_we;
wire [31:0] dram_addr;
wire [31:0] dram_din;
wire [31:0] dram_dout;
reg dram_nwait;
cache i_cache(
.clk(clk),
.n_reset(n_reset),
.cpu_cs(cpu_cs),
.cpu_we(cpu_we),
.cpu_addr(cpu_addr),
.cpu_din(cpu_din),
.cpu_dout(cpu_dout),
.cpu_nwait(cpu_nwait),
.dram_cs(dram_cs),
.dram_we(dram_we),
.dram_addr(dram_addr),
.dram_din(dram_din),
.dram_dout(dram_dout),
.dram_nwait(dram_nwait)
);
reg [1:0] cnt;
reg dram_we_d;
reg [31:0] dram_addr_d;
reg [31:0] dram_din_d;
always@(negedge n_reset or posedge clk) begin
if(n_reset == 1'b0) begin
cnt <= 0;
end else begin
if((dram_cs == 1'b1) || (cnt > 0)) begin
cnt <= cnt + 1;
end
if((dram_cs == 1'b1) && (dram_nwait == 1'b1)) begin
dram_we_d <= dram_we;
dram_addr_d <= dram_addr;
dram_din_d <= dram_din;
end
if((dram_we_d == 1'b1) && (cnt == 3)) begin
dram_data[dram_addr_d[31:2]] <= dram_din_d;
end
end
end
assign dram_dout = (dram_nwait==1'b1) ? dram_data[dram_addr_d[31:2]] : 'bx;
assign dram_nwait = (cnt == 0);
reg [31:0] cpu_addr_d;
reg [31:0] cpu_din_d;
reg [1:0] cpu_prev_op;
always@(posedge clk) begin
if(cpu_nwait == 1'b1) begin
cpu_prev_op <= {cpu_cs, cpu_we};
if(cpu_cs == 1'b1) begin
cpu_addr_d <= cpu_addr;
cpu_din_d <= cpu_din;
end
if(cpu_prev_op == 2'b10) begin
if(dram_data_ref[cpu_addr_d[31:2]] == cpu_dout) begin
end else begin
$display("Error!! addr = %X, dram_data = %X, cpu_dout = %X",
cpu_addr_d, dram_data_ref[cpu_addr_d[31:2]], cpu_dout);
#10; $finish;
end
end else if(cpu_prev_op == 2'b11) begin
dram_data_ref[cpu_addr_d[31:2]] <= cpu_din_d;
end
end
end
endmodule
직접 기술할 수 없을 수많은 케이스를 확인하기 위해
Random generation을 사용해 10000가지 경우의 수로 검증합니다.
- top2 (Constrained Random generation)
module top_cache;
parameter DRAM_SIZE = 64*1024*1024;
reg clk, n_reset;
initial clk = 1'b0;
always #5 clk = ~clk;
reg [31:0] dram_data[0:DRAM_SIZE-1];
reg [31:0] dram_data_ref[0:DRAM_SIZE-1];
initial begin
$shm_open("./waveform");
$shm_probe(top_cache,"AS");
end
reg cpu_cs;
reg cpu_we;
reg [31:0] cpu_addr;
reg [31:0] cpu_din;
wire [31:0] cpu_dout;
wire cpu_nwait;
reg [31:0] p_addr;
initial begin
n_reset = 1'b1;
for(int i=0;i<DRAM_SIZE;i++) begin
dram_data[i] = $random;
dram_data_ref[i] = dram_data[i];
end
#3;
n_reset = 1'b0;
#20;
n_reset = 1'b1;
cpu_cs = 1'b0;
@(posedge clk);
@(posedge clk);
@(posedge clk);
#6;
p_addr = {$random & (DRAM_SIZE-1), 2'b00};
repeat(10000) begin
cpu_cs = $random % 2;
if(cpu_cs == 1'b1) begin
cpu_we = $random % 2;
if($random%4 > 0) begin // -----> in 75% probability
if($random%2 == 0) cpu_addr = p_addr + $random%4 * 4; // -- increase
else cpu_addr = p_addr - $random%4 * 4; // |--> or decrease
if(cpu_addr >= DRAM_SIZE*4) cpu_addr = (DRAM_SIZE-1) * 4; // -- previous addr
end else begin // -----> in 25% probability
cpu_addr = {$random & (DRAM_SIZE-1), 2'b00}; // ----> new random addr
end
p_addr = cpu_addr; // ----> save generated addr
if(cpu_we == 1'b1) cpu_din = $random;
else cpu_din = 'bx;
end else begin
cpu_we = 1'bx;
cpu_addr = 'bx;
cpu_din = 'bx;
end
while(1) begin
@(posedge clk);
#6;
if(cpu_nwait == 1'b1) begin
break;
end else begin
cpu_cs = 1'bx;
cpu_we = 1'bx;
cpu_addr = 'bx;
cpu_din = 'bx;
end
end
end
@(posedge clk);
@(posedge clk);
@(posedge clk);
@(posedge clk);
$finish;
end
wire dram_cs;
wire dram_we;
wire [31:0] dram_addr;
wire [31:0] dram_din;
wire [31:0] dram_dout;
reg dram_nwait;
cache i_cache(
.clk(clk),
.n_reset(n_reset),
.cpu_cs(cpu_cs),
.cpu_we(cpu_we),
.cpu_addr(cpu_addr),
.cpu_din(cpu_din),
.cpu_dout(cpu_dout),
.cpu_nwait(cpu_nwait),
.dram_cs(dram_cs),
.dram_we(dram_we),
.dram_addr(dram_addr),
.dram_din(dram_din),
.dram_dout(dram_dout),
.dram_nwait(dram_nwait)
);
reg [1:0] cnt;
reg dram_we_d;
reg [31:0] dram_addr_d;
reg [31:0] dram_din_d;
always@(negedge n_reset or posedge clk) begin
if(n_reset == 1'b0) begin
cnt <= 0;
end else begin
if((dram_cs == 1'b1) || (cnt > 0)) begin
cnt <= cnt + 1;
end
if((dram_cs == 1'b1) && (dram_nwait == 1'b1)) begin
dram_we_d <= dram_we;
dram_addr_d <= dram_addr;
dram_din_d <= dram_din;
end
if((dram_we_d == 1'b1) && (cnt == 3)) begin
dram_data[dram_addr_d[31:2]] <= dram_din_d;
end
end
end
assign dram_dout = (dram_nwait==1'b1) ? dram_data[dram_addr_d[31:2]] : 'bx;
assign dram_nwait = (cnt == 0);
reg [31:0] cpu_addr_d;
reg [31:0] cpu_din_d;
reg [1:0] cpu_prev_op;
always@(posedge clk) begin
if(cpu_nwait == 1'b1) begin
cpu_prev_op <= {cpu_cs, cpu_we};
if(cpu_cs == 1'b1) begin
cpu_addr_d <= cpu_addr;
cpu_din_d <= cpu_din;
end
if(cpu_prev_op == 2'b10) begin
if(dram_data_ref[cpu_addr_d[31:2]] == cpu_dout) begin
end else begin
$display("Error!! addr = %X, dram_data = %X, cpu_dout = %X",
cpu_addr_d, dram_data_ref[cpu_addr_d[31:2]], cpu_dout);
#10; $finish;
end
end else if(cpu_prev_op == 2'b11) begin
dram_data_ref[cpu_addr_d[31:2]] <= cpu_din_d;
end
end
end
endmodule
무작위로 할 경우, 원하는 상황이 나올지 안 나올지 확인하기 힘듭니다.
제약조건을 걸어 75% 확률로 hit를 만들고,
이전 주소에서 word단위로 이동되도록 합니다.
이 과정에서 DRAM의 유효범위를 넘지 않게 합니다.
-top3 (task 적용)
module top_cache;
parameter DRAM_SIZE = 64*1024*1024;
reg clk, n_reset;
initial clk = 1'b0;
always #5 clk = ~clk;
reg [31:0] dram_data[0:DRAM_SIZE-1];
reg [31:0] dram_data_ref[0:DRAM_SIZE-1];
initial begin
$shm_open("./waveform");
$shm_probe(top_cache,"AS");
end
reg cpu_cs;
reg cpu_we;
reg [31:0] cpu_addr;
reg [31:0] cpu_din;
wire [31:0] cpu_dout;
wire cpu_nwait;
task mem_drive (
input cs,
input we,
input [31:0] addr,
input [31:0] din
);
begin
cpu_cs = cs;
if(cs == 1'b1) begin
cpu_we = we;
cpu_addr = addr;
if(we == 1'b1) cpu_din = din;
else cpu_din = 'bx;
end else begin
cpu_we = 1'bx;
cpu_addr = 'bx;
cpu_din = 'bx;
end
while(1) begin
@(posedge clk);
#6;
if(cpu_nwait == 1'b1) begin
break;
end else begin
cpu_cs = 1'bx;
cpu_we = 1'bx;
cpu_addr = 'bx;
cpu_din = 'bx;
end
end
end
endtask
reg cs;
reg we;
reg [31:0] addr;
reg [31:0] din;
reg [31:0] p_addr;
initial begin
n_reset = 1'b1;
for(int i=0;i<DRAM_SIZE;i++) begin
dram_data[i] = $random;
dram_data_ref[i] = dram_data[i];
end
#3;
n_reset = 1'b0;
#20;
n_reset = 1'b1;
cpu_cs = 1'b0;
@(posedge clk);
@(posedge clk);
@(posedge clk);
#6;
// first miss
mem_drive(1'b1, 1'b0, 32'h00A37B9C, 'b0);
// hit
mem_drive(1'b1, 1'b0, 32'h00A37B98, 'b0);
// miss on the same cache line
mem_drive(1'b1, 1'b0, 32'h00A3BB98, 'b0);
// write hit
mem_drive(1'b1, 1'b1, 32'h00A3BB90, 32'hFFFFFFFF);
// miss & write back because of dirty
mem_drive(1'b1, 1'b0, 32'h00A37B94, 'b0);
// miss on the same cache line
mem_drive(1'b1, 1'b0, 32'h00A3BB90, 'b0);
p_addr = {$random & (DRAM_SIZE-1), 2'b00};
repeat(10000) begin
cs = $random % 2;
we = $random % 2;
if($random%4 > 0) begin
if($random%2 == 0) addr = p_addr + $random%4 * 4;
else addr = p_addr - $random%4 * 4;
if(addr >= DRAM_SIZE*4) addr = (DRAM_SIZE-1) * 4;
end else begin
addr = {$random & (DRAM_SIZE-1), 2'b00};
end
din = $random;
p_addr = addr;
mem_drive(cs, we, addr, din);
end
@(posedge clk);
@(posedge clk);
@(posedge clk);
@(posedge clk);
$finish;
end
wire dram_cs;
wire dram_we;
wire [31:0] dram_addr;
wire [31:0] dram_din;
wire [31:0] dram_dout;
reg dram_nwait;
cache i_cache(
.clk(clk),
.n_reset(n_reset),
.cpu_cs(cpu_cs),
.cpu_we(cpu_we),
.cpu_addr(cpu_addr),
.cpu_din(cpu_din),
.cpu_dout(cpu_dout),
.cpu_nwait(cpu_nwait),
.dram_cs(dram_cs),
.dram_we(dram_we),
.dram_addr(dram_addr),
.dram_din(dram_din),
.dram_dout(dram_dout),
.dram_nwait(dram_nwait)
);
reg [1:0] cnt;
reg dram_we_d;
reg [31:0] dram_addr_d;
reg [31:0] dram_din_d;
always@(negedge n_reset or posedge clk) begin
if(n_reset == 1'b0) begin
cnt <= 0;
end else begin
if((dram_cs == 1'b1) || (cnt > 0)) begin
cnt <= cnt + 1;
end
if((dram_cs == 1'b1) && (dram_nwait == 1'b1)) begin
dram_we_d <= dram_we;
dram_addr_d <= dram_addr;
dram_din_d <= dram_din;
end
if((dram_we_d == 1'b1) && (cnt == 3)) begin
dram_data[dram_addr_d[31:2]] <= dram_din_d;
end
end
end
assign dram_dout = (dram_nwait==1'b1) ? dram_data[dram_addr_d[31:2]] : 'bx;
assign dram_nwait = (cnt == 0);
reg [31:0] cpu_addr_d;
reg [31:0] cpu_din_d;
reg [1:0] cpu_prev_op;
always@(posedge clk) begin
if(cpu_nwait == 1'b1) begin
cpu_prev_op <= {cpu_cs, cpu_we};
if(cpu_cs == 1'b1) begin
cpu_addr_d <= cpu_addr;
cpu_din_d <= cpu_din;
end
if(cpu_prev_op == 2'b10) begin
if(dram_data_ref[cpu_addr_d[31:2]] == cpu_dout) begin
end else begin
$display("Error!! addr = %X, dram_data = %X, cpu_dout = %X",
cpu_addr_d, dram_data_ref[cpu_addr_d[31:2]], cpu_dout);
#10; $finish;
end
end else if(cpu_prev_op == 2'b11) begin
dram_data_ref[cpu_addr_d[31:2]] <= cpu_din_d;
end
end
end
endmodule
2. 그 다음 cache에 저장된 주소중 하나에 쓰기를 진행하면(hit), 즉시 cache의 데이터가 수정됩니다.
1. 이후 miss 읽기를 진행하면, dirty bit가 1이기 때문에,
먼저 기존 주소의 dram을 update 하기 위해 쓰기를 진행합니다.
2. 그다음 원래 요청주소에 대해 dram에서 데이터를 읽어옵니다.
- Coverage_top0
직접 만든 케이스들은, 모든 경우의 수를 테스트하지 못합니다.
그래서 coverage score 역시 매우 낮습니다.
4-2. top1 (random generation)
random generation에 의해 1만 가지 case가 동작하고 있습니다.
- Coverage_top1
top0와 달리 점수가 월등하게 오른 것을 볼 수 있습니다.
4-3. top2 (Constrained random generation)
이 경우 역시 top1과 마찬가지로 waveform은 의미가 없으므로, 바로 점수를 보도록 하겠습니다.
- Coverage_top2
조금이지만 score가 더 올라간 것을 확인할 수 있습니다.
위처럼 직접 찾아 확인해 볼 수 있고, 다시 방법을 찾아 검증할 수 있습니다.
현재는 Cover 되지 않은 부분 중에 문제가 되는 경우는 없습니다.
따라서 여기까지만 테스트하도록 합니다.
※참고: 직접 확인하는 방법 말고, functional coverage를 사용하는 방법도 있습니다.
program automatic test;
covergroup fcov @(port_event); // coverage group 생성
coverpoint sa;
coverpoint da;
endgroup: fcov
bit[3:0] sa, da;
event port_event;
real coverage = 0.0;
fcov port_fc = new(); // Instantiate & coverage object 생성
initial while (coverage < 99.5) begin
...
sa = pkt_ref.sa;
da = pkt_ref.da;
->port_event; // port_fc coverage group의 data가 sampling 됨
// port_fc.sample(); // alternative form of updating of bins
coverage = $get_coverage(); // overall coverage , coverage result query
// coverage = port_fc.get_inst_coverage(); // instance coverage
end
endprogram: test
module uart_rx(
input clk,
input rx,
input rst,
output reg [7:0]rx_register,
output reg rx_end,
output reg [4:0]clk_cnt,
output reg [3:0]rx_cnt
);
localparam RX_IDLE = 2'b00;
localparam RX_START = 2'b01;
localparam RX_STOP = 2'b10;
reg [1:0]current_state ;
reg [1:0]next_state;
reg [15:0]rx_check;
always @(posedge clk or posedge rst) begin
if (rst)
clk_cnt <= 0;
else if (clk_cnt == 16 && rx_cnt != 11)
clk_cnt <= 1;
else if (clk_cnt == 16 && rx_cnt == 11)
clk_cnt <= 0;
else
case (next_state)
RX_IDLE : clk_cnt <= 0;
RX_START : clk_cnt <= clk_cnt+1;
RX_STOP : clk_cnt <= clk_cnt+1;
default : clk_cnt <= clk_cnt;
endcase
end
always @(posedge clk or posedge rst) begin
if (rst)
rx_cnt <= 0;
else if (rx_cnt == 11 && clk_cnt == 16)
rx_cnt <= 0;
else if (current_state == RX_IDLE && rx == 0)
rx_cnt <= 1;
else if (clk_cnt == 16)
rx_cnt <= rx_cnt+1;
else
rx_cnt <= rx_cnt;
end
always @(posedge clk or posedge rst) begin
if (rst)
current_state <= RX_IDLE;
else
current_state <= next_state;
end
always @(negedge clk or posedge rst) begin
if (rst)
next_state <= RX_IDLE;
else
case(current_state)
RX_IDLE : if (rx == 0)
next_state <= RX_START;
else
next_state <= next_state;
RX_START : if (rx_cnt == 11)
next_state <= RX_STOP;
else
next_state <= next_state;
RX_STOP : if (rx_cnt == 0)
next_state <= RX_IDLE;
else
next_state <= next_state;
default : next_state <= next_state;
endcase
end
reg [4:0]rx_score;
always @(posedge clk or posedge rst) begin
if (rst)
rx_check <= 16'b1111_1111_1111_1111;
else
rx_check <= {rx_check[15:0], rx};
end
always @(posedge clk or posedge rst) begin
if (rst)
rx_score <= 0;
else if (rx_cnt == 0)
rx_score <= 0;
else if (clk_cnt ==16)
rx_score <= 0;
else if (rx_cnt == 11)
rx_score <= 0;
else if (rx == 1)
rx_score <= rx_score+1;
else
rx_score <= rx_score;
end
reg rx_sampling;
always @(posedge clk or posedge rst) begin
if (rst)
rx_sampling <= 1;
else if (rx_cnt == 0)
rx_sampling <= 1;
else if (clk_cnt == 16 && rx_score > 8 && rx_cnt != 11)
rx_sampling <= 1;
else if (clk_cnt == 16 && rx_score < 8 && rx_cnt != 11)
rx_sampling <= 0;
else
rx_sampling <= rx_sampling;
end
reg [9:0]rx_received;
always @(posedge clk or posedge rst) begin
if (rst)
rx_received <= 10'b00_0000_0000;
else if (clk_cnt == 1 && rx_cnt >1)
rx_received <= {rx_received[8:0], rx_sampling};
else
rx_received <= rx_received;
end
always @(posedge clk or posedge rst) begin
if (rst)
rx_register <= 8'b0000_0000;
else if (rx_cnt == 11 && clk_cnt == 2)
rx_register <= rx_received[8:1];
else
rx_register <= rx_register;
end
always @(posedge clk or posedge rst) begin
if (rst)
rx_end <= 1;
else
case (next_state)
RX_IDLE : rx_end <= 1;
RX_START : rx_end <= 0;
RX_STOP : rx_end <= 0;
default : rx_end <= rx_end;
endcase
end
endmodule
이전 포스팅에서 언급하였듯이,
코드를 최적화하지 않고 직관적으로 코딩한 후 그대로 두었습니다.
- UART_Clock
`timescale 1ns / 1ps
module uart_clock(
input clk_in,
input rst,
output reg clk_out
);
reg [10:0]clk_cnt;
parameter max = 1;
always @(posedge clk_in or posedge rst) begin
if (rst) begin
clk_cnt <= 0;
end
else if (clk_cnt == max) begin
clk_cnt <= 0;
end
else
clk_cnt <= clk_cnt + 1;
end
always @(posedge clk_in or posedge rst) begin
if (rst) begin
clk_out <= 0;
end
else if (clk_cnt == max) begin
clk_out <= !clk_out;
end
else
clk_out <= clk_out;
end
endmodule
- LCD
`timescale 1ns / 1ps
module lcd(
input clk,
input rx_clk,
input rst,
input [7:0]rx_register,
input [3:0]rx_cnt,
input [4:0]clk_cnt,
output reg lcd_e,
output reg lcd_rs,
output reg [7:0]data,
output reg [6:0]lcd_cnt,
output reg [7:0]lcd_register
);
reg [7:0] dspdata [0:37];
integer i = 0;
integer j = 0;
always @(posedge rx_clk or posedge rst) begin
if (rst)
lcd_register <= 8'h80;
else if (rx_cnt == 11 && clk_cnt == 16) begin
lcd_register[0] <= rx_register[7];
lcd_register[1] <= rx_register[6];
lcd_register[2] <= rx_register[5];
lcd_register[3] <= rx_register[4];
lcd_register[4] <= rx_register[3];
lcd_register[5] <= rx_register[2];
lcd_register[6] <= rx_register[1];
lcd_register[7] <= rx_register[0];
end
else
lcd_register <= lcd_register;
end
always @(posedge rx_clk or posedge rst) begin
if (rst)
lcd_cnt <= 4;
else if (lcd_cnt == 20 && rx_cnt == 11 && clk_cnt == 16)
lcd_cnt <= 22;
else if (lcd_cnt == 37 && rx_cnt == 11 && clk_cnt == 16)
lcd_cnt <= 5;
else if (rx_cnt== 11 && clk_cnt == 16)
lcd_cnt <= lcd_cnt+1;
else
lcd_cnt <= lcd_cnt;
end
always @(posedge clk or posedge rst) begin
if (rst) begin
dspdata[0] <= 8'b00111000; // function set 8bit, 2line, 5x7 dot
dspdata[1] <= 8'b00001100; // display on/off , display on, cursor off, cursor blink off
dspdata[2] <= 8'b00000110; // entry mode set increment cursor position, no display shift
dspdata[3] <= 8'b00000001; // clear display
dspdata[4] <= 8'h80; // set cg ram address 1000 0000 1번라인 첫번째부터
dspdata[5] <= 8'b00110000; // 0
dspdata[6] <= 8'b00110000; // 0
dspdata[7] <= 8'b00110000; // 0
dspdata[8] <= 8'b00110000; // 0
dspdata[9] <= 8'b00110000; // 0
dspdata[10] <= 8'b00110000; // 0
dspdata[11] <= 8'b00110000; // 0
dspdata[12] <= 8'b00110000; // 0
dspdata[13] <= 8'b00110000; // 0
dspdata[14] <= 8'b00110000; // 0
dspdata[15] <= 8'b00110000; // 0
dspdata[16] <= 8'b00110000; // 0
dspdata[17] <= 8'b00110000; // 0
dspdata[18] <= 8'b00110000; // 0
dspdata[19] <= 8'b00110000; // 0
dspdata[20] <= 8'b00110000; // 0
dspdata[21] <= 8'hC0; // set cg ram address 1100 0000 2번라인 첫번째부터
dspdata[22] <= 8'b00110000; // 0
dspdata[23] <= 8'b00110000; // 0
dspdata[24] <= 8'b00110000; // 0
dspdata[25] <= 8'b00110000; // 0
dspdata[26] <= 8'b00110000; // 0
dspdata[27] <= 8'b00110000; // 0
dspdata[28] <= 8'b00110000; // 0
dspdata[29] <= 8'b00110000; // 0
dspdata[30] <= 8'b00110000; // 0
dspdata[31] <= 8'b00110000; // 0
dspdata[32] <= 8'b00110000; // 0
dspdata[33] <= 8'b00110000; // 0
dspdata[34] <= 8'b00110000; // 0
dspdata[35] <= 8'b00110000; // 0
dspdata[36] <= 8'b00110000; // 0
dspdata[37] <= 8'b00110000; // 0
end
else if (rx_cnt == 0)
dspdata[lcd_cnt] <= lcd_register;
else begin
dspdata[0] <= dspdata[0];
dspdata[1] <= dspdata[1];
dspdata[2] <= dspdata[2];
dspdata[3] <= dspdata[3];
dspdata[4] <= dspdata[4];
dspdata[5] <= dspdata[5];
dspdata[6] <= dspdata[6];
dspdata[7] <= dspdata[7];
dspdata[8] <= dspdata[8];
dspdata[9] <= dspdata[9];
dspdata[10] <= dspdata[10];
dspdata[11] <= dspdata[11];
dspdata[12] <= dspdata[12];
dspdata[13] <= dspdata[13];
dspdata[14] <= dspdata[14];
dspdata[15] <= dspdata[15];
dspdata[16] <= dspdata[16];
dspdata[17] <= dspdata[17];
dspdata[18] <= dspdata[18];
dspdata[19] <= dspdata[19];
dspdata[20] <= dspdata[20];
dspdata[21] <= dspdata[21];
dspdata[22] <= dspdata[22];
dspdata[23] <= dspdata[23];
dspdata[24] <= dspdata[24];
dspdata[25] <= dspdata[25];
dspdata[26] <= dspdata[26];
dspdata[27] <= dspdata[27];
dspdata[28] <= dspdata[28];
dspdata[29] <= dspdata[29];
dspdata[30] <= dspdata[30];
dspdata[31] <= dspdata[31];
dspdata[32] <= dspdata[32];
dspdata[33] <= dspdata[33];
dspdata[34] <= dspdata[34];
dspdata[35] <= dspdata[35];
dspdata[36] <= dspdata[36];
dspdata[37] <= dspdata[37];
end
end
always @(posedge clk or posedge rst) begin
if (rst) begin
i <= 0;
lcd_e <= 0;
data <= 8'b0;
end
else begin
if(i <= 1000000) begin
i <= i + 1;
lcd_e <= 1;
data <= dspdata[j];
end
else if ((i > 1000000) && (i < 2000000)) begin
i <= i + 1;
lcd_e <= 0;
end
else if (i == 2000000) begin
i <= 1'b0;
end
else begin
i <= i;
lcd_e <= lcd_e;
data <= data;
end
end
end
always @(posedge clk or posedge rst) begin
if (rst)
j <= 0;
else if (j == 38)
j <= 4;
else if (i == 2000000)
j <= j+1;
else
j <= j;
end
always @(posedge clk or posedge rst) begin
if (rst)
lcd_rs <= 0;
else if (j <= 4)
lcd_rs <= 0;
else if (j > 4 && j < 21)
lcd_rs <= 1;
else if (j == 21)
lcd_rs <= 0;
else if (j > 21 && j < 38)
lcd_rs <= 1;
else
lcd_rs <= lcd_rs;
end
endmodule
- Nucleo Board 제어용 코드
/* Includes ------------------------------------------------------------------*/
#include "main.h"
#include "usart.h"
#include "gpio.h"
/* Private includes ----------------------------------------------------------*/
/* USER CODE BEGIN Includes */
#include "stdio.h"
#include "string.h"
/* USER CODE END Includes */
/* USER CODE BEGIN PV */
uint8_t rx_data;
int led_mode = 0;
/* USER CODE END PV */
int main(void)
{
/* USER CODE BEGIN 1 */
/* USER CODE END 1 */
/* MCU Configuration--------------------------------------------------------*/
/* Reset of all peripherals, Initializes the Flash interface and the Systick. */
HAL_Init();
/* USER CODE BEGIN Init */
/* USER CODE END Init */
/* Configure the system clock */
SystemClock_Config();
/* USER CODE BEGIN SysInit */
/* USER CODE END SysInit */
/* Initialize all configured peripherals */
MX_GPIO_Init();
MX_USART3_UART_Init();
/* Initialize interrupts */
MX_NVIC_Init();
/* USER CODE BEGIN 2 */
HAL_UART_Receive_IT(&huart3, (uint8_t *)&rx_data, 1);
/* USER CODE END 2 */
/* Infinite loop */
/* USER CODE BEGIN WHILE */
while (1)
{
switch (led_mode)
{
case 1 : GPIOB -> ODR = 0x0001; break;
case 2 : GPIOB -> ODR = 0x0080; break;
case 3 : GPIOB -> ODR = 0x4000; break;
case 0 : GPIOB -> ODR = 0x0000; break;
}
/* USER CODE END WHILE */
/* USER CODE BEGIN 3 */
}
/* USER CODE END 3 */
}
/* USER CODE BEGIN 4 */
void HAL_UART_RxCpltCallback(UART_HandleTypeDef *huart)
{
if(huart -> Instance == USART3)
{
HAL_UART_Receive_IT(&huart3, (uint8_t *)&rx_data, 1);
HAL_UART_Transmit(&huart3, (uint8_t *)&rx_data, 1, 500);
switch (rx_data)
{
case '1': led_mode = 1; break;
case '2': led_mode = 2; break;
case '3': led_mode = 3; break;
default: led_mode = 0;
}
}
}
/* USER CODE END 4 */
1. TX, RX 2개의 핀을 사용합니다. 2. 10비트 데이터를 교환하며 구성은 다음과 같습니다. - [0] 비트는 start bit (0) - [1~8] 비트는 8비트 data - [9] 비트는 end bit (1) 3. 16배로 오버샘플링하며, 중앙 샘플링 방법을 채택하도록 합니다. - 간단한 예시이므로 노이즈가 심하지 않을 것임을 알고 있어 채택하였습니다. - 7, 8, 9번째 신호 중 2개 이상 1이 나온다면, 1로 인정합니다.
`timescale 1ns / 1ps
module uart_clock(
input i_clk,
input i_rst,
output reg o_clk
);
reg [10:0] r_clk_cnt;
parameter max = 1;
always @(posedge i_clk or posedge i_rst) begin
if (i_rst) begin
r_clk_cnt <= 0;
end else if (r_clk_cnt == max) begin
r_clk_cnt <= 0;
end else
r_clk_cnt <= r_clk_cnt + 1;
end
always @(posedge i_clk or posedge i_rst) begin
if (i_rst) begin
o_clk_out <= 0;
end else if (r_clk_cnt == max) begin
o_clk <= !o_clk;
end else
o_clk <= o_clk;
end
endmodule
- Uart_TX
`timescale 1ns / 1ps
module uart_tx(
input i_clk,
input i_rst,
input [7:0] i_rx_register,
input i_rx_end,
output reg o_tx,
output reg [3:0] o_send_cnt,
output reg [1:0] o_current_state,
output reg [1:0] o_next_state
);
localparam TX_IDLE = 2'b00;
localparam TX_START = 2'b01;
localparam TX_DATA = 2'b10;
localparam TX_STOP = 2'b11;
always @(posedge i_clk or posedge i_rst) begin
if (i_rst)
o_current_state <= TX_IDLE;
else
o_current_state <= o_next_state;
end
always @(negedge i_clk or posedge i_rst) begin
if (i_rst)
o_next_state <= TX_IDLE;
else
case(o_current_state)
TX_IDLE : if (i_rx_end == 1)
o_next_state <= TX_START;
else
o_next_state <= o_next_state;
TX_START : o_next_state <= TX_DATA;
TX_DATA : if (o_send_cnt == 8)
o_next_state <= TX_STOP;
else
o_next_state <= o_next_state;
TX_STOP : o_next_state <= TX_IDLE;
endcase
end
always @(posedge i_clk or posedge i_rst) begin
if (i_rst)
o_send_cnt <= 0;
else if (o_send_cnt == 10)
o_send_cnt <= 0;
else
case(o_next_state)
TX_IDLE : o_send_cnt <= 0;
TX_START : o_send_cnt <= 0;
TX_DATA : o_send_cnt <= o_send_cnt+1;
TX_STOP : o_send_cnt <= o_send_cnt+1;
endcase
end
always @(posedge i_clk or posedge i_rst) begin
if (i_rst)
o_tx <= 1;
else
case (o_next_state)
TX_IDLE : o_tx <= 1;
TX_START : o_tx <= 0;
TX_DATA : o_tx <= i_rx_register[o_send_cnt];
TX_STOP : o_tx <= 1;
endcase
end
endmodule
TX 부분은 들어온 RX 신호를 그대로 다시 전송할 수 있도록 간단하게 구현하였습니다.
- UART_RX
module uart_rx(
input i_clk,
input i_rx,
input i_rst,
output reg [7:0] o_rx_register,
output reg o_rx_end,
output reg [4:0] o_clk_cnt,
output reg [3:0] o_rx_cnt
);
localparam RX_IDLE = 2'b00;
localparam RX_START = 2'b01;
localparam RX_STOP = 2'b10;
reg [1:0] r_current_state;
reg [1:0] r_next_state;
reg [15:0] r_rx_check;
reg [4:0] r_rx_score;
reg r_rx_sampling;
reg [9:0] r_rx_received;
// Clock counter and bit counter logic
always @(posedge i_clk or posedge i_rst) begin
if (i_rst) begin
o_clk_cnt <= 0;
end else if (o_clk_cnt == 16 && o_rx_cnt != 11) begin
o_clk_cnt <= 1;
end else if (o_clk_cnt == 16 && o_rx_cnt == 11) begin
o_clk_cnt <= 0;
end else begin
case (r_next_state)
RX_IDLE : o_clk_cnt <= 0;
RX_START : o_clk_cnt <= o_clk_cnt + 1;
RX_STOP : o_clk_cnt <= o_clk_cnt + 1;
default : o_clk_cnt <= o_clk_cnt;
endcase
end
end
// Bit counter logic
always @(posedge i_clk or posedge i_rst) begin
if (i_rst) begin
o_rx_cnt <= 0;
end else if (o_rx_cnt == 11 && o_clk_cnt == 16) begin
o_rx_cnt <= 0;
end else if (r_current_state == RX_IDLE && i_rx == 0) begin
o_rx_cnt <= 1;
end else if (o_clk_cnt == 16) begin
o_rx_cnt <= o_rx_cnt + 1;
end else begin
o_rx_cnt <= o_rx_cnt;
end
end
// FSM for RX control
always @(posedge i_clk or posedge i_rst) begin
if (i_rst) begin
r_current_state <= RX_IDLE;
end else begin
r_current_state <= r_next_state;
end
end
always @(negedge i_clk or posedge i_rst) begin
if (i_rst) begin
r_next_state <= RX_IDLE;
end else begin
case (r_current_state)
RX_IDLE : if (i_rx == 0)
r_next_state <= RX_START;
else
r_next_state <= r_next_state;
RX_START : if (o_rx_cnt == 11)
r_next_state <= RX_STOP;
else
r_next_state <= r_next_state;
RX_STOP : if (o_rx_cnt == 0)
r_next_state <= RX_IDLE;
else
r_next_state <= r_next_state;
default : r_next_state <= r_next_state;
endcase
end
end
// Central sampling logic as per the original code
always @(posedge i_clk or posedge i_rst) begin
if (i_rst) begin
r_rx_check <= 16'b1111_1111_1111_1111;
end else begin
r_rx_check <= {r_rx_check[14:0], i_rx};
end
end
always @(posedge i_clk or posedge i_rst) begin
if (i_rst) begin
r_rx_score <= 0;
end else if (o_rx_cnt == 0) begin
r_rx_score <= 0;
end else if (o_clk_cnt == 16) begin
r_rx_score <= 0;
end else if (o_rx_cnt == 11) begin
r_rx_score <= 0;
end else if (i_rx == 1) begin
r_rx_score <= r_rx_score + 1;
end else begin
r_rx_score <= r_rx_score;
end
end
always @(posedge i_clk or posedge i_rst) begin
if (i_rst) begin
r_rx_sampling <= 1;
end else if (o_rx_cnt == 0) begin
r_rx_sampling <= 1;
end else if (o_clk_cnt == 16 && r_rx_score > 8 && o_rx_cnt != 11) begin
r_rx_sampling <= 1;
end else if (o_clk_cnt == 16 && r_rx_score < 8 && o_rx_cnt != 11) begin
r_rx_sampling <= 0;
end else begin
r_rx_sampling <= r_rx_sampling;
end
end
// Register received data
always @(posedge i_clk or posedge i_rst) begin
if (i_rst) begin
r_rx_received <= 10'b0;
end else if (o_clk_cnt == 1 && o_rx_cnt > 1) begin
r_rx_received <= {r_rx_received[8:0], r_rx_sampling};
end
end
// Update RX register
always @(posedge i_clk or posedge i_rst) begin
if (i_rst) begin
o_rx_register <= 8'b0;
end else if (o_rx_cnt == 11 && o_clk_cnt == 2) begin
o_rx_register <= r_rx_received[8:1];
end
else
rx_register <= rx_register;
end
// RX end signal
always @(posedge i_clk or posedge i_rst) begin
if (i_rst) begin
o_rx_end <= 1;
end else begin
case (r_next_state)
RX_IDLE : o_rx_end <= 1;
RX_START : o_rx_end <= 0;
RX_STOP : o_rx_end <= 0;
default : o_rx_end <= o_rx_end;
endcase
end
end
endmodule
여기서 o_rx_end 신호는 의미상으로만 존재할뿐, 사용하지 않습니다.
Data Update Timing 등은 설계하기 나름이며, 최대한 간단한 형태로 설계하였습니다.
- tb_rx
`timescale 1ns / 1ps
module tb_rx;
reg clk;
reg rx;
reg rst;
wire [7:0] rx_register;
wire rx_end;
reg [7:0] tx_data; // 보낼 데이터
integer i;
// UART RX 모듈 인스턴스화
uart_rx uut (
.i_clk(clk),
.i_rx(rx),
.i_rst(rst),
.o_rx_register(rx_register),
.o_rx_end(rx_end)
);
// Clock generation (100 MHz)
initial begin
clk = 0;
forever #5 clk = ~clk; // 10ns 주기 클럭, 100 MHz
end
// 테스트 시나리오
initial begin
// 초기화
rst = 1; // 리셋
rx = 1; // IDLE 상태 (RX 라인은 기본적으로 High 상태)
tx_data = 8'b0101_0111; // 전송할 데이터: 8'b0101_0111
#100 rst = 0; // 리셋 해제
// 데이터 전송 (UART 프레임 생성)
// 시작 비트 (Start Bit)
rx = 0;
#160; // 16 클럭 사이클 (샘플링 속도)
// 데이터 비트 (Data Bits)
for (i = 0; i < 8; i = i + 1) begin
rx = tx_data[i];
#160; // 각 데이터 비트는 16 클럭 사이클 동안 유지
end
// 정지 비트 (Stop Bit)
rx = 1;
#160; // Stop Bit
// 대기 후 결과 확인
#200;
// 결과 출력
if (rx_register == tx_data) begin
$display("Test Passed! Received Data: %b", rx_register);
end else begin
$display("Test Failed! Received Data: %b, Expected Data: %b", rx_register, tx_data);
end
// 시뮬레이션 종료
#100;
$finish;
end
endmodule
앞서 언급하였듯이 RX 위주로 확인할 것이기 때문에,
RX의 테스트벤치만을 간단하게 작성하였습니다.
4. 시뮬레이션 결과
동작을 확인해보기 위해, 또 모범적인 waveform을 마련해 놓기 위해 function simulation을 합니다.
Tool은 Vivado를 사용하였습니다.
사실, 앞서 언급한 바와 같이 rx_end 신호는 의미상으로만 존재합니다.
제 구현에서는 rx_end 신호가 필요하지 않습니다.
만약, 필요하다면 처음 rx_end 신호가 Low로 내려가는 순간은
다른 신호와 마찬가지로 샘플링된 결과에 의해 작동해야 합니다.
처음에 그렸던 Timing Diagram과 비슷한 waveform을 만들고자 의도한 바입니다.
module motor(
input i_clk,
input i_rst_H,
input [3:0] i_duty,
output reg o_out,
output reg o_nsleep
);
reg [18:0] r_cnt;
reg [4:0] r_duty;
reg r_clk2;
always @(posedge i_clk or posedge i_rst_H) begin
if (i_rst_H)
o_nsleep <= 0;
else
o_nsleep <= 1;
end
always @(posedge i_clk or posedge i_rst_H) begin
if (i_rst_H) begin
r_cnt <= 0;
r_clk2 <= 0;
end
else if (r_cnt == 500_000) begin
r_clk2 <= ~r_clk2;
r_cnt <= 0;
end
else
r_cnt <= r_cnt + 1;
end
always @(posedge r_clk2 or posedge i_rst_H) begin
if (i_rst_H)
r_duty <= 0;
else if (r_duty >= 16)
r_duty <= 0;
else
r_duty <= r_duty + 1;
end
always @(posedge i_clk or posedge i_rst_H) begin
if (i_rst_H)
o_out <= 0;
else if (r_duty < i_duty)
o_out <= 1;
else
o_out <= 0;
end
endmodule
`timescale 1ns / 1ps
`define IDLE 2'b00
`define READ 2'b10
`define WRITE 2'b11
module SPI_Slave(
// Internal input
input i_clk,
input i_rst_H,
input [1:0] i_ID,
// SPI Interface
input i_SCLK,
input i_CS,
input i_MOSI,
output reg o_MISO,
// Additional output for duty cycle
output reg [3:0] o_duty
);
reg [5:0] bitcnt = 6'd0;
reg [1:0] current_state = 2'b00;
reg [1:0] next_state = 2'b00;
reg [15:0] D0;
reg [15:0] D1;
reg addr;
reg [31:0] data_RX;
reg id_ok;
// FSM part
always @(negedge i_SCLK or posedge i_CS) begin
if (i_CS) begin
current_state <= `IDLE;
next_state <= `IDLE;
end
else begin
current_state <= next_state;
case(current_state)
`IDLE : begin
if (bitcnt == 6'd3 && id_ok == 1 && data_RX[0] == 1)
next_state <= `READ;
else if (bitcnt == 6'd3 && id_ok == 1 && data_RX[0] == 0)
next_state <= `WRITE;
else
next_state <= next_state;
end
`READ : begin
if (bitcnt == 6'd0)
next_state <= `IDLE;
else
next_state <= next_state;
end
`WRITE : begin
if (bitcnt == 6'd0)
next_state <= `IDLE;
else
next_state <= next_state;
end
default : next_state <= next_state; // no cases. just for synthesis
endcase
end
end
always @(posedge i_SCLK or posedge i_CS) begin
if(i_CS) begin
id_ok <= 0;
end
else if (bitcnt == 6'd2 && data_RX[1:0] == i_ID) begin
id_ok <= 1;
end
else
id_ok <= id_ok;
end
// Receive data
always @(negedge i_SCLK or posedge i_CS) begin
if(i_CS) begin
bitcnt <= 6'd0;
data_RX <= 32'd0;
end
else begin
bitcnt <= bitcnt + 6'd1;
data_RX <= {data_RX[30:0], i_MOSI};
end
end
// Read address
always @(negedge i_SCLK or posedge i_CS) begin
if(i_CS) begin
addr <= 1'd0;
end
else if (current_state == `IDLE && bitcnt == 6'd4) begin
addr <= data_RX[0];
end
else
addr <= addr;
end
// Write data
always @(posedge i_SCLK or posedge i_rst_H) begin
if(i_rst_H) begin
D0 <= 16'd0;
D1 <= 16'd0;
end
else if(current_state == `WRITE && addr == 1'b0 && bitcnt == 6'd32) begin
D0 <= data_RX[15:0];
end
else if(current_state == `WRITE && addr == 1'b1 && bitcnt == 6'd32) begin
D1 <= data_RX[15:0];
end
else begin
D0 <= D0;
D1 <= D1;
end
end
// Update o_duty from D0 or D1
always @(posedge i_SCLK or posedge i_rst_H) begin
if (i_rst_H)
o_duty <= 4'd0;
else if (current_state == `WRITE && bitcnt == 6'd32) begin
if (addr == 1'b0)
o_duty <= D0[3:0]; // Update duty cycle from the lower 4 bits of D0
else
o_duty <= D1[3:0]; // Update duty cycle from the lower 4 bits of D1
end
else
o_duty <= o_duty;
end
// READ Mode: data transfer
always @(posedge i_SCLK) begin
if(current_state == `READ && addr == 1'b0) begin
case(bitcnt)
6'd16 : o_MISO <= D0[15];
6'd17 : o_MISO <= D0[14];
6'd18 : o_MISO <= D0[13];
6'd19 : o_MISO <= D0[12];
6'd20 : o_MISO <= D0[11];
6'd21 : o_MISO <= D0[10];
6'd22 : o_MISO <= D0[9];
6'd23 : o_MISO <= D0[8];
6'd24 : o_MISO <= D0[7];
6'd25 : o_MISO <= D0[6];
6'd26 : o_MISO <= D0[5];
6'd27 : o_MISO <= D0[4];
6'd28 : o_MISO <= D0[3];
6'd29 : o_MISO <= D0[2];
6'd30 : o_MISO <= D0[1];
6'd31 : o_MISO <= D0[0];
default : o_MISO <= 0;
endcase
end else if(current_state == `READ && addr == 1'b1) begin
case(bitcnt)
6'd16 : o_MISO <= D1[15];
6'd17 : o_MISO <= D1[14];
6'd18 : o_MISO <= D1[13];
6'd19 : o_MISO <= D1[12];
6'd20 : o_MISO <= D1[11];
6'd21 : o_MISO <= D1[10];
6'd22 : o_MISO <= D1[9];
6'd23 : o_MISO <= D1[8];
6'd24 : o_MISO <= D1[7];
6'd25 : o_MISO <= D1[6];
6'd26 : o_MISO <= D1[5];
6'd27 : o_MISO <= D1[4];
6'd28 : o_MISO <= D1[3];
6'd29 : o_MISO <= D1[2];
6'd30 : o_MISO <= D1[1];
6'd31 : o_MISO <= D1[0];
default : o_MISO <= 0;
endcase
end else
o_MISO <= 1'b0;
end
endmodule
기존 코드에서, 입력으로 받은 데이터중 하위 4비트 데이터를 duty로 전달하였습니다.
motor 제어 부분은 간단하므로, 추가로 설명하지 않았습니다.
- SPI_Master에 사용할 Arduino 코드
#include <SPI.h>
enum InputState {
WAITING_FOR_MODE,
WAITING_FOR_ID, // ID 입력 대기 상태 추가
WAITING_FOR_UPPER_LOWER,
WAITING_FOR_ADDRESS,
WAITING_FOR_DATA
};
InputState currentState = WAITING_FOR_MODE;
bool writeMode;
bool upperData; // 4번째 비트 (Upper/Lower)
bool address;
uint16_t addr; // 주소
uint16_t data; // 데이터
uint8_t id; // ID 값 (2비트)
const int CS_PIN = 10; // SS 핀 (CS 핀) 고정값 변경 못함
const int SCLK_PIN = 13; // SCLK 핀
const int ID_PIN_1 = 40; // ID 핀 1
const int ID_PIN_2 = 42; // ID 핀 2
void setup() {
Serial.begin(9600); // 통신속도
SPI.begin(); // SPI 초기화
SPI.setClockDivider(SPI_CLOCK_DIV2); // 클럭 속도 설정
SPI.setDataMode(SPI_MODE2); // SPI 모드 설정 (모드 2)
// SPI 핀 모드 설정
pinMode(CS_PIN, OUTPUT); // SS 핀을 출력 모드로 설정
digitalWrite(CS_PIN, HIGH); // 슬레이브 비선택 상태
pinMode(SCLK_PIN, OUTPUT); // SCLK 핀을 출력 모드로 설정
digitalWrite(SCLK_PIN, LOW); // 초기 상태 LOW
// ID 핀 모드 설정
pinMode(ID_PIN_1, OUTPUT);
pinMode(ID_PIN_2, OUTPUT);
digitalWrite(ID_PIN_1, LOW);
digitalWrite(ID_PIN_2, LOW);
// 시작 프롬프트 출력
Serial.println("Enter read/write mode (1 for read, 0 for write):"); // 콘솔에 값 출력
}
void loop() { // 반복문
if (Serial.available() > 0) { //시리얼 통신에 값이 있으면
String input = Serial.readStringUntil('\n'); // 엔터까지 읽기
input.trim(); // 공백 제거
if (currentState == WAITING_FOR_MODE) {
// 모드 입력 받기
if (input.equals("1") || input.equals("0")) {
writeMode = input.equals("0"); // 수정: 0을 입력하면 true(0), 1을 입력하면 false(1) 0이 WRITE니까 입력해야 하니까 상태 변화
currentState = WAITING_FOR_ID; // ID 입력 대기 상태로 전환
Serial.println("Enter ID value (0 to 3):"); // WRITE일 경우 ID 값 받기
} else {
Serial.println("Invalid input. Enter 1 for read, 0 for write:"); //READ는 그냥 넘어가고 0, 1 이외의 값은 그냥 끝내기
}
} else if (currentState == WAITING_FOR_ID) { // WRITE 입력하면 상태가 얘로 바뀌니까 반복문에 의해서 얘가 실행
// ID 입력 받기
int idValue = input.toInt();
if (idValue >= 0 && idValue <= 3) {
id = idValue;
digitalWrite(ID_PIN_1, id & 0x01); // id가 입력한 값으로 들어온 만약 id로 3을 입력하면 0000 0011이랑 0000 0001이란 &연산 = 0000 0001
digitalWrite(ID_PIN_2, (id >> 1) & 0x01); // 0000 0011 >> 1 => 0000 0001 & 0000 00001
currentState = WAITING_FOR_UPPER_LOWER;
Serial.println("Enter upper or lower (1 for upper, 0 for lower):");
} else {
Serial.println("Invalid input. Enter ID value (0 to 3):");
}
} else if (currentState == WAITING_FOR_UPPER_LOWER) {
// upper/lower 입력 받기
upperData = input.toInt();
// if (input.equals("1") || input.equals("0")) {
//upperData = input.equals("1");
if(upperData == 1 || upperData == 0) {
currentState = WAITING_FOR_ADDRESS;
Serial.println("Enter address (in hex):");
} else {
Serial.println("Invalid input. Enter 1 for upper, 0 for lower:");
}
} else if (currentState == WAITING_FOR_ADDRESS) { // ADDRESS인데 그냥 그 빈공간임
// 주소 입력 받기
addr = strtol(input.c_str(), NULL, 16);
currentState = WAITING_FOR_DATA;
Serial.println("Enter data (in hex):");
} else if (currentState == WAITING_FOR_DATA) {
// 데이터 입력 받기
data = strtol(input.c_str(), NULL, 16);
// 32비트 데이터 구성
uint32_t dataToSend = ((uint32_t)id << 30) | ((uint32_t)(!writeMode) << 29) | ((uint32_t)upperData << 28) | ((uint32_t)addr << 16) | data;
// SPI 데이터 전송
SPI.beginTransaction(SPISettings(800, MSBFIRST, SPI_MODE2)); // SPI 설정 (모드 2)
digitalWrite(CS_PIN, LOW); // 슬레이브 선택
byte bytesToSend[4];
bytesToSend[0] = (dataToSend >> 24) & 0xFF;
bytesToSend[1] = (dataToSend >> 16) & 0xFF;
bytesToSend[2] = (dataToSend >> 8) & 0xFF;
bytesToSend[3] = dataToSend & 0xFF;
uint32_t receivedData = 0;
for (int i = 0; i < 4; ++i) {
receivedData = (receivedData << 8) | SPI.transfer(bytesToSend[i]);
}
digitalWrite(CS_PIN, HIGH); // 슬레이브 비선택
SPI.endTransaction(); // SPI 트랜잭션 종료
// 결과 출력
Serial.print("Sent: ID(");
Serial.print(id, BIN); // ID 값을 이진수로 출력
Serial.print("), ");
Serial.print(writeMode ? "Write" : "Read");
Serial.print(", ");
Serial.print(upperData ? "Upper" : "Lower");
Serial.print(", Address: 16'b");
Serial.print(addr, BIN);
Serial.print(", Data: 16'b");
Serial.print(data, BIN);
Serial.println();
// 읽기 모드일 때 수신된 데이터 출력
if (!writeMode) {
Serial.print("Received: 0x");
Serial.println(receivedData, HEX);
}
// 다시 모드 입력 상태로 전환
currentState = WAITING_FOR_MODE;
Serial.println("Enter read/write mode (1 for read, 0 for write):");
}
}
}
upper / lower는 각각 D1, D0 데이터에 넣는다는 의미입니다.
3. 실습결과
FPGA는 ZYNQ-7000의 것을 사용하였고,
Tool은 Vivado를 사용하였습니다.
Arduino의 제어 Tool은 Arduino IDE를 사용하였습니다.
데이터 "1000_0000_0000_0000_0000_0000_1001_1001" 이 전송되며,