- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
In a larger project I have a triple-buffer with each buffer made up of 16 individually addressable tiles/slices (word interleaved).
Specifically that means 48 true dual-port 4kx20bit memories with individual address and data busses. I organized all these busses as arrays and thought to do the same with the memories themselves, but went with a generate loop instead. The included code is for test purposes only - to get Quartus to synthesize the memory structure. Altera Quartus fails when it runs out of memory after trying for a very long time. Xilinx Vivado synthesizes it in less than a minute. Any ideas to get Quartus to synthesize?library ieee;
use ieee.std_logic_1164.all;
entity fec is
port(
clk: in std_logic;
data_in: in std_logic_vector(159 downto 0);
data_out: out std_logic_vector(159 downto 0)
);
end entity fec;
architecture rtl of fec is
type FRAME_SLICE_t is array(0 to (2**12)-1) of std_logic_vector(19 downto 0);
type SLICE_ADDR_t is array(0 to 3*4*4-1) of integer range 0 to (2**12)-1;
signal slice_addr_a, slice_addr_b: SLICE_ADDR_t;
signal slice_we_a, slice_we_b: std_logic_vector(0 to 3*4*4-1);
type SLICE_DATA_t is array(0 to 3*4*4-1) of std_logic_vector(19 downto 0);
signal slice_data_in_a, slice_data_in_b, slice_data_out_a, slice_data_out_b: SLICE_DATA_t;
signal fill_buffer_sel: integer range 0 to 2 := 0;
signal read_buffer_sel: integer range 0 to 2 := 2;
signal fill_addr_a: integer range 0 to (2**16)-1 := 0;
signal fill_addr_b: integer range 0 to (2**16)-1 := 256;
signal read_addr_a: integer range 0 to (2**16)-1 := 0;
signal read_addr_b: integer range 0 to (2**16)-1 := 256;
begin
slices: for s in 0 to 3*4*4-1 generate
signal slice: FRAME_SLICE_t;
begin
process(clk)
begin
if rising_edge(clk) then
if slice_we_a(s) = '1' then
slice(slice_addr_a(s)) <= slice_data_in_a(s);
end if;
slice_data_out_a(s) <= slice(slice_addr_a(s));
end if;
end process;
process(clk)
begin
if rising_edge(clk) then
if slice_we_b(s) = '1' then
slice(slice_addr_b(s)) <= slice_data_in_b(s);
end if;
slice_data_out_b(s) <= slice(slice_addr_b(s));
end if;
end process;
end generate;
process(clk)
begin
if rising_edge(clk) then
if fill_addr_a = (2**16)-1-7*64 then
fill_addr_a <= (fill_addr_a + (1 + 7*64)) mod (2**16);
fill_addr_b <= 256;
read_addr_a <= 0;
read_addr_b <= 256;
fill_buffer_sel <= (fill_buffer_sel + 1) mod 3;
read_buffer_sel <= (read_buffer_sel + 1) mod 3;
else
if fill_addr_a mod 64 = 63 then
fill_addr_a <= (fill_addr_a + (1 + 7*64)) mod (2**16);
else
fill_addr_a <= (fill_addr_a + 1) mod (2**16);
end if;
if fill_addr_b mod 64 = 63 then
fill_addr_b <= (fill_addr_b + (1 + 7*64)) mod (2**16);
else
fill_addr_b <= (fill_addr_b + 1) mod (2**16);
end if;
if read_addr_a mod 64 = 63 then
read_addr_a <= (read_addr_a + (1 + 7*64)) mod (2**16);
else
read_addr_a <= (read_addr_a + 1) mod (2**16);
end if;
if read_addr_b mod 64 = 63 then
read_addr_b <= (read_addr_b + (1 + 7*64)) mod (2**16);
else
read_addr_b <= (read_addr_b + 1) mod (2**16);
end if;
end if;
end if;
end process;
process(fill_buffer_sel, read_buffer_sel, data_in,
fill_addr_a, read_addr_a, slice_data_out_a,
fill_addr_b, read_addr_b, slice_data_out_b)
begin
for buf in 0 to 2 loop
for row in 0 to 3 loop
for col in 0 to 3 loop
slice_addr_a(buf*16+row*4+col) <= 0;
if buf = fill_buffer_sel and col = fill_addr_a mod 4 then
slice_we_a(buf*16+row*4+col) <= '1';
slice_addr_a(buf*16+row*4+col) <= fill_addr_a/256*16 + ((fill_addr_a/4) mod 16);
slice_data_in_a(buf*16+row*4+col) <= data_in(159-row*20 downto 140-row*20);
else
slice_we_a(buf*16+row*4+col) <= '0';
slice_data_in_a(buf*16+row*4+col) <= (others => '0');
end if;
if buf = read_buffer_sel and col = read_addr_a mod 4 then
slice_we_a(buf*16+row*4+col) <= '0';
slice_addr_a(buf*16+row*4+col) <= read_addr_a/256*16 + ((read_addr_a/4) mod 16);
end if;
slice_addr_b(buf*16+row*4+col) <= 0;
if buf = fill_buffer_sel and col = fill_addr_b mod 4 then
slice_we_b(buf*16+row*4+col) <= '1';
slice_addr_b(buf*16+row*4+col) <= fill_addr_b/256*16 + ((fill_addr_b/4) mod 16);
slice_data_in_b(buf*16+row*4+col) <= data_in(79-row*20 downto 60-row*20);
else
slice_we_b(buf*16+row*4+col) <= '0';
slice_data_in_b(buf*16+row*4+col) <= (others => '0');
end if;
if buf = read_buffer_sel and col = read_addr_b mod 4 then
slice_we_b(buf*16+row*4+col) <= '0';
slice_addr_b(buf*16+row*4+col) <= read_addr_b/256*16 + ((read_addr_b/4) mod 16);
end if;
end loop;
end loop;
end loop;
for row in 0 to 3 loop
data_out(159-row*20 downto 140-row*20) <= slice_data_out_a(read_buffer_sel*16+row*4+(read_addr_a mod 4));
data_out(79-row*20 downto 60-row*20) <= slice_data_out_b(read_buffer_sel*16+row*4+(read_addr_b mod 4));
end loop;
end process;
end rtl;
Link Copied
1 Reply
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
This looks similar to an issue I raised with altera about 5yrs ago. The problem was inferring rams in a generate statement (ie it wont work). It's taking a long time because its generating the rams out of registers.
Work around is to make a single ram entity and instantiate it in the generate loop. Please raise a ticket with altera to get this sorted.
Reply
Topic Options
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page