Initial checkin of DSP 56k VHDL code.

This commit is contained in:
Matthias Alles
2010-11-02 07:29:43 +00:00
parent ebdb8e0d71
commit af014dc0d6
56 changed files with 7279 additions and 0 deletions

6
vhdl/dsp56k/asm/asm.sh Executable file
View File

@@ -0,0 +1,6 @@
#!/bin/bash
export DSP_PATH=~/.wine/drive_c/Programme/Motorola/DSP56300/clas
wine $DSP_PATH/asm56300.exe -b -g -l $1.asm
wine $DSP_PATH/dsplnk.exe $1.cln
wine $DSP_PATH/cldlod.exe $1.cld > $1.lod

View File

@@ -0,0 +1,13 @@
move #1,n1
move #10,r1
nop
move (r1)+n1
move #15,m1
rep #10
move (r1)+n1
move #10,n1
rep #10
move (r1)+n1

View File

@@ -0,0 +1,12 @@
; clear CCR
andi #$00,CCR
move #>0.25,a
abs a
move #>-0.25,a
abs a
move #>0,a
abs a
move #>$80,a2
abs a

View File

@@ -0,0 +1,11 @@
move #>0,y0
move #>1,y1
clr b
move #>1,b0
; set only carry bit
andi #$00,ccr
ori #$01,ccr
adc y,b
move #>$800000,y1
move #>$80,b2
adc y,b

View File

@@ -0,0 +1,11 @@
move #>0,y0
move #>1,y1
clr b
move #>1,b0
; set only carry bit
andi #$00,ccr
ori #$01,ccr
add y,b
move #>$800000,y1
move #>$80,b2
add y,b

View File

@@ -0,0 +1,9 @@
move #>$55,a
clr b
move #>$55,b0
andi #$00,ccr
addl a,b
move #>$AA,a
addl a,b
move #>$80,b2
addl a,b

View File

@@ -0,0 +1,9 @@
move #>$55,a
clr b
move #>$55,b0
andi #$00,ccr
addr a,b
move #>$AA,a
addr a,b
move #>$80,b2
addr a,b

View File

@@ -0,0 +1,12 @@
move #>$000FFF,y0
move #>$FFFFFF,b
andi #$00,ccr
and y0,b
move #>$FFF000,y0
move #>$FFFFFF,b
andi #$00,ccr
and y0,b
move #>$000000,y0
move #>$FFFFFF,b
andi #$00,ccr
and y0,b

View File

@@ -0,0 +1,8 @@
; move #>0,y0
; move #>1,y1
clr b
move #>$A5,b0
move #>$A5,b1
move #>$A5,b2
andi #$00,ccr
asl b

View File

@@ -0,0 +1,8 @@
; move #>0,y0
; move #>1,y1
clr b
move #>$A5,b0
move #>$A5,b1
move #>$A5,b2
andi #$00,ccr
asr b

View File

@@ -0,0 +1,20 @@
clr a
clr b
andi #$00,ccr
move #>$7F,a2
move #>$7F,b2
add a,b
clr a
clr b
andi #$00,ccr
move #>$80,a2
move #>$7F,b2
add a,b
clr a
clr b
andi #$00,ccr
move #>$80,a2
move #>$80,b2
add a,b

View File

@@ -0,0 +1,9 @@
; clear CCR
andi #$00,CCR
move #>0.25,a
clr a
move #>-0.25,a
andi #$00,CCR
ori #$01,CCR
clr a

View File

@@ -0,0 +1,20 @@
move #$20,b
move #$24,y0
andi #$00,ccr
cmp y0,b
move #$20,b
move #$20,y0
andi #$00,ccr
cmp y0,b
move #$24,b
move #$20,y0
andi #$00,ccr
cmp y0,b
move #$800AAA,b
move #$20,y0
andi #$00,ccr
cmp y0,b
move #$800AAA,y0
move #$20,b
andi #$00,ccr
cmp y0,b

View File

@@ -0,0 +1,20 @@
move #$20,b
move #$24,y0
andi #$00,ccr
cmpm y0,b
move #$20,b
move #$20,y0
andi #$00,ccr
cmpm y0,b
move #$24,b
move #$20,y0
andi #$00,ccr
cmpm y0,b
move #$800AAA,b
move #$20,y0
andi #$00,ccr
cmpm y0,b
move #$800AAA,y0
move #$20,b
andi #$00,ccr
cmpm y0,b

View File

@@ -0,0 +1,8 @@
move #>$000FFF,y0
move #>$FF00FF,b
andi #$00,ccr
eor y0,b
move #>$FFFFFF,y0
move #>$FFFFFF,b
andi #$00,ccr
eor y0,b

View File

@@ -0,0 +1,7 @@
andi #$00,CCR
move #>0.25,a
move #>$AAAAAA,a
move #>$BCDEFA,a0
rep #24
lsl a

View File

@@ -0,0 +1,7 @@
andi #$00,CCR
move #>0.25,a
move #>$AAAAAA,a
move #>$BCDEFA,a0
rep #24
lsr a

View File

@@ -0,0 +1,17 @@
andi #$00,CCR
clr a
move #$80,a2
move #>0.25,x0
move #>0.50,y0
mac -x0,y0,a
move #>-0.25,x0
move #>-0.55,y0
mac x0,y0,a
move #>-0.20,x0
move #>+0.55,y0
mac x0,y0,a
move #>-0.20,x0
move #>+0.55,y0
mac -x0,y0,a

View File

@@ -0,0 +1,17 @@
andi #$00,CCR
clr a
move #$100000,a
move #>$123456,x0
move #>$123456,y0
macr x0,y0,a
move #$100001,a
move #>$123456,x0
move #>$123456,y0
macr x0,y0,a
move #$100000,a
move #$800000,a0
move #>$123456,x0
move #>$123456,y0
macr x0,y0,a

View File

@@ -0,0 +1,15 @@
andi #$00,CCR
move #>0.25,x0
move #>0.50,y0
mpy x0,y0,a
move #>-0.25,x0
move #>-0.55,y0
mpy x0,y0,a
move #>-0.20,x0
move #>+0.55,y0
mpy x0,y0,a
move #>-0.20,x0
move #>+0.55,y0
mpy -x0,y0,a

View File

@@ -0,0 +1,5 @@
andi #$00,CCR
move #>$654321,y0
mpyr -y0,y0,a

View File

@@ -0,0 +1,10 @@
andi #$00,CCR
move #>$654321,a
neg a
clr a
move #>$80,a2
neg a
move #>$800000,a
neg a

View File

@@ -0,0 +1,14 @@
clr a
move #$000001,a1
tst a
rep #$2F
norm R3,a
clr a
move #$FF0000,a
move #$84,a2
tst a
rep #$2F
norm R1,a
clr a
rep #$2F
norm R2,a

View File

@@ -0,0 +1,8 @@
move #>$000FFF,y0
move #>$7F00FF,b
andi #$00,ccr
not b
move #>$000000,y0
move #>$FFFFFF,b
andi #$00,ccr
not b

View File

@@ -0,0 +1,8 @@
move #>$000FFF,y0
move #>$FF00FF,b
andi #$00,ccr
or y0,b
move #>$000000,y0
move #>$000000,b
andi #$00,ccr
or y0,b

View File

@@ -0,0 +1,11 @@
andi #$00,CCR
move #>$123456,a1
move #>$789ABC,a0
rnd a
move #>$123456,a1
move #>$800000,a0
rnd a
move #>$123455,a1
move #>$800000,a0
rnd a

View File

@@ -0,0 +1,6 @@
andi #$00,CCR
move #>$AAAAAA,a
move #>$BCDEFA,a0
rep #24
rol a

View File

@@ -0,0 +1,6 @@
andi #$00,CCR
move #>$AAAAAA,a
move #>$BCDEFA,a0
rep #24
ror a

View File

@@ -0,0 +1,15 @@
move #>0,y0
move #>1,y1
clr b
move #>1,b0
; set only carry bit
andi #$00,ccr
ori #$01,ccr
sbc y,b
move #>$800000,y1
move #>$80,b2
sbc y,b
clr b
move #>$80,b2
move #>$1,y1
sbc y,b

View File

@@ -0,0 +1,15 @@
move #>0,y0
move #>1,y1
clr b
move #>1,b0
; set only carry bit
andi #$00,ccr
ori #$01,ccr
sub y,b
move #>$800000,y1
move #>$80,b2
sub y,b
clr b
move #>$80,b2
move #>$1,y1
sub y1,b

View File

@@ -0,0 +1,15 @@
move #>0,a0
move #>1,a1
clr b
move #>1,b0
; set only carry bit
andi #$00,ccr
ori #$01,ccr
subl a,b
move #>$800000,a1
move #>$80,b2
subl a,b
clr b
move #>$80,b2
move #>$1,a1
subl a,b

View File

@@ -0,0 +1,15 @@
move #>0,a0
move #>1,a1
clr b
move #>1,b0
; set only carry bit
andi #$00,ccr
ori #$01,ccr
subr a,b
move #>$800000,a1
move #>$80,b2
subr a,b
clr b
move #>$80,b2
move #>$1,a1
subr a,b

View File

@@ -0,0 +1,10 @@
move #20,r1
move #$ABCDEF,x0
move #$123456,b
andi #$00,ccr
tcs x0,a r1,r3
tcc x0,b r1,r2
; set Zero Flag
ori #$04,ccr
teq x0,a r1,r3
tne x0,b r1,r2

View File

@@ -0,0 +1,7 @@
move #$ABCDEF,a
move #$123456,b
tfr a,b b,a
move #$555555,x0
move #$AAAAAA,y1
tfr x0,a a,x0
tfr y1,b b,y0

View File

@@ -0,0 +1,9 @@
clr b
tst b
; set only carry bit
andi #$00,ccr
ori #$01,ccr
move #>$80,b2
tst b
move #>$7F,b2
tst b

View File

@@ -0,0 +1,8 @@
move #4,r0
move #20,r1
move r1,x:(r0)
move x:(r0),a
move r1,y:(r0)
move l:(r0)+,ab

View File

@@ -0,0 +1,21 @@
move #>$10,x0
move #>$11,x1
move #11,a1
move #-3,a2
jclr #0,a,blubb
bset #0,x:(r0)+
move #>$26,y0
move #>$27,y1
move x,L:(r0)+
move y,L:(r0)+
move x,L:$0A
move y,L:$1F
move y,L:$00A0
move x,L:$004F
move L:-(r0),x
move L:-(r0),y
move L:$0A,x
move L:$1F,y
blubb

185
vhdl/dsp56k/doc/Change.log Normal file
View File

@@ -0,0 +1,185 @@
Done:
02.01.09
- Started work on pipeline (FE, FE2, DC, AG, EX)
- Program counter counts linearly
- Initial program memory holds program data
- Started work on instruction decoder
03.01.09
- Jump instructions work (with flushing of the pipeline)
- First version of AGU implemented
- Detection of double word instructions
- Initial version of global register file
04.01.09
- Included hardware stack
- Finished support for JSR and JSCC instructions
- RTI/RTS work
- ANDI/ORI work
- Initial work on REP instruction
10.01.09
- Initial suppurt for X memory accesses. One stall cycle is introduced when
accessing the X memory.
- Finished implementation of REP instruction. Reading number of loops from
registers is still missing.
- Initial support for DO loops.
- Preventing to write the R registers when stalling occurs or a jump is
performed
11.01.09
- Finished implementation of DO loops (stop looping at the end)
- Nested loops work
- Single instruction loops work
- ENDDO instruction implemented (very much the same as usual end of the loop)
12.01.09
- Included Y memory and its addressing modes for REP and DO instruction.
- Setup of a sheet showing which types of which instructions have been
implemented and how many clock cycles are needed.
16.01.09
- Integration of LUA instruction.
24.01.09
- Integrated different addressing schemes (immediate short, immediate long,
absolute address)
- Integration and test of MOVE(C) instruction. Some modes missing (writing to
memory)
- Testing of Y memory read accesses.
26.01.09
- Continued testing of different addressing modes.
- Decoding for first parallel move operations.
01.02.09
- Moved memory components to an extra entity (memory_management)
- Writing to internal X and Y memory supported. Problems are possible for
reading the same address one instruction after writing at the same address!
- Included ALU registers (x,y,a,b) into register file
- Integration of x/y/l bus started
03.02.09
- Continued testing of parallel moves (there are quite a few cases!)
07.02.09
- Fixed REP instruction for instructions that are causing a stall due to
a memory read
- Fixed fetching from program data when stalling.
- Fixed detection of double word instruction, when previous instruction
used the AGU as well (forgot instruction word in sensitivity list).
- Continued testing of parallel moves.
- First synthesis run: Changed RAM description to map to BRAMs, removed
latches, and many things are still missing, post-synthesis results:
- Xilinx Spartan3A, Speed-Grade -4
- 1488 FFs
- 4657 4-Input LUTs
- 3 BRAMs
- 71.08 MHz
08.02.09
- Implemented second address generation unit in order to access X and Y
memory at the same time
- Implemented reverse carry addressing modes for FFT addressing
- Started implementation of modulo addressing.
- Set M0-M7 to -1 in reset.
- Downloaded the assembler for DSP56300. I hope to use it in order to
generate the content of the program memory automatically, which will
boost the testing speed...
- Encoding each instruction to test by hand just sucks. I think I will
integrate some bootloader in order to use the LOD files from the
assembler to initiate the RAMs.
- Implementation of data shifter and limiter (when accessing a or b and
giving the result to XDB or YDB). Needs testing.
- Integration for L: addressing modes. Needs nesting.
10.02.09
- Fixed decoding of X: and Y: adressing mode (collided with L: adressing)
- L: adressing modes are working
14.02.09
- Implemented BCHG,BCLR,BSET,BTST,JCLR,JSCLR,JSET,JSSET. A lot of testing
is still needed. Peripheral register accesses are still missing.
- Second synthesis run: Removed new latches again.
, many things are still missing, post-synthesis results:
- Xilinx Spartan3A, Speed-Grade -4
- 1519 FFs
- 6210 4-Input LUTs
- 3 BRAMs
- 51.68 MHz
* Critical path for JSCLR/JSSET=> read limited a/b, go through bit modify
unit, test whether condition met, push data to stack. Reading of
limited A/B is probably a bug (DSP56001 UM says CCR is not changed,
in DSP56300 simulator the flag is set when reading a/b!!).
15.02.09
- Started implementing the ALU.
- ABS works.
- MPY(R), MAC(R) implemented, rounding is missing.
- Clock frequency dropped to 41 MHz, but the critical path is not caused by
the MAC in the ALU! The multiplier is composed of four 18x18 multipliers
and still seems to be very fast!
16.02.09
- Implemented decoding and controlling of ALU for
ADC, ADD, ADDL, ADDR, AND, ASL, ASR, CLR, CMP, CMPM, EOR, NEG, NOT, OR
Still missing ALU instructions:
DIV, NORM, RND, ROL, ROR, SBC, SUB, SUBL, SUBR, Tcc, TFR, TST
Except for DIV and NORM this will be straight forward.
- Other things that need to be done :
* Adress Generation Unit does not support modulo addressing.
* MOVEP/MOVEM/STOP/WAIT/ILLEGAL/RESET/SWI
* Interrupts
* External memory accesses
* Peripheral devices (SCI, SSI, Host port)
17.02.09
- Implemented decoding and controlling of ALU instructions for
RND, ROL, ROR, SBC, SUB, SUBL, SUBR, TFR, TST
Still missing ALU instructions:
DIV, NORM, Tcc
08.03.09
- Forgot integration of LSR and LSL instructions. TBD.
- Started integration of Condition flag generation in ALU.
- New synthesis run with ALU, register balancing:
- Xilinx Spartan3A, Speed-Grade -4
- 3115 FFs
- 7417 4-Input LUTs
- 3 BRAMs
- 39.47 MHz
13.03.09
- Integrated decoding of LSL/LSR instructions.
- Integrated rotating function into ALU.
- Included convergent rounding functionality into ALU.
- Implemented Tcc instruction.
- Implemented DIV instruction.
15.03.09
- Tested ABS,ADC,ADD,ADDL,ADDR,AND,ASL,ASR,CLR,CMP,CMPM,DIV,EOR,
LSL,LSR,MPY,MPYR,MAC,MACR,NEG,NOT,OR
- Bugs fixed:
- Detection of overflow corrected when negating most negative
value $80 000000 000000.
- Decoding of ADC and TFR clarified.
- Overflow flag generation when left shifting of 56 bit values.
- For logical operations the flag generation relied on the adder
output which was wrong. Now relies on the Logical unit output.
- Decoding of CMPM clarified in order not to conflict with NOT.
- Shifter was used for CMP(M) instructions, which is wrong.
- Hopefully calculation of carry and overflow flag work correctly now...
- MPY/MAC write result back.
- Limit Flag is not cleared by the ALU anymore (has to be reset by the
user!).
16.03.09
- Tested RND
- Bugs fixed:
- Simulator seems to misunderstand the X"1000000" where the first digit
represents a single bit. Comparing against this value fixed! RND works.
17.03.09
- Tested ROR,ROL,SBC,SUB,SUBL,SUBR,TCC,TFR,TST,NORM
- Integrated logic for NORM instruction support.
- ALU is complete now!
- Bugs fixed:
- Fixed setting of CCR for ROL/ROR
- TCC didn't read register through ALU
- Known bugs:
- Carry calculation for SBC is still buggy
- New synthesis run with ALU, register balancing:
- Xilinx Spartan3A, Speed-Grade -4
- 1801 FFs
- 7407 4-Input LUTs
- 3 BRAMs
- 30.84 MHz
Critical path is in the ALU (multiplication, adding, rounding, zero-flag
calculation). I wonder why the values changed like that since the
last synthesis run.
26.03.09
- Included support for modulo addressing in AGUs. This allows for the
integration of ring buffers. Now 7900 LUTs.
18.05.10
- Commenting of code.
- Added second memory port for p-mem (needed for movem-instruction)

View File

@@ -0,0 +1,291 @@
------------------------------------------------------------------------------
--! @file
--! @author Matthias Alles
--! @date 01/2009
--! @brief Address generation logic
--!
------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.parameter_pkg.all;
use work.types_pkg.all;
use work.constants_pkg.all;
entity adgen_stage is port(
activate_adgen : in std_logic;
activate_x_mem : in std_logic;
activate_y_mem : in std_logic;
activate_l_mem : in std_logic;
instr_word : in std_logic_vector(23 downto 0);
instr_array : in instructions_type;
optional_ea_word : in std_logic_vector(23 downto 0);
register_file : in register_file_type;
adgen_mode_a : in adgen_mode_type;
adgen_mode_b : in adgen_mode_type;
address_out_x : out unsigned(BW_ADDRESS-1 downto 0);
address_out_y : out unsigned(BW_ADDRESS-1 downto 0);
wr_R_port_A_valid : out std_logic;
wr_R_port_A : out addr_wr_port_type;
wr_R_port_B_valid : out std_logic;
wr_R_port_B : out addr_wr_port_type
);
end entity;
architecture rtl of adgen_stage is
signal address_out_x_int : unsigned(BW_ADDRESS-1 downto 0);
signal r_reg_local_x : unsigned(BW_ADDRESS-1 downto 0);
signal n_reg_local_x : unsigned(BW_ADDRESS-1 downto 0);
signal m_reg_local_x : unsigned(BW_ADDRESS-1 downto 0);
signal r_reg_local_y : unsigned(BW_ADDRESS-1 downto 0);
signal n_reg_local_y : unsigned(BW_ADDRESS-1 downto 0);
signal m_reg_local_y : unsigned(BW_ADDRESS-1 downto 0);
function calculate_modulo_bitmask(m_reg_local : in unsigned ) return std_logic_vector is
variable modulo_bitmask_intern : std_logic_vector(BW_ADDRESS-1 downto 0);
begin
modulo_bitmask_intern(BW_ADDRESS-1) := m_reg_local(BW_ADDRESS-1);
for i in BW_ADDRESS-2 downto 0 loop
modulo_bitmask_intern(i) := modulo_bitmask_intern(i+1) or m_reg_local(i);
end loop;
return modulo_bitmask_intern;
end function calculate_modulo_bitmask;
function calculate_new_r_reg(new_r_reg_intermediate, r_reg_local, m_reg_local: in unsigned;
modulo_bitmask: in std_logic_vector ) return unsigned is
variable modulo_result : unsigned(BW_ADDRESS-1 downto 0);
variable new_r_reg_intern : unsigned(BW_ADDRESS-1 downto 0);
begin
-- cut out the bits we are interested in
-- for modulo addressing
for i in 0 to BW_ADDRESS-1 loop
if modulo_bitmask(i) = '1' then
modulo_result(i) := new_r_reg_intermediate(i);
else
modulo_result(i) := '0';
end if;
end loop;
-- compare whether an overflow occurred and we
-- have to renormalize the result
if modulo_result > m_reg_local then
modulo_result := modulo_result - m_reg_local;
end if;
-- linear addressing
if m_reg_local = 2**BW_ADDRESS-1 then
new_r_reg_intern := new_r_reg_intermediate;
-- bit reverse operation
elsif m_reg_local = 0 then
for i in 0 to BW_ADDRESS-1 loop
new_r_reg_intern(BW_ADDRESS - 1 - i) := new_r_reg_intermediate(i);
end loop;
-- modulo arithmetic / linear addressing
else
-- only update the bits that are part of the bitmask!
for i in 0 to BW_ADDRESS-1 loop
if modulo_bitmask(i) = '1' then
new_r_reg_intern(i) := modulo_result(i);
else
new_r_reg_intern(i) := r_reg_local(i);
end if;
end loop;
end if;
return new_r_reg_intern;
end function calculate_new_r_reg;
procedure set_operands(r_reg_local, m_reg_local, addr_mod : in unsigned; op1, op2 : out unsigned) is
begin
-- bit reverse operation
if m_reg_local = 0 then
-- reverse the input to the adder bit wise
-- so we just need to use a single adder
for i in 0 to BW_ADDRESS-1 loop
op1(BW_ADDRESS - 1 - i) := r_reg_local(i);
op2(BW_ADDRESS - 1 - i) := addr_mod(i);
end loop;
-- modulo arithmetic / linear addressing
else
op1 := r_reg_local;
op2 := addr_mod;
end if;
end procedure set_operands;
begin
address_out_x <= address_out_x_int;
r_reg_local_x <= register_file.addr_r(to_integer(unsigned(instr_word(10 downto 8))));
n_reg_local_x <= register_file.addr_n(to_integer(unsigned(instr_word(10 downto 8))));
m_reg_local_x <= register_file.addr_m(to_integer(unsigned(instr_word(10 downto 8))));
r_reg_local_y <= register_file.addr_r(to_integer(unsigned((not instr_word(10)) & instr_word(14 downto 13))));
n_reg_local_y <= register_file.addr_n(to_integer(unsigned((not instr_word(10)) & instr_word(14 downto 13))));
m_reg_local_y <= register_file.addr_m(to_integer(unsigned((not instr_word(10)) & instr_word(14 downto 13))));
address_generator_X: process(activate_adgen, instr_word, adgen_mode_a, r_reg_local_x, n_reg_local_x, m_reg_local_x) is
variable op1 : unsigned(BW_ADDRESS-1 downto 0);
variable op2 : unsigned(BW_ADDRESS-1 downto 0);
variable addr_mod : unsigned(BW_ADDRESS-1 downto 0);
variable new_r_reg : unsigned(BW_ADDRESS-1 downto 0);
variable new_r_reg_interm : unsigned(BW_ADDRESS-1 downto 0);
variable modulo_bitmask : std_logic_vector(BW_ADDRESS-1 downto 0);
variable modulo_result : unsigned(BW_ADDRESS-1 downto 0);
begin
-- select the operands for the calculation
case adgen_mode_a is
-- (Rn) - Nn
when POST_MIN_N => addr_mod := unsigned(- signed(n_reg_local_x));
-- (Rn) + Nn
when POST_PLUS_N => addr_mod := n_reg_local_x;
-- (Rn)-
when POST_MIN_1 => addr_mod := (others => '1'); -- -1
-- (Rn)+
when POST_PLUS_1 => addr_mod := to_unsigned(1, BW_ADDRESS);
-- (Rn)
when NOP => addr_mod := (others => '0');
-- (Rn + Nn)
when INDEXED_N => addr_mod := n_reg_local_x;
-- -(Rn)
when PRE_MIN_1 => addr_mod := (others => '1'); -- - 1
-- absolute address (appended to instruction word)
when ABSOLUTE => addr_mod := (others => '0');
when IMMEDIATE => addr_mod := (others => '0');
end case;
------------------------------------------------
-- set op1 and op2 according to modulo register
------------------------------------------------
set_operands(r_reg_local_x, m_reg_local_x, addr_mod, op1, op2);
-------------------------
-- Calculate new address
-------------------------
new_r_reg_interm := op1 + op2;
----------------------------------
-- Calculate new register content
-----------------------------------
modulo_bitmask := calculate_modulo_bitmask(m_reg_local_x);
new_r_reg := calculate_new_r_reg(new_r_reg_interm, r_reg_local_x, m_reg_local_x, modulo_bitmask);
-- store the updated register in the global register file
-- do not store when we do nothing or there is nothing to update
-- LUA instructions DO NOT UPDATE the source register!!
if (adgen_mode_a = NOP or adgen_mode_a = ABSOLUTE or adgen_mode_a = IMMEDIATE or instr_array = INSTR_LUA) then
wr_R_port_A_valid <= '0';
else
wr_R_port_A_valid <= '1';
end if;
wr_R_port_A.reg_number <= unsigned(instr_word(10 downto 8));
wr_R_port_A.reg_value <= new_r_reg;
-- select the output of the AGU
case adgen_mode_a is
-- (Rn) - Nn
when POST_MIN_N => address_out_x_int <= r_reg_local_x;
-- (Rn) + Nn
when POST_PLUS_N => address_out_x_int <= r_reg_local_x;
-- (Rn)-
when POST_MIN_1 => address_out_x_int <= r_reg_local_x;
-- (Rn)+
when POST_PLUS_1 => address_out_x_int <= r_reg_local_x;
-- (Rn)
when NOP => address_out_x_int <= r_reg_local_x;
-- (Rn + Nn)
when INDEXED_N => address_out_x_int <= new_r_reg;
-- -(Rn)
when PRE_MIN_1 => address_out_x_int <= new_r_reg;
-- absolute address (appended to instruction word)
when ABSOLUTE => address_out_x_int <= unsigned(optional_ea_word(BW_ADDRESS-1 downto 0));
when IMMEDIATE => address_out_x_int <= r_reg_local_x; -- Done externally, value never used
end case;
-- LUA instructions only use the updated address!
if instr_array = INSTR_LUA then
address_out_x_int <= new_r_reg;
end if;
end process address_generator_X;
---------------------------------------------------------
-- Second address generator
-- Used when accessing X and Y memory at the same time
---------------------------------------------------------
address_generator_Y: process(activate_adgen, activate_x_mem, activate_y_mem, activate_l_mem, instr_word,
register_file, adgen_mode_b, address_out_x_int, r_reg_local_y, n_reg_local_y, m_reg_local_y) is
variable op1 : unsigned(BW_ADDRESS-1 downto 0);
variable op2 : unsigned(BW_ADDRESS-1 downto 0);
variable addr_mod : unsigned(BW_ADDRESS-1 downto 0);
variable new_r_reg : unsigned(BW_ADDRESS-1 downto 0);
variable new_r_reg_interm : unsigned(BW_ADDRESS-1 downto 0);
variable modulo_bitmask : std_logic_vector(BW_ADDRESS-1 downto 0);
variable modulo_result : unsigned(BW_ADDRESS-1 downto 0);
begin
-- select the operands for the calculation
case adgen_mode_b is
-- (Rn) + Nn
when POST_PLUS_N => addr_mod := n_reg_local_y;
-- (Rn)-
when POST_MIN_1 => addr_mod := (others => '1'); -- -1
-- (Rn)+
when POST_PLUS_1 => addr_mod := to_unsigned(1, BW_ADDRESS);
-- (Rn)
when others => addr_mod := (others => '0');
end case;
------------------------------------------------
-- set op1 and op2 according to modulo register
------------------------------------------------
set_operands(r_reg_local_y, m_reg_local_y, addr_mod, op1, op2);
-------------------------
-- Calculate new address
-------------------------
new_r_reg_interm := op1 + op2;
----------------------------------
-- Calculate new register content
-----------------------------------
modulo_bitmask := calculate_modulo_bitmask(m_reg_local_y);
new_r_reg := calculate_new_r_reg(new_r_reg_interm, r_reg_local_y, m_reg_local_y, modulo_bitmask);
-- store the updated register in the global register file
-- do not store when we do nothing or there is nothing to update
if adgen_mode_b = NOP then
wr_R_port_B_valid <= '0';
else
wr_R_port_B_valid <= '1';
end if;
wr_R_port_B.reg_number <= unsigned((not instr_word(10)) & instr_word(14 downto 13));
wr_R_port_B.reg_value <= new_r_reg;
-- the address for the y memory is calculated in the first AGU if the x memory is not accessed!
-- so use the other output as address output for the y memory!
-- Furthermore, use the same address for L memory accesses (X and Y memory access the same address!)
if (activate_y_mem = '1' and activate_x_mem = '0') or activate_l_mem = '1' then
address_out_y <= address_out_x_int;
-- in any other case use the locally computed value
else
-- select the output of the AGU
case adgen_mode_b is
-- (Rn) + Nn
when POST_PLUS_N => address_out_y <= r_reg_local_y;
-- (Rn)-
when POST_MIN_1 => address_out_y <= r_reg_local_y;
-- (Rn)+
when POST_PLUS_1 => address_out_y <= r_reg_local_y;
-- (Rn)
when others => address_out_y <= r_reg_local_y;
end case;
end if;
end process address_generator_Y;
end architecture;

View File

@@ -0,0 +1,74 @@
------------------------------------------------------------------------------
--! @file
--! @author Matthias Alles
--! @date 01/2009
--! @brief General constants for decoding pipeline.
--!
------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.parameter_pkg.all;
use work.types_pkg.all;
package constants_pkg is
-------------------------
-- Flags in CCR register
-------------------------
constant C_FLAG : natural := 0;
constant V_FLAG : natural := 1;
constant Z_FLAG : natural := 2;
constant N_FLAG : natural := 3;
constant U_FLAG : natural := 4;
constant E_FLAG : natural := 5;
constant L_FLAG : natural := 6;
constant S_FLAG : natural := 7;
-------------------
-- Pipeline stages
-------------------
constant ST_FE_FE2 : natural := 0;
constant ST_FE2_DEC : natural := 1;
constant ST_DEC_ADG : natural := 2;
constant ST_ADG_EX : natural := 3;
----------------------
-- Activation signals
----------------------
constant ACT_ADGEN : natural := 0; -- Run the address generator
constant ACT_ALU : natural := 1; -- Activation of ALU results in modification of the status register
constant ACT_EXEC_BRA : natural := 2; -- Branch (in execute stage)
constant ACT_EXEC_CR_MOD : natural := 3; -- Control Register Modification (in execute stage)
constant ACT_EXEC_LOOP : natural := 4; -- Loop instruction (REP, DO)
constant ACT_X_MEM_RD : natural := 5; -- Init read from X memory
constant ACT_Y_MEM_RD : natural := 6; -- Init read from Y memory
constant ACT_P_MEM_RD : natural := 7; -- Init read from P memory
constant ACT_X_MEM_WR : natural := 8; -- Init write to X memory
constant ACT_Y_MEM_WR : natural := 9; -- Init write to Y memory
constant ACT_P_MEM_WR : natural := 10; -- Init write to P memory
constant ACT_REG_RD : natural := 11; -- Read from register (6 bit addressing)
constant ACT_REG_WR : natural := 12; -- Write to register (6 bit addressing)
constant ACT_IMM_8BIT : natural := 13; -- 8 bit immediate operand (in instruction word)
constant ACT_IMM_12BIT : natural := 14; -- 12 bit immediate operand (in instruction word)
constant ACT_IMM_LONG : natural := 15; -- 24 bit immediate operant (in optional instruction word)
constant ACT_X_BUS_RD : natural := 16; -- Read data via X-bus (from x0,x1,a,b)
constant ACT_X_BUS_WR : natural := 17; -- Write data via X-bus (to x0,x1,a,b)
constant ACT_Y_BUS_RD : natural := 18; -- Read data via Y-bus (from y0,y1,a,b)
constant ACT_Y_BUS_WR : natural := 19; -- Write data via Y-bus (to y0,y1,a,b)
constant ACT_L_BUS_RD : natural := 20; -- Read data via L-bus (from a10, b10,x,y,a,b,ab,ba)
constant ACT_L_BUS_WR : natural := 21; -- Write data via L-bus (to a10, b10,x,y,a,b,ab,ba)
constant ACT_BIT_MOD_WR : natural := 22; -- Bit modify write (to set for BSET, BCLR, BCHG)
constant ACT_REG_WR_CC : natural := 23; -- Write to register file conditionally (Tcc)
constant ACT_ALU_WR_CC : natural := 24; -- Write ALU result conditionally (Tcc)
constant ACT_NORM : natural := 25; -- NORM instruction needs special handling
end package constants_pkg;

File diff suppressed because it is too large Load Diff

117
vhdl/dsp56k/src/dsp56k.vhd Normal file
View File

@@ -0,0 +1,117 @@
------------------------------------------------------------------------------
--! @file
--! @author Matthias Alles
--! @date 01/2009
--! @brief Top entity of DSP
--!
------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.parameter_pkg.all;
use work.types_pkg.all;
use work.constants_pkg.all;
entity dsp56k is port (
clk, rst : in std_logic;
-- put register file here for synthesis!
register_file : out register_file_type
-- port_a_in : in port_a_in_type;
-- port_a_out : out port_a_out_type;
-- port_b_in : in port_b_in_type;
-- port_b_out : out port_b_out_type;
-- port_c_in : in port_c_in_type;
-- port_c_out : out port_c_out_type;
);
end dsp56k;
architecture rtl of dsp56k is
component pipeline is port (
clk, rst : in std_logic;
register_file_out : out register_file_type;
stall_flags_out : out std_logic_vector(PIPELINE_DEPTH-1 downto 0);
memory_stall : in std_logic;
data_rom_enable: out std_logic;
pmem_ctrl_in : out mem_ctrl_type_in;
pmem_ctrl_out : in mem_ctrl_type_out;
pmem2_ctrl_in : out mem_ctrl_type_in;
pmem2_ctrl_out : in mem_ctrl_type_out;
xmem_ctrl_in : out mem_ctrl_type_in;
xmem_ctrl_out : in mem_ctrl_type_out;
ymem_ctrl_in : out mem_ctrl_type_in;
ymem_ctrl_out : in mem_ctrl_type_out
);
end component pipeline;
component memory_management is port (
clk, rst : in std_logic;
stall_flags : in std_logic_vector(PIPELINE_DEPTH-1 downto 0);
memory_stall : out std_logic;
data_rom_enable: in std_logic;
pmem_ctrl_in : in mem_ctrl_type_in;
pmem_ctrl_out : out mem_ctrl_type_out;
pmem2_ctrl_in : in mem_ctrl_type_in;
pmem2_ctrl_out : out mem_ctrl_type_out;
xmem_ctrl_in : in mem_ctrl_type_in;
xmem_ctrl_out : out mem_ctrl_type_out;
ymem_ctrl_in : in mem_ctrl_type_in;
ymem_ctrl_out : out mem_ctrl_type_out
);
end component memory_management;
signal stall_flags : std_logic_vector(PIPELINE_DEPTH-1 downto 0);
signal memory_stall : std_logic;
signal data_rom_enable : std_logic;
signal pmem_ctrl_in : mem_ctrl_type_in;
signal pmem_ctrl_out : mem_ctrl_type_out;
signal pmem2_ctrl_in : mem_ctrl_type_in;
signal pmem2_ctrl_out : mem_ctrl_type_out;
signal xmem_ctrl_in : mem_ctrl_type_in;
signal xmem_ctrl_out : mem_ctrl_type_out;
signal ymem_ctrl_in : mem_ctrl_type_in;
signal ymem_ctrl_out : mem_ctrl_type_out;
begin
pipeline_inst : pipeline port map(
clk => clk,
rst => rst,
register_file_out => register_file,
stall_flags_out => stall_flags,
memory_stall => memory_stall,
data_rom_enable => data_rom_enable,
pmem_ctrl_in => pmem_ctrl_in,
pmem_ctrl_out => pmem_ctrl_out,
pmem2_ctrl_in => pmem2_ctrl_in,
pmem2_ctrl_out => pmem2_ctrl_out,
xmem_ctrl_in => xmem_ctrl_in,
xmem_ctrl_out => xmem_ctrl_out,
ymem_ctrl_in => ymem_ctrl_in,
ymem_ctrl_out => ymem_ctrl_out
);
---------------------
-- MEMORY MANAGEMENT
---------------------
MMU_inst: memory_management port map (
clk => clk,
rst => rst,
stall_flags => stall_flags,
memory_stall => memory_stall,
data_rom_enable => data_rom_enable,
pmem_ctrl_in => pmem_ctrl_in,
pmem_ctrl_out => pmem_ctrl_out,
pmem2_ctrl_in => pmem2_ctrl_in,
pmem2_ctrl_out => pmem2_ctrl_out,
xmem_ctrl_in => xmem_ctrl_in,
xmem_ctrl_out => xmem_ctrl_out,
ymem_ctrl_in => ymem_ctrl_in,
ymem_ctrl_out => ymem_ctrl_out
);
end architecture rtl;

View File

@@ -0,0 +1,611 @@
------------------------------------------------------------------------------
--! @file
--! @author Matthias Alles
--! @date 01/2009
--! @brief ALU, including shifter, MAC unit, etc.
--!
------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.parameter_pkg.all;
use work.types_pkg.all;
use work.constants_pkg.all;
entity exec_stage_alu is port(
alu_activate : in std_logic;
instr_word : in std_logic_vector(23 downto 0);
alu_ctrl : in alu_ctrl_type;
register_file : in register_file_type;
addr_r_in : in unsigned(BW_ADDRESS-1 downto 0);
addr_r_out : out unsigned(BW_ADDRESS-1 downto 0);
modify_accu : out std_logic;
dst_accu : out std_logic;
modified_accu : out signed(55 downto 0);
modify_sr : out std_logic;
modified_sr : out std_logic_vector(15 downto 0)
);
end entity;
architecture rtl of exec_stage_alu is
signal alu_shifter_out : signed(55 downto 0);
signal alu_shifter_carry_out : std_logic;
signal alu_shifter_overflow_out : std_logic;
signal alu_logic_conj : signed(55 downto 0);
signal alu_multiplier_out : signed(55 downto 0);
signal alu_src_op : signed(55 downto 0);
signal alu_add_result : signed(56 downto 0);
signal alu_add_carry_out : std_logic;
signal alu_post_adder_result : signed(56 downto 0);
signal scaling_mode : std_logic_vector(1 downto 0);
signal modified_accu_int : signed(55 downto 0);
signal norm_instr_asl : std_logic;
signal norm_instr_asr : std_logic;
signal norm_instr_nop : std_logic;
signal norm_update_ccr : std_logic;
begin
-- store calculated value?
modify_accu <= alu_ctrl.store_result;
modified_accu <= modified_accu_int;
-- for the norm instruction we first need to determine whether we have to
-- update the CCR register or not
modify_sr <= alu_activate when alu_ctrl.norm_instr = '0' else
norm_update_ccr;
dst_accu <= alu_ctrl.dst_accu;
scaling_mode <= register_file.sr(11 downto 10);
calcule_ccr_flags: process(register_file, alu_ctrl, alu_shifter_carry_out,
alu_post_adder_result, modified_accu_int, alu_add_carry_out) is
begin
-- by default do not modify the flags in the status register
modified_sr <= register_file.sr;
-- Carry flag generation
-------------------------
case alu_ctrl.ccr_flags_ctrl(C_FLAG) is
when CLEAR => modified_sr(C_FLAG) <= '0';
when SET => modified_sr(C_FLAG) <= '1';
when MODIFY =>
-- the carry flag can stem from the shifter or from the post adder
-- in case we shift and add only a zero to the shift result (ASL, ASR, LSL, LSR, ROL, ROR)
-- take the carry flag from the shifter, else from the post adder
if (alu_ctrl.shift_mode = SHIFT_LEFT or alu_ctrl.shift_mode = SHIFT_RIGHT) and
alu_ctrl.add_src_stage_2 = "00" then -- add zero after shifting?
modified_sr(C_FLAG) <= alu_shifter_carry_out;
elsif alu_ctrl.div_instr = '1' then
modified_sr(C_FLAG) <= not std_logic(alu_post_adder_result(55));
else
-- modified_sr(C_FLAG) <= std_logic(alu_post_adder_result(57));
modified_sr(C_FLAG) <= alu_add_carry_out;
end if;
when others => -- Don't touch
end case;
-- Overflow flag generation
----------------------------
case alu_ctrl.ccr_flags_ctrl(V_FLAG) is
when CLEAR => modified_sr(V_FLAG) <= '0';
when SET => modified_sr(V_FLAG) <= '1';
when MODIFY =>
-- There are two sources for the overflow flag:
-- 1)
-- in case the result cannot be represented using 56 bits set
-- the overflow flag. this is the case when the two MSBs of
-- the 57 bit result are different
-- 2)
-- The shifter circuit performs a 56 bit left shift. In case the
-- two MSBs of the operand are different set the overflow flag as well
if (alu_ctrl.div_instr = '0' and alu_post_adder_result(56) /= alu_post_adder_result(55)) or
(alu_ctrl.shift_mode = SHIFT_LEFT and alu_ctrl.word_24_update = '0' and
alu_shifter_overflow_out = '1' ) then
modified_sr(V_FLAG) <= '1';
else
modified_sr(V_FLAG) <= '0';
end if;
when others => -- Don't touch
end case;
-- Zero flag generation
----------------------------
case alu_ctrl.ccr_flags_ctrl(Z_FLAG) is
when CLEAR => modified_sr(Z_FLAG) <= '0';
when SET => modified_sr(Z_FLAG) <= '1';
when MODIFY =>
-- in case the result is zero set this flag
-- distinguish between 24 bit and 56 bit ALU operations
-- 24 bit instructions are LSL, LSR, ROR, ROL, OR, EOR, NOT, AND
if (alu_ctrl.word_24_update = '1' and modified_accu_int(47 downto 24) = 0) or
(alu_ctrl.word_24_update = '0' and modified_accu_int(55 downto 0) = 0) then
modified_sr(Z_FLAG) <= '1';
else
modified_sr(Z_FLAG) <= '0';
end if;
when others => -- Don't touch
end case;
-- Negative flag generation
----------------------------
case alu_ctrl.ccr_flags_ctrl(N_FLAG) is
when CLEAR => modified_sr(N_FLAG) <= '0';
when SET => modified_sr(N_FLAG) <= '1';
when MODIFY =>
-- in case the result is negative set this flag
-- distinguish between 24 bit and 56 bit ALU operations
-- 24 bit instructions are LSL, LSR, ROR, ROL, OR, EOR, NOT, AND
if alu_ctrl.word_24_update = '1' then
modified_sr(N_FLAG) <= std_logic(modified_accu_int(47));
else
modified_sr(N_FLAG) <= std_logic(modified_accu_int(55));
end if;
when others => -- Don't touch
end case;
-- Unnormalized flag generation
----------------------------
case alu_ctrl.ccr_flags_ctrl(U_FLAG) is
when CLEAR => modified_sr(U_FLAG) <= '0';
when SET => modified_sr(U_FLAG) <= '1';
when MODIFY =>
-- Set unnormalized bit according to the scaling mode
if (scaling_mode = "00" and alu_post_adder_result(47) = alu_post_adder_result(46)) or
(scaling_mode = "01" and alu_post_adder_result(48) = alu_post_adder_result(47)) or
(scaling_mode = "10" and alu_post_adder_result(46) = alu_post_adder_result(45)) then
modified_sr(U_FLAG) <= '1';
else
modified_sr(U_FLAG) <= '0';
end if;
when others => -- Don't touch
end case;
-- Extension flag generation
----------------------------
case alu_ctrl.ccr_flags_ctrl(E_FLAG) is
when CLEAR => modified_sr(E_FLAG) <= '0';
when SET => modified_sr(E_FLAG) <= '1';
when MODIFY =>
-- Set extension flag by default
modified_sr(E_FLAG) <= '1';
-- Clear extension flag according to the scaling mode
case scaling_mode is
when "00" =>
if alu_post_adder_result(55 downto 47) = "111111111" or alu_post_adder_result(55 downto 47) = "000000000" then
modified_sr(E_FLAG) <= '0';
end if;
when "01" =>
if alu_post_adder_result(55 downto 48) = "11111111" or alu_post_adder_result(55 downto 48) = "00000000" then
modified_sr(E_FLAG) <= '0';
end if;
when "10" =>
if alu_post_adder_result(55 downto 46) = "1111111111" or alu_post_adder_result(55 downto 46) = "0000000000" then
modified_sr(E_FLAG) <= '0';
end if;
when others =>
modified_sr(E_FLAG) <= '0';
end case;
when others => -- Don't touch
end case;
-- Limit flag generation (equals overflow flag generaton!)
-- Clearing of the Limit flag has to be done by the user!
-----------------------------------------------------------
case alu_ctrl.ccr_flags_ctrl(L_FLAG) is
when CLEAR => modified_sr(L_FLAG) <= '0';
when SET => modified_sr(L_FLAG) <= '1';
when MODIFY =>
-- There are two sources for the overflow flag:
-- 1)
-- in case the result cannot be represented using 56 bits set
-- the overflow flag. this is the case when the two MSBs of
-- the 57 bit result are different
-- 2)
-- The shifter circuit performs a 56 bit left shift. In case the
-- two MSBs of the operand are different set the overflow flag as well
if (alu_ctrl.div_instr = '0' and alu_post_adder_result(56) /= alu_post_adder_result(55)) or
(alu_ctrl.shift_mode = SHIFT_LEFT and alu_ctrl.word_24_update = '0' and
alu_shifter_overflow_out = '1' ) then
modified_sr(L_FLAG) <= '1';
end if;
when others => -- Don't touch
end case;
-- Scaling flag generation (DSP56002 and up)
--------------------------------------------
-- Scaling flag is not generated in the ALU, but when A or B are read to the XDB or YDB
end process;
src_operand_select: process(register_file, alu_ctrl) is
begin
-- decoding according similar to JJJ representation
case alu_ctrl.add_src_stage_1 is
when "000" =>
-- select depending on destination accu
if alu_ctrl.dst_accu = '0' then
alu_src_op <= register_file.a;
else
alu_src_op <= register_file.b;
end if;
when "001" => -- A,B or B,A
-- select depending on destination accu
if alu_ctrl.dst_accu = '0' then
alu_src_op <= register_file.b;
else
alu_src_op <= register_file.a;
end if;
when "010" => -- X
alu_src_op(55 downto 48) <= (others => register_file.x1(23));
alu_src_op(47 downto 0) <= register_file.x1 & register_file.x0;
when "011" => -- Y
alu_src_op(55 downto 48) <= (others => register_file.y1(23));
alu_src_op(47 downto 0) <= register_file.y1 & register_file.y0;
when "100" => -- x0
alu_src_op(55 downto 48) <= (others => register_file.x0(23));
alu_src_op(47 downto 24) <= register_file.x0;
alu_src_op(23 downto 0) <= (others => '0');
when "101" => -- y0
alu_src_op(55 downto 48) <= (others => register_file.y0(23));
alu_src_op(47 downto 24) <= register_file.y0;
alu_src_op(23 downto 0) <= (others => '0');
when "110" => -- x1
alu_src_op(55 downto 48) <= (others => register_file.x1(23));
alu_src_op(47 downto 24) <= register_file.x1;
alu_src_op(23 downto 0) <= (others => '0');
when "111" => -- y1
alu_src_op(55 downto 48) <= (others => register_file.y1(23));
alu_src_op(47 downto 24) <= register_file.y1;
alu_src_op(23 downto 0) <= (others => '0');
when others =>
end case;
end process;
alu_logical_functions: process(alu_ctrl, alu_src_op, alu_shifter_out) is
begin
alu_logic_conj <= alu_shifter_out;
case alu_ctrl.logic_function is
when "110" =>
alu_logic_conj(47 downto 24) <= alu_shifter_out(47 downto 24) and alu_src_op(47 downto 24);
when "010" =>
alu_logic_conj(47 downto 24) <= alu_shifter_out(47 downto 24) or alu_src_op(47 downto 24);
when "011" =>
alu_logic_conj(47 downto 24) <= alu_shifter_out(47 downto 24) xor alu_src_op(47 downto 24);
when "111" =>
alu_logic_conj(47 downto 24) <= not alu_shifter_out(47 downto 24);
when others =>
end case;
end process;
alu_adder : process(alu_ctrl, alu_src_op, alu_multiplier_out, alu_shifter_out) is
variable add_src_op_1 : signed(56 downto 0);
variable add_src_op_2 : signed(56 downto 0);
variable carry_const : signed(56 downto 0);
variable alu_shifter_out_57 : signed(56 downto 0);
variable alu_add_result_58 : signed(57 downto 0);
variable alu_add_result_interm : signed(56 downto 0);
variable invert_carry_flag : std_logic;
begin
-- by default do not invert the carry
invert_carry_flag := '0';
-- determine whether to use multiplier output, the operand defined above, or zeros!
-- resizing is done here already. Like that we can see whether an overflow
-- occurs due to negating the source operand
case alu_ctrl.add_src_stage_2 is
when "00" => add_src_op_1 := (others => '0');
when "10" => add_src_op_1 := resize(alu_multiplier_out, 57);
when others => add_src_op_1 := resize(alu_src_op, 57);
end case;
-- determine the sign for the 1st operand!
case alu_ctrl.add_src_sign is
-- normal operation
when "00" => add_src_op_1 := add_src_op_1;
-- negative sign
when "01" => add_src_op_1 := - add_src_op_1;
invert_carry_flag := not invert_carry_flag;
-- change according to sign
-- performs - | accu | for the CMPM instruction
when "10" =>
-- we subtract in any case, so invert the carry!
invert_carry_flag := not invert_carry_flag;
if add_src_op_1(55) = '0' then
add_src_op_1 := - add_src_op_1;
else
add_src_op_1 := add_src_op_1;
end if;
-- div instruction!
-- sign dependant of D[55] XOR S[23], if 1 => positive , if 0 => negative
-- add_src_op_1 holds S[23] (sign extension!)
when others =>
if (alu_ctrl.shift_src = '0' and add_src_op_1(55) /= register_file.a(55)) or
(alu_ctrl.shift_src = '1' and add_src_op_1(55) /= register_file.b(55)) then
add_src_op_1 := add_src_op_1;
else
add_src_op_1 := - add_src_op_1;
-- invert_carry_flag := not invert_carry_flag;
end if;
end case;
alu_shifter_out_57 := resize(alu_shifter_out, 57);
-- determine the sign for the 2nd operand (coming from the shifter)!
case alu_ctrl.shift_src_sign is
-- negative sign
when "01" =>
add_src_op_2 := - alu_shifter_out_57;
-- change according to sign
-- this allows to build the magnitude (ABS, CMPM)
when "10" =>
if alu_shifter_out(55) = '1' then
add_src_op_2 := - alu_shifter_out_57;
else
add_src_op_2 := alu_shifter_out_57;
end if;
when others =>
add_src_op_2 := alu_shifter_out_57;
end case;
-- determine whether carry flag has to be added or subtracted
if alu_ctrl.rounding_used = "10" then
carry_const := (others => '0');
-- add carry flag
carry_const(0) := register_file.sr(C_FLAG);
elsif alu_ctrl.rounding_used = "11" then
-- subtract carry flag
carry_const := (others => register_file.sr(0)); -- carry flag
else
carry_const := (others => '0');
end if;
-- add the values and calculate the carry bit
alu_add_result_interm := ('0' & add_src_op_1(55 downto 0)) +
('0' & add_src_op_2(55 downto 0)) +
('0' & carry_const(55 downto 0));
-- here pops the new carry out of the adder
if invert_carry_flag = '0' then
alu_add_carry_out <= alu_add_result_interm(56);
else
alu_add_carry_out <= not alu_add_result_interm(56);
end if;
-- calculate the last bit (56), in order to test for overflow later on
alu_add_result(55 downto 0) <= alu_add_result_interm(55 downto 0);
-- alu_add_result(56) <= add_src_op_1(56) xor add_src_op_2(56) xor alu_add_result_interm(56);
alu_add_result(56) <= add_src_op_1(56) xor add_src_op_2(56)
xor carry_const(56) xor alu_add_result_interm(56);
end process alu_adder;
-- Adder after the normal arithmetic adder
-- This adder is responsible for
-- -- 1) carry addition
-- -- 2) carry subtration
-- 3) convergent rounding
alu_post_adder: process(alu_add_result, scaling_mode, alu_ctrl) is
variable post_adder_constant : signed(56 downto 0);
variable testing_constant : signed(24 downto 0);
begin
-- by default add nothing
post_adder_constant := (others => '0');
case alu_ctrl.rounding_used is
-- rounding dependant on scaling bits
when "01" =>
case scaling_mode is
-- no scaling
when "00" => testing_constant := alu_add_result(23 downto 0) & '0';
-- scale down
when "01" => testing_constant := alu_add_result(24 downto 0);
-- scale up
when "10" => testing_constant := alu_add_result(22 downto 0) & "00";
when others =>
testing_constant := alu_add_result(23 downto 0) & '0';
end case;
-- Special case!
if testing_constant(24) = '1' and testing_constant(23 downto 0) = X"000000" then
-- add depending on bit left to the rounding position
case scaling_mode is
-- no scaling
when "00" => post_adder_constant(23) := alu_add_result(24);
-- scale down
when "01" => post_adder_constant(24) := alu_add_result(25);
-- scale up
when "10" => post_adder_constant(22) := alu_add_result(23);
when others =>
end case;
else -- testing_constant /= X"1000000"
-- add rounding constant depending on scaling mode
-- results in round up if MSB of testing constant is set, else nothing happens
case scaling_mode is
-- no scaling
when "00" => post_adder_constant(23) := '1';
-- scale down
when "01" => post_adder_constant(24) := '1';
-- scale up
when "10" => post_adder_constant(22) := '1';
when others =>
end case;
end if;
-- no rounding
when others =>
post_adder_constant := (others => '0');
end case;
-- Add the result of the first adder to the constant (e.g., carry flag)
alu_post_adder_result <= alu_add_result + post_adder_constant;
-- When rounding is used set 24 LSBs to zero!
if alu_ctrl.rounding_used = "01" then
alu_post_adder_result(23 downto 0) <= (others => '0');
end if;
end process;
alu_select_new_accu: process(alu_post_adder_result, alu_logic_conj, alu_ctrl) is
begin
if alu_ctrl.logic_function /= "000" then
modified_accu_int <= alu_logic_conj;
else
modified_accu_int <= alu_post_adder_result(55 downto 0);
end if;
end process;
-- contains the 24*24 bit fractional multiplier
alu_multiplier : process(register_file, alu_ctrl) is
variable src_op1: signed(23 downto 0);
variable src_op2: signed(23 downto 0);
variable mul_result_interm : signed(47 downto 0);
begin
-- select source operands for multiplication
case alu_ctrl.mul_op1 is
when "00" => src_op1 := register_file.x0;
when "01" => src_op1 := register_file.x1;
when "10" => src_op1 := register_file.y0;
when others => src_op1 := register_file.y1;
end case;
case alu_ctrl.mul_op2 is
when "00" => src_op2 := register_file.x0;
when "01" => src_op2 := register_file.x1;
when "10" => src_op2 := register_file.y0;
when others => src_op2 := register_file.y1;
end case;
-- perform integer multiplication
mul_result_interm := src_op1 * src_op2;
-- sign extension of result
alu_multiplier_out(55 downto 48) <= (others => mul_result_interm(47));
-- convert from two's complement representation to fractional format
-- signed integer multiplication delivers twice the sign bit, but only one is needed for the
-- fractional multiplication, so remove one and append a zero to the result
alu_multiplier_out(47 downto 0) <= mul_result_interm(46 downto 0) & '0';
end process alu_multiplier;
-- contains the data shifter
alu_shifter: process(register_file, alu_ctrl, norm_instr_asl, norm_instr_asr) is
variable src_accu : signed(55 downto 0);
variable shift_to_perform : alu_shift_mode;
begin
-- read source accumulator
if alu_ctrl.shift_src = '0' then
src_accu := register_file.a;
else
src_accu := register_file.b;
end if;
alu_shifter_carry_out <= '0';
alu_shifter_overflow_out <= '0';
-- NORM instruction determines the shift value just
-- in time, so overwrite the flag from the alu_ctrl
-- for this instruction by the calculated value
if alu_ctrl.norm_instr = '0' then
shift_to_perform := alu_ctrl.shift_mode;
else
if norm_instr_asl = '1' then
shift_to_perform := SHIFT_LEFT;
elsif norm_instr_asr = '1' then
shift_to_perform := SHIFT_RIGHT;
else
shift_to_perform := NO_SHIFT;
end if;
end if;
case shift_to_perform is
when NO_SHIFT =>
alu_shifter_out <= src_accu;
when SHIFT_LEFT =>
-- ASL, ADDL, DIV?
if alu_ctrl.word_24_update = '0' then
-- special handling for div instruction required
if alu_ctrl.div_instr = '1' then
alu_shifter_out <= src_accu(54 downto 0) & register_file.sr(C_FLAG);
else
alu_shifter_out <= src_accu(54 downto 0) & '0';
end if;
alu_shifter_carry_out <= src_accu(55);
-- detect overflow that results from left shifting
-- Needed for ASL, ADDL, DIV instructions
if src_accu(55) /= src_accu(54) then
alu_shifter_overflow_out <= '1';
end if;
-- LSL/ROL?
elsif alu_ctrl.word_24_update = '1' then
alu_shifter_out(55 downto 48) <= src_accu(55 downto 48);
alu_shifter_out(23 downto 0) <= src_accu(23 downto 0);
alu_shifter_carry_out <= src_accu(47);
if alu_ctrl.rotate = '0' then -- LSL ?
alu_shifter_out(47 downto 24) <= src_accu(46 downto 24) & '0';
else -- ROL ?
alu_shifter_out(47 downto 24) <= src_accu(46 downto 24) & register_file.sr(C_FLAG);
end if;
end if;
when SHIFT_RIGHT =>
-- ASR?
if alu_ctrl.word_24_update = '0' then
alu_shifter_out <= src_accu(55) & src_accu(55 downto 1);
alu_shifter_carry_out <= src_accu(0);
-- LSR/ROR?
elsif alu_ctrl.word_24_update = '1' then
alu_shifter_out(55 downto 48) <= src_accu(55 downto 48);
alu_shifter_out(23 downto 0) <= src_accu(23 downto 0);
alu_shifter_carry_out <= src_accu(24);
if alu_ctrl.rotate = '0' then -- LSR
alu_shifter_out(47 downto 24) <= '0' & src_accu(47 downto 25);
else -- ROR
alu_shifter_out(47 downto 24) <= register_file.sr(C_FLAG) & src_accu(47 downto 25);
end if;
end if;
when ZEROS =>
alu_shifter_out <= (others => '0');
end case;
end process alu_shifter;
-- Special handling for NORM instruction
-- Determine which case occurs (see User's Manual for more information)
norm_instr_logic: process(register_file, addr_r_in) is
begin
norm_instr_asl <= '0';
norm_instr_asr <= '0';
-- Either left shift
if register_file.sr(E_FLAG) = '0' and
register_file.sr(U_FLAG) = '1' and
register_file.sr(Z_FLAG) = '0' then
norm_instr_asl <= '1';
norm_update_ccr <= '1';
addr_r_out <= addr_r_in - 1;
-- Or right shift
elsif register_file.sr(E_FLAG) = '1' then
norm_instr_asr <= '1';
norm_update_ccr <= '1';
addr_r_out <= addr_r_in + 1;
-- Or do nothing!
else
norm_update_ccr <= '0';
addr_r_out <= addr_r_in;
end if;
end process;
end architecture;

View File

@@ -0,0 +1,86 @@
-----------------------------------------------------------------------------
--! @file
--! @author Matthias Alles
--! @date 01/2009
--! @brief Bit modify (BCLR, BSET, J(S)CLR, J(S)SET)
--!
------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.parameter_pkg.all;
use work.types_pkg.all;
use work.constants_pkg.all;
entity exec_stage_bit_modify is port(
instr_word : in std_logic_vector(23 downto 0);
instr_array : in instructions_type;
src_operand : in std_logic_vector(23 downto 0);
register_file : in register_file_type;
dst_operand : out std_logic_vector(23 downto 0);
bit_cond_met : out std_logic;
modify_sr : out std_logic;
modified_sr : out std_logic_vector(15 downto 0)
);
end entity;
architecture rtl of exec_stage_bit_modify is
signal operand_bit : std_logic;
signal src_operand_32 : std_logic_vector(31 downto 0);
begin
-- this is just a helper signal to prevent the simulator
-- to stop when accessing a bit > 23.
src_operand_32 <= "00000000" & src_operand;
-- read the bit we want to test (and modify)
operand_bit <= src_operand_32(to_integer(unsigned(instr_word(4 downto 0))));
-- modify the Carry flag only for the bit modify instructions!
modify_sr <= '1' when instr_array = INSTR_BCLR or instr_array = INSTR_BSET or instr_array = INSTR_BCHG or instr_array = INSTR_BTST else '0';
modified_sr <= register_file.sr(15 downto 1) & operand_bit;
bit_operation: process(instr_word, instr_array, src_operand, operand_bit) is
variable new_bit : std_logic;
begin
-- do nothing by default!
dst_operand <= src_operand;
bit_cond_met <= '0';
-- determine which bit to write
if instr_array = INSTR_BCLR then
new_bit := '0';
elsif instr_array = INSTR_BSET then
new_bit := '1';
else -- BCHG
new_bit := not operand_bit;
end if;
if instr_array = INSTR_BCLR or instr_array = INSTR_BSET or instr_array = INSTR_BCHG then
dst_operand(to_integer(unsigned(instr_word(4 downto 0)))) <= new_bit;
end if;
-- check for the jump instructions whether condition is met or not!
if instr_array = INSTR_JCLR or instr_array = INSTR_JSCLR then
if operand_bit = '0' then
bit_cond_met <= '1';
else
bit_cond_met <= '0';
end if;
end if;
if instr_array = INSTR_JSET or instr_array = INSTR_JSSET then
if operand_bit = '0' then
bit_cond_met <= '0';
else
bit_cond_met <= '1';
end if;
end if;
end process;
end architecture;

View File

@@ -0,0 +1,124 @@
-----------------------------------------------------------------------------
--! @file
--! @author Matthias Alles
--! @date 01/2009
--! @brief Branch control
--!
------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.parameter_pkg.all;
use work.types_pkg.all;
use work.constants_pkg.all;
entity exec_stage_branch is port(
activate_exec_bra : in std_logic;
instr_word : in std_logic_vector(23 downto 0);
instr_array : in instructions_type;
register_file : in register_file_type;
jump_address : in unsigned(BW_ADDRESS-1 downto 0);
bit_cond_met : in std_logic;
cc_flag_set : in std_logic;
push_stack : out push_stack_type;
pop_stack : out pop_stack_type;
modify_pc : out std_logic;
modified_pc : out unsigned(BW_ADDRESS-1 downto 0);
modify_sr : out std_logic;
modified_sr : out std_logic_vector(15 downto 0)
);
end entity;
architecture rtl of exec_stage_branch is
signal branch_condition_met : std_logic;
signal modify_pc_int : std_logic;
begin
modify_pc_int <= '1' when activate_exec_bra = '1' and branch_condition_met = '1' else '0';
modify_pc <= modify_pc_int;
calculate_branch_condition : process(instr_word, instr_array, register_file, bit_cond_met)
begin
branch_condition_met <= '0';
-- unconditional jumps
if instr_array = INSTR_JMP or
instr_array = INSTR_JSR or
instr_array = INSTR_RTI or
instr_array = INSTR_RTS then
-- jump always
branch_condition_met <= '1';
end if;
-- then see whether the branch condition is satisfied
if instr_array = INSTR_JCC or instr_array = INSTR_JSCC then
branch_condition_met <= cc_flag_set;
end if;
-- jmp that is executed according to a certain bit condition
if instr_array = INSTR_JCLR or instr_array = INSTR_JSCLR or
instr_array = INSTR_JSET or instr_array = INSTR_JSSET then
branch_condition_met <= bit_cond_met;
end if;
end process calculate_branch_condition;
calculate_branch_target : process(instr_array, instr_word, jump_address)
begin
modified_pc <= jump_address;
-- address calculation is the same for the following instructions
if instr_array = INSTR_JMP or
instr_array = INSTR_JCC or
instr_array = INSTR_JSCC or
instr_array = INSTR_JSR then
if instr_word(18) = '1' then
-- short jump address included in opcode (bits 11 downto 0)
modified_pc(11 downto 0) <= unsigned(instr_word(11 downto 0));
elsif instr_word(18) = '0' then
-- effective address defined by opcode and coming from address generator unit
modified_pc <= jump_address;
end if;
end if;
-- jump address contains the obligatory address of the second
-- instruction word
if instr_array = INSTR_JCLR or
instr_array = INSTR_JSET or
instr_array = INSTR_JSCLR or
instr_array = INSTR_JSSET then
modified_pc <= jump_address;
end if;
-- target address is stored on the stack
if instr_array = INSTR_RTS or
instr_array = INSTR_RTI then
modified_pc <= unsigned(register_file.current_ssh);
end if;
end process calculate_branch_target;
-- Subroutine functions need to store PC and SR on the stack
push_stack.valid <= '1' when modify_pc_int = '1' and (instr_array = INSTR_JSCC or instr_array = INSTR_JSR or
instr_array = INSTR_JSCLR or instr_array = INSTR_JSSET) else '0';
push_stack.content <= PC_AND_SR;
-- pc is set externally!
push_stack.pc <= (others => '0');
-- RTI/RTS instructions need to read from the stack
pop_stack.valid <= '1' when modify_pc_int = '1' and (instr_array = INSTR_RTI or instr_array = INSTR_RTS) else '0';
-- some instructions require to set the SR
calculate_status_register : process(instr_array)
begin
modify_sr <= '0';
modified_sr <= (others => '0');
if instr_array = INSTR_RTI then
modify_sr <= '1';
modified_sr <= register_file.current_ssl;
end if;
end process calculate_status_register;
end architecture rtl;

View File

@@ -0,0 +1,82 @@
-----------------------------------------------------------------------------
--! @file
--! @author Matthias Alles
--! @date 01/2009
--! @brief Calculate whether cc flag condition is true
--!
------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.parameter_pkg.all;
use work.types_pkg.all;
use work.constants_pkg.all;
entity exec_stage_cc_flag_calc is port(
instr_word : in std_logic_vector(23 downto 0);
instr_array : in instructions_type;
register_file : in register_file_type;
cc_flag_set : out std_logic
);
end entity;
architecture rtl of exec_stage_cc_flag_calc is
begin
calculate_cc_flag : process(instr_word, instr_array, register_file)
variable cc_select : std_logic_vector(3 downto 0);
procedure calculate_cc_flag(cc: std_logic_vector(3 downto 0)) is
variable c_flag : std_logic := register_file.ccr(0);
variable v_flag : std_logic := register_file.ccr(1);
variable z_flag : std_logic := register_file.ccr(2);
variable n_flag : std_logic := register_file.ccr(3);
variable u_flag : std_logic := register_file.ccr(4);
variable e_flag : std_logic := register_file.ccr(5);
variable l_flag : std_logic := register_file.ccr(6);
begin
if (cc = "0000" and c_flag = '0') or -- CC: carry clear
(cc = "1000" and c_flag = '1') or -- CS: carry set
(cc = "0101" and e_flag = '0') or -- EC: extension clear
(cc = "1010" and z_flag = '1') or -- EQ: equal
(cc = "1101" and e_flag = '1') or -- ES: extension set
(cc = "0001" and (n_flag = v_flag)) or -- GE: greater than or equal
(cc = "0001" and ((n_flag xor v_flag) or z_flag) = '0') or -- GT: greater than
(cc = "0110" and l_flag = '0') or -- LC: limit clear
(cc = "1111" and ((n_flag xor v_flag) or z_flag ) = '1') or -- LE: less or equal
(cc = "1110" and l_flag = '1') or -- LS: limit set
(cc = "1001" and (n_flag /= v_flag)) or -- LT: less than
(cc = "1011" and n_flag = '1') or -- MI: minus
(cc = "0010" and z_flag = '0') or -- NE: not equal
(cc = "1100" and (( not u_flag and not e_flag) or z_flag) = '1') or -- NR: normalized
(cc = "0011" and n_flag = '0') or -- PL: plus
(cc = "0100" and (( not u_flag and not e_flag ) or z_flag) = '0') -- NN: not normalized
then
cc_flag_set <= '1';
end if;
end procedure;
begin
cc_flag_set <= '0';
-- Rip the flags we have to test for from the instruction word
if (instr_array = INSTR_JCC and instr_word(18) = '0') or
(instr_array = INSTR_JSCC) then
cc_select := instr_word(3 downto 0);
else
cc_select := instr_word(15 downto 12);
end if;
calculate_cc_flag(cc_select);
end process;
end architecture;

View File

@@ -0,0 +1,79 @@
-----------------------------------------------------------------------------
--! @file
--! @author Matthias Alles
--! @date 01/2009
--! @brief
--!
------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.parameter_pkg.all;
use work.types_pkg.all;
use work.constants_pkg.all;
entity exec_stage_cr_mod is port (
activate_exec_cr_mod : in std_logic;
instr_word : in std_logic_vector(23 downto 0);
instr_array : in instructions_type;
register_file : in register_file_type;
modify_sr : out std_logic;
modified_sr : out std_logic_vector(15 downto 0);
modify_omr : out std_logic;
modified_omr : out std_logic_vector(7 downto 0)
);
end exec_stage_cr_mod;
architecture rtl of exec_stage_cr_mod is
begin
process(activate_exec_cr_mod, instr_word, instr_array, register_file) is
variable imm8 : std_logic_vector(7 downto 0);
variable op8 : std_logic_vector(7 downto 0);
variable res8 : std_logic_vector(7 downto 0);
begin
modify_sr <= '0';
modify_omr <= '0';
modified_sr <= (others => '0');
modified_omr <= (others => '0');
imm8 := instr_word(15 downto 8);
if instr_word(1 downto 0) = "00" then
-- read MR
op8 := register_file.mr;
elsif instr_word(1 downto 0) = "01" then
-- read CCR
op8 := register_file.ccr;
else -- instr_word(1 downto 0) = "10"
-- read OMR
op8 := register_file.omr;
end if;
if instr_array = INSTR_ANDI then
res8 := imm8 and op8;
else -- instr_array = INSTR_ORI
res8 := imm8 or op8;
end if;
-- only write the result when activated
if activate_exec_cr_mod = '1' then
if instr_word(1 downto 0) = "00" then
-- update MR
modify_sr <= '1';
modified_sr <= res8 & register_file.ccr;
elsif instr_word(1 downto 0) = "01" then
-- update CCR
modify_sr <= '1';
modified_sr <= register_file.mr & res8;
elsif instr_word(1 downto 0) = "10" then
-- update OMR
modify_omr <= '1';
modified_omr <= res8;
end if;
end if;
end process;
end architecture;

View File

@@ -0,0 +1,207 @@
------------------------------------------------------------------------------
--! @file
--! @author Matthias Alles
--! @date 01/2009
--! @brief Loop control (REP, DO, ENDDO)
--!
------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.parameter_pkg.all;
use work.types_pkg.all;
use work.constants_pkg.all;
entity exec_stage_loop is port(
clk, rst : in std_logic;
activate_exec_loop : in std_logic;
instr_word : in std_logic_vector(23 downto 0);
instr_array : in instructions_type;
loop_iterations : in unsigned(15 downto 0);
loop_address : in unsigned(BW_ADDRESS-1 downto 0);
loop_start_address: in unsigned(BW_ADDRESS-1 downto 0);
register_file : in register_file_type;
fetch_perform_enddo: in std_logic;
memory_stall : in std_logic;
push_stack : out push_stack_type;
pop_stack : out pop_stack_type;
stall_rep : out std_logic;
stall_do : out std_logic;
decrement_lc : out std_logic;
modify_lc : out std_logic;
modified_lc : out unsigned(15 downto 0);
modify_la : out std_logic;
modified_la : out unsigned(15 downto 0);
modify_pc : out std_logic;
modified_pc : out unsigned(BW_ADDRESS-1 downto 0);
modify_sr : out std_logic;
modified_sr : out std_logic_vector(15 downto 0)
);
end entity;
architecture rtl of exec_stage_loop is
signal rep_loop_polling : std_logic;
signal do_loop_polling : std_logic;
signal enddo_polling : std_logic;
signal lc_temp : unsigned(15 downto 0);
signal rf_lc_eq_1 : std_logic;
signal memory_stall_t : std_logic;
begin
modified_pc <= loop_start_address;
-- loop counter in register file equal to 1?
rf_lc_eq_1 <= '1' when register_file.lc = 1 else '0';
process(activate_exec_loop, instr_array, register_file, fetch_perform_enddo,
rep_loop_polling, loop_iterations, rf_lc_eq_1, loop_start_address) is
begin
stall_rep <= '0';
stall_do <= '0';
modify_la <= '0';
modify_lc <= '0';
modify_pc <= '0';
modify_sr <= '0';
modified_la <= loop_address;
modified_lc <= loop_iterations; -- default
-- set the loop flag LF (bit 15) of Status register
modified_sr(15) <= '1';
modified_sr(14 downto 0) <= register_file.sr(14 downto 0);
push_stack.valid <= '0'; -- push PC and SR on the stack
push_stack.pc <= loop_start_address;
push_stack.content <= LA_AND_LC;
pop_stack.valid <= '0';
decrement_lc <= '0';
------------------
-- DO instruction
------------------
if activate_exec_loop = '1' and instr_array = INSTR_DO then
-- first instruction of the do loop instruction?
if do_loop_polling = '0' then
stall_do <= '1';
modify_lc <= '1'; -- store the new loop counter
modify_la <= '1'; -- store the new loop address
push_stack.valid <= '1'; -- push LA and LC on the stack
push_stack.content <= LA_AND_LC;
else -- second clock cycle of the do loop instruction ?
push_stack.valid <= '1'; -- push PC and SR on the stack
push_stack.pc <= loop_start_address;
push_stack.content <= PC_AND_SR;
-- set the PC to the first instruction of the loop
-- the already fetched instruction are flushed from the pipeline
-- this prevents problems, when the loop consists of only one or two instructions
modify_pc <= '1';
-- set the loop flag
modify_sr <= '1';
end if;
end if;
-----------------------------------------------
-- ENDDO instruction / loop end in fetch stage
-----------------------------------------------
if (activate_exec_loop = '1' and instr_array = INSTR_ENDDO) or fetch_perform_enddo = '1' or enddo_polling = '1' then
pop_stack.valid <= '1';
if enddo_polling = '0' then
-- only restore the LF from the stack
modified_sr(15) <= register_file.current_ssl(15);
modify_sr <= '1';
stall_do <= '1'; -- stall one clock cycle
else
-- restore loop counter and loop address in second clock cycle
modified_lc <= unsigned(register_file.current_ssl);
modify_lc <= '1';
modified_la <= unsigned(register_file.current_ssh);
modify_la <= '1';
end if;
end if;
-------------------
-- REP instruction
-------------------
if activate_exec_loop = '1' and instr_array = INSTR_REP then
-- only do something when there are more than 1 iterations
-- the first execution is already on the way
if loop_iterations /= 1 then
stall_rep <= '1'; -- stall the fetch and decode stages
modify_lc <= '1'; -- store the loop counter
modified_lc <= loop_iterations - 1;
end if;
end if;
-- keep processing the single instruction
if rep_loop_polling = '1' then
stall_rep <= '1';
-- if the REP instruction caused a stall do not modify the lc!
if memory_stall_t = '0' then
if rf_lc_eq_1 = '0' then
decrement_lc <= '1';
-- when the instruction to repeat caused a memory stall
-- do not continue!
else
-- finish the REP instruction by restoring the LC
stall_rep <= '0';
modify_lc <= '1';
modified_lc <= lc_temp;
end if;
end if;
end if;
end process;
-- process that allows to remember that we are processing a REP/DO instruction
-- even though the REP instruction is not available in the pipeline anymore
-- also store the old loop counter
process(clk) is
begin
if rising_edge(clk) then
if rst = '1' then
rep_loop_polling <= '0';
do_loop_polling <= '0';
enddo_polling <= '0';
lc_temp <= (others => '0');
memory_stall_t <= '0';
else
memory_stall_t <= memory_stall;
if activate_exec_loop = '1' and instr_array = INSTR_REP then
-- only do something when there are more than 1 iterations
-- the first execution is already on the way
if loop_iterations /= 1 then
rep_loop_polling <= '1';
lc_temp <= register_file.lc;
end if;
end if;
-- test whether the REP instruction has been executed
if rep_loop_polling = '1' and rf_lc_eq_1 = '1' and memory_stall_t = '0' then
rep_loop_polling <= '0';
end if;
-- do loop execution takes two clock cycles
-- in the first clock cycle we store loop address and loop counter on the stack
-- in the second clock cycle we store programm counter and status register on the stack
if activate_exec_loop = '1' and instr_array = INSTR_DO then
do_loop_polling <= '1';
end if;
-- clear the flag immediately again (only two cycles execution time!)
if do_loop_polling = '1' then
do_loop_polling <= '0';
end if;
-- ENDDO instructions take two clock cycles as well!
if (activate_exec_loop = '1' and instr_array = INSTR_ENDDO) or fetch_perform_enddo = '1' then
enddo_polling <= '1';
end if;
if enddo_polling = '1' then
enddo_polling <= '0';
end if;
end if;
end if;
end process;
end architecture;

View File

@@ -0,0 +1,67 @@
------------------------------------------------------------------------------
--! @file
--! @author Matthias Alles
--! @date 01/2009
--! @brief Fetching from program memory
--!
------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.parameter_pkg.all;
use work.types_pkg.all;
entity fetch_stage is port(
pc_old : in unsigned(BW_ADDRESS-1 downto 0);
pc_new : out unsigned(BW_ADDRESS-1 downto 0);
modify_pc : in std_logic;
modified_pc : in unsigned(BW_ADDRESS-1 downto 0);
register_file : in register_file_type;
decrement_lc : out std_logic;
perform_enddo : out std_logic
);
end fetch_stage;
architecture rtl of fetch_stage is
begin
pc_calculation: process(pc_old, modify_pc, modified_pc, register_file) is
begin
decrement_lc <= '0';
perform_enddo <= '0';
-- by default increment pc by one
pc_new <= pc_old + 1;
if modify_pc = '1' then
pc_new <= modified_pc;
end if;
-- Loop Flag set?
if register_file.sr(15) = '1' then
if register_file.la = pc_old then
-- Loop not finished?
-- => start from the beginning if necessary
if register_file.lc /= 1 then
-- if the last address was LA and the loop is not finished yet, we have to
-- read now from the beginning of the loop again
pc_new <= unsigned(register_file.current_ssh(BW_ADDRESS-1 downto 0));
-- decrement loop counter
decrement_lc <= '1';
else
-- loop done!
-- => tell the loop controller in the exec stage to perform the enddo operation
-- (without flushing of the pipeline!)
perform_enddo <= '1';
end if;
end if;
end if;
end process pc_calculation;
end architecture rtl;

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,223 @@
------------------------------------------------------------------------------
--! @file
--! @author Matthias Alles
--! @date 01/2009
--! @brief Memory controller
--!
--! @details This entity contains the internal memories. These are:
--! - pmem
--! - xmem
--! - ymem
--! - ROM tables
--! - Bootup code
--! All memory requests are collected here. Only when they are all finished
--! the memory_stall-flag is released. External memory accesses are given to
--! the external interface.
--!
------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.parameter_pkg.all;
use work.types_pkg.all;
use work.constants_pkg.all;
entity memory_management is port (
clk, rst : in std_logic;
stall_flags : in std_logic_vector(PIPELINE_DEPTH-1 downto 0);
memory_stall : out std_logic;
data_rom_enable: in std_logic;
pmem_ctrl_in : in mem_ctrl_type_in;
pmem_ctrl_out : out mem_ctrl_type_out;
pmem2_ctrl_in : in mem_ctrl_type_in;
pmem2_ctrl_out : out mem_ctrl_type_out;
xmem_ctrl_in : in mem_ctrl_type_in;
xmem_ctrl_out : out mem_ctrl_type_out;
ymem_ctrl_in : in mem_ctrl_type_in;
ymem_ctrl_out : out mem_ctrl_type_out
);
end memory_management;
architecture rtl of memory_management is
component mem_control is
generic(
mem_type : memory_type
);
port(
clk, rst : in std_logic;
rd_addr : in unsigned(BW_ADDRESS-1 downto 0);
rd_en : in std_logic;
data_out : out std_logic_vector(23 downto 0);
data_out_valid : out std_logic;
wr_addr : in unsigned(BW_ADDRESS-1 downto 0);
wr_en : in std_logic;
wr_accomplished : out std_logic;
data_in : in std_logic_vector(23 downto 0)
);
end component mem_control;
signal pmem_data_out : std_logic_vector(23 downto 0);
signal pmem_data_out_valid : std_logic;
signal pmem_rd_addr : unsigned(BW_ADDRESS-1 downto 0);
signal pmem_rd_en : std_logic;
signal xmem_rd_en : std_logic;
signal xmem_data_out : std_logic_vector(23 downto 0);
signal xmem_data_out_valid : std_logic;
signal xmem_rd_polling : std_logic;
signal ymem_rd_en : std_logic;
signal ymem_data_out : std_logic_vector(23 downto 0);
signal ymem_data_out_valid : std_logic;
signal ymem_rd_polling : std_logic;
signal pmem_stall_buffer : std_logic_vector(23 downto 0);
signal pmem_stall_buffer_valid : std_logic;
signal xmem_stall_buffer : std_logic_vector(23 downto 0);
signal ymem_stall_buffer : std_logic_vector(23 downto 0);
signal stall_flags_d : std_logic_vector(PIPELINE_DEPTH-1 downto 0);
begin
-- here it is necessary to store the output of the pmem/xmem/ymem when the pipeline enters a stall
-- when the pipeline wakes up, this temporal result is inserted into the pipeline
stall_buffer: process(clk) is
begin
if rising_edge(clk) then
if rst = '1' then
pmem_stall_buffer <= (others => '0');
pmem_stall_buffer_valid <= '0';
xmem_stall_buffer <= (others => '0');
ymem_stall_buffer <= (others => '0');
stall_flags_d <= (others => '0');
else
stall_flags_d <= stall_flags;
if stall_flags(ST_FE2_DEC) = '1' and stall_flags_d(ST_FE2_DEC) = '0' then
if pmem_data_out_valid = '1' then
pmem_stall_buffer <= pmem_data_out;
pmem_stall_buffer_valid <= '1';
end if;
end if;
if stall_flags(ST_FE2_DEC) = '0' and stall_flags_d(ST_FE2_DEC) = '1' then
pmem_stall_buffer_valid <= '0';
end if;
end if;
end if;
end process stall_buffer;
memory_stall <= '1' when ( xmem_rd_en = '1' or (xmem_rd_polling = '1' and xmem_data_out_valid = '0') ) or
( ymem_rd_en = '1' or (ymem_rd_polling = '1' and ymem_data_out_valid = '0') ) else
'0';
-------------------------------
-- PMEM CONTROLLER
-------------------------------
inst_pmem_ctrl : mem_control
generic map(
mem_type => P_MEM
)
port map(
clk => clk,
rst => rst,
rd_addr => pmem_ctrl_in.rd_addr,
rd_en => pmem_ctrl_in.rd_en,
data_out => pmem_data_out,
data_out_valid => pmem_data_out_valid,
wr_addr => pmem_ctrl_in.wr_addr,
wr_en => pmem_ctrl_in.wr_en,
data_in => pmem_ctrl_in.data_in
);
-- In case we wake up from a stall use the buffered value
pmem_ctrl_out.data_out <= pmem_stall_buffer when stall_flags(ST_FE2_DEC) = '0' and
stall_flags_d(ST_FE2_DEC) = '1' and
pmem_stall_buffer_valid = '1' else
pmem_data_out;
pmem_ctrl_out.data_out_valid <= pmem_stall_buffer_valid when stall_flags(ST_FE2_DEC) = '0' and
stall_flags_d(ST_FE2_DEC) = '1' else
'0' when stall_flags(ST_FE2_DEC) = '1' else
pmem_data_out_valid;
-------------------------------
-- XMEM CONTROLLER
-------------------------------
inst_xmem_ctrl : mem_control
generic map(
mem_type => X_MEM
)
port map(
clk => clk,
rst => rst,
rd_addr => xmem_ctrl_in.rd_addr,
rd_en => xmem_rd_en,
data_out => xmem_data_out,
data_out_valid => xmem_data_out_valid,
wr_addr => xmem_ctrl_in.wr_addr,
wr_en => xmem_ctrl_in.wr_en,
data_in => xmem_ctrl_in.data_in
);
xmem_rd_en <= '1' when xmem_rd_polling = '0' and xmem_ctrl_in.rd_en = '1' else '0';
xmem_ctrl_out.data_out <= xmem_data_out;
xmem_ctrl_out.data_out_valid <= xmem_data_out_valid;
-------------------------------
-- YMEM CONTROLLER
-------------------------------
inst_ymem_ctrl : mem_control
generic map(
mem_type => Y_MEM
)
port map(
clk => clk,
rst => rst,
rd_addr => ymem_ctrl_in.rd_addr,
rd_en => ymem_rd_en,
data_out => ymem_data_out,
data_out_valid => ymem_data_out_valid,
wr_addr => ymem_ctrl_in.wr_addr,
wr_en => ymem_ctrl_in.wr_en,
data_in => ymem_ctrl_in.data_in
);
ymem_rd_en <= '1' when ymem_rd_polling = '0' and ymem_ctrl_in.rd_en = '1' else '0';
ymem_ctrl_out.data_out <= ymem_data_out;
ymem_ctrl_out.data_out_valid <= ymem_data_out_valid;
mem_stall_control: process(clk) is
begin
if rising_edge(clk) then
if rst = '1' then
xmem_rd_polling <= '0';
ymem_rd_polling <= '0';
else
if xmem_rd_en = '1' then
xmem_rd_polling <= '1';
end if;
if xmem_data_out_valid = '1' then
xmem_rd_polling <= '0';
end if;
if ymem_rd_en = '1' then
ymem_rd_polling <= '1';
end if;
if ymem_data_out_valid = '1' then
ymem_rd_polling <= '0';
end if;
end if;
end if;
end process;
end architecture;

View File

@@ -0,0 +1,18 @@
------------------------------------------------------------------------------
--! @file
--! @author Matthias Alles
--! @date 01/2009
--! @brief Global parameters
--!
------------------------------------------------------------------------------
package parameter_pkg is
constant BW_ADDRESS : natural := 16;
-- number of pipeline register stages
constant PIPELINE_DEPTH : natural := 4;
constant NUM_ACT_SIGNALS : natural := 26;
end package;

1007
vhdl/dsp56k/src/pipeline.vhd Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,686 @@
------------------------------------------------------------------------------
--! @file
--! @author Matthias Alles
--! @date 01/2009
--! @brief Global register file, including scaler and limiter
--!
------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.parameter_pkg.all;
use work.types_pkg.all;
use work.constants_pkg.all;
entity reg_file is port(
clk, rst : in std_logic;
register_file : out register_file_type;
wr_R_port_A_valid : in std_logic;
wr_R_port_A : in addr_wr_port_type;
wr_R_port_B_valid : in std_logic;
wr_R_port_B : in addr_wr_port_type;
alu_wr_valid : in std_logic;
alu_wr_addr : in std_logic;
alu_wr_data : in signed(55 downto 0);
reg_wr_addr : in std_logic_vector(5 downto 0);
reg_wr_addr_valid : in std_logic;
reg_wr_data : in std_Logic_vector(23 downto 0);
reg_rd_addr : in std_logic_vector(5 downto 0);
reg_rd_data : out std_Logic_vector(23 downto 0);
X_bus_rd_addr : in std_logic_vector(1 downto 0);
X_bus_data_out : out std_logic_vector(23 downto 0);
X_bus_wr_addr : in std_logic_vector(1 downto 0);
X_bus_wr_valid : in std_logic;
X_bus_data_in : in std_logic_vector(23 downto 0);
Y_bus_rd_addr : in std_logic_vector(1 downto 0);
Y_bus_data_out : out std_logic_vector(23 downto 0);
Y_bus_wr_addr : in std_logic_vector(1 downto 0);
Y_bus_wr_valid : in std_logic;
Y_bus_data_in : in std_logic_vector(23 downto 0);
L_bus_rd_addr : in std_logic_vector(2 downto 0);
L_bus_rd_valid : in std_logic;
L_bus_wr_addr : in std_logic_vector(2 downto 0);
L_bus_wr_valid : in std_logic;
push_stack : in push_stack_type;
pop_stack : in pop_stack_type;
set_sr : in std_logic;
new_sr : in std_logic_vector(15 downto 0);
set_omr : in std_logic;
new_omr : in std_logic_vector(7 downto 0);
dec_lc : in std_logic;
set_lc : in std_logic;
new_lc : in unsigned(15 downto 0);
set_la : in std_logic;
new_la : in unsigned(BW_ADDRESS-1 downto 0)
);
end entity;
architecture rtl of reg_file is
signal addr_r : addr_array;
signal addr_m : addr_array;
signal addr_n : addr_array;
signal loop_address : unsigned(BW_ADDRESS-1 downto 0);
signal loop_counter : unsigned(15 downto 0);
-- condition code register
signal ccr : std_logic_vector(7 downto 0);
-- mode register
signal mr : std_logic_vector(7 downto 0);
-- status register = mode register + condition code register
signal sr : std_logic_vector(15 downto 0);
-- operation mode register
signal omr : std_logic_vector(7 downto 0);
signal stack_pointer : unsigned(5 downto 0);
signal system_stack_ssh : stack_array_type;
signal system_stack_ssl : stack_array_type;
signal x0 : signed(23 downto 0);
signal x1 : signed(23 downto 0);
signal y0 : signed(23 downto 0);
signal y1 : signed(23 downto 0);
signal a0 : signed(23 downto 0);
signal a1 : signed(23 downto 0);
signal a2 : signed(7 downto 0);
signal b0 : signed(23 downto 0);
signal b1 : signed(23 downto 0);
signal b2 : signed(7 downto 0);
signal limited_a1 : signed(23 downto 0);
signal limited_b1 : signed(23 downto 0);
signal limited_a0 : signed(23 downto 0);
signal limited_b0 : signed(23 downto 0);
signal set_limiting_flag : std_logic;
signal X_bus_rd_limited_a : std_logic;
signal X_bus_rd_limited_b : std_logic;
signal Y_bus_rd_limited_a : std_logic;
signal Y_bus_rd_limited_b : std_logic;
signal reg_rd_limited_a : std_logic;
signal reg_rd_limited_b : std_logic;
signal rd_limited_a : std_logic;
signal rd_limited_b : std_logic;
begin
sr <= mr & ccr;
register_file.addr_r <= addr_r;
register_file.addr_n <= addr_n;
register_file.addr_m <= addr_m;
register_file.lc <= loop_counter;
register_file.la <= loop_address;
register_file.ccr <= ccr;
register_file.mr <= mr;
register_file.sr <= sr;
register_file.omr <= omr;
register_file.stack_pointer <= stack_pointer;
register_file.current_ssh <= system_stack_ssh(to_integer(stack_pointer(3 downto 0)));
register_file.current_ssl <= system_stack_ssl(to_integer(stack_pointer(3 downto 0)));
register_file.a <= a2 & a1 & a0;
register_file.b <= b2 & b1 & b0;
register_file.x0 <= x0;
register_file.x1 <= x1;
register_file.y0 <= y0;
register_file.y1 <= y1;
global_register_file: process(clk) is
variable stack_pointer_plus_1 : unsigned(3 downto 0);
variable reg_addr : integer range 0 to 7;
begin
if rising_edge(clk) then
if rst = '1' then
addr_r <= (others => (others => '0'));
addr_n <= (others => (others => '0'));
addr_m <= (others => (others => '1'));
ccr <= (others => '0');
mr <= (others => '0');
omr <= (others => '0');
system_stack_ssl <= (others => (others => '0'));
system_stack_ssh <= (others => (others => '0'));
stack_pointer <= (others => '0');
loop_counter <= (others => '0');
loop_address <= (others => '0');
x0 <= (others => '0');
x1 <= (others => '0');
y0 <= (others => '0');
y1 <= (others => '0');
a0 <= (others => '0');
a1 <= (others => '0');
a2 <= (others => '0');
b0 <= (others => '0');
b1 <= (others => '0');
b2 <= (others => '0');
else
reg_addr := to_integer(unsigned(reg_wr_addr(2 downto 0)));
-----------------------------------------------------------------------
-- General write port to register file using 6 bit addressing scheme
-----------------------------------------------------------------------
if reg_wr_addr_valid = '1' then
case reg_wr_addr(5 downto 3) is
-- X0, X1, Y0, Y1
when "000" =>
case reg_wr_addr(2 downto 0) is
when "100" =>
x0 <= signed(reg_wr_data);
when "101" =>
x1 <= signed(reg_wr_data);
when "110" =>
y0 <= signed(reg_wr_data);
when "111" =>
y1 <= signed(reg_wr_data);
when others =>
end case;
-- A0, B0, A2, B2, A1, B1, A, B
when "001" =>
case reg_wr_addr(2 downto 0) is
when "000" =>
a0 <= signed(reg_wr_data);
when "001" =>
b0 <= signed(reg_wr_data);
when "010" =>
a2 <= signed(reg_wr_data(7 downto 0));
when "011" =>
b2 <= signed(reg_wr_data(7 downto 0));
when "100" =>
a1 <= signed(reg_wr_data);
when "101" =>
b1 <= signed(reg_wr_data);
when "110" =>
a2 <= (others => reg_wr_data(23));
a1 <= signed(reg_wr_data);
a0 <= (others => '0');
when "111" =>
b2 <= (others => reg_wr_data(23));
b1 <= signed(reg_wr_data);
b0 <= (others => '0');
when others =>
end case;
-- R0-R7
when "010" =>
addr_r(reg_addr) <= unsigned(reg_wr_data(BW_ADDRESS-1 downto 0));
-- N0-N7
when "011" =>
addr_n(reg_addr) <= unsigned(reg_wr_data(BW_ADDRESS-1 downto 0));
-- M0-M7
when "100" =>
addr_m(reg_addr) <= unsigned(reg_wr_data(BW_ADDRESS-1 downto 0));
-- SR, OMR, SP, SSH, SSL, LA, LC
when "111" =>
case reg_wr_addr(2 downto 0) is
-- SR
when "001" =>
mr <= reg_wr_data(15 downto 8);
ccr <= reg_wr_data( 7 downto 0);
-- OMR
when "010" =>
omr <= reg_wr_data(7 downto 0);
-- SP
when "011" =>
stack_pointer <= unsigned(reg_wr_data(5 downto 0));
-- SSH
when "100" =>
system_stack_ssh(to_integer(stack_pointer_plus_1)) <= reg_wr_data(BW_ADDRESS-1 downto 0);
-- increase stack after writing
stack_pointer(3 downto 0) <= stack_pointer_plus_1;
-- test whether stack is full, if so set the stack error flag (SE)
if stack_pointer(3 downto 0) = "1111" then
stack_pointer(4) <= '1';
end if;
-- SSL
when "101" =>
system_stack_ssl(to_integer(stack_pointer)) <= reg_wr_data(BW_ADDRESS-1 downto 0);
-- LA
when "110" =>
loop_address <= unsigned(reg_wr_data(BW_ADDRESS-1 downto 0));
-- LC
when "111" =>
loop_counter <= unsigned(reg_wr_data(15 downto 0));
when others =>
end case;
when others =>
end case;
end if;
----------------
-- X BUS Write
----------------
if X_bus_wr_valid = '1' then
case X_bus_wr_addr is
when "00" =>
x0 <= signed(X_bus_data_in);
when "01" =>
x1 <= signed(X_bus_data_in);
when "10" =>
a2 <= (others => X_bus_data_in(23));
a1 <= signed(X_bus_data_in);
a0 <= (others => '0');
when others =>
b2 <= (others => X_bus_data_in(23));
b1 <= signed(X_bus_data_in);
b0 <= (others => '0');
end case;
end if;
----------------
-- Y BUS Write
----------------
if Y_bus_wr_valid = '1' then
case Y_bus_wr_addr is
when "00" =>
y0 <= signed(Y_bus_data_in);
when "01" =>
y1 <= signed(Y_bus_data_in);
when "10" =>
a2 <= (others => Y_bus_data_in(23));
a1 <= signed(Y_bus_data_in);
a0 <= (others => '0');
when others =>
b2 <= (others => Y_bus_data_in(23));
b1 <= signed(Y_bus_data_in);
b0 <= (others => '0');
end case;
end if;
------------------
-- L BUS Write
------------------
if L_bus_wr_valid = '1' then
case L_bus_wr_addr is
-- A10
when "000" =>
a1 <= signed(X_bus_data_in);
a0 <= signed(Y_bus_data_in);
-- B10
when "001" =>
b1 <= signed(X_bus_data_in);
b0 <= signed(Y_bus_data_in);
-- X
when "010" =>
x1 <= signed(X_bus_data_in);
x0 <= signed(Y_bus_data_in);
-- Y
when "011" =>
y1 <= signed(X_bus_data_in);
y0 <= signed(Y_bus_data_in);
-- A
when "100" =>
a2 <= (others => X_bus_data_in(23));
a1 <= signed(X_bus_data_in);
a0 <= signed(Y_bus_data_in);
-- B
when "101" =>
b2 <= (others => X_bus_data_in(23));
b1 <= signed(X_bus_data_in);
b0 <= signed(Y_bus_data_in);
-- AB
when "110" =>
a2 <= (others => X_bus_data_in(23));
a1 <= signed(X_bus_data_in);
a0 <= (others => '0');
b2 <= (others => Y_bus_data_in(23));
b1 <= signed(Y_bus_data_in);
b0 <= (others => '0');
-- BA
when others =>
a2 <= (others => Y_bus_data_in(23));
a1 <= signed(Y_bus_data_in);
a0 <= (others => '0');
b2 <= (others => X_bus_data_in(23));
b1 <= signed(X_bus_data_in);
b0 <= (others => '0');
end case;
end if;
---------------------
-- STATUS REGISTERS
---------------------
if set_sr = '1' then
ccr <= new_sr( 7 downto 0);
mr <= new_sr(15 downto 8);
end if;
if set_omr = '1' then
omr <= new_omr;
end if;
-- data limiter active?
-- listing this statement after the set_sr test results
-- in the correct behaviour for ALU operations with parallel move
if set_limiting_flag = '1' then
ccr(6) <= '1';
end if;
--------------------
-- LOOP REGISTERS
--------------------
if set_la = '1' then
loop_address <= new_la;
end if;
if set_lc = '1' then
loop_counter <= new_lc;
end if;
if dec_lc = '1' then
loop_counter <= loop_counter - 1;
end if;
---------------------
-- ADDRESS REGISTER
---------------------
if wr_R_port_A_valid = '1' then
addr_r(to_integer(wr_R_port_A.reg_number)) <= wr_R_port_A.reg_value;
end if;
if wr_R_port_B_valid = '1' then
addr_r(to_integer(wr_R_port_B.reg_number)) <= wr_R_port_B.reg_value;
end if;
-------------------------
-- ALU ACCUMULATOR WRITE
-------------------------
if alu_wr_valid = '1' then
if alu_wr_addr = '0' then
a2 <= alu_wr_data(55 downto 48);
a1 <= alu_wr_data(47 downto 24);
a0 <= alu_wr_data(23 downto 0);
else
b2 <= alu_wr_data(55 downto 48);
b1 <= alu_wr_data(47 downto 24);
b0 <= alu_wr_data(23 downto 0);
end if;
end if;
---------------------
-- STACK CONTROLLER
---------------------
stack_pointer_plus_1 := stack_pointer(3 downto 0) + 1;
if push_stack.valid = '1' then
-- increase stack after writing
stack_pointer(3 downto 0) <= stack_pointer_plus_1;
-- test whether stack is full, if so set the stack error flag (SE)
if stack_pointer(3 downto 0) = "1111" then
stack_pointer(4) <= '1';
end if;
case push_stack.content is
when PC =>
system_stack_ssh(to_integer(stack_pointer_plus_1)) <= std_logic_vector(push_stack.pc);
when PC_AND_SR =>
system_stack_ssh(to_integer(stack_pointer_plus_1)) <= std_logic_vector(push_stack.pc);
system_stack_ssl(to_integer(stack_pointer_plus_1)) <= SR;
when LA_AND_LC =>
system_stack_ssh(to_integer(stack_pointer_plus_1)) <= std_logic_vector(loop_address);
system_stack_ssl(to_integer(stack_pointer_plus_1)) <= std_logic_vector(loop_counter);
end case;
end if;
-- decrease stack pointer
if pop_stack.valid = '1' then
stack_pointer(3 downto 0) <= stack_pointer(3 downto 0) - 1;
-- if stack is empty set the underflow flag (bit 5, UF) and the stack error flag (bit 4, SE)
if stack_pointer(3 downto 0) = "0000" then
stack_pointer(5) <= '1';
stack_pointer(4) <= '1';
end if;
end if;
end if;
end if;
end process;
x_bus_rd_port: process(X_bus_rd_addr,x0,x1,a1,b1,limited_a1,limited_b1,
L_bus_rd_addr,L_bus_rd_valid,y1) is
begin
X_bus_rd_limited_a <= '0';
X_bus_rd_limited_b <= '0';
case X_bus_rd_addr is
when "00" => X_bus_data_out <= std_logic_vector(x0);
when "01" => X_bus_data_out <= std_logic_vector(x1);
when "10" => X_bus_data_out <= std_logic_vector(limited_a1); X_bus_rd_limited_a <= '1';
when others => X_bus_data_out <= std_logic_vector(limited_b1); X_bus_rd_limited_b <= '1';
end case;
if L_bus_rd_valid = '1' then
case L_bus_rd_addr is
when "000" => X_bus_data_out <= std_logic_vector(a1);
when "001" => X_bus_data_out <= std_logic_vector(b1);
when "010" => X_bus_data_out <= std_logic_vector(x1);
when "011" => X_bus_data_out <= std_logic_vector(y1);
when "100" => X_bus_data_out <= std_logic_vector(limited_a1); X_bus_rd_limited_a <= '1';
when "101" => X_bus_data_out <= std_logic_vector(limited_b1); X_bus_rd_limited_b <= '1';
when "110" => X_bus_data_out <= std_logic_vector(limited_a1); X_bus_rd_limited_a <= '1';
when others => X_bus_data_out <= std_logic_vector(limited_b1); X_bus_rd_limited_b <= '1';
end case;
end if;
end process x_bus_rd_port;
y_bus_rd_port: process(Y_bus_rd_addr,y0,y1,a1,b1,limited_a1,limited_b1,
L_bus_rd_addr,L_bus_rd_valid,a0,b0,x0,limited_a0,limited_b0) is
begin
Y_bus_rd_limited_a <= '0';
Y_bus_rd_limited_b <= '0';
case Y_bus_rd_addr is
when "00" => Y_bus_data_out <= std_logic_vector(y0);
when "01" => Y_bus_data_out <= std_logic_vector(y1);
when "10" => Y_bus_data_out <= std_logic_vector(limited_a1); Y_bus_rd_limited_a <= '1';
when others => Y_bus_data_out <= std_logic_vector(limited_b1); Y_bus_rd_limited_b <= '1';
end case;
if L_bus_rd_valid = '1' then
case L_bus_rd_addr is
when "000" => Y_bus_data_out <= std_logic_vector(a0);
when "001" => Y_bus_data_out <= std_logic_vector(b0);
when "010" => Y_bus_data_out <= std_logic_vector(x0);
when "011" => Y_bus_data_out <= std_logic_vector(y0);
when "100" => Y_bus_data_out <= std_logic_vector(limited_a0); Y_bus_rd_limited_a <= '1';
when "101" => Y_bus_data_out <= std_logic_vector(limited_b0); Y_bus_rd_limited_b <= '1';
when "110" => Y_bus_data_out <= std_logic_vector(limited_b1); Y_bus_rd_limited_b <= '1';
when others => Y_bus_data_out <= std_logic_vector(limited_a1); Y_bus_rd_limited_a <= '1';
end case;
end if;
end process y_bus_rd_port;
reg_rd_port: process(reg_rd_addr, x0,x1,y0,y1,a0,a1,a2,b0,b1,b2,
omr,ccr,mr,addr_r,addr_n,addr_m,stack_pointer,
loop_address,loop_counter,system_stack_ssl,system_stack_ssh) is
variable reg_addr : integer range 0 to 7;
begin
reg_addr := to_integer(unsigned(reg_rd_addr(2 downto 0)));
reg_rd_data <= (others => '0');
reg_rd_limited_a <= '0';
reg_rd_limited_b <= '0';
case reg_rd_addr(5 downto 3) is
-- X0, X1, Y0, Y1
when "000" =>
case reg_rd_addr(2 downto 0) is
when "100" =>
reg_rd_data <= std_logic_vector(x0);
when "101" =>
reg_rd_data <= std_logic_vector(x1);
when "110" =>
reg_rd_data <= std_logic_vector(y0);
when "111" =>
reg_rd_data <= std_logic_vector(y1);
when others =>
end case;
-- A0, B0, A2, B2, A1, B1, A, B
when "001" =>
case reg_rd_addr(2 downto 0) is
when "000" =>
reg_rd_data <= std_logic_vector(a0);
when "001" =>
reg_rd_data <= std_logic_vector(b0);
when "010" =>
-- MSBs are read as zero!
reg_rd_data(23 downto 8) <= (others => '0');
reg_rd_data(7 downto 0) <= std_logic_vector(a2);
when "011" =>
-- MSBs are read as zero!
reg_rd_data(23 downto 8) <= (others => '0');
reg_rd_data(7 downto 0) <= std_logic_vector(b2);
when "100" =>
reg_rd_data <= std_logic_vector(a1);
when "101" =>
reg_rd_data <= std_logic_vector(b1);
when "110" =>
reg_rd_data <= std_logic_vector(limited_a1);
reg_rd_limited_a <= '1';
when "111" =>
reg_rd_data <= std_logic_vector(limited_b1);
reg_rd_limited_b <= '1';
when others =>
end case;
-- R0-R7
when "010" =>
reg_rd_data <= std_logic_vector(resize(addr_r(reg_addr), 24));
-- N0-N7
when "011" =>
reg_rd_data <= std_logic_vector(resize(addr_n(reg_addr), 24));
-- M0-M7
when "100" =>
reg_rd_data <= std_logic_vector(resize(addr_m(reg_addr), 24));
-- SR, OMR, SP, SSH, SSL, LA, LC
when "111" =>
case reg_wr_addr(2 downto 0) is
-- SR
when "001" =>
reg_rd_data(23 downto 16) <= (others => '0');
reg_rd_data(15 downto 0) <= mr & ccr;
-- OMR
when "010" =>
reg_rd_data(23 downto 8) <= (others => '0');
reg_rd_data( 7 downto 0) <= omr;
-- SP
when "011" =>
reg_rd_data(23 downto 6) <= (others => '0');
reg_rd_data(5 downto 0) <= std_logic_vector(stack_pointer);
-- SSH
when "100" =>
-- TODO!
-- system_stack_ssh(to_integer(stack_pointer_plus_1)) <= reg_wr_data(BW_ADDRESS-1 downto 0);
-- -- increase stack after writing
-- stack_pointer(3 downto 0) <= stack_pointer_plus_1;
-- -- test whether stack is full, if so set the stack error flag (SE)
-- if stack_pointer(3 downto 0) = "1111" then
-- stack_pointer(4) <= '1';
-- end if;
-- SSL
when "101" =>
reg_rd_data <= (others => '0');
reg_rd_data(BW_ADDRESS-1 downto 0) <= std_logic_vector(system_stack_ssl(to_integer(stack_pointer)));
-- LA
when "110" =>
reg_rd_data <= (others => '0');
reg_rd_data(BW_ADDRESS-1 downto 0) <= std_logic_vector(loop_address);
-- LC
when "111" =>
reg_rd_data <= (others => '0');
reg_rd_data(15 downto 0) <= std_logic_vector(loop_counter);
when others =>
end case;
when others =>
end case;
end process;
rd_limited_a <= '1' when reg_rd_limited_a = '1' or X_bus_rd_limited_a = '1' or Y_bus_rd_limited_a = '1' else '0';
rd_limited_b <= '1' when reg_rd_limited_b = '1' or X_bus_rd_limited_b = '1' or Y_bus_rd_limited_b = '1' else '0';
data_shifter_limiter: process(a2,a1,a0,b2,b1,b0,sr,rd_limited_a,rd_limited_b) is
variable scaled_a : signed(55 downto 0);
variable scaled_b : signed(55 downto 0);
begin
set_limiting_flag <= '0';
-----------------
-- DATA SCALING
-----------------
-- test against scaling bits S1, S0
case sr(11 downto 10) is
-- scale down (right shift)
when "01" =>
scaled_a := a2(7) & a2 & a1 & a0(23 downto 1);
scaled_b := b2(7) & b2 & b1 & b0(23 downto 1);
-- scale up (arithmetic left shift)
when "10" =>
scaled_a := a2(6 downto 0) & a1 & a0 & '0';
scaled_b := b2(6 downto 0) & b1 & b0 & '0';
-- "00" do not scale!
when others =>
scaled_a := a2 & a1 & a0;
scaled_b := b2 & b1 & b0;
end case;
-- only sign extension stored in a2?
-- Yes: No limiting needed!
if scaled_a(55 downto 47) = "111111111" or scaled_a(55 downto 47) = "000000000" then
limited_a1 <= scaled_a(47 downto 24);
limited_a0 <= scaled_a(23 downto 0);
else
-- positive value in a?
if scaled_a(55) = '0' then
limited_a1 <= X"7FFFFF";
limited_a0 <= X"FFFFFF";
-- negative value in a?
else
limited_a1 <= X"800000";
limited_a0 <= X"000000";
end if;
-- set the limit flag in the status register
if rd_limited_a = '1' then
set_limiting_flag <= '1';
end if;
end if;
-- only sign extension stored in b2?
-- Yes: No limiting needed!
if scaled_b(55 downto 47) = "111111111" or scaled_b(55 downto 47) = "000000000" then
limited_b1 <= scaled_b(47 downto 24);
limited_b0 <= scaled_b(23 downto 0);
else
-- positive value in b?
if scaled_b(55) = '0' then
limited_b1 <= X"7FFFFF";
limited_b0 <= X"FFFFFF";
-- negative value in b?
else
limited_b1 <= X"800000";
limited_b0 <= X"000000";
end if;
-- set the limit flag in the status register
if rd_limited_b = '1' then
set_limiting_flag <= '1';
end if;
end if;
end process;
end architecture rtl;

View File

@@ -0,0 +1,182 @@
------------------------------------------------------------------------------
--! @file
--! @author Matthias Alles
--! @date 01/2009
--! @brief Global types
--!
------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.parameter_pkg.all;
package types_pkg is
-- the different addressing modes
type ADGen_mode_type is (NOP,
POST_MIN_N,
POST_PLUS_N,
POST_MIN_1,
POST_PLUS_1,
INDEXED_N,
PRE_MIN_1,
ABSOLUTE,
IMMEDIATE);
------------------------
-- Decoded instructions
------------------------
type instructions_type is (
INSTR_NOP ,
INSTR_RTI ,
INSTR_ILLEGAL ,
INSTR_SWI ,
INSTR_RTS ,
INSTR_RESET ,
INSTR_WAIT ,
INSTR_STOP ,
INSTR_ENDDO ,
INSTR_ANDI ,
INSTR_ORI ,
INSTR_DIV ,
INSTR_NORM ,
INSTR_LUA ,
INSTR_MOVEC ,
INSTR_REP ,
INSTR_DO ,
INSTR_MOVEM ,
INSTR_MOVEP ,
INSTR_PM_MOVEM,
INSTR_BCLR ,
INSTR_BSET ,
INSTR_JCLR ,
INSTR_JSET ,
INSTR_JMP ,
INSTR_JCC ,
INSTR_BCHG ,
INSTR_BTST ,
INSTR_JSCLR ,
INSTR_JSSET ,
INSTR_JSR ,
INSTR_JSCC );
type addr_array is array(0 to 7) of unsigned(BW_ADDRESS-1 downto 0);
type alu_shift_mode is (NO_SHIFT, SHIFT_LEFT, SHIFT_RIGHT, ZEROS);
type alu_ccr_flag is (DONT_TOUCH, CLEAR, MODIFY, SET);
type alu_ccr_flag_array is array(7 downto 0) of alu_ccr_flag;
type alu_ctrl_type is record
mul_op1 : std_logic_vector(1 downto 0); -- x0,x1,y0,y1
mul_op2 : std_logic_vector(1 downto 0); -- x0,x1,y0,y1
shift_src : std_logic; -- a,b
shift_src_sign : std_logic_vector(1 downto 0); -- 00: pos, 01: neg, 10: sign dependant, 11: reserved
shift_mode : alu_shift_mode;
rotate : std_logic; -- 0: logical shift, 1: rotate shift
add_src_stage_1 : std_logic_vector(2 downto 0); -- x0,x1,y0,y1,x,y,a,b
add_src_stage_2 : std_logic_vector(1 downto 0); -- 00: 0 , 01: add_src_1, 10: mul_result, 11: reserved
add_src_sign : std_logic_vector(1 downto 0); -- 00: pos, 01: neg, 10: sign dependant, 11: reserved
logic_function : std_logic_vector(2 downto 0); -- 000: none, 001: and, 010: or, 011: eor, 100: not
word_24_update : std_logic; -- only accumulator bits 47 downto 24 affected?
rounding_used : std_logic_vector(1 downto 0); -- 00: no rounding, 01: rounding, 10: add carry, 11: subtract carry
store_result : std_logic; -- 0: do not update accumulator, 1: update accumulator
dst_accu : std_logic; -- 0: a, 1: b
div_instr : std_logic; -- DIV instruction? Special ALU operations needed!
norm_instr : std_logic; -- NORM instruction? Special ALU operations needed!
ccr_flags_ctrl : alu_ccr_flag_array;
end record;
type pipeline_signals is record
instr_word: std_logic_vector(23 downto 0);
pc : unsigned(BW_ADDRESS-1 downto 0);
dble_word_instr : std_logic;
instr_array : instructions_type;
act_array : std_logic_vector(NUM_ACT_SIGNALS-1 downto 0);
dec_activate : std_logic;
adgen_mode_a : adgen_mode_type;
adgen_mode_b : adgen_mode_type;
reg_wr_addr : std_logic_vector(5 downto 0);
reg_rd_addr : std_logic_vector(5 downto 0);
x_bus_rd_addr : std_logic_vector(1 downto 0);
x_bus_wr_addr : std_logic_vector(1 downto 0);
y_bus_rd_addr : std_logic_vector(1 downto 0);
y_bus_wr_addr : std_logic_vector(1 downto 0);
l_bus_addr : std_logic_vector(2 downto 0);
adgen_address_x : unsigned(BW_ADDRESS-1 downto 0);
adgen_address_y : unsigned(BW_ADDRESS-1 downto 0);
RAM_out_x : std_logic_vector(23 downto 0);
RAM_out_y : std_logic_vector(23 downto 0);
alu_ctrl : alu_ctrl_type;
end record;
type pipeline_type is array(0 to PIPELINE_DEPTH-1) of pipeline_signals;
type register_file_type is record
a : signed(55 downto 0);
b : signed(55 downto 0);
x0 : signed(23 downto 0);
x1 : signed(23 downto 0);
y0 : signed(23 downto 0);
y1 : signed(23 downto 0);
la : unsigned(BW_ADDRESS-1 downto 0);
lc : unsigned(15 downto 0);
addr_r : addr_array;
addr_n : addr_array;
addr_m : addr_array;
ccr : std_logic_vector(7 downto 0);
mr : std_logic_vector(7 downto 0);
sr : std_logic_vector(15 downto 0);
omr : std_logic_vector(7 downto 0);
stack_pointer : unsigned(5 downto 0);
-- system_stack_ssh : stack_array_type;
-- system_stack_ssl : stack_array_type;
current_ssh : std_logic_vector(BW_ADDRESS-1 downto 0);
current_ssl : std_logic_vector(BW_ADDRESS-1 downto 0);
end record;
type addr_wr_port_type is record
-- write_valid : std_logic;
reg_number : unsigned(2 downto 0);
reg_value : unsigned(15 downto 0);
end record;
type mem_ctrl_type_in is record
rd_addr : unsigned(BW_ADDRESS-1 downto 0);
rd_en : std_logic;
wr_addr : unsigned(BW_ADDRESS-1 downto 0);
wr_en : std_logic;
data_in : std_logic_vector(23 downto 0);
end record;
type mem_ctrl_type_out is record
data_out : std_logic_vector(23 downto 0);
data_out_valid : std_logic;
end record;
type memory_type is (X_MEM, Y_MEM, P_MEM);
---------------
-- STACK TYPES
---------------
type stack_array_type is array(0 to 15) of std_logic_vector(BW_ADDRESS-1 downto 0);
type push_stack_content_type is (PC, PC_AND_SR, LA_AND_LC);
type push_stack_type is record
valid : std_logic;
pc : unsigned(BW_ADDRESS-1 downto 0);
content : push_stack_content_type;
end record;
-- type pop_stack_type is std_logic;
type pop_stack_type is record
valid : std_logic;
-- content : pop_stack_content_type;
end record;
end package types_pkg;

View File

@@ -0,0 +1,49 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.parameter_pkg.all;
use work.types_pkg.all;
entity tb_pipeline is generic (
clk_period : time := 10 ns
);
end entity tb_pipeline;
architecture uut of tb_pipeline is
signal clk : std_logic := '0';
signal rst : std_logic;
component pipeline is port(
clk, rst : std_logic
);
end component pipeline;
begin
uut: pipeline port map(
clk => clk,
rst => rst
);
clk_gen: process
begin
wait for clk_period/2;
clk <= not clk;
end process clk_gen;
rst_gen : process
begin
rst <= '1';
wait for 10 * clk_period;
rst <= '0';
wait;
end process rst_gen;
end architecture uut;