Files
FireBee_Setup/tools/vision/LDV/SRC/bhole/bhole.s
2022-11-14 10:05:42 +01:00

636 lines
22 KiB
ArmAsm

.IMPORT pre_cosin ; array of 2*ITRIGO_ARRAY_SIZE longs
.IMPORT pre_loffset ; long*
.EXPORT bh_xd, bh_yd, bh_rd2 ; long
.EXPORT bh_xs, bh_ys, bh_ref ; long
.EXPORT bh_xc, bh_yc ; long
.EXPORT bh_out_w, bh_out_h ; short
.EXPORT bh_in_w, bh_in_h ; short
.EXPORT bh_out_x, bh_out_y ; short
.EXPORT BHL32_68030,BHL16_68030,BHLInd_68030
.EXPORT BHL32_F68030,BHL16_F68030,BHLInd_F68030
.EXPORT lcosin_fpu
ITRIGO_ARRAY_SIZE EQU (1 << 14)
ITRIGO_ANGLE_MAX EQU (ITRIGO_ARRAY_SIZE-1)
QUARTER EQU (ITRIGO_ARRAY_SIZE/4)
; The black hole transformation consists of:
; Rotate the destination pixel around the black hole center
; The closest we are to the black hole, the bigger the rotation angle is
; We have 2 or 3 parts of computation angle for black hole transformation:
; 1: Just before dividing the maximum possible angle (depending on LDV paramter Rotation angle) by:
; CPU: the square of distance to black hole center
; FPU: the distance to black hole center; here we need 68881 instruction FSQRT
; So there are 3 parts in macro calculations:
; 1. Before dividing by distance or its square
; 1a. (FPU only) Compute distance
; 3. Divide by either distance or its square and state if the resulting pixel is in source image
; Dividing by distance instead of its square gives better visual results
MACRO BH_COMP_1
MOVE.L A6,D0 ; Remember A6=bh_xd
ADD.L D0,D0
ADDQ.L #1,D0
MOVE.L A5,D1 ; Remember A5=bh_rd2
ADD.L D0,D1 ; bh_rd2(D1)=1+2*bh_xd(D0)
MOVE.L bh_ref(PC),D2
MOVE.L D1,A5 ; Save bh_rd2, free D1
ADDQ.L #1,D1 ; We need bh_rd2+1 for divide
ENDM
MACRO BH_COMP_2 bail_out_addr
DIVU.L D1,D2 ; alpha(D2)=bh_ref/(1+bh_rd2)
ANDI.L #ITRIGO_ANGLE_MAX,D2 ; alpha &= ITRIGO_ANGLE_MAX
LSL.L #3,D2 ; D2=offset for pre_cosin array
MOVE.L A6,D0 ; D0=bh_xd
MOVE.L bh_yd(PC),D1 ; D1=bh_yd
MOVE.L 4(A2,D2.L),D3 ; A2->pre_cosin, D3.L=SIN(alpha)
MOVE.L (A2,D2.L),D2 ; A2->pre_cosin, D2.L=COS(alpha)
MOVEQ.L #16,D6 ; Prepare shifts/assume not in range
MOVE.L D0,D4
MULS.L D2,D4 ; D4=bh_xd * pre_cos[alpha]
MOVE.L D1,D5
MULS.L D3,D5 ; D5=bh_yd * pre_sin[alpha]
SUB.L D5,D4 ; D4=bh_xd * pre_cos[alpha] - bh_yd * pre_sin[alpha]
ASR.L D6,D4
ADD.L bh_xc(PC),D4 ; D6=16, assume pixel not in range
BLT.S bail_out_addr ; xs < 0: not in range
CMP.W bh_in_w(PC),D4
BGE.S bail_out_addr ; xs >= bh_in_w: not in range
MOVE.L D4,A3 ; A3=bh_xs= (D4 >> 16)
MOVE.L D0,D4
MULS.L D3,D4 ; D4=bh_xd * pre_sin[alpha]
MOVE.L D1,D5
MULS.L D2,D5 ; D5=bh_yd * pre_cos[alpha]
ADD.L D5,D4 ; D4=bh_xd * pre_sin[alpha] + bh_yd * pre_cos[alpha]
ASR.L D6,D4
ADD.L bh_yc(PC),D4 ; D4.L=bh_ys
BLT.S bail_out_addr ; ys < 0: not in range
CMP.W bh_in_h(PC),D4
BGE.S bail_out_addr ; ys >= bh_in_h: not in range
MOVEQ.L #0,D6 ; pixel in range
ENDM
; Macro for CPU 68030
MACRO BH_COMPUTE_CPU bail_out_addr
BH_COMP_1
BH_COMP_2 bail_out_addr
ENDM
; Macro for using FPU
; What we do with FPU is to compute the square root of D1 which
; Is then the distance to the back hole center instead of its square
MACRO BH_COMPUTE_FPU bail_out_addr
BH_COMP_1
FMOVE.L D1,FP0
FSQRT FP0
FMOVE.L FP0,D1
BH_COMP_2 bail_out_addr
ENDM
; Init parameters valid for the entire call
MACRO INIT_BHL
MOVE.L pre_cosin,A2
MOVE.L pre_loffset,A4 ; A4-->preoffset
MOVE.L bh_rd2(PC),A5 ; A5=bh_rd2
MOVE.L bh_xd(PC),A6 ; A6=bh_xd
MOVE.W bh_out_w(PC),D7
SUBQ.W #1,D7 ; For DBF
ENDM
MACRO INIT_BHL_SYM
MOVE.L pre_cosin,A2
MOVE.L pre_loffset,A4 ; A4-->preoffset
MOVE.L bh_rd2(PC),A5 ; A5=bh_rd2
MOVE.L bh_xd(PC),A6 ; A6=bh_xd
MOVE.W bh_out_w(PC),D7
LSR.W #1,D7 ; loop to bh_out_w/2 counts
ADD.W A6,D7 ; starting from bh_xd
MOVE.W D7,last_xd
ENDM
; Get pixel TC32 -->dest (Dx)
; A4-->pre_loffset
MACRO GET_TCPIXEL32 p,x,y,dest
MOVE.L (A4,y.L*4),dest
ADD.L x,dest
MOVE.L (p,dest.L*4),dest
ENDM
; Set pixel TC32 val(Dx) -->dest
; A4-->pre_loffset
; scratch_Dx scratch variable
MACRO SET_TCPIXEL32 p,x,y,val,scratch_Dx
MOVE.L (A4,y.L*4),scratch_Dx
ADD.L x,scratch_Dx
MOVE.L val,(p,scratch_Dx.L*4)
ENDM
; Get pixel TC16 -->dest (Dx)
; A4-->pre_loffset
MACRO GET_TCPIXEL16 p,x,y,dest
MOVE.L (A4,y.L*4),dest
ADD.L x,dest
MOVE.W (p,dest.L*2),dest
ENDM
; Set pixel TC16 val(Dx) -->dest
; A4-->pre_loffset
; scratch_Dx scratch variable
MACRO SET_TCPIXEL16 p,x,y,val,scratch_Dx
MOVE.L (A4,y.L*4),scratch_Dx
ADD.L x,scratch_Dx
MOVE.W val,(p,scratch_Dx.L*2)
ENDM
; Rotate 90
; x,y,scratch_Dx : registers, y has to be Dx
; #define R90(x,y) { long _x=x; x=-y; y=_x; }
MACRO R90 x,y,scratch_Dx
MOVE.L x,scratch_Dx
NEG.L y
MOVE.L y,x
MOVE.L scratch_Dx,y
ENDM
MACRO PIXINIMG x,y,w,h,bailout
TST.W x
BLT.S bailout
TST.W y
BLT.S bailout
CMP.W w,x
BGE.S bailout
CMP.W h,y
BGE.S bailout
ENDM
; On sample file 00_1480ms.jpg, Aranym 32bit:
; BHL32_68000 takes about 1450ms
; BHL32_68030 takes about 740ms
; 68030 routine for True Color 32bit
; BHL32_68030(short use_sym, void* in, void* out)
; D0, A0 A1
BHL32_68030:
MOVEM.L D0-D7/A2-A6,-(SP) ; Save context
TST.W D0
BNE BHL32_68030_sym
MOVEQ.L #0,D7
MOVE.W bh_out_x(PC),D7
LSL.L #2,D7
ADD.L D7,A1 ; A1-->&out[bh_out_x]
INIT_BHL
bhl32_l030:
BH_COMPUTE_CPU bhl32_tpir_030 ; -->D6=0:pixel in range
bhl32_tpir_030:
MOVEQ.L #-1,D0 ; Assume out of range
TST.W D6
BNE.S bhl32_setpix_030
MOVE.L (A4,D4.L*4),D0 ; D4=bh_ys, D0=pre_loffset[bh_ys]
ADD.L A3,D0 ; D0=pre_loffset[bh_ys] + bh_xs(A3)
MOVE.L (A0,D0.L*4),D0 ; D0=in_ptr[pre_loffset[bh_ys] + bh_xs]
bhl32_setpix_030:
MOVE.L D0,(A1)+
ADD.L #1,A6 ; bh_xd++
DBF D7,bhl32_l030
MOVEM.L (SP)+,D0-D7/A2-A6 ; Restore context
RTS
BHL32_68030_sym:
INIT_BHL_SYM
bhl32_l030_sym:
BH_COMPUTE_CPU bhl32_tpir_030_sym ; -->D6=0:pixel in range
bhl32_tpir_030_sym:
MOVEQ.L #-1,D0 ; Assume out of range
TST.W D6
BNE.S bhl32_setpix_030_sym
GET_TCPIXEL32 A0,A3,D4,D0 ; A0-->in_ptr, A3=bh_xs+bh_xc, D4=bh_ys+bh_yc
; D0=GET_TCPIXEL(in_ptr, bh_xs+bh_xc, bh_ys+bh_yc)
bhl32_setpix_030_sym:
MOVE.L A6,-(SP) ; Save bh_xd
MOVE.L bh_xc(PC),D2 ; D2=bh_xc
MOVE.L bh_yc(PC),D3 ; D3=bh_yc
ADD.L D2,A6 ; A6=bh_xd+bh_xc
MOVE.L bh_yd(PC),D7
ADD.L D3,D7 ; D7=bh_yd+bh_yc
SET_TCPIXEL32 A1,A6,D7,D0,D5 ; SET_TCPIXEL(out_ptr, bh_xd+bh_xc, bh_yd+bh_yc, GET_TCPIXEL(in_ptr, bh_xs+bh_xc, bh_ys+bh_yc))
MOVEQ.L #2,D1 ; 3 loops, -1 for DBF
deduc32_030:
SUB.L D2,A3 ; We need now bh_xs(A3) and bh_ys(D4) relative to
SUB.L D3,D4 ; Black Hole center to use R90
SUB.L D2,A6 ; We need now bh_xd(A6) and bh_yd(D7) relative to
SUB.L D3,D7 ; Black Hole center to use R90
R90 A6,D7,D0 ; Rotate (xd,yd) by PI/4
R90 A3,D4,D0 ; Rotate (xs,ys) by PI/4
ADD.L D2,A3 ; Now we need coords in image raster
ADD.L D3,D4
ADD.L D2,A6
ADD.L D3,D7
PIXINIMG A6,D7,bh_in_w(PC),bh_in_h(PC),dnotinimg32_030 ; Check if pixel in dest is within bounds
MOVEQ.L #-1,D0 ; Assume out of range
PIXINIMG A3,D4,bh_in_w(PC),bh_in_h(PC),plotdest32_030 ; Check if pixel in src is within bounds
GET_TCPIXEL32 A0,A3,D4,D0 ; A0-->in_ptr, A3=bh_xs+bh_xc, D4=bh_ys+bh_yc
; D0=GET_TCPIXEL(in_ptr, bh_xs+bh_xc, bh_ys+bh_yc)
plotdest32_030:
SET_TCPIXEL32 A1,A6,D7,D0,D5 ; SET_TCPIXEL(out_ptr, bh_xd+bh_xc, bh_yd+bh_yc, GET_TCPIXEL(in_ptr, bh_xs+bh_xc, bh_ys+bh_yc))
dnotinimg32_030:
DBF D1,deduc32_030
MOVE.L (SP)+,A6 ; Restore bh_xd
ADD.L #1,A6 ; bh_xd++
CMP.W last_xd(PC),A6
BLE bhl32_l030_sym
MOVEM.L (SP)+,D0-D7/A2-A6 ; Restore context
RTS
; 68030 routine for True Color 16bit
; BHL16_68030(short use_sym, void* in, void* out)
; D0, A0 A1
BHL16_68030:
MOVEM.L D0-D7/A2-A6,-(SP) ; Save context
TST.W D0
BNE BHL16_68030_sym
MOVEQ.L #0,D7
MOVE.W bh_out_x(PC),D7
ADD.L D7,D7
ADD.L D7,A1 ; A1-->&out[bh_out_x]
INIT_BHL
bhl16_l030:
BH_COMPUTE_CPU bhl16_tpir_030 ; -->D6=0:pixel in range
bhl16_tpir_030:
MOVEQ.L #-1,D0 ; Assume out of range
TST.W D6
BNE.S bhl16_setpix_030
MOVE.L (A4,D4.L*4),D0 ; D4=bh_ys, D0=pre_loffset[bh_ys]
ADD.L A3,D0 ; D0=pre_loffset[bh_ys] + bh_xs(A3)
MOVE.W (A0,D0.L*2),D0 ; D0=in_ptr[pre_loffset[bh_ys] + bh_xs]
bhl16_setpix_030:
MOVE.W D0,(A1)+
ADD.L #1,A6 ; bh_xd++
DBF D7,bhl16_l030
MOVEM.L (SP)+,D0-D7/A2-A6 ; Restore context
RTS
BHL16_68030_sym:
INIT_BHL_SYM
bhl16_l030_sym:
BH_COMPUTE_CPU bhl16_tpir_030_sym ; -->D6=0:pixel in range
bhl16_tpir_030_sym:
MOVEQ.L #-1,D0 ; Assume out of range
TST.W D6
BNE.S bhl16_setpix_030_sym
GET_TCPIXEL16 A0,A3,D4,D0 ; A0-->in_ptr, A3=bh_xs+bh_xc, D4=bh_ys+bh_yc
; D0=GET_TCPIXEL(in_ptr, bh_xs+bh_xc, bh_ys+bh_yc)
bhl16_setpix_030_sym:
MOVE.L A6,-(SP) ; Save bh_xd
MOVE.L bh_xc(PC),D2 ; D2=bh_xc
MOVE.L bh_yc(PC),D3 ; D3=bh_yc
ADD.L D2,A6 ; A6=bh_xd+bh_xc
MOVE.L bh_yd(PC),D7
ADD.L D3,D7 ; D7=bh_yd+bh_yc
SET_TCPIXEL16 A1,A6,D7,D0,D5 ; SET_TCPIXEL(out_ptr, bh_xd+bh_xc, bh_yd+bh_yc, GET_TCPIXEL(in_ptr, bh_xs+bh_xc, bh_ys+bh_yc))
MOVEQ.L #2,D1 ; 3 loops, -1 for DBF
deduc16_030:
SUB.L D2,A3 ; We need now bh_xs(A3) and bh_ys(D4) relative to
SUB.L D3,D4 ; Black Hole center to use R90
SUB.L D2,A6 ; We need now bh_xd(A6) and bh_yd(D7) relative to
SUB.L D3,D7 ; Black Hole center to use R90
R90 A6,D7,D0 ; Rotate (xd,yd) by PI/4
R90 A3,D4,D0 ; Rotate (xs,ys) by PI/4
ADD.L D2,A3 ; Now we need coords in image raster
ADD.L D3,D4
ADD.L D2,A6
ADD.L D3,D7
PIXINIMG A6,D7,bh_in_w(PC),bh_in_h(PC),dnotinimg16_030 ; Check if pixel in dest is within bounds
MOVEQ.L #-1,D0 ; Assume out of range
PIXINIMG A3,D4,bh_in_w(PC),bh_in_h(PC),plotdest16_030 ; Check if pixel in src is within bounds
GET_TCPIXEL16 A0,A3,D4,D0 ; A0-->in_ptr, A3=bh_xs+bh_xc, D4=bh_ys+bh_yc
; D0=GET_TCPIXEL(in_ptr, bh_xs+bh_xc, bh_ys+bh_yc)
plotdest16_030:
SET_TCPIXEL16 A1,A6,D7,D0,D5 ; SET_TCPIXEL(out_ptr, bh_xd+bh_xc, bh_yd+bh_yc, GET_TCPIXEL(in_ptr, bh_xs+bh_xc, bh_ys+bh_yc))
dnotinimg16_030:
DBF D1,deduc16_030
MOVE.L (SP)+,A6 ; Restore bh_xd
ADD.L #1,A6 ; bh_xd++
CMP.W last_xd(PC),A6
BLE bhl16_l030_sym
MOVEM.L (SP)+,D0-D7/A2-A6 ; Restore context
RTS
; 68030 routine for bitplances images, converted to indexes
; BHLInd_68030(short use_sym, void* in, void* out)
; D0, A0 A1
BHLInd_68030:
MOVEM.L D0-D7/A2-A6,-(SP) ; Save context
INIT_BHL
bhlind_l030:
BH_COMPUTE_CPU bhlind_tpir_030 ; -->D6=0:pixel in range
bhlind_tpir_030:
MOVEQ.L #-1,D0 ; Assume out of range
TST.W D6
BNE.S bhlind_setpix_030
MOVE.L (A4,D4.L*4),D0 ; D4=bh_ys, D0=pre_loffset[bh_ys]
ADD.L A3,D0 ; D0=pre_loffset[bh_ys] + bh_xs(A3)
MOVE.B (A0,D0.L),D0 ; D0=in_ptr[pre_loffset[bh_ys] + bh_xs]
bhlind_setpix_030:
MOVE.B D0,(A1)+
ADD.L #1,A6 ; bh_xd++
DBF D7,bhlind_l030
MOVEM.L (SP)+,D0-D7/A2-A6 ; Restore context
RTS
.EVEN
bh_xd:
DC.L 0
bh_yd:
DC.L 0
bh_rd2:
DC.L 0
bh_xs:
DC.L 0
bh_ys:
DC.L 0
bh_ref:
DC.L 0
bh_xc:
DC.L 0
bh_yc:
DC.L 0
bh_out_w:
DC.W 0
bh_out_h:
DC.W 0
bh_in_w:
DC.W 0
bh_in_h:
DC.W 0
bh_out_x:
DC.W 0
bh_out_y:
DC.W 0
last_xd:
DC.W 0
; 68030/FPU routine for True Color 32bit
; BHL32_F68030(short use_sym, void* in, void* out)
; D0, A0 A1
;BHL32_F68030:
; MOVEM.L D0-D7/A2-A6,-(SP) ; Save context
; FMOVE.X FP0,-(SP)
;
; INIT_BHL
;bhl32_lf030:
; BH_COMPUTE_FPU bhl32_tpir_f030 ; -->D6=0:pixel in range
;bhl32_tpir_f030:
; MOVEQ.L #-1,D0 ; Assume out of range
; TST.W D6
; BNE.S bhl32_setpix_f030
; MOVE.L (A4,D4.L*4),D0 ; D4=bh_ys, D0=pre_loffset[bh_ys]
; ADD.L A3,D0 ; D0=pre_loffset[bh_ys] + bh_xs(A3)
; MOVE.L (A0,D0.L*4),D0 ; D0=in_ptr[pre_loffset[bh_ys] + bh_xs]
;bhl32_setpix_f030:
; MOVE.L D0,(A1)+
; ADD.L #1,A6 ; bh_xd++
; DBF D7,bhl32_lf030
;
; FMOVE.X (SP)+,FP0
; MOVEM.L (SP)+,D0-D7/A2-A6 ; Restore context
; RTS
BHL32_F68030:
MOVEM.L D0-D7/A2-A6,-(SP) ; Save context
FMOVE.X FP0,-(SP)
TST.W D0
BNE BHL32_F68030_sym
MOVEQ.L #0,D7
MOVE.W bh_out_x(PC),D7
LSL.L #2,D7
ADD.L D7,A1 ; A1-->&out[bh_out_x]
INIT_BHL
bhl32_lf030:
BH_COMPUTE_FPU bhl32_tpir_f030 ; -->D6=0:pixel in range
bhl32_tpir_f030:
MOVEQ.L #-1,D0 ; Assume out of range
TST.W D6
BNE.S bhl32_setpix_f030
MOVE.L (A4,D4.L*4),D0 ; D4=bh_ys, D0=pre_loffset[bh_ys]
ADD.L A3,D0 ; D0=pre_loffset[bh_ys] + bh_xs(A3)
MOVE.L (A0,D0.L*4),D0 ; D0=in_ptr[pre_loffset[bh_ys] + bh_xs]
bhl32_setpix_f030:
MOVE.L D0,(A1)+
ADD.L #1,A6 ; bh_xd++
DBF D7,bhl32_lf030
FMOVE.X (SP)+,FP0
MOVEM.L (SP)+,D0-D7/A2-A6 ; Restore context
RTS
BHL32_F68030_sym:
INIT_BHL_SYM
bhl32_lf030_sym:
BH_COMPUTE_FPU bhl32_tpir_f030_sym ; -->D6=0:pixel in range
bhl32_tpir_f030_sym:
MOVEQ.L #-1,D0 ; Assume out of range
TST.W D6
BNE.S bhl32_setpix_f030_sym
GET_TCPIXEL32 A0,A3,D4,D0 ; A0-->in_ptr, A3=bh_xs+bh_xc, D4=bh_ys+bh_yc
; D0=GET_TCPIXEL(in_ptr, bh_xs+bh_xc, bh_ys+bh_yc)
bhl32_setpix_f030_sym:
MOVE.L A6,-(SP) ; Save bh_xd
MOVE.L bh_xc(PC),D2 ; D2=bh_xc
MOVE.L bh_yc(PC),D3 ; D3=bh_yc
ADD.L D2,A6 ; A6=bh_xd+bh_xc
MOVE.L bh_yd(PC),D7
ADD.L D3,D7 ; D7=bh_yd+bh_yc
SET_TCPIXEL32 A1,A6,D7,D0,D5 ; SET_TCPIXEL(out_ptr, bh_xd+bh_xc, bh_yd+bh_yc, GET_TCPIXEL(in_ptr, bh_xs+bh_xc, bh_ys+bh_yc))
MOVEQ.L #2,D1 ; 3 loops, -1 for DBF
deduc32_f030:
SUB.L D2,A3 ; We need now bh_xs(A3) and bh_ys(D4) relative to
SUB.L D3,D4 ; Black Hole center to use R90
SUB.L D2,A6 ; We need now bh_xd(A6) and bh_yd(D7) relative to
SUB.L D3,D7 ; Black Hole center to use R90
R90 A6,D7,D0 ; Rotate (xd,yd) by PI/4
R90 A3,D4,D0 ; Rotate (xs,ys) by PI/4
ADD.L D2,A3 ; Now we need coords in image raster
ADD.L D3,D4
ADD.L D2,A6
ADD.L D3,D7
PIXINIMG A6,D7,bh_in_w(PC),bh_in_h(PC),dnotinimg32_f030 ; Check if pixel in dest is within bounds
MOVEQ.L #-1,D0 ; Assume out of range
PIXINIMG A3,D4,bh_in_w(PC),bh_in_h(PC),plotdest32_f030 ; Check if pixel in src is within bounds
GET_TCPIXEL32 A0,A3,D4,D0 ; A0-->in_ptr, A3=bh_xs+bh_xc, D4=bh_ys+bh_yc
; D0=GET_TCPIXEL(in_ptr, bh_xs+bh_xc, bh_ys+bh_yc)
plotdest32_f030:
SET_TCPIXEL32 A1,A6,D7,D0,D5 ; SET_TCPIXEL(out_ptr, bh_xd+bh_xc, bh_yd+bh_yc, GET_TCPIXEL(in_ptr, bh_xs+bh_xc, bh_ys+bh_yc))
dnotinimg32_f030:
DBF D1,deduc32_f030
MOVE.L (SP)+,A6 ; Restore bh_xd
ADD.L #1,A6 ; bh_xd++
CMP.W last_xd(PC),A6
BLE bhl32_lf030_sym
FMOVE.X (SP)+,FP0
MOVEM.L (SP)+,D0-D7/A2-A6 ; Restore context
RTS
BHL16_F68030:
MOVEM.L D0-D7/A2-A6,-(SP) ; Save context
FMOVE.X FP0,-(SP)
TST.W D0
BNE BHL16_f68030_sym
MOVEQ.L #0,D7
MOVE.W bh_out_x(PC),D7
ADD.L D7,D7
ADD.L D7,A1 ; A1-->&out[bh_out_x]
INIT_BHL
bhl16_lf030:
BH_COMPUTE_FPU bhl16_tpir_f030 ; -->D6=0:pixel in range
bhl16_tpir_f030:
MOVEQ.L #-1,D0 ; Assume out of range
TST.W D6
BNE.S bhl16_setpix_f030
MOVE.L (A4,D4.L*4),D0 ; D4=bh_ys, D0=pre_loffset[bh_ys]
ADD.L A3,D0 ; D0=pre_loffset[bh_ys] + bh_xs(A3)
MOVE.W (A0,D0.L*2),D0 ; D0=in_ptr[pre_loffset[bh_ys] + bh_xs]
bhl16_setpix_f030:
MOVE.W D0,(A1)+
ADD.L #1,A6 ; bh_xd++
DBF D7,bhl16_lf030
FMOVE.X (SP)+,FP0
MOVEM.L (SP)+,D0-D7/A2-A6 ; Restore context
RTS
BHL16_f68030_sym:
INIT_BHL_SYM
bhl16_lf030_sym:
BH_COMPUTE_FPU bhl16_tpir_f030_sym ; -->D6=0:pixel in range
bhl16_tpir_f030_sym:
MOVEQ.L #-1,D0 ; Assume out of range
TST.W D6
BNE.S bhl16_setpix_f030_sym
GET_TCPIXEL16 A0,A3,D4,D0 ; A0-->in_ptr, A3=bh_xs+bh_xc, D4=bh_ys+bh_yc
; D0=GET_TCPIXEL(in_ptr, bh_xs+bh_xc, bh_ys+bh_yc)
bhl16_setpix_f030_sym:
MOVE.L A6,-(SP) ; Save bh_xd
MOVE.L bh_xc(PC),D2 ; D2=bh_xc
MOVE.L bh_yc(PC),D3 ; D3=bh_yc
ADD.L D2,A6 ; A6=bh_xd+bh_xc
MOVE.L bh_yd(PC),D7
ADD.L D3,D7 ; D7=bh_yd+bh_yc
SET_TCPIXEL16 A1,A6,D7,D0,D5 ; SET_TCPIXEL(out_ptr, bh_xd+bh_xc, bh_yd+bh_yc, GET_TCPIXEL(in_ptr, bh_xs+bh_xc, bh_ys+bh_yc))
MOVEQ.L #2,D1 ; 3 loops, -1 for DBF
deduc16_f030:
SUB.L D2,A3 ; We need now bh_xs(A3) and bh_ys(D4) relative to
SUB.L D3,D4 ; Black Hole center to use R90
SUB.L D2,A6 ; We need now bh_xd(A6) and bh_yd(D7) relative to
SUB.L D3,D7 ; Black Hole center to use R90
R90 A6,D7,D0 ; Rotate (xd,yd) by PI/4
R90 A3,D4,D0 ; Rotate (xs,ys) by PI/4
ADD.L D2,A3 ; Now we need coords in image raster
ADD.L D3,D4
ADD.L D2,A6
ADD.L D3,D7
PIXINIMG A6,D7,bh_in_w(PC),bh_in_h(PC),dnotinimg16_f030 ; Check if pixel in dest is within bounds
MOVEQ.L #-1,D0 ; Assume out of range
PIXINIMG A3,D4,bh_in_w(PC),bh_in_h(PC),plotdest16_f030 ; Check if pixel in src is within bounds
GET_TCPIXEL16 A0,A3,D4,D0 ; A0-->in_ptr, A3=bh_xs+bh_xc, D4=bh_ys+bh_yc
; D0=GET_TCPIXEL(in_ptr, bh_xs+bh_xc, bh_ys+bh_yc)
plotdest16_f030:
SET_TCPIXEL16 A1,A6,D7,D0,D5 ; SET_TCPIXEL(out_ptr, bh_xd+bh_xc, bh_yd+bh_yc, GET_TCPIXEL(in_ptr, bh_xs+bh_xc, bh_ys+bh_yc))
dnotinimg16_f030:
DBF D1,deduc16_f030
MOVE.L (SP)+,A6 ; Restore bh_xd
ADD.L #1,A6 ; bh_xd++
CMP.W last_xd(PC),A6
BLE bhl16_lf030_sym
FMOVE.X (SP)+,FP0
MOVEM.L (SP)+,D0-D7/A2-A6 ; Restore context
RTS
; 68030/FPU routine for bitplances images, converted to indexes
; BHLInd_F68030(short use_sym, void* in, void* out)
; D0, A0 A1
BHLInd_F68030:
MOVEM.L D0-D7/A2-A6,-(SP) ; Save context
FMOVE.X FP0,-(SP)
INIT_BHL
bhlInd_lf030:
BH_COMPUTE_FPU bhlInd_tpir_f030 ; -->D6=0:pixel in range
bhlInd_tpir_f030:
MOVEQ.L #-1,D0 ; Assume out of range
TST.W D6
BNE.S bhlInd_setpix_f030
MOVE.L (A4,D4.L*4),D0 ; D4=bh_ys, D0=pre_loffset[bh_ys]
ADD.L A3,D0 ; D0=pre_loffset[bh_ys] + bh_xs
MOVE.B (A0,D0.L),D0 ; D0=in_ptr[pre_loffset[bh_ys] + bh_xs]
bhlInd_setpix_f030:
MOVE.B D0,(A1)+
ADD.L #1,A6 ; bh_xd++
DBF D7,bhlInd_lf030
FMOVE.X (SP)+,FP0
MOVEM.L (SP)+,D0-D7/A2-A6 ; Restore context
RTS
; Pre-calculation of cosinus/sinus table using FPU
; void lcosin_fpu(long i, long* pcs)
; D0 A0
; double angle = i*ANGLE_ADD ;
; pcs[0] = 65536.0 * cos( angle ) ;
; pcs[1] = 65536.0 * sin( angle ) ;
lcosin_fpu:
FMOVEM.X FP0-FP2,-(SP)
FMOVE D0,FP0
; 3.8349519697141030742952189737299e-4=M_PI/2.0/(double)QUARTER
; For some reason, I did not manage to use EQU with floating point constants
FMUL.X #3.8349519697141030742952189737299e-4,FP0
FSINCOS.X FP0,FP1:FP2
FMUL.X #65536.0,FP1
FMUL.X #65536.0,FP2
FMOVE.L FP1,(A0)+
FMOVE.L FP2,(A0)
FMOVEM.X (SP)+,FP0-FP2
RTS