353 lines
7.8 KiB
ArmAsm
353 lines
7.8 KiB
ArmAsm
|
|
| new version of bcopy, memcpy and memmove
|
|
| handles overlap, odd/even alignment
|
|
| uses movem to copy 256 bytes blocks faster.
|
|
| Alexander Lehmann alexlehm@iti.informatik.th-darmstadt.de
|
|
| sortof inspired by jrbs bcopy
|
|
|
|
.text
|
|
.even
|
|
.globl ___bcopy
|
|
.globl __bcopy
|
|
.globl _bcopy
|
|
.globl _memcpy
|
|
.globl _memmove
|
|
|
|
| void *memcpy( void *dest, const void *src, size_t len );
|
|
| void *memmove( void *dest, const void *src, size_t len );
|
|
| returns dest
|
|
| functions are aliased
|
|
|
|
#ifndef __SOZOBON__
|
|
_memcpy:
|
|
_memmove:
|
|
movl sp@(4),a1 | dest
|
|
movl sp@(8),a0 | src
|
|
jra common | the rest is samea as bcopy
|
|
#else
|
|
| ___bcopy() is the base function below; for memcpy(), memmove()
|
|
| and bcopy(), we have to sneak a size_t into an unsigned long first.
|
|
|
|
_memcpy:
|
|
_memmove:
|
|
movl sp@(4),a1 | dest
|
|
movl sp@(8),a0 | src
|
|
clrl d0 | here is the sneaky bit...
|
|
movw sp@(12),d0 | length
|
|
jra common2 | the rest is samea as bcopy
|
|
|
|
_bcopy:
|
|
movl sp@(4),a0 | src
|
|
movl sp@(8),a1 | dest
|
|
clrl d0 | here is the sneaky bit...
|
|
movw sp@(12),d0 | length
|
|
jra common2 | the rest is samea as bcopy
|
|
#endif
|
|
|
|
| void bcopy( const void *src, void *dest, size_t length );
|
|
| void _bcopy( const void *src, void *dest, unsigned long length );
|
|
| return value not used (returns src)
|
|
| functions are aliased (except for HSC -- sb)
|
|
|
|
#ifndef __SOZOBON__
|
|
_bcopy:
|
|
___bcopy:
|
|
#endif
|
|
__bcopy:
|
|
move.l 4(sp),a0 | src
|
|
move.l 8(sp),a1 | dest
|
|
common: move.l 12(sp),d0 | length
|
|
common2:
|
|
jeq exit | length==0? (size_t)
|
|
|
|
| a0 src, a1 dest, d0.l length
|
|
move.l d2,-(sp)
|
|
|
|
| overlay ?
|
|
cmp.l a0,a1
|
|
jgt top_down
|
|
|
|
#ifdef __mcoldfire__
|
|
move.l a0,d1 | test for alignment
|
|
move.l a1,d2
|
|
eor.l d2,d1
|
|
#else
|
|
move.w a0,d1 | test for alignment
|
|
move.w a1,d2
|
|
eor.w d2,d1
|
|
#endif
|
|
btst #0,d1 | one odd one even ?
|
|
jne slow_copy
|
|
btst #0,d2 | both even ?
|
|
jeq both_even
|
|
move.b (a0)+,(a1)+ | copy one byte, now we are both even
|
|
subq.l #1,d0
|
|
both_even:
|
|
moveq #0,d1 | save length less 256
|
|
move.b d0,d1
|
|
lsr.l #8,d0 | number of 256 bytes blocks
|
|
jeq less256
|
|
#ifdef __mcoldfire__
|
|
lea -10 * 4(sp),sp
|
|
movem.l d1/d3-d7/a2/a3/a5/a6,(sp) | d2 is already saved
|
|
| exclude a4 because of -mbaserel
|
|
copy256:
|
|
movem.l 0(a0),d1-d7/a2/a3/a5/a6 | copy 5*44+36=256 bytes
|
|
movem.l d1-d7/a2/a3/a5/a6,a1@
|
|
movem.l 44(a0),d1-d7/a2/a3/a5/a6
|
|
movem.l d1-d7/a2/a3/a5/a6,44(a1)
|
|
movem.l 88(a0),d1-d7/a2/a3/a5/a6
|
|
movem.l d1-d7/a2/a3/a5/a6,88(a1)
|
|
movem.l 132(a0),d1-d7/a2/a3/a5/a6
|
|
movem.l d1-d7/a2/a3/a5/a6,132(a1)
|
|
movem.l 176(a0),d1-d7/a2/a3/a5/a6
|
|
movem.l d1-d7/a2/a3/a5/a6,176(a1)
|
|
movem.l 220(a0),d1-d7/a2-a3
|
|
movem.l d1-d7/a2-a3,220(a1)
|
|
lea 256(a0),a0
|
|
#else
|
|
movem.l d1/d3-d7/a2/a3/a5/a6,-(sp) | d2 is already saved
|
|
| exclude a4 because of -mbaserel
|
|
copy256:
|
|
movem.l (a0)+,d1-d7/a2/a3/a5/a6 | copy 5*44+36=256 bytes
|
|
movem.l d1-d7/a2/a3/a5/a6,(a1)
|
|
movem.l (a0)+,d1-d7/a2/a3/a5/a6
|
|
movem.l d1-d7/a2/a3/a5/a6,44(a1)
|
|
movem.l (a0)+,d1-d7/a2/a3/a5/a6
|
|
movem.l d1-d7/a2/a3/a5/a6,88(a1)
|
|
movem.l (a0)+,d1-d7/a2/a3/a5/a6
|
|
movem.l d1-d7/a2/a3/a5/a6,132(a1)
|
|
movem.l (a0)+,d1-d7/a2/a3/a5/a6
|
|
movem.l d1-d7/a2/a3/a5/a6,176(a1)
|
|
movem.l (a0)+,d1-d7/a2-a3
|
|
movem.l d1-d7/a2-a3,220(a1)
|
|
#endif
|
|
lea a1@(256),a1 | increment dest, src is already
|
|
subql #1,d0
|
|
jne copy256 | next, please
|
|
#ifdef __mcoldfire__
|
|
movml sp@,d1/d3-d7/a2/a3/a5/a6
|
|
lea sp@(40),sp
|
|
less256: | copy 16 bytes blocks
|
|
movl d1,d0
|
|
lsrl #2,d0 | number of 4 bytes blocks
|
|
jeq less4 | less that 4 bytes left
|
|
movl d0,d2
|
|
negl d2
|
|
andil #3,d2 | d2 = number of bytes below 16 (-n)&3
|
|
subql #1,d0
|
|
lsrl #2,d0 | number of 16 bytes blocks minus 1, if d2==0
|
|
addl d2,d2 | offset in code (movl two bytes)
|
|
jmp pc@(2,d2:l) | jmp into loop
|
|
#else
|
|
movml sp@+,d1/d3-d7/a2/a3/a5/a6
|
|
less256: | copy 16 bytes blocks
|
|
movw d1,d0
|
|
lsrw #2,d0 | number of 4 bytes blocks
|
|
jeq less4 | less that 4 bytes left
|
|
movw d0,d2
|
|
negw d2
|
|
andiw #3,d2 | d2 = number of bytes below 16 (-n)&3
|
|
subqw #1,d0
|
|
lsrw #2,d0 | number of 16 bytes blocks minus 1, if d2==0
|
|
addw d2,d2 | offset in code (movl two bytes)
|
|
jmp pc@(2,d2:w) | jmp into loop
|
|
#endif
|
|
copy16:
|
|
movl a0@+,a1@+
|
|
movl a0@+,a1@+
|
|
movl a0@+,a1@+
|
|
movl a0@+,a1@+
|
|
#ifdef __mcoldfire__
|
|
subql #1,d0
|
|
bpl copy16
|
|
#else
|
|
dbra d0,copy16
|
|
#endif
|
|
less4:
|
|
btst #1,d1
|
|
jeq less2
|
|
movw a0@+,a1@+
|
|
less2:
|
|
btst #0,d1
|
|
jeq none
|
|
movb a0@,a1@
|
|
none:
|
|
exit_d2:
|
|
movl sp@+,d2
|
|
exit:
|
|
movl sp@(4),d0 | return dest (for memcpy only)
|
|
rts
|
|
|
|
slow_copy: | byte by bytes copy
|
|
#ifdef __mcoldfire__
|
|
movl d0,d1
|
|
negl d1
|
|
andil #7,d1 | d1 = number of bytes blow 8 (-n)&7
|
|
addql #7,d0
|
|
lsrl #3,d0 | number of 8 bytes block plus 1, if d1!=0
|
|
addl d1,d1 | offset in code (movb two bytes)
|
|
jmp pc@(2,d1:l) | jump into loop
|
|
#else
|
|
movw d0,d1
|
|
negw d1
|
|
andiw #7,d1 | d1 = number of bytes blow 8 (-n)&7
|
|
addql #7,d0
|
|
lsrl #3,d0 | number of 8 bytes block plus 1, if d1!=0
|
|
addw d1,d1 | offset in code (movb two bytes)
|
|
jmp pc@(2,d1:w) | jump into loop
|
|
#endif
|
|
scopy:
|
|
movb a0@+,a1@+
|
|
movb a0@+,a1@+
|
|
movb a0@+,a1@+
|
|
movb a0@+,a1@+
|
|
movb a0@+,a1@+
|
|
movb a0@+,a1@+
|
|
movb a0@+,a1@+
|
|
movb a0@+,a1@+
|
|
subql #1,d0
|
|
jne scopy
|
|
jra exit_d2
|
|
|
|
top_down:
|
|
addl d0,a0 | a0 byte after end of src
|
|
addl d0,a1 | a1 byte after end of dest
|
|
|
|
#ifdef __mcoldfire__
|
|
movl a0,d1 | exact the same as above, only with predec
|
|
movl a1,d2
|
|
eorl d2,d1
|
|
#else
|
|
movw a0,d1 | exact the same as above, only with predec
|
|
movw a1,d2
|
|
eorw d2,d1
|
|
#endif
|
|
btst #0,d1
|
|
jne slow_copy_d
|
|
|
|
btst #0,d2
|
|
jeq both_even_d
|
|
movb a0@-,a1@-
|
|
subql #1,d0
|
|
both_even_d:
|
|
movq #0,d1
|
|
movb d0,d1
|
|
lsrl #8,d0
|
|
jeq less256_d
|
|
#ifdef __mcoldfire__
|
|
lea sp@(-40),sp
|
|
movml d1/d3-d7/a2/a3/a5/a6,sp@
|
|
copy256_d:
|
|
movml a0@(-44),d1-d7/a2/a3/a5/a6
|
|
movml d1-d7/a2/a3/a5/a6,a1@(-44)
|
|
movml a0@(-88),d1-d7/a2/a3/a5/a6
|
|
movml d1-d7/a2/a3/a5/a6,a1@(-88)
|
|
movml a0@(-132),d1-d7/a2/a3/a5/a6
|
|
movml d1-d7/a2/a3/a5/a6,a1@(-132)
|
|
movml a0@(-176),d1-d7/a2/a3/a5/a6
|
|
movml d1-d7/a2/a3/a5/a6,a1@(-176)
|
|
movml a0@(-220),d1-d7/a2/a3/a5/a6
|
|
movml d1-d7/a2/a3/a5/a6,a1@(-220)
|
|
movml a0@(-256),d1-d7/a2-a3
|
|
movml d1-d7/a2-a3,a1@(-256)
|
|
lea a1@(-256),a1
|
|
#else
|
|
movml d1/d3-d7/a2/a3/a5/a6,sp@-
|
|
copy256_d:
|
|
movml a0@(-44),d1-d7/a2/a3/a5/a6
|
|
movml d1-d7/a2/a3/a5/a6,a1@-
|
|
movml a0@(-88),d1-d7/a2/a3/a5/a6
|
|
movml d1-d7/a2/a3/a5/a6,a1@-
|
|
movml a0@(-132),d1-d7/a2/a3/a5/a6
|
|
movml d1-d7/a2/a3/a5/a6,a1@-
|
|
movml a0@(-176),d1-d7/a2/a3/a5/a6
|
|
movml d1-d7/a2/a3/a5/a6,a1@-
|
|
movml a0@(-220),d1-d7/a2/a3/a5/a6
|
|
movml d1-d7/a2/a3/a5/a6,a1@-
|
|
movml a0@(-256),d1-d7/a2-a3
|
|
movml d1-d7/a2-a3,a1@-
|
|
#endif
|
|
lea a0@(-256),a0
|
|
subql #1,d0
|
|
jne copy256_d
|
|
#ifdef __mcoldfire__
|
|
movml sp@,d1/d3-d7/a2/a3/a5/a6
|
|
lea sp@(40),sp
|
|
less256_d:
|
|
movl d1,d0
|
|
lsrl #2,d0
|
|
jeq less4_d
|
|
movl d0,d2
|
|
negl d2
|
|
andil #3,d2
|
|
subql #1,d0
|
|
lsrl #2,d0
|
|
addl d2,d2
|
|
jmp pc@(2,d2:l)
|
|
#else
|
|
movml sp@+,d1/d3-d7/a2/a3/a5/a6
|
|
less256_d:
|
|
movw d1,d0
|
|
lsrw #2,d0
|
|
jeq less4_d
|
|
movw d0,d2
|
|
negw d2
|
|
andiw #3,d2
|
|
subqw #1,d0
|
|
lsrw #2,d0
|
|
addw d2,d2
|
|
jmp pc@(2,d2:w)
|
|
#endif
|
|
copy16_d:
|
|
movl a0@-,a1@-
|
|
movl a0@-,a1@-
|
|
movl a0@-,a1@-
|
|
movl a0@-,a1@-
|
|
#ifdef __mcoldfire__
|
|
subql #1,d0
|
|
bpl copy16_d
|
|
#else
|
|
dbra d0,copy16_d
|
|
#endif
|
|
less4_d:
|
|
btst #1,d1
|
|
jeq less2_d
|
|
movw a0@-,a1@-
|
|
less2_d:
|
|
btst #0,d1
|
|
jeq exit_d2
|
|
movb a0@-,a1@-
|
|
jra exit_d2
|
|
slow_copy_d:
|
|
#ifdef __mcoldfire__
|
|
movl d0,d1
|
|
negl d1
|
|
andil #7,d1
|
|
addql #7,d0
|
|
lsrl #3,d0
|
|
addl d1,d1
|
|
jmp pc@(2,d1:l)
|
|
#else
|
|
movw d0,d1
|
|
negw d1
|
|
andiw #7,d1
|
|
addql #7,d0
|
|
lsrl #3,d0
|
|
addw d1,d1
|
|
jmp pc@(2,d1:w)
|
|
#endif
|
|
scopy_d:
|
|
movb a0@-,a1@-
|
|
movb a0@-,a1@-
|
|
movb a0@-,a1@-
|
|
movb a0@-,a1@-
|
|
movb a0@-,a1@-
|
|
movb a0@-,a1@-
|
|
movb a0@-,a1@-
|
|
movb a0@-,a1@-
|
|
subql #1,d0
|
|
jne scopy_d
|
|
jra exit_d2
|
|
|