413 lines
10 KiB
ArmAsm
413 lines
10 KiB
ArmAsm
/* Copyright (C) 2005, 2007 Free Software Foundation, Inc.
|
|
Contributed by Sunnorth
|
|
|
|
This file is part of GCC.
|
|
|
|
GCC is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published
|
|
by the Free Software Foundation; either version 3, or (at your
|
|
option) any later version.
|
|
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
|
|
License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GCC; see the file COPYING3. If not see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#define ra r3
|
|
#define a0 r4
|
|
#define a1 r5
|
|
#define a2 r6
|
|
#define a3 r7
|
|
#define v0 r23
|
|
|
|
#define t0 r8
|
|
#define t1 r9
|
|
#define t2 r10
|
|
#define t3 r11
|
|
#define t4 r22
|
|
|
|
#ifndef __pic__
|
|
#if !defined(L_mulsi3) && !defined(L_divsi3)
|
|
.text
|
|
.global _flush_cache
|
|
#ifdef __score3__
|
|
_flush_cache:
|
|
br r3
|
|
#else
|
|
_flush_cache:
|
|
srli r9, r5, 4
|
|
mv r8, r4
|
|
mtsr r9, sr0
|
|
1:
|
|
cache 0xe, [r8, 0] # write back invalid dcache
|
|
addi r8, 16
|
|
bcnz 1b
|
|
mfcr r8, cr4
|
|
bittst! r8, 0x3 # if LDM is enable, write back LDM
|
|
beq! 6f
|
|
ldi r10, 0
|
|
cache 0xc, [r10, 0]
|
|
6:
|
|
bittst! r8, 0x2 # if LIM is enable, refill it
|
|
beq! 7f
|
|
cache 0x4, [r10, 0]
|
|
7:
|
|
#nop!
|
|
#nop!
|
|
#nop!
|
|
#nop!
|
|
#nop!
|
|
mv r8, r4
|
|
mtsr r9, sr0
|
|
2:
|
|
cache 0x2, [r8, 0] # invalid unlock icache
|
|
#nop!
|
|
#nop!
|
|
#nop!
|
|
#nop!
|
|
#nop!
|
|
addi r8, 16
|
|
bcnz 2b
|
|
br r3
|
|
#endif
|
|
#endif
|
|
|
|
/* FUNCTION
|
|
(U) INT32 v0 = __mulsi3 ((U) INT32 a0, (U) INT32 a1);
|
|
REGISTERS:
|
|
use t0
|
|
modify a0
|
|
a1 -> become 0
|
|
NOTE:
|
|
this seems to give better performance to just rotate and add. */
|
|
|
|
#ifdef L_mulsi3
|
|
.text
|
|
.global __umulsi3
|
|
.global __mulsi3
|
|
/* signed multiplication (32x32) */
|
|
.ent __mulsi3
|
|
__umulsi3:
|
|
__mulsi3:
|
|
li t1, 0
|
|
__mulsi3_loop:
|
|
andri.c t0, a1, 1 # t0 = multiplier[0]
|
|
srli a1, a1, 1 # a1 /= 2
|
|
beq __mulsi3_loop2 # skip if (t0 == 0)
|
|
add t1, t1, a0 # add multiplicand
|
|
__mulsi3_loop2:
|
|
slli a0, a0, 1 # multiplicand mul 2
|
|
cmpi.c a1, 0
|
|
bne __mulsi3_loop
|
|
mv r4, t1
|
|
br ra
|
|
.end __mulsi3
|
|
#endif /* L_mulsi3 */
|
|
|
|
/* FUNCTION
|
|
UINT32 (v0) = __udivsi3 (UINT32 (a0), UINT32 (a1));
|
|
INT32 (v0) = __divsi3 (INT32 (a0), INT32 (a1));
|
|
UINT32 (v0) = __umodsi3 (UINT32 (a0), UINT32 (a1));
|
|
INT32 (v0) = __modsi3 (INT32 (a0), INT32 (a1));
|
|
DESCRIPTION
|
|
performs 32-bit division/modulo.
|
|
REGISTERS
|
|
used t0 bit-index
|
|
t1
|
|
modify a0 becomes remainer */
|
|
#ifdef L_divsi3
|
|
.text
|
|
.global __udivsi3
|
|
.global __umodsi3
|
|
.global __divsi3
|
|
.global __modsi3
|
|
|
|
/* unsigned division */
|
|
.ent __udivsi3
|
|
__udivsi3:
|
|
li t4, 0
|
|
cmpi.c a1, 0
|
|
beq __uds_exit
|
|
li t0, 1
|
|
blt __uds_ok
|
|
__uds_normalize:
|
|
cmp.c a0, a1
|
|
bcc __uds_ok
|
|
slli a1, a1, 1
|
|
slli t0, t0, 1
|
|
cmpi.c a1, 0
|
|
bge __uds_normalize
|
|
__uds_ok:
|
|
__uds_loop2:
|
|
cmp.c a0, a1
|
|
bcc __uds_loop3
|
|
sub a0, a0, a1
|
|
or t4, t4, t0
|
|
__uds_loop3:
|
|
srli t0, t0, 1
|
|
srli a1, a1, 1
|
|
cmpi.c t0, 0
|
|
bne __uds_loop2
|
|
__uds_exit:
|
|
mv a1, a0
|
|
mv r4, t4
|
|
br ra
|
|
.end __udivsi3
|
|
|
|
/* unsigned modulus */
|
|
.ent __umodsi3
|
|
__umodsi3:
|
|
mv t3, ra
|
|
jl __udivsi3
|
|
mv r4, a1
|
|
br t3
|
|
.end __umodsi3
|
|
|
|
/* abs and div */
|
|
.ent __orgsi3
|
|
__orgsi3:
|
|
cmpi.c a0, 0
|
|
bge __orgsi3_a0p
|
|
neg a0, a0
|
|
__orgsi3_a0p:
|
|
cmpi.c a1, 0
|
|
bge __udivsi3
|
|
neg a1, a1
|
|
b __udivsi3 # goto udivsi3
|
|
.end __orgsi3
|
|
|
|
/* signed division */
|
|
.ent __divsi3
|
|
__divsi3:
|
|
mv t3, ra
|
|
xor t2, a0, a1
|
|
jl __orgsi3
|
|
__divsi3_adjust:
|
|
cmpi.c t2, 0
|
|
bge __divsi3_exit
|
|
neg r4, r4
|
|
__divsi3_exit:
|
|
br t3
|
|
.end __divsi3
|
|
|
|
/* signed modulus */
|
|
.ent __modsi3
|
|
__modsi3:
|
|
mv t3, ra
|
|
mv t2, a0
|
|
jl __orgsi3
|
|
mv r4, a1
|
|
b __divsi3_adjust
|
|
.end __modsi3
|
|
|
|
#endif /* L_divsi3 */
|
|
#else /* -fPIC */
|
|
#if !defined(L_mulsi3) && !defined(L_divsi3)
|
|
.set pic
|
|
.text
|
|
.global _flush_cache
|
|
#ifdef __score3__
|
|
_flush_cache:
|
|
br r3
|
|
#else
|
|
_flush_cache:
|
|
addi r0, -8 # pic used
|
|
.cpload r29 # pic used
|
|
srli r9, r5, 4
|
|
mv r8, r4
|
|
mtsr r9, sr0
|
|
1:
|
|
cache 0xe, [r8, 0] # write back invalid dcache
|
|
addi r8, 16
|
|
bcnz 1b
|
|
mfcr r8, cr4
|
|
bittst! r8, 0x3 # if LDM is enable, write back LDM
|
|
beq! 6f
|
|
ldi r10, 0
|
|
cache 0xc, [r10, 0]
|
|
6:
|
|
bittst! r8, 0x2 # if LIM is enable, refill it
|
|
beq! 7f
|
|
cache 0x4, [r10, 0]
|
|
7:
|
|
#nop!
|
|
#nop!
|
|
#nop!
|
|
#nop!
|
|
#nop!
|
|
mv r8, r4
|
|
mtsr r9, sr0
|
|
2:
|
|
cache 0x2, [r8, 0] # invalid unlock icache
|
|
#nop!
|
|
#nop!
|
|
#nop!
|
|
#nop!
|
|
#nop!
|
|
addi r8, 16
|
|
bcnz 2b
|
|
.cprestore r0, 12 # pic used
|
|
addi r0, 8 # pic used
|
|
br r3
|
|
#endif
|
|
#endif
|
|
|
|
/* FUNCTION
|
|
(U) INT32 v0 = __mulsi3 ((U) INT32 a0, (U) INT32 a1);
|
|
REGISTERS:
|
|
use t0
|
|
modify a0
|
|
a1 -> become 0
|
|
NOTE:
|
|
this seems to give better performance to just rotate and add. */
|
|
|
|
#ifdef L_mulsi3
|
|
.set pic
|
|
.text
|
|
.global __umulsi3
|
|
.global __mulsi3
|
|
/* signed multiplication (32x32) */
|
|
.ent __mulsi3
|
|
__umulsi3:
|
|
__mulsi3:
|
|
addi r0, -8 # pic used
|
|
.cpload r29 # pic used
|
|
li t1, 0
|
|
__mulsi3_loop:
|
|
andri.c t0, a1, 1 # t0 = multiplier[0]
|
|
srli a1, a1, 1 # a1 /= 2
|
|
beq __mulsi3_loop2 # skip if (t0 == 0)
|
|
add t1, t1, a0 # add multiplicand
|
|
__mulsi3_loop2:
|
|
slli a0, a0, 1 # multiplicand mul 2
|
|
cmpi.c a1, 0
|
|
bne __mulsi3_loop
|
|
mv r4, t1
|
|
.cprestore r0, 12 # pic used
|
|
addi r0, 8 # pic used
|
|
br ra
|
|
.end __mulsi3
|
|
#endif /* L_mulsi3 */
|
|
|
|
/* FUNCTION
|
|
UINT32 (v0) = __udivsi3 (UINT32 (a0), UINT32 (a1));
|
|
INT32 (v0) = __divsi3 (INT32 (a0), INT32 (a1));
|
|
UINT32 (v0) = __umodsi3 (UINT32 (a0), UINT32 (a1));
|
|
INT32 (v0) = __modsi3 (INT32 (a0), INT32 (a1));
|
|
DESCRIPTION
|
|
performs 32-bit division/modulo.
|
|
REGISTERS
|
|
used t0 bit-index
|
|
t1
|
|
modify a0 becomes remainer */
|
|
#ifdef L_divsi3
|
|
.set pic
|
|
.text
|
|
.global __udivsi3
|
|
.global __umodsi3
|
|
.global __divsi3
|
|
.global __modsi3
|
|
|
|
/* unsigned division */
|
|
.ent __udivsi3
|
|
__udivsi3:
|
|
addi r0, -8 # pic used
|
|
.cpload r29 # pic used
|
|
li t4, 0
|
|
cmpi.c a1, 0
|
|
beq __uds_exit
|
|
li t0, 1
|
|
blt __uds_ok
|
|
__uds_normalize:
|
|
cmp.c a0, a1
|
|
bcc __uds_ok
|
|
slli a1, a1, 1
|
|
slli t0, t0, 1
|
|
cmpi.c a1, 0
|
|
bge __uds_normalize
|
|
__uds_ok:
|
|
__uds_loop2:
|
|
cmp.c a0, a1
|
|
bcc __uds_loop3
|
|
sub a0, a0, a1
|
|
or t4, t4, t0
|
|
__uds_loop3:
|
|
srli t0, t0, 1
|
|
srli a1, a1, 1
|
|
cmpi.c t0, 0
|
|
bne __uds_loop2
|
|
__uds_exit:
|
|
mv a1, a0
|
|
mv r4, t4
|
|
.cprestore r0, 12 # pic used
|
|
addi r0, 8 # pic used
|
|
br ra
|
|
.end __udivsi3
|
|
|
|
/* unsigned modulus */
|
|
.ent __umodsi3
|
|
__umodsi3:
|
|
addi r0, -8 # pic used
|
|
.cpload r29 # pic used
|
|
li t1, 0
|
|
mv t3, ra
|
|
la r29, __udivsi3
|
|
brl r29
|
|
mv r4, a1
|
|
.cprestore r0, 12 # pic used
|
|
addi r0, 8 # pic used
|
|
br t3
|
|
.end __umodsi3
|
|
|
|
/* abs and div */
|
|
.ent __orgsi3
|
|
__orgsi3:
|
|
cmpi.c a0, 0
|
|
bge __orgsi3_a0p
|
|
neg a0, a0
|
|
__orgsi3_a0p:
|
|
cmpi.c a1, 0
|
|
bge __udivsi3
|
|
neg a1, a1
|
|
b __udivsi3 # goto udivsi3
|
|
.end __orgsi3
|
|
|
|
/* signed division */
|
|
.ent __divsi3
|
|
__divsi3:
|
|
addi r0, -8 # pic used
|
|
.cpload r29 # pic used
|
|
mv t3, ra
|
|
xor t2, a0, a1
|
|
la r29, __orgsi3
|
|
brl r29
|
|
__divsi3_adjust:
|
|
cmpi.c t2, 0
|
|
bge __divsi3_exit
|
|
neg r4, r4
|
|
__divsi3_exit:
|
|
.cprestore r0, 12 # pic used
|
|
addi r0, 8 # pic used
|
|
br t3
|
|
.end __divsi3
|
|
|
|
/* signed modulus */
|
|
.ent __modsi3
|
|
__modsi3:
|
|
addi r0, -8 # pic used
|
|
.cpload r29 # pic used
|
|
mv t3, ra
|
|
mv t2, a0
|
|
la r29, __orgsi3
|
|
brl r29
|
|
mv r4, a1
|
|
b __divsi3_adjust
|
|
.end __modsi3
|
|
|
|
#endif /*L_divsi3 */
|
|
#endif
|