From 2c3c1048746a4622d8c89a29670120dc8fab93c4 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sun, 7 Apr 2024 20:49:45 +0200
Subject: Adding upstream version 6.1.76.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 arch/m68k/fpsp040/x_unfl.S | 268 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 268 insertions(+)
 create mode 100644 arch/m68k/fpsp040/x_unfl.S

(limited to 'arch/m68k/fpsp040/x_unfl.S')

diff --git a/arch/m68k/fpsp040/x_unfl.S b/arch/m68k/fpsp040/x_unfl.S
new file mode 100644
index 000000000..eb772ff3b
--- /dev/null
+++ b/arch/m68k/fpsp040/x_unfl.S
@@ -0,0 +1,268 @@
+|
+|	x_unfl.sa 3.4 7/1/91
+|
+|	fpsp_unfl --- FPSP handler for underflow exception
+|
+| Trap disabled results
+|	For 881/2 compatibility, sw must denormalize the intermediate
+| result, then store the result.  Denormalization is accomplished
+| by taking the intermediate result (which is always normalized) and
+| shifting the mantissa right while incrementing the exponent until
+| it is equal to the denormalized exponent for the destination
+| format.  After denormalization, the result is rounded to the
+| destination format.
+|
+| Trap enabled results
+|	All trap disabled code applies.	In addition the exceptional
+| operand needs to made available to the user with a bias of $6000
+| added to the exponent.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|       For details on the license for this file, please see the
+|       file, README, in this same directory.
+
+X_UNFL:	|idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+	|xref	denorm
+	|xref	round
+	|xref	store
+	|xref	g_rndpr
+	|xref	g_opcls
+	|xref	g_dfmtou
+	|xref	real_unfl
+	|xref	real_inex
+	|xref	fpsp_done
+	|xref	b1238_fix
+
+	.global	fpsp_unfl
+fpsp_unfl:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%a7)
+	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
+	fmovemx	%fp0-%fp3,USER_FP0(%a6)
+	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
+
+|
+	bsrl		unf_res	|denormalize, round & store interm op
+|
+| If underflow exceptions are not enabled, check for inexact
+| exception
+|
+	btstb		#unfl_bit,FPCR_ENABLE(%a6)
+	beqs		ck_inex
+
+	btstb		#E3,E_BYTE(%a6)
+	beqs		no_e3_1
+|
+| Clear dirty bit on dest resister in the frame before branching
+| to b1238_fix.
+|
+	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
+	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
+	bsrl		b1238_fix		|test for bug1238 case
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+no_e3_1:
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_unfl
+|
+| It is possible to have either inex2 or inex1 exceptions with the
+| unfl.  If the inex enable bit is set in the FPCR, and either
+| inex2 or inex1 occurred, we must clean up and branch to the
+| real inex handler.
+|
+ck_inex:
+	moveb		FPCR_ENABLE(%a6),%d0
+	andb		FPSR_EXCEPT(%a6),%d0
+	andib		#0x3,%d0
+	beqs		unfl_done
+
+|
+| Inexact enabled and reported, and we must take an inexact exception
+|
+take_inex:
+	btstb		#E3,E_BYTE(%a6)
+	beqs		no_e3_2
+|
+| Clear dirty bit on dest resister in the frame before branching
+| to b1238_fix.
+|
+	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
+	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
+	bsrl		b1238_fix		|test for bug1238 case
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+no_e3_2:
+	moveb		#INEX_VEC,EXC_VEC+1(%a6)
+	moveml         USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx        USER_FP0(%a6),%fp0-%fp3
+	fmoveml        USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore        (%a7)+
+	unlk            %a6
+	bral		real_inex
+
+unfl_done:
+	bclrb		#E3,E_BYTE(%a6)
+	beqs		e1_set		|if set then branch
+|
+| Clear dirty bit on dest resister in the frame before branching
+| to b1238_fix.
+|
+	bfextu		CMDREG3B(%a6){#6:#3},%d0		|get dest reg no
+	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
+	bsrl		b1238_fix		|test for bug1238 case
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		fpsp_done
+e1_set:
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	unlk		%a6
+	bral		fpsp_done
+|
+|	unf_res --- underflow result calculation
+|
+unf_res:
+	bsrl		g_rndpr		|returns RND_PREC in d0 0=ext,
+|					;1=sgl, 2=dbl
+|					;we need the RND_PREC in the
+|					;upper word for round
+	movew		#0,-(%a7)
+	movew		%d0,-(%a7)	|copy RND_PREC to stack
+|
+|
+| If the exception bit set is E3, the exceptional operand from the
+| fpu is in WBTEMP; else it is in FPTEMP.
+|
+	btstb		#E3,E_BYTE(%a6)
+	beqs		unf_E1
+unf_E3:
+	lea		WBTEMP(%a6),%a0	|a0 now points to operand
+|
+| Test for fsgldiv and fsglmul.  If the inst was one of these, then
+| force the precision to extended for the denorm routine.  Use
+| the user's precision for the round routine.
+|
+	movew		CMDREG3B(%a6),%d1	|check for fsgldiv or fsglmul
+	andiw		#0x7f,%d1
+	cmpiw		#0x30,%d1		|check for sgldiv
+	beqs		unf_sgl
+	cmpiw		#0x33,%d1		|check for sglmul
+	bnes		unf_cont	|if not, use fpcr prec in round
+unf_sgl:
+	clrl		%d0
+	movew		#0x1,(%a7)	|override g_rndpr precision
+|					;force single
+	bras		unf_cont
+unf_E1:
+	lea		FPTEMP(%a6),%a0	|a0 now points to operand
+unf_cont:
+	bclrb		#sign_bit,LOCAL_EX(%a0)	|clear sign bit
+	sne		LOCAL_SGN(%a0)		|store sign
+
+	bsrl		denorm		|returns denorm, a0 points to it
+|
+| WARNING:
+|				;d0 has guard,round sticky bit
+|				;make sure that it is not corrupted
+|				;before it reaches the round subroutine
+|				;also ensure that a0 isn't corrupted
+
+|
+| Set up d1 for round subroutine d1 contains the PREC/MODE
+| information respectively on upper/lower register halves.
+|
+	bfextu		FPCR_MODE(%a6){#2:#2},%d1	|get mode from FPCR
+|						;mode in lower d1
+	addl		(%a7)+,%d1		|merge PREC/MODE
+|
+| WARNING: a0 and d0 are assumed to be intact between the denorm and
+| round subroutines. All code between these two subroutines
+| must not corrupt a0 and d0.
+|
+|
+| Perform Round
+|	Input:		a0 points to input operand
+|			d0{31:29} has guard, round, sticky
+|			d1{01:00} has rounding mode
+|			d1{17:16} has rounding precision
+|	Output:		a0 points to rounded operand
+|
+
+	bsrl		round		|returns rounded denorm at (a0)
+|
+| Differentiate between store to memory vs. store to register
+|
+unf_store:
+	bsrl		g_opcls		|returns opclass in d0{2:0}
+	cmpib		#0x3,%d0
+	bnes		not_opc011
+|
+| At this point, a store to memory is pending
+|
+opc011:
+	bsrl		g_dfmtou
+	tstb		%d0
+	beqs		ext_opc011	|If extended, do not subtract
+|				;If destination format is sgl/dbl,
+	tstb		LOCAL_HI(%a0)	|If rounded result is normal,don't
+|					;subtract
+	bmis		ext_opc011
+	subqw		#1,LOCAL_EX(%a0)	|account for denorm bias vs.
+|				;normalized bias
+|				;          normalized   denormalized
+|				;single       $7f           $7e
+|				;double       $3ff          $3fe
+|
+ext_opc011:
+	bsrl		store		|stores to memory
+	bras		unf_done	|finish up
+
+|
+| At this point, a store to a float register is pending
+|
+not_opc011:
+	bsrl		store	|stores to float register
+|				;a0 is not corrupted on a store to a
+|				;float register.
+|
+| Set the condition codes according to result
+|
+	tstl		LOCAL_HI(%a0)	|check upper mantissa
+	bnes		ck_sgn
+	tstl		LOCAL_LO(%a0)	|check lower mantissa
+	bnes		ck_sgn
+	bsetb		#z_bit,FPSR_CC(%a6) |set condition codes if zero
+ck_sgn:
+	btstb		#sign_bit,LOCAL_EX(%a0)	|check the sign bit
+	beqs		unf_done
+	bsetb		#neg_bit,FPSR_CC(%a6)
+
+|
+| Finish.
+|
+unf_done:
+	btstb		#inex2_bit,FPSR_EXCEPT(%a6)
+	beqs		no_aunfl
+	bsetb		#aunfl_bit,FPSR_AEXCEPT(%a6)
+no_aunfl:
+	rts
+
+	|end
-- 
cgit v1.2.3