arch/alpha/math-emu/math.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <asm/ptrace.h>

#include <linux/uaccess.h>

#include "sfp-util.h"
#include <math-emu/soft-fp.h>
#include <math-emu/single.h>
#include <math-emu/double.h>

#define	OPC_PAL		0x00
#define OPC_INTA	0x10
#define OPC_INTL	0x11
#define OPC_INTS	0x12
#define OPC_INTM	0x13
#define OPC_FLTC	0x14
#define OPC_FLTV	0x15
#define OPC_FLTI	0x16
#define OPC_FLTL	0x17
#define OPC_MISC	0x18
#define	OPC_JSR		0x1a

#define FOP_SRC_S	0
#define FOP_SRC_T	2
#define FOP_SRC_Q	3

#define FOP_FNC_ADDx	0
#define FOP_FNC_CVTQL	0
#define FOP_FNC_SUBx	1
#define FOP_FNC_MULx	2
#define FOP_FNC_DIVx	3
#define FOP_FNC_CMPxUN	4
#define FOP_FNC_CMPxEQ	5
#define FOP_FNC_CMPxLT	6
#define FOP_FNC_CMPxLE	7
#define FOP_FNC_SQRTx	11
#define FOP_FNC_CVTxS	12
#define FOP_FNC_CVTxT	14
#define FOP_FNC_CVTxQ	15

#define MISC_TRAPB	0x0000
#define MISC_EXCB	0x0400

extern unsigned long alpha_read_fp_reg (unsigned long reg);
extern void alpha_write_fp_reg (unsigned long reg, unsigned long val);
extern unsigned long alpha_read_fp_reg_s (unsigned long reg);
extern void alpha_write_fp_reg_s (unsigned long reg, unsigned long val);


#ifdef MODULE

MODULE_DESCRIPTION("FP Software completion module");
MODULE_LICENSE("GPL v2");

extern long (*alpha_fp_emul_imprecise)(struct pt_regs *, unsigned long);
extern long (*alpha_fp_emul) (unsigned long pc);

static long (*save_emul_imprecise)(struct pt_regs *, unsigned long);
static long (*save_emul) (unsigned long pc);

long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long);
long do_alpha_fp_emul(unsigned long);

int init_module(void)
{
	save_emul_imprecise = alpha_fp_emul_imprecise;
	save_emul = alpha_fp_emul;
	alpha_fp_emul_imprecise = do_alpha_fp_emul_imprecise;
	alpha_fp_emul = do_alpha_fp_emul;
	return 0;
}

void cleanup_module(void)
{
	alpha_fp_emul_imprecise = save_emul_imprecise;
	alpha_fp_emul = save_emul;
}

#undef  alpha_fp_emul_imprecise
#define alpha_fp_emul_imprecise		do_alpha_fp_emul_imprecise
#undef  alpha_fp_emul
#define alpha_fp_emul			do_alpha_fp_emul

#endif /* MODULE */


/*
 * Emulate the floating point instruction at address PC.  Returns -1 if the
 * instruction to be emulated is illegal (such as with the opDEC trap), else
 * the SI_CODE for a SIGFPE signal, else 0 if everything's ok.
 *
 * Notice that the kernel does not and cannot use FP regs.  This is good
 * because it means that instead of saving/restoring all fp regs, we simply
 * stick the result of the operation into the appropriate register.
 */
long
alpha_fp_emul (unsigned long pc)
{
	FP_DECL_EX;
	FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
	FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);

	unsigned long fa, fb, fc, func, mode, src;
	unsigned long res, va, vb, vc, swcr, fpcr;
	__u32 insn;
	long si_code;

	get_user(insn, (__u32 __user *)pc);
	fc     = (insn >>  0) & 0x1f;	/* destination register */
	fb     = (insn >> 16) & 0x1f;
	fa     = (insn >> 21) & 0x1f;
	func   = (insn >>  5) & 0xf;
	src    = (insn >>  9) & 0x3;
	mode   = (insn >> 11) & 0x3;
	
	fpcr = rdfpcr();
	swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);

	if (mode == 3) {
		/* Dynamic -- get rounding mode from fpcr.  */
		mode = (fpcr >> FPCR_DYN_SHIFT) & 3;
	}

	switch (src) {
	case FOP_SRC_S:
		va = alpha_read_fp_reg_s(fa);
		vb = alpha_read_fp_reg_s(fb);
		
		FP_UNPACK_SP(SA, &va);
		FP_UNPACK_SP(SB, &vb);

		switch (func) {
		case FOP_FNC_SUBx:
			FP_SUB_S(SR, SA, SB);
			goto pack_s;

		case FOP_FNC_ADDx:
			FP_ADD_S(SR, SA, SB);
			goto pack_s;

		case FOP_FNC_MULx:
			FP_MUL_S(SR, SA, SB);
			goto pack_s;

		case FOP_FNC_DIVx:
			FP_DIV_S(SR, SA, SB);
			goto pack_s;

		case FOP_FNC_SQRTx:
			FP_SQRT_S(SR, SB);
			goto pack_s;
		}
		goto bad_insn;

	case FOP_SRC_T:
		va = alpha_read_fp_reg(fa);
		vb = alpha_read_fp_reg(fb);

		if ((func & ~3) == FOP_FNC_CMPxUN) {
			FP_UNPACK_RAW_DP(DA, &va);
			FP_UNPACK_RAW_DP(DB, &vb);
			if (!DA_e && !_FP_FRAC_ZEROP_1(DA)) {
				FP_SET_EXCEPTION(FP_EX_DENORM);
				if (FP_DENORM_ZERO)
					_FP_FRAC_SET_1(DA, _FP_ZEROFRAC_1);
			}
			if (!DB_e && !_FP_FRAC_ZEROP_1(DB)) {
				FP_SET_EXCEPTION(FP_EX_DENORM);
				if (FP_DENORM_ZERO)
					_FP_FRAC_SET_1(DB, _FP_ZEROFRAC_1);
			}
			FP_CMP_D(res, DA, DB, 3);
			vc = 0x4000000000000000UL;
			/* CMPTEQ, CMPTUN don't trap on QNaN,
			   while CMPTLT and CMPTLE do */
			if (res == 3
			    && ((func & 3) >= 2
				|| FP_ISSIGNAN_D(DA)
				|| FP_ISSIGNAN_D(DB))) {
				FP_SET_EXCEPTION(FP_EX_INVALID);
			}
			switch (func) {
			case FOP_FNC_CMPxUN: if (res != 3) vc = 0; break;
			case FOP_FNC_CMPxEQ: if (res) vc = 0; break;
			case FOP_FNC_CMPxLT: if (res != -1) vc = 0; break;
			case FOP_FNC_CMPxLE: if ((long)res > 0) vc = 0; break;
			}
			goto done_d;
		}

		FP_UNPACK_DP(DA, &va);
		FP_UNPACK_DP(DB, &vb);

		switch (func) {
		case FOP_FNC_SUBx:
			FP_SUB_D(DR, DA, DB);
			goto pack_d;

		case FOP_FNC_ADDx:
			FP_ADD_D(DR, DA, DB);
			goto pack_d;

		case FOP_FNC_MULx:
			FP_MUL_D(DR, DA, DB);
			goto pack_d;

		case FOP_FNC_DIVx:
			FP_DIV_D(DR, DA, DB);
			goto pack_d;

		case FOP_FNC_SQRTx:
			FP_SQRT_D(DR, DB);
			goto pack_d;

		case FOP_FNC_CVTxS:
			/* It is irritating that DEC encoded CVTST with
			   SRC == T_floating.  It is also interesting that
			   the bit used to tell the two apart is /U... */
			if (insn & 0x2000) {
				FP_CONV(S,D,1,1,SR,DB);
				goto pack_s;
			} else {
				vb = alpha_read_fp_reg_s(fb);
				FP_UNPACK_SP(SB, &vb);
				DR_c = DB_c;
				DR_s = DB_s;
				DR_e = DB_e + (1024 - 128);
				DR_f = SB_f << (52 - 23);
				goto pack_d;
			}

		case FOP_FNC_CVTxQ:
			if (DB_c == FP_CLS_NAN
			    && (_FP_FRAC_HIGH_RAW_D(DB) & _FP_QNANBIT_D)) {
			  /* AAHB Table B-2 says QNaN should not trigger INV */
				vc = 0;
			} else
				FP_TO_INT_ROUND_D(vc, DB, 64, 2);
			goto done_d;
		}
		goto bad_insn;

	case FOP_SRC_Q:
		vb = alpha_read_fp_reg(fb);

		switch (func) {
		case FOP_FNC_CVTQL:
			/* Notice: We can get here only due to an integer
			   overflow.  Such overflows are reported as invalid
			   ops.  We return the result the hw would have
			   computed.  */
			vc = ((vb & 0xc0000000) << 32 |	/* sign and msb */
			      (vb & 0x3fffffff) << 29);	/* rest of the int */
			FP_SET_EXCEPTION (FP_EX_INVALID);
			goto done_d;

		case FOP_FNC_CVTxS:
			FP_FROM_INT_S(SR, ((long)vb), 64, long);
			goto pack_s;

		case FOP_FNC_CVTxT:
			FP_FROM_INT_D(DR, ((long)vb), 64, long);
			goto pack_d;
		}
		goto bad_insn;
	}
	goto bad_insn;

pack_s:
	FP_PACK_SP(&vc, SR);
	if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
		vc = 0;
	alpha_write_fp_reg_s(fc, vc);
	goto done;

pack_d:
	FP_PACK_DP(&vc, DR);
	if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
		vc = 0;
done_d:
	alpha_write_fp_reg(fc, vc);
	goto done;

	/*
	 * Take the appropriate action for each possible
	 * floating-point result:
	 *
	 *	- Set the appropriate bits in the FPCR
	 *	- If the specified exception is enabled in the FPCR,
	 *	  return.  The caller (entArith) will dispatch
	 *	  the appropriate signal to the translated program.
	 *
	 * In addition, properly track the exception state in software
	 * as described in the Alpha Architecture Handbook section 4.7.7.3.
	 */
done:
	if (_fex) {
		/* Record exceptions in software control word.  */
		swcr |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
		current_thread_info()->ieee_state
		  |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);

		/* Update hardware control register.  */
		fpcr &= (~FPCR_MASK | FPCR_DYN_MASK);
		fpcr |= ieee_swcr_to_fpcr(swcr);
		wrfpcr(fpcr);

		/* Do we generate a signal?  */
		_fex = _fex & swcr & IEEE_TRAP_ENABLE_MASK;
		si_code = 0;
		if (_fex) {
			if (_fex & IEEE_TRAP_ENABLE_DNO) si_code = FPE_FLTUND;
			if (_fex & IEEE_TRAP_ENABLE_INE) si_code = FPE_FLTRES;
			if (_fex & IEEE_TRAP_ENABLE_UNF) si_code = FPE_FLTUND;
			if (_fex & IEEE_TRAP_ENABLE_OVF) si_code = FPE_FLTOVF;
			if (_fex & IEEE_TRAP_ENABLE_DZE) si_code = FPE_FLTDIV;
			if (_fex & IEEE_TRAP_ENABLE_INV) si_code = FPE_FLTINV;
		}

		return si_code;
	}

	/* We used to write the destination register here, but DEC FORTRAN
	   requires that the result *always* be written... so we do the write
	   immediately after the operations above.  */

	return 0;

bad_insn:
	printk(KERN_ERR "alpha_fp_emul: Invalid FP insn %#x at %#lx\n",
	       insn, pc);
	return -1;
}

long
alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask)
{
	unsigned long trigger_pc = regs->pc - 4;
	unsigned long insn, opcode, rc, si_code = 0;

	/*
	 * Turn off the bits corresponding to registers that are the
	 * target of instructions that set bits in the exception
	 * summary register.  We have some slack doing this because a
	 * register that is the target of a trapping instruction can
	 * be written at most once in the trap shadow.
	 *
	 * Branches, jumps, TRAPBs, EXCBs and calls to PALcode all
	 * bound the trap shadow, so we need not look any further than
	 * up to the first occurrence of such an instruction.
	 */
	while (write_mask) {
		get_user(insn, (__u32 __user *)(trigger_pc));
		opcode = insn >> 26;
		rc = insn & 0x1f;

		switch (opcode) {
		      case OPC_PAL:
		      case OPC_JSR:
		      case 0x30 ... 0x3f:	/* branches */
			goto egress;

		      case OPC_MISC:
			switch (insn & 0xffff) {
			      case MISC_TRAPB:
			      case MISC_EXCB:
				goto egress;

			      default:
				break;
			}
			break;

		      case OPC_INTA:
		      case OPC_INTL:
		      case OPC_INTS:
		      case OPC_INTM:
			write_mask &= ~(1UL << rc);
			break;

		      case OPC_FLTC:
		      case OPC_FLTV:
		      case OPC_FLTI:
		      case OPC_FLTL:
			write_mask &= ~(1UL << (rc + 32));
			break;
		}
		if (!write_mask) {
			/* Re-execute insns in the trap-shadow.  */
			regs->pc = trigger_pc + 4;
			si_code = alpha_fp_emul(trigger_pc);
			goto egress;
		}
		trigger_pc -= 4;
	}

egress:
	return si_code;
}