@@ -18,7+18,7 @@ export-objs := mca.o mtrr.o msr.o cpuid.o microcode.o i386_ksyms.o
obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \
- pci-dma.o i386_ksyms.o
+ pci-dma.o i386_ksyms.o i387.o
ifdef CONFIG_PCI
@@ -323,6+323,11 @@ ENTRY(coprocessor_error) pushl $ SYMBOL_NAME(do_coprocessor_error)
jmp error_code
+ENTRY(simd_coprocessor_error)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_simd_coprocessor_error)
+ jmp error_code
+
ENTRY(device_not_available)
pushl $-1 # mark this as an int
SAVE_ALL
@@ -414,11+419,6 @@ ENTRY(spurious_interrupt_bug) pushl $ SYMBOL_NAME(do_spurious_interrupt_bug)
jmp error_code
-ENTRY(xmm_fault)
- pushl $0
- pushl $ SYMBOL_NAME(do_xmm_fault)
- jmp error_code
-
.data
ENTRY(sys_call_table)
.long SYMBOL_NAME(sys_ni_syscall) /* 0 - old "setup()" system call*/
#include <asm/semaphore.h>
#include <asm/processor.h>
+#include <asm/i387.h>
#include <asm/uaccess.h>
#include <asm/checksum.h>
#include <asm/io.h>
#include <asm/desc.h>
extern void dump_thread(struct pt_regs *, struct user *);
-extern int dump_fpu(elf_fpregset_t *);
extern spinlock_t rtc_lock;
#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
@@ -51,6+51,7 @@ EXPORT_SYMBOL(MCA_bus); EXPORT_SYMBOL(__verify_write);
EXPORT_SYMBOL(dump_thread);
EXPORT_SYMBOL(dump_fpu);
+EXPORT_SYMBOL(dump_extended_fpu);
EXPORT_SYMBOL(__ioremap);
EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(__io_virt_debug);
--- /dev/null
+/*
+ * linux/arch/i386/kernel/i387.c
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/math_emu.h>
+#include <asm/sigcontext.h>
+#include <asm/user.h>
+#include <asm/ptrace.h>
+#include <asm/uaccess.h>
+
+#if defined(CONFIG_X86_FXSR)
+#define HAVE_FXSR 1
+#elif defined(CONFIG_X86_RUNTIME_FXSR)
+#define HAVE_FXSR (cpu_has_fxsr)
+#else
+#define HAVE_FXSR 0
+#endif
+
+#ifdef CONFIG_MATH_EMULATION
+#define HAVE_HWFP (boot_cpu_data.hard_math)
+#else
+#define HAVE_HWFP 1
+#endif
+
+/*
+ * FPU lazy state save handling.
+ */
+
+void save_fpu( struct task_struct *tsk )
+{
+ if ( HAVE_FXSR ) {
+ asm volatile( "fxsave %0 ; fwait"
+ : "=m" (tsk->thread.i387.fxsave) );
+ } else {
+ asm volatile( "fnsave %0 ; fwait"
+ : "=m" (tsk->thread.i387.fsave) );
+ }
+ tsk->flags &= ~PF_USEDFPU;
+ stts();
+}
+
+void save_init_fpu( struct task_struct *tsk )
+{
+ if ( HAVE_FXSR ) {
+ asm volatile( "fxsave %0 ; fnclex"
+ : "=m" (tsk->thread.i387.fxsave) );
+ } else {
+ asm volatile( "fnsave %0 ; fwait"
+ : "=m" (tsk->thread.i387.fsave) );
+ }
+ tsk->flags &= ~PF_USEDFPU;
+ stts();
+}
+
+void restore_fpu( struct task_struct *tsk )
+{
+ if ( HAVE_FXSR ) {
+ asm volatile( "fxrstor %0"
+ : : "m" (tsk->thread.i387.fxsave) );
+ } else {
+ asm volatile( "frstor %0"
+ : : "m" (tsk->thread.i387.fsave) );
+ }
+}
+
+/*
+ * FPU tag word conversions.
+ */
+
+static inline unsigned short twd_i387_to_fxsr( unsigned short twd )
+{
+ unsigned short ret = 0;
+ int i;
+
+ for ( i = 0 ; i < 8 ; i++ ) {
+ if ( (twd & 0x3) != 0x3 ) {
+ ret |= (1 << i);
+ }
+ twd = twd >> 2;
+ }
+ return ret;
+}
+
+static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave )
+{
+ struct _fpxreg *st = NULL;
+ unsigned long twd = (unsigned long) fxsave->twd;
+ unsigned long tag;
+ unsigned long ret = 0xffff0000;
+ int i;
+
+#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16);
+
+ for ( i = 0 ; i < 8 ; i++ ) {
+ if ( twd & 0x1 ) {
+ st = (struct _fpxreg *) FPREG_ADDR( fxsave, i );
+
+ switch ( st->exponent ) {
+ case 0xffff:
+ tag = 2; /* Special */
+ break;
+ case 0x0000:
+ if ( !st->significand[0] &&
+ !st->significand[1] &&
+ !st->significand[2] &&
+ !st->significand[3] ) {
+ tag = 1; /* Zero */
+ } else {
+ tag = 2; /* Special */
+ }
+ break;
+ default:
+ if ( st->significand[3] & 0x8000 ) {
+ tag = 0; /* Valid */
+ } else {
+ tag = 2; /* Special */
+ }
+ break;
+ }
+ } else {
+ tag = 3; /* Empty */
+ }
+ ret |= (tag << (2 * i));
+ twd = twd >> 1;
+ }
+ return ret;
+}
+
+/*
+ * FPU state interaction.
+ */
+
+unsigned short get_fpu_cwd( struct task_struct *tsk )
+{
+ if ( HAVE_FXSR ) {
+ return tsk->thread.i387.fxsave.cwd;
+ } else {
+ return (unsigned short)tsk->thread.i387.fsave.cwd;
+ }
+}
+
+unsigned short get_fpu_swd( struct task_struct *tsk )
+{
+ if ( HAVE_FXSR ) {
+ return tsk->thread.i387.fxsave.swd;
+ } else {
+ return (unsigned short)tsk->thread.i387.fsave.swd;
+ }
+}
+
+unsigned short get_fpu_twd( struct task_struct *tsk )
+{
+ if ( HAVE_FXSR ) {
+ return tsk->thread.i387.fxsave.twd;
+ } else {
+ return (unsigned short)tsk->thread.i387.fsave.twd;
+ }
+}
+
+unsigned short get_fpu_mxcsr( struct task_struct *tsk )
+{
+ if ( HAVE_FXSR ) {
+ return tsk->thread.i387.fxsave.mxcsr;
+ } else {
+ return 0x1f80;
+ }
+}
+
+void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd )
+{
+ if ( HAVE_FXSR ) {
+ tsk->thread.i387.fxsave.cwd = cwd;
+ } else {
+ tsk->thread.i387.fsave.cwd = ((long)cwd | 0xffff0000);
+ }
+}
+
+void set_fpu_swd( struct task_struct *tsk, unsigned short swd )
+{
+ if ( HAVE_FXSR ) {
+ tsk->thread.i387.fxsave.swd = swd;
+ } else {
+ tsk->thread.i387.fsave.swd = ((long)swd | 0xffff0000);
+ }
+}
+
+void set_fpu_twd( struct task_struct *tsk, unsigned short twd )
+{
+ if ( HAVE_FXSR ) {
+ tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd);
+ } else {
+ tsk->thread.i387.fsave.twd = ((long)twd | 0xffff0000);
+ }
+}
+
+void set_fpu_mxcsr( struct task_struct *tsk, unsigned short mxcsr )
+{
+ if ( HAVE_FXSR ) {
+ tsk->thread.i387.fxsave.mxcsr = mxcsr;
+ }
+}
+
+/*
+ * FXSR floating point environment conversions.
+ */
+
+static inline int convert_fxsr_to_user( struct _fpstate *buf,
+ struct i387_fxsave_struct *fxsave )
+{
+ unsigned long env[7];
+ struct _fpreg *to;
+ struct _fpxreg *from;
+ int i;
+
+ env[0] = (unsigned long)fxsave->cwd | 0xffff0000;
+ env[1] = (unsigned long)fxsave->swd | 0xffff0000;
+ env[2] = twd_fxsr_to_i387(fxsave);
+ env[3] = fxsave->fip;
+ env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16);
+ env[5] = fxsave->foo;
+ env[6] = fxsave->fos;
+
+ if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) )
+ return 1;
+
+ to = &buf->_st[0];
+ from = (struct _fpxreg *) &fxsave->st_space[0];
+ for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
+ if ( __copy_to_user( to, from, sizeof(*to) ) )
+ return 1;
+ }
+ return 0;
+}
+
+static inline int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave,
+ struct _fpstate *buf )
+{
+ unsigned long env[7];
+ struct _fpxreg *to;
+ struct _fpreg *from;
+ int i;
+
+ if ( __copy_from_user( env, buf, 7 * sizeof(long) ) )
+ return 1;
+
+ fxsave->cwd = (unsigned short)(env[0] & 0xffff);
+ fxsave->swd = (unsigned short)(env[1] & 0xffff);
+ fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff));
+ fxsave->fip = env[3];
+ fxsave->fop = (unsigned short)((env[4] & 0xffff0000) >> 16);
+ fxsave->fcs = (env[4] & 0xffff);
+ fxsave->foo = env[5];
+ fxsave->fos = env[6];
+
+ to = (struct _fpxreg *) &fxsave->st_space[0];
+ from = &buf->_st[0];
+ for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
+ if ( __copy_from_user( to, from, sizeof(*from) ) )
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Signal frame handlers.
+ */
+
+static inline int save_i387_fsave( struct _fpstate *buf )
+{
+ struct task_struct *tsk = current;
+
+ unlazy_fpu( tsk );
+ tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd;
+ if ( __copy_to_user( buf, &tsk->thread.i387.fsave,
+ sizeof(struct i387_fsave_struct) ) )
+ return -1;
+ return 1;
+}
+
+static inline int save_i387_fxsave( struct _fpstate *buf )
+{
+ struct task_struct *tsk = current;
+ int err = 0;
+
+ unlazy_fpu( tsk );
+
+ if ( convert_fxsr_to_user( buf, &tsk->thread.i387.fxsave ) )
+ return -1;
+
+ err |= __put_user( tsk->thread.i387.fxsave.swd, &buf->status );
+ err |= __put_user( X86_FXSR_MAGIC, &buf->magic );
+ if ( err )
+ return -1;
+
+ if ( __copy_to_user( &buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
+ sizeof(struct i387_fxsave_struct) ) )
+ return -1;
+ return 1;
+}
+
+int save_i387( struct _fpstate *buf )
+{
+ if ( !current->used_math )
+ return 0;
+
+ /* This will cause a "finit" to be triggered by the next
+ * attempted FPU operation by the 'current' process.
+ */
+ current->used_math = 0;
+
+ if ( HAVE_HWFP ) {
+ if ( HAVE_FXSR ) {
+ return save_i387_fxsave( buf );
+ } else {
+ return save_i387_fsave( buf );
+ }
+ } else {
+ return save_i387_soft( ¤t->thread.i387.soft, buf );
+ }
+}
+
+static inline int restore_i387_fsave( struct _fpstate *buf )
+{
+ struct task_struct *tsk = current;
+ clear_fpu( tsk );
+ return __copy_from_user( &tsk->thread.i387.fsave, buf,
+ sizeof(struct i387_fsave_struct) );
+}
+
+static inline int restore_i387_fxsave( struct _fpstate *buf )
+{
+ struct task_struct *tsk = current;
+ clear_fpu( tsk );
+ if ( __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0],
+ sizeof(struct i387_fxsave_struct) ) )
+ return 1;
+ return convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf );
+}
+
+int restore_i387( struct _fpstate *buf )
+{
+ int err;
+
+ if ( HAVE_HWFP ) {
+ if ( HAVE_FXSR ) {
+ err = restore_i387_fxsave( buf );
+ } else {
+ err = restore_i387_fsave( buf );
+ }
+ } else {
+ err = restore_i387_soft( ¤t->thread.i387.soft, buf );
+ }
+ current->used_math = 1;
+ return err;
+}
+
+/*
+ * ptrace request handlers.
+ */
+
+static inline int get_fpregs_fsave( struct user_i387_struct *buf,
+ struct task_struct *tsk )
+{
+ return __copy_to_user( buf, &tsk->thread.i387.fsave,
+ sizeof(struct user_i387_struct) );
+}
+
+static inline int get_fpregs_fxsave( struct user_i387_struct *buf,
+ struct task_struct *tsk )
+{
+ return convert_fxsr_to_user( (struct _fpstate *)buf,
+ &tsk->thread.i387.fxsave );
+}
+
+int get_fpregs( struct user_i387_struct *buf, struct task_struct *tsk )
+{
+ if ( HAVE_HWFP ) {
+ if ( HAVE_FXSR ) {
+ return get_fpregs_fxsave( buf, tsk );
+ } else {
+ return get_fpregs_fsave( buf, tsk );
+ }
+ } else {
+ return save_i387_soft( &tsk->thread.i387.soft,
+ (struct _fpstate *)buf );
+ }
+}
+
+static inline int set_fpregs_fsave( struct task_struct *tsk,
+ struct user_i387_struct *buf )
+{
+ return __copy_from_user( &tsk->thread.i387.fsave, buf,
+ sizeof(struct user_i387_struct) );
+}
+
+static inline int set_fpregs_fxsave( struct task_struct *tsk,
+ struct user_i387_struct *buf )
+{
+ return convert_fxsr_from_user( &tsk->thread.i387.fxsave,
+ (struct _fpstate *)buf );
+}
+
+int set_fpregs( struct task_struct *tsk, struct user_i387_struct *buf )
+{
+ if ( HAVE_HWFP ) {
+ if ( HAVE_FXSR ) {
+ return set_fpregs_fxsave( tsk, buf );
+ } else {
+ return set_fpregs_fsave( tsk, buf );
+ }
+ } else {
+ return restore_i387_soft( &tsk->thread.i387.soft,
+ (struct _fpstate *)buf );
+ }
+}
+
+int get_fpxregs( struct user_fxsr_struct *buf, struct task_struct *tsk )
+{
+ if ( HAVE_FXSR ) {
+ __copy_to_user( (void *)buf, &tsk->thread.i387.fxsave,
+ sizeof(struct user_fxsr_struct) );
+ return 0;
+ } else {
+ return -EIO;
+ }
+}
+
+int set_fpxregs( struct task_struct *tsk, struct user_fxsr_struct *buf )
+{
+ if ( HAVE_FXSR ) {
+ __copy_from_user( &tsk->thread.i387.fxsave, (void *)buf,
+ sizeof(struct user_fxsr_struct) );
+ return 0;
+ } else {
+ return -EIO;
+ }
+}
+
+/*
+ * FPU state for core dumps.
+ */
+
+static inline void copy_fpu_fsave( struct task_struct *tsk,
+ struct user_i387_struct *fpu )
+{
+ memcpy( fpu, &tsk->thread.i387.fsave,
+ sizeof(struct user_i387_struct) );
+}
+
+static inline void copy_fpu_fxsave( struct task_struct *tsk,
+ struct user_i387_struct *fpu )
+{
+ unsigned short *to;
+ unsigned short *from;
+ int i;
+
+ memcpy( fpu, &tsk->thread.i387.fxsave, 7 * sizeof(long) );
+
+ to = (unsigned short *)&fpu->st_space[0];
+ from = (unsigned short *)&tsk->thread.i387.fxsave.st_space[0];
+ for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) {
+ memcpy( to, from, 5 * sizeof(unsigned short) );
+ }
+}
+
+int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu )
+{
+ int fpvalid;
+ struct task_struct *tsk = current;
+
+ fpvalid = tsk->used_math;
+ if ( fpvalid ) {
+ unlazy_fpu( tsk );
+ if ( HAVE_FXSR ) {
+ copy_fpu_fxsave( tsk, fpu );
+ } else {
+ copy_fpu_fsave( tsk, fpu );
+ }
+ }
+
+ return fpvalid;
+}
+
+int dump_extended_fpu( struct pt_regs *regs, struct user_fxsr_struct *fpu )
+{
+ int fpvalid;
+ struct task_struct *tsk = current;
+
+ fpvalid = tsk->used_math && HAVE_FXSR;
+ if ( fpvalid ) {
+ unlazy_fpu( tsk );
+ memcpy( fpu, &tsk->thread.i387.fxsave,
+ sizeof(struct user_fxsr_struct) );
+ }
+
+ return fpvalid;
+}
* linux/arch/i386/kernel/process.c
*
* Copyright (C) 1995 Linus Torvalds
- * Pentium III code by Ingo Molnar with changes and support for
- * OS exception support by Goutham Rao
+ *
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@valinux.com>, May 2000
*/
/*
#include <asm/io.h>
#include <asm/ldt.h>
#include <asm/processor.h>
+#include <asm/i387.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
#ifdef CONFIG_MATH_EMULATION
@@ -471,94+473,6 @@ void copy_segments(struct task_struct *p, struct mm_struct *new_mm) return;
}
-#ifdef CONFIG_X86_FXSR
-
-int i387_hard_to_user ( struct _fpstate * user,
- struct i387_hard_struct * hard)
-{
- int i, err = 0;
- short *tmp, *tmp2;
- long *ltmp1, *ltmp2;
-
- err |= put_user(hard->cwd, &user->cw);
- err |= put_user(hard->swd, &user->sw);
- err |= put_user(fputag_KNIto387(hard->twd), &user->tag);
- err |= put_user(hard->fip, &user->ipoff);
- err |= put_user(hard->fcs, &user->cssel);
- err |= put_user(hard->fdp, &user->dataoff);
- err |= put_user(hard->fds, &user->datasel);
- err |= put_user(hard->mxcsr, &user->mxcsr);
-
- tmp = (short *)&user->_st;
- tmp2 = (short *)&hard->st_space;
-
- /*
- * Transform the two layouts:
- * (we do not mix 32-bit access with 16-bit access because
- * thats suboptimal on PPros)
- */
- for (i = 0; i < 8; i++)
- {
- err |= put_user(*tmp2, tmp); tmp++; tmp2++;
- err |= put_user(*tmp2, tmp); tmp++; tmp2++;
- err |= put_user(*tmp2, tmp); tmp++; tmp2++;
- err |= put_user(*tmp2, tmp); tmp++; tmp2++;
- err |= put_user(*tmp2, tmp); tmp++; tmp2 += 3;
- }
-
- ltmp1 = (unsigned long *)&(user->_xmm[0]);
- ltmp2 = (unsigned long *)&(hard->xmm_space[0]);
- for(i = 0; i < 88; i++)
- {
- err |= put_user(*ltmp2, ltmp1);
- ltmp1++; ltmp2++;
- }
-
- return err;
-}
-
-int i387_user_to_hard (struct i387_hard_struct * hard,
- struct _fpstate * user)
-{
- int i, err = 0;
- short *tmp, *tmp2;
- long *ltmp1, *ltmp2;
-
- err |= get_user(hard->cwd, &user->cw);
- err |= get_user(hard->swd, &user->sw);
- err |= get_user(hard->twd, &user->tag);
- hard->twd = fputag_387toKNI(hard->twd);
- err |= get_user(hard->fip, &user->ipoff);
- err |= get_user(hard->fcs, &user->cssel);
- err |= get_user(hard->fdp, &user->dataoff);
- err |= get_user(hard->fds, &user->datasel);
- err |= get_user(hard->mxcsr, &user->mxcsr);
-
- tmp2 = (short *)&hard->st_space;
- tmp = (short *)&user->_st;
-
- for (i = 0; i < 8; i++)
- {
- err |= get_user(*tmp2, tmp); tmp++; tmp2++;
- err |= get_user(*tmp2, tmp); tmp++; tmp2++;
- err |= get_user(*tmp2, tmp); tmp++; tmp2++;
- err |= get_user(*tmp2, tmp); tmp++; tmp2++;
- err |= get_user(*tmp2, tmp); tmp++; tmp2 += 3;
- }
-
- ltmp1 = (unsigned long *)(&user->_xmm[0]);
- ltmp2 = (unsigned long *)(&hard->xmm_space[0]);
- for(i = 0; i < (88); i++)
- {
- err |= get_user(*ltmp2, ltmp1);
- ltmp2++; ltmp1++;
- }
-
- return err;
-}
-
-#endif
-
/*
* Save a segment.
*/
@@ -590,23+504,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, }
/*
- * fill in the FPU structure for a core dump.
- */
-int dump_fpu (struct pt_regs * regs, struct user_i387_struct* fpu)
-{
- int fpvalid;
- struct task_struct *tsk = current;
-
- fpvalid = tsk->used_math;
- if (fpvalid) {
- unlazy_fpu(tsk);
- memcpy(fpu,&tsk->thread.i387.hard,sizeof(*fpu));
- }
-
- return fpvalid;
-}
-
-/*
* fill in the user structure for a core dump..
*/
void dump_thread(struct pt_regs * regs, struct user * dump)
/* ptrace.c */
/* By Ross Biro 1/23/92 */
-/* FXSAVE/FXRSTOR support by Ingo Molnar and modifications by Goutham Rao */
-/* edited by Linus Torvalds */
+/*
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ */
#include <linux/config.h> /* for CONFIG_MATH_EMULATION */
#include <linux/kernel.h>
#include <asm/pgtable.h>
#include <asm/system.h>
#include <asm/processor.h>
+#include <asm/i387.h>
#include <asm/debugreg.h>
/*
@@ -392,48+395,62 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) }
case PTRACE_GETFPREGS: { /* Get the child FPU state. */
- if (!access_ok(VERIFY_WRITE, (unsigned *)data, sizeof(struct user_i387_struct))) {
+ if (!access_ok(VERIFY_WRITE, (unsigned *)data,
+ sizeof(struct user_i387_struct))) {
ret = -EIO;
break;
}
ret = 0;
if ( !child->used_math ) {
/* Simulate an empty FPU. */
- i387_set_cwd(child->thread.i387.hard, 0x037f);
- i387_set_swd(child->thread.i387.hard, 0x0000);
- i387_set_twd(child->thread.i387.hard, 0xffff);
- }
-#ifdef CONFIG_MATH_EMULATION
- if ( boot_cpu_data.hard_math ) {
-#endif
- i387_hard_to_user((struct _fpstate *)data, &child->thread.i387.hard);
-#ifdef CONFIG_MATH_EMULATION
- } else {
- save_i387_soft(&child->thread.i387.soft, (struct _fpstate *)data);
+ set_fpu_cwd(child, 0x037f);
+ set_fpu_swd(child, 0x0000);
+ set_fpu_twd(child, 0xffff);
}
-#endif
+ get_fpregs((struct user_i387_struct *)data, child);
break;
}
case PTRACE_SETFPREGS: { /* Set the child FPU state. */
- if (!access_ok(VERIFY_READ, (unsigned *)data, sizeof(struct user_i387_struct))) {
+ if (!access_ok(VERIFY_READ, (unsigned *)data,
+ sizeof(struct user_i387_struct))) {
ret = -EIO;
break;
}
child->used_math = 1;
-#ifdef CONFIG_MATH_EMULATION
- if ( boot_cpu_data.hard_math ) {
-#endif
- i387_user_to_hard(&child->thread.i387.hard,(struct _fpstate *)data);
-#ifdef CONFIG_MATH_EMULATION
- } else {
- restore_i387_soft(&child->thread.i387.soft, (struct _fpstate *)data);
- }
-#endif
+ set_fpregs(child, (struct user_i387_struct *)data);
ret = 0;
break;
}
+ case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */
+ if (!access_ok(VERIFY_WRITE, (unsigned *)data,
+ sizeof(struct user_fxsr_struct))) {
+ ret = -EIO;
+ break;
+ }
+ if ( !child->used_math ) {
+ /* Simulate an empty FPU. */
+ set_fpu_cwd(child, 0x037f);
+ set_fpu_swd(child, 0x0000);
+ set_fpu_twd(child, 0xffff);
+ set_fpu_mxcsr(child, 0x1f80);
+ }
+ ret = get_fpxregs((struct user_fxsr_struct *)data, child);
+ break;
+ }
+
+ case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */
+ if (!access_ok(VERIFY_READ, (unsigned *)data,
+ sizeof(struct user_fxsr_struct))) {
+ ret = -EIO;
+ break;
+ }
+ child->used_math = 1;
+ ret = set_fpxregs(child, (struct user_fxsr_struct *)data);
+ break;
+ }
+
default:
ret = -EIO;
break;
* Detection for Celeron coppermine, identify_cpu() overhauled,
* and a few other clean ups.
* Dave Jones <dave@powertweak.com>, April 2000
- * Pentium-III code by Ingo Molnar and modifications by Goutham Rao
*
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
*/
/*
@@ -784,20+786,6 @@ void __init setup_arch(char **cmdline_p) conswitchp = &dummy_con;
#endif
#endif
-#ifdef CONFIG_X86_FXSR
- if (boot_cpu_data.x86_capability & X86_FEATURE_FXSR)
- {
- printk("Enabling extended fast FPU save and restore ... ");
- set_in_cr4(X86_CR4_OSFXSR);
- printk("done.\n");
- }
- if (boot_cpu_data.x86_capability & X86_FEATURE_XMM)
- {
- printk("Enabling KNI unmasked exception support ... ");
- set_in_cr4(X86_CR4_OSXMMEXCPT);
- printk("done.\n");
- }
-#endif
}
static int __init get_model_name(struct cpuinfo_x86 *c)
* Copyright (C) 1991, 1992 Linus Torvalds
*
* 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
- * Pentium III support by Ingo Molnar, modifications and OS Exception support
- * by Goutham Rao
+ * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
*/
#include <linux/config.h>
#include <linux/stddef.h>
#include <asm/ucontext.h>
#include <asm/uaccess.h>
+#include <asm/i387.h>
#define DEBUG_SIG 0
@@ -187,29+187,6 @@ struct rt_sigframe char retcode[8];
};
-
-static inline int restore_i387_hard(struct _fpstate *buf)
-{
- struct task_struct *tsk = current;
- clear_fpu(tsk);
- return i387_user_to_hard(&tsk->thread.i387.hard, buf);
-}
-
-static inline int restore_i387(struct _fpstate *buf)
-{
- int err;
-#ifndef CONFIG_MATH_EMULATION
- err = restore_i387_hard(buf);
-#else
- if (boot_cpu_data.hard_math)
- err = restore_i387_hard(buf);
- else
- err = restore_i387_soft(¤t->thread.i387.soft, buf);
-#endif
- current->used_math = 1;
- return err;
-}
-
static int
restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *peax)
{
@@ -340,35+317,6 @@ badframe: * Set up a signal frame.
*/
-static inline int save_i387_hard(struct _fpstate * buf)
-{
- struct task_struct *tsk = current;
-
- unlazy_fpu(tsk);
- tsk->thread.i387.hard.status = tsk->thread.i387.hard.swd;
- if (i387_hard_to_user(buf, &tsk->thread.i387.hard))
- return -1;
- return 1;
-}
-
-static int save_i387(struct _fpstate *buf)
-{
- if (!current->used_math)
- return 0;
-
- /* This will cause a "finit" to be triggered by the next
- attempted FPU operation by the 'current' process.
- */
- current->used_math = 0;
-
-#ifndef CONFIG_MATH_EMULATION
- return save_i387_hard(buf);
-#else
- return boot_cpu_data.hard_math ? save_i387_hard(buf)
- : save_i387_soft(¤t->thread.i387.soft, buf);
-#endif
-}
-
static int
setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate,
struct pt_regs *regs, unsigned long mask)
* linux/arch/i386/traps.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
- * FXSAVE/FXRSTOR support by Ingo Molnar, OS exception support by Goutham Rao
+ *
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@valinux.com>, May 2000
*/
/*
#include <asm/atomic.h>
#include <asm/debugreg.h>
#include <asm/desc.h>
+#include <asm/i387.h>
#include <asm/smp.h>
#include <asm/pgalloc.h>
@@ -152,10+155,10 @@ asmlinkage void stack_segment(void); asmlinkage void general_protection(void);
asmlinkage void page_fault(void);
asmlinkage void coprocessor_error(void);
+asmlinkage void simd_coprocessor_error(void);
asmlinkage void reserved(void);
asmlinkage void alignment_check(void);
asmlinkage void spurious_interrupt_bug(void);
-asmlinkage void xmm_fault(void);
int kstack_depth_to_print = 24;
@@ -318,7+321,6 @@ DO_ERROR(11, SIGBUS, "segment not present", segment_not_present, current) DO_ERROR(12, SIGBUS, "stack segment", stack_segment, current)
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, current, BUS_ADRALN, get_cr2())
DO_ERROR(18, SIGSEGV, "reserved", reserved, current)
-DO_VM86_ERROR(19, SIGFPE, "XMM fault", xmm_fault, current)
asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
{
@@ -584,13+586,13 @@ void math_error(void *eip) {
struct task_struct * task;
siginfo_t info;
+ unsigned short cwd, swd;
/*
- * Save the info for the exception handler
- * (this will also clear the error)
+ * Save the info for the exception handler and clear the error.
*/
task = current;
- save_fpu(task);
+ save_init_fpu(task);
task->thread.trap_no = 16;
task->thread.error_code = 0;
info.si_signo = SIGFPE;
@@ -607,9+609,9 @@ void math_error(void *eip) * and it will suffer the consequences since we won't be able to
* fully reproduce the context of the exception
*/
- switch(((~task->thread.i387.hard.cwd) &
- task->thread.i387.hard.swd & 0x3f) |
- (task->thread.i387.hard.swd & 0x240)) {
+ cwd = get_fpu_cwd(task);
+ swd = get_fpu_swd(task);
+ switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) {
case 0x000:
default:
break;
@@ -641,6+643,79 @@ asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code) math_error((void *)regs->eip);
}
+void simd_math_error(void *eip)
+{
+ struct task_struct * task;
+ siginfo_t info;
+ unsigned short mxcsr;
+
+ /*
+ * Save the info for the exception handler and clear the error.
+ */
+ task = current;
+ save_init_fpu(task);
+ load_mxcsr(0x1f80);
+ task->thread.trap_no = 19;
+ task->thread.error_code = 0;
+ info.si_signo = SIGFPE;
+ info.si_errno = 0;
+ info.si_code = __SI_FAULT;
+ info.si_addr = eip;
+ /*
+ * The SIMD FPU exceptions are handled a little differently, as there
+ * is only a single status/control register. Thus, to determine which
+ * unmasked exception was caught we must mask the exception mask bits
+ * at 0x1f80, and then use these to mask the exception bits at 0x3f.
+ */
+ mxcsr = get_fpu_mxcsr(task);
+ switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
+ case 0x000:
+ default:
+ break;
+ case 0x001: /* Invalid Op */
+ info.si_code = FPE_FLTINV;
+ break;
+ case 0x002: /* Denormalize */
+ case 0x010: /* Underflow */
+ info.si_code = FPE_FLTUND;
+ break;
+ case 0x004: /* Zero Divide */
+ info.si_code = FPE_FLTDIV;
+ break;
+ case 0x008: /* Overflow */
+ info.si_code = FPE_FLTOVF;
+ break;
+ case 0x020: /* Precision */
+ info.si_code = FPE_FLTRES;
+ break;
+ }
+ force_sig_info(SIGFPE, &info, task);
+}
+
+asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs,
+ long error_code)
+{
+ if (cpu_has_xmm) {
+ /* Handle SIMD FPU exceptions on PIII+ processors. */
+ ignore_irq13 = 1;
+ simd_math_error((void *)regs->eip);
+ } else {
+ /*
+ * Handle strange cache flush from user space exception
+ * in all other cases. This is undocumented behaviour.
+ */
+ if (regs->eflags & VM_MASK) {
+ handle_vm86_fault((struct kernel_vm86_regs *)regs,
+ error_code);
+ return;
+ }
+ die_if_kernel("cache flush denied", regs, error_code);
+ current->thread.trap_no = 19;
+ current->thread.error_code = error_code;
+ force_sig(SIGSEGV, current);
+ }
+}
+
asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs,
long error_code)
{
@@ -661,17+736,16 @@ asmlinkage void math_state_restore(struct pt_regs regs) {
__asm__ __volatile__("clts"); /* Allow maths ops (or we recurse) */
- if(current->used_math)
- i387_restore_hard(current->thread.i387);
- else
- {
+ if (current->used_math) {
+ restore_fpu(current);
+ } else {
/*
* Our first FPU usage, clean the chip.
*/
__asm__("fninit");
current->used_math = 1;
}
- current->flags|=PF_USEDFPU; /* So we fnsave on switch_to() */
+ current->flags |= PF_USEDFPU; /* So we fnsave on switch_to() */
}
#ifndef CONFIG_MATH_EMULATION
@@ -905,7+979,7 @@ void __init trap_init(void) set_trap_gate(15,&spurious_interrupt_bug);
set_trap_gate(16,&coprocessor_error);
set_trap_gate(17,&alignment_check);
- set_trap_gate(19,&xmm_fault);
+ set_trap_gate(19,&simd_coprocessor_error);
set_system_gate(SYSCALL_VECTOR,&system_call);
* <rreilova@ececs.uc.edu>
* - Channing Corn (tests & fixes),
* - Andrew D. Balsa (code cleanup).
+ *
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@valinux.com>, May 2000
*/
/*
#include <linux/config.h>
#include <asm/processor.h>
+#include <asm/i387.h>
#include <asm/msr.h>
static int __init no_halt(char *s)
@@ -62,6+66,9 @@ static void __init copro_timeout(void) static double __initdata x = 4195835.0;
static double __initdata y = 3145727.0;
+static float __initdata zero[4] = { 0.0, 0.0, 0.0, 0.0 };
+static float __initdata one[4] = { 1.0, 1.0, 1.0, 1.0 };
+
static void __init check_fpu(void)
{
unsigned short control_word;
@@ -139,6+146,37 @@ static void __init check_fpu(void) printk("OK, FPU using exception 16 error reporting.\n");
else
printk("Hmm, FPU using exception 16 error reporting with FDIV bug.\n");
+
+#if defined(CONFIG_X86_FXSR) || defined(CONFIG_X86_RUNTIME_FXSR)
+ /*
+ * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
+ */
+ if (offsetof(struct task_struct, thread.i387.fxsave) & 15)
+ panic("Kernel compiled for PII/PIII+ with FXSR, data not 16-byte aligned!");
+
+ if (cpu_has_fxsr) {
+ printk(KERN_INFO "Enabling fast FPU save and restore... ");
+ set_in_cr4(X86_CR4_OSFXSR);
+ printk("done.\n");
+ }
+#endif
+#ifdef CONFIG_X86_XMM
+ if (cpu_has_xmm) {
+ printk(KERN_INFO "Enabling unmasked SIMD FPU exception support... ");
+ set_in_cr4(X86_CR4_OSXMMEXCPT);
+ printk("done.\n");
+
+ /* Check if exception 19 works okay. */
+ load_mxcsr(0x0000);
+ printk(KERN_INFO "Checking SIMD FPU exceptions... ");
+ __asm__("movups %0,%%xmm0\n\t"
+ "movups %1,%%xmm1\n\t"
+ "divps %%xmm0,%%xmm1\n\t"
+ : : "m" (*&zero), "m" (*&one));
+ printk("OK, SIMD FPU using exception 19 error reporting.\n");
+ load_mxcsr(0x1f80);
+ }
+#endif
}
static void __init check_hlt(void)
@@ -424,6+462,14 @@ static void __init check_config(void) && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11))
panic("Kernel compiled for PPro+, assumes a local APIC without the read-before-write bug!");
#endif
+
+/*
+ * If we configured ourselves for FXSR, we'd better have it.
+ */
+#ifdef CONFIG_X86_FXSR
+ if (!cpu_has_fxsr)
+ panic("Kernel compiled for PII/PIII+, requires FXSR feature!");
+#endif
}
static void __init check_bugs(void)
@@ -14,6+14,7 @@ typedef unsigned long elf_greg_t; typedef elf_greg_t elf_gregset_t[ELF_NGREG];
typedef struct user_i387_struct elf_fpregset_t;
+typedef struct user_fxsr_struct elf_fpxregset_t;
/*
* This is used to ensure we don't load something for the wrong architecture.
--- /dev/null
+/*
+ * include/asm-i386/i387.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#ifndef __ASM_I386_I387_H
+#define __ASM_I386_I387_H
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <asm/processor.h>
+#include <asm/sigcontext.h>
+#include <asm/user.h>
+
+/*
+ * FPU lazy state save handling...
+ */
+extern void save_fpu( struct task_struct *tsk );
+extern void save_init_fpu( struct task_struct *tsk );
+extern void restore_fpu( struct task_struct *tsk );
+
+#define unlazy_fpu( tsk ) do { \
+ if ( tsk->flags & PF_USEDFPU ) \
+ save_fpu( tsk ); \
+} while (0)
+
+#define clear_fpu( tsk ) do { \
+ if ( tsk->flags & PF_USEDFPU ) { \
+ tsk->flags &= ~PF_USEDFPU; \
+ stts(); \
+ } \
+} while (0)
+
+/*
+ * FPU state interaction...
+ */
+extern unsigned short get_fpu_cwd( struct task_struct *tsk );
+extern unsigned short get_fpu_swd( struct task_struct *tsk );
+extern unsigned short get_fpu_twd( struct task_struct *tsk );
+extern unsigned short get_fpu_mxcsr( struct task_struct *tsk );
+
+extern void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd );
+extern void set_fpu_swd( struct task_struct *tsk, unsigned short swd );
+extern void set_fpu_twd( struct task_struct *tsk, unsigned short twd );
+extern void set_fpu_mxcsr( struct task_struct *tsk, unsigned short mxcsr );
+
+#define load_mxcsr( val ) do { \
+ if ( cpu_has_xmm ) { \
+ unsigned long __mxcsr = ((unsigned long)(val) & 0xffff); \
+ asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \
+ } \
+} while (0)
+
+/*
+ * Signal frame handlers...
+ */
+extern int save_i387( struct _fpstate *buf );
+extern int restore_i387( struct _fpstate *buf );
+
+/*
+ * ptrace request handers...
+ */
+extern int get_fpregs( struct user_i387_struct *buf,
+ struct task_struct *tsk );
+extern int set_fpregs( struct task_struct *tsk,
+ struct user_i387_struct *buf );
+
+extern int get_fpxregs( struct user_fxsr_struct *buf,
+ struct task_struct *tsk );
+extern int set_fpxregs( struct task_struct *tsk,
+ struct user_fxsr_struct *buf );
+
+/*
+ * FPU state for core dumps...
+ */
+extern int dump_fpu( struct pt_regs *regs,
+ struct user_i387_struct *fpu );
+extern int dump_extended_fpu( struct pt_regs *regs,
+ struct user_fxsr_struct *fpu );
+
+#endif /* __ASM_I386_I387_H */
@@ -83,24+83,22 @@ struct cpuinfo_x86 { #define X86_FEATURE_PGE 0x00002000 /* Page Global Enable */
#define X86_FEATURE_MCA 0x00004000 /* Machine Check Architecture */
#define X86_FEATURE_CMOV 0x00008000 /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
-#define X86_FEATURE_PAT 0x00010000 /* Page Attribute Table */
+#define X86_FEATURE_PAT 0x00010000 /* Page Attribute Table */
#define X86_FEATURE_PSE36 0x00020000 /* 36-bit PSEs */
#define X86_FEATURE_18 0x00040000
#define X86_FEATURE_19 0x00080000
#define X86_FEATURE_20 0x00100000
#define X86_FEATURE_21 0x00200000
#define X86_FEATURE_22 0x00400000
-#define X86_FEATURE_MMX 0x00800000 /* multimedia extensions */
+#define X86_FEATURE_MMX 0x00800000 /* Multimedia Extensions */
#define X86_FEATURE_FXSR 0x01000000 /* FXSAVE and FXRSTOR instructions (fast save and restore of FPU context), and CR4.OSFXSR (OS uses these instructions) available */
-#define X86_FEATURE_XMM 0x02000000 /* Intel MMX2 instruction set */
+#define X86_FEATURE_XMM 0x02000000 /* Streaming SIMD Extensions */
#define X86_FEATURE_26 0x04000000
#define X86_FEATURE_27 0x08000000
#define X86_FEATURE_28 0x10000000
#define X86_FEATURE_29 0x20000000
#define X86_FEATURE_30 0x40000000
#define X86_FEATURE_AMD3D 0x80000000
-#define X86_CR4_OSFXSR 0x0200 /* fast FPU save/restore */
-#define X86_CR4_OSXMMEXCPT 0x0400 /* KNI (MMX2) unmasked exception 16 */
extern struct cpuinfo_x86 boot_cpu_data;
extern struct tss_struct init_tss[NR_CPUS];
@@ -125,6+123,10 @@ extern struct cpuinfo_x86 cpu_data[]; (boot_cpu_data.x86_capability & X86_FEATURE_DE)
#define cpu_has_vme \
(boot_cpu_data.x86_capability & X86_FEATURE_VME)
+#define cpu_has_fxsr \
+ (boot_cpu_data.x86_capability & X86_FEATURE_FXSR)
+#define cpu_has_xmm \
+ (boot_cpu_data.x86_capability & X86_FEATURE_XMM)
extern char ignore_irq13;
@@ -150,15+152,17 @@ extern inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) /*
* Intel CPU features in CR4
*/
-#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
-#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
-#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
-#define X86_CR4_DE 0x0008 /* enable debugging extensions */
-#define X86_CR4_PSE 0x0010 /* enable page size extensions */
-#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
-#define X86_CR4_MCE 0x0040 /* Machine check enable */
-#define X86_CR4_PGE 0x0080 /* enable global pages */
-#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
+#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
+#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
+#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
+#define X86_CR4_DE 0x0008 /* enable debugging extensions */
+#define X86_CR4_PSE 0x0010 /* enable page size extensions */
+#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
+#define X86_CR4_MCE 0x0040 /* Machine check enable */
+#define X86_CR4_PGE 0x0080 /* enable global pages */
+#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
+#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */
+#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */
/*
* Save the cr4 feature set we're using (ie
@@ -244,23+248,7 @@ extern unsigned int mca_pentium_flag; #define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
#define INVALID_IO_BITMAP_OFFSET 0x8000
-#ifndef CONFIG_X86_FXSR
-
-#define i387_save_hard(x) \
- __asm__("fnsave %0\n\tfwait": :"m" (x))
-#define i387_restore_hard(x) \
- __asm__("frstor %0": :"m" (x))
-
-#define i387_hard_to_user(uaddr, x) \
- __copy_to_user((uaddr), (x), sizeof(struct i387_hard_struct))
-#define i387_user_to_hard(x, uaddr) \
- __copy_from_user((x), (uaddr), sizeof(struct i387_hard_struct))
-
-#define i387_set_cwd(x,v) do { (x).cwd = 0xffff0000 | (v); } while (0)
-#define i387_set_swd(x,v) do { (x).swd = 0xffff0000 | (v); } while (0)
-#define i387_set_twd(x,v) do { (x).twd = 0xffff0000 | (v); } while (0)
-
-struct i387_hard_struct {
+struct i387_fsave_struct {
long cwd;
long swd;
long twd;
@@ -272,69+260,22 @@ struct i387_hard_struct { long status; /* software status information */
};
-#else
-
-/*
- * has to be 128-bit aligned
- */
-struct i387_hard_struct {
+struct i387_fxsave_struct {
unsigned short cwd;
unsigned short swd;
unsigned short twd;
- unsigned short fopcode;
- unsigned int fip;
- unsigned short fcs;
- unsigned short __reserved_01;
- unsigned int fdp;
- unsigned short fds;
- unsigned short __reserved_02;
- unsigned int mxcsr;
- unsigned int __reserved_03;
- unsigned int st_space[32]; /* 8*16 bytes for each FP/MMX-reg = 128 bytes */
- unsigned int xmm_space[22*4]; /* 22 cachelines for MMX2 registers */
- unsigned long status;
+ unsigned short fop;
+ long fip;
+ long fcs;
+ long foo;
+ long fos;
+ long mxcsr;
+ long reserved;
+ long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
+ long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
+ long padding[56];
} __attribute__ ((aligned (16)));
-/*
- * tag word conversion (thanks to Gabriel Paubert for noticing the
- * subtle format difference and implementing these functions)
- *
- * there are several erratas wrt. the tag word in the i387, thus
- * any software relying on it's value is questionable, but we
- * definitely want to be as close as possible.
- */
-static inline unsigned short fputag_KNIto387(unsigned char tb) {
- unsigned short tw = tb;
- tw = ((tw<<4) | tw) &0x0f0f; /* zzzz7654zzzz3210 */
- tw = ((tw<<2) | tw) &0x3333; /* zz76zz54zz32zz10 */
- tw = ((tw<<1) | tw) &0x5555; /* z7z6z5z4z3z2z1z0 */
- return ~(tw*3);
-}
-
-static inline unsigned char fputag_387toKNI(unsigned short tw) {
- tw = ~tw;
- tw = (tw | (tw>>1)) & 0x5555; /* z7z6z5z4z3z2z1z0 */
- tw = (tw | (tw>>1)) & 0x3333; /* zz76zz54zz32zz10 */
- tw = (tw | (tw>>3)) & 0x0f0f; /* zzzz7654zzzz3210 */
- return (tw|(tw>>4)) & 0x00ff; /* zzzzzzzz76543210 */
-}
-
-#define i387_set_cwd(x,v) do { (x).cwd = (short)(v); } while (0)
-#define i387_set_swd(x,v) do { (x).swd = (short)(v); } while (0)
-#define i387_set_twd(x,v) do { (x).twd = fputag_387toKNI(v); } while (0)
-
-#define i387_save_hard(x) \
- { __asm__ __volatile__(".byte 0x0f, 0xae, 0x06": :"S" (&(x))); } while (0)
-
-#define i387_restore_hard(x) \
-do { __asm__ __volatile__(".byte 0x0f, 0xae, 0x4f, 0x00": :"D" (&(x))); } while(0)
-
-extern int i387_hard_to_user ( struct _fpstate * user,
- struct i387_hard_struct * hard);
-extern int i387_user_to_hard (struct i387_hard_struct * hard,
- struct _fpstate * user);
-#endif
-
struct i387_soft_struct {
long cwd;
long swd;
@@ -350,7+291,8 @@ struct i387_soft_struct { };
union i387_union {
- struct i387_hard_struct hard;
+ struct i387_fsave_struct fsave;
+ struct i387_fxsave_struct fxsave;
struct i387_soft_struct soft;
};
@@ -467,27+409,6 @@ extern void release_segments(struct mm_struct * mm); extern void forget_segments(void);
/*
- * FPU lazy state save handling..
- */
-#define save_fpu(tsk) do { \
- i387_save_hard(tsk->thread.i387); \
- tsk->flags &= ~PF_USEDFPU; \
- stts(); \
-} while (0)
-
-#define unlazy_fpu(tsk) do { \
- if (tsk->flags & PF_USEDFPU) \
- save_fpu(tsk); \
-} while (0)
-
-#define clear_fpu(tsk) do { \
- if (tsk->flags & PF_USEDFPU) { \
- tsk->flags &= ~PF_USEDFPU; \
- stts(); \
- } \
-} while (0)
-
-/*
* Return saved PC of a blocked thread.
*/
extern inline unsigned long thread_saved_pc(struct thread_struct *t)
@@ -46,6+46,8 @@ struct pt_regs { #define PTRACE_SETREGS 13
#define PTRACE_GETFPREGS 14
#define PTRACE_SETFPREGS 15
+#define PTRACE_GETFPXREGS 18
+#define PTRACE_SETFPXREGS 19
#ifdef __KERNEL__
#define user_mode(regs) ((VM_MASK & (regs)->eflags) || (3 & (regs)->xcs))
#ifndef _ASMi386_SIGCONTEXT_H
#define _ASMi386_SIGCONTEXT_H
-#include <linux/config.h>
/*
* As documented in the iBCS2 standard..
*
- * The first part of "struct _fpstate" is just the
- * normal i387 hardware setup, the extra "status"
- * word is used to save the coprocessor status word
- * before entering the handler.
+ * The first part of "struct _fpstate" is just the normal i387
+ * hardware setup, the extra "status" word is used to save the
+ * coprocessor status word before entering the handler.
+ *
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ *
+ * The FPU state data structure has had to grow to accomodate the
+ * extended FPU state required by the Streaming SIMD Extensions.
+ * There is no documented standard to accomplish this at the moment.
*/
struct _fpreg {
unsigned short significand[4];
unsigned short exponent;
};
+struct _fpxreg {
+ unsigned short significand[4];
+ unsigned short exponent;
+ unsigned short padding[3];
+};
+
+struct _xmmreg {
+ unsigned long element[4];
+};
+
struct _fpstate {
- unsigned long cw,
- sw,
- tag,
- ipoff,
- cssel,
- dataoff,
- datasel;
+ /* Regular FPU environment */
+ unsigned long cw;
+ unsigned long sw;
+ unsigned long tag;
+ unsigned long ipoff;
+ unsigned long cssel;
+ unsigned long dataoff;
+ unsigned long datasel;
struct _fpreg _st[8];
- unsigned long status;
-#ifdef CONFIG_X86_FXSR
+ unsigned short status;
+ unsigned short magic; /* 0xffff = regular FPU data only */
+
+ /* FXSR FPU environment */
+ unsigned long _fxsr_env[6]; /* FXSR FPU env is ignored */
unsigned long mxcsr;
- unsigned long _xmm[4*22];
-#endif
+ unsigned long reserved;
+ struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
+ struct _xmmreg _xmm[8];
+ unsigned long padding[56];
};
+#define X86_FXSR_MAGIC 0x0000
+
struct sigcontext {
unsigned short gs, __gsh;
unsigned short fs, __fsh;
The minimum core file size is 3 pages, or 12288 bytes.
*/
+/*
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ *
+ * Provide support for the GDB 5.0+ PTRACE_{GET|SET}FPXREGS requests for
+ * interacting with the FXSR-format floating point environment. Floating
+ * point data can be accessed in the regular format in the usual manner,
+ * and both the standard and SIMD floating point data can be accessed via
+ * the new ptrace requests. In either case, changes to the FPU environment
+ * will be reflected in the task's state as expected.
+ */
+
struct user_i387_struct {
long cwd;
long swd;
@@ -41,6+53,22 @@ struct user_i387_struct { long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
};
+struct user_fxsr_struct {
+ unsigned short cwd;
+ unsigned short swd;
+ unsigned short twd;
+ unsigned short fop;
+ long fip;
+ long fcs;
+ long foo;
+ long fos;
+ long mxcsr;
+ long reserved;
+ long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
+ long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
+ long padding[56];
+};
+
/*
* This is the old layout of "struct pt_regs", and
* is still the layout used by user mode (the new
@@ -565,6+565,7 @@ typedef struct elf64_shdr { #define NT_PRFPREG 2
#define NT_PRPSINFO 3
#define NT_TASKSTRUCT 4
+#define NT_PRFPXREG 20
/* Note header in a PT_NOTE section */
typedef struct elf32_note {
@@ -20,6+20,7 @@ struct elf_siginfo typedef elf_greg_t greg_t;
typedef elf_gregset_t gregset_t;
typedef elf_fpregset_t fpregset_t;
+typedef elf_fpxregset_t fpxregset_t;
#define NGREG ELF_NGREG
#endif