8d6ee8dc3fd0ecceb9f5bafc93c507b4e036da81
4 * Copyright (C) 1991, 1992 Linus Torvalds 8 * #!-checking implemented by tytso. 11 * Demand-loading implemented 01.12.91 - no need to read anything but 12 * the header into memory. The inode of the executable is put into 13 * "current->executable", and page faults do the actual loading. Clean. 15 * Once more I can proudly say that linux stood up to being changed: it 16 * was less than 2 hours work to get demand-loading completely implemented. 18 * Demand loading changed July 1993 by Eric Youngdale. Use mmap instead, 19 * current->executable is only used by the procfs. This allows a dispatch 20 * table to check for several different types of binary formats. We keep 21 * trying until we recognize the file or we run out of supported binary 25 #include <linux/config.h> 26 #include <linux/slab.h> 27 #include <linux/file.h> 28 #include <linux/mman.h> 29 #include <linux/a.out.h> 30 #include <linux/stat.h> 31 #include <linux/fcntl.h> 32 #include <linux/smp_lock.h> 33 #include <linux/init.h> 35 #include <asm/uaccess.h> 36 #include <asm/pgtable.h> 37 #include <asm/mmu_context.h> 40 #include <linux/kmod.h> 44 * Here are the actual binaries that will be accepted: 45 * add more with "register_binfmt()" if using modules... 47 * These are defined again for the 'real' modules if you are using a 48 * module definition for these routines. 51 static struct linux_binfmt
*formats
= (struct linux_binfmt
*) NULL
; 53 void __init
binfmt_setup(void) 55 #ifdef CONFIG_BINFMT_MISC 59 #ifdef CONFIG_BINFMT_ELF 63 #ifdef CONFIG_BINFMT_ELF32 67 #ifdef CONFIG_BINFMT_AOUT 71 #ifdef CONFIG_BINFMT_AOUT32 75 #ifdef CONFIG_BINFMT_EM86 79 /* This cannot be configured out of the kernel */ 83 intregister_binfmt(struct linux_binfmt
* fmt
) 85 struct linux_binfmt
** tmp
= &formats
; 101 #ifdef CONFIG_MODULES 102 intunregister_binfmt(struct linux_binfmt
* fmt
) 104 struct linux_binfmt
** tmp
= &formats
; 115 #endif/* CONFIG_MODULES */ 117 /* N.B. Error returns must be < 0 */ 118 intopen_dentry(struct dentry
* dentry
,int mode
) 120 struct inode
* inode
= dentry
->d_inode
; 122 struct list_head
* l
= NULL
; 126 l
= &inode
->i_sb
->s_files
; 129 if(!inode
->i_op
|| !inode
->i_op
->default_file_ops
) 138 f
->f_mode
= (mode
+1) & O_ACCMODE
; 139 f
->f_dentry
= dentry
; 142 f
->f_op
= inode
->i_op
->default_file_ops
; 144 error
= f
->f_op
->open(inode
,f
); 165 * Note that a shared library must be both readable and executable due to 168 * Also note that we take the address to load from from the file itself. 170 asmlinkage
intsys_uselib(const char* library
) 174 struct linux_binfmt
* fmt
; 177 fd
=sys_open(library
,0,0); 183 if(file
&& file
->f_dentry
&& file
->f_op
&& file
->f_op
->read
) { 184 for(fmt
= formats
; fmt
; fmt
= fmt
->next
) { 185 int(*fn
)(int) = fmt
->load_shlib
; 188 /* N.B. Should use file instead of fd */ 190 if(retval
!= -ENOEXEC
) 202 * count() counts the number of arguments/envelopes 204 static intcount(char** argv
) 213 error
=get_user(p
,argv
); 226 * 'copy_strings()' copies argument/envelope strings from user 227 * memory to free pages in kernel mem. These are in a format ready 228 * to be put directly into the top of new user memory. 230 intcopy_strings(int argc
,char** argv
,struct linux_binprm
*bprm
) 237 if(get_user(str
, argv
+argc
) || !str
|| !(len
=strlen_user(str
))) 243 /* XXX: add architecture specific overflow check here. */ 248 int offset
, bytes_to_copy
; 250 offset
= pos
% PAGE_SIZE
; 251 if(!(pag
= (char*) bprm
->page
[pos
/PAGE_SIZE
]) && 252 !(pag
= (char*) bprm
->page
[pos
/PAGE_SIZE
] = 253 (unsigned long*)get_free_page(GFP_USER
))) 256 bytes_to_copy
= PAGE_SIZE
- offset
; 257 if(bytes_to_copy
> len
) 259 if(copy_from_user(pag
+ offset
, str
, bytes_to_copy
)) 262 pos
+= bytes_to_copy
; 263 str
+= bytes_to_copy
; 264 len
-= bytes_to_copy
; 271 * Like copy_strings, but get argv and its values from kernel memory. 273 intcopy_strings_kernel(int argc
,char** argv
,struct linux_binprm
*bprm
) 276 mm_segment_t oldfs
=get_fs(); 278 r
=copy_strings(argc
, argv
, bprm
); 283 intsetup_arg_pages(struct linux_binprm
*bprm
) 285 unsigned long stack_base
; 286 struct vm_area_struct
*mpnt
; 289 stack_base
= STACK_TOP
- MAX_ARG_PAGES
*PAGE_SIZE
; 291 bprm
->p
+= stack_base
; 293 bprm
->loader
+= stack_base
; 294 bprm
->exec
+= stack_base
; 296 mpnt
=kmem_cache_alloc(vm_area_cachep
, SLAB_KERNEL
); 301 mpnt
->vm_mm
= current
->mm
; 302 mpnt
->vm_start
= PAGE_MASK
& (unsigned long) bprm
->p
; 303 mpnt
->vm_end
= STACK_TOP
; 304 mpnt
->vm_page_prot
= PAGE_COPY
; 305 mpnt
->vm_flags
= VM_STACK_FLAGS
; 308 mpnt
->vm_file
= NULL
; 310 insert_vm_struct(current
->mm
, mpnt
); 311 current
->mm
->total_vm
= (mpnt
->vm_end
- mpnt
->vm_start
) >> PAGE_SHIFT
; 314 for(i
=0; i
< MAX_ARG_PAGES
; i
++) { 317 put_dirty_page(current
,bprm
->page
[i
],stack_base
); 319 stack_base
+= PAGE_SIZE
; 326 * Read in the complete executable. This is used for "-N" files 327 * that aren't on a block boundary, and for files on filesystems 328 * without get_block support. 330 intread_exec(struct dentry
*dentry
,unsigned long offset
, 331 char* addr
,unsigned long count
,int to_kmem
) 334 struct inode
* inode
= dentry
->d_inode
; 335 int result
= -ENOEXEC
; 337 if(!inode
->i_op
|| !inode
->i_op
->default_file_ops
) 339 if(init_private_file(&file
, dentry
,1)) 343 if(file
.f_op
->llseek
) { 344 if(file
.f_op
->llseek(&file
,offset
,0) != offset
) 349 mm_segment_t old_fs
=get_fs(); 351 result
= file
.f_op
->read(&file
, addr
, count
, &file
.f_pos
); 354 result
=verify_area(VERIFY_WRITE
, addr
, count
); 357 result
= file
.f_op
->read(&file
, addr
, count
, &file
.f_pos
); 360 if(file
.f_op
->release
) 361 file
.f_op
->release(inode
,&file
); 366 static intexec_mmap(void) 368 struct mm_struct
* mm
, * old_mm
; 370 old_mm
= current
->mm
; 371 if(old_mm
&&atomic_read(&old_mm
->mm_users
) ==1) { 372 flush_cache_mm(old_mm
); 374 release_segments(old_mm
); 376 flush_tlb_mm(old_mm
); 382 struct mm_struct
*active_mm
= current
->active_mm
; 384 mm
->cpu_vm_mask
= (1UL<<smp_processor_id()); 386 current
->active_mm
= mm
; 387 switch_mm(active_mm
, mm
); 390 if(active_mm
!= old_mm
)BUG(); 401 * This function makes sure the current process has its own signal table, 402 * so that flush_signal_handlers can later reset the handlers without 403 * disturbing other processes. (Other processes might share the signal 404 * table via the CLONE_SIGHAND option to clone().) 407 staticinlineintmake_private_signals(void) 409 struct signal_struct
* newsig
; 411 if(atomic_read(¤t
->sig
->count
) <=1) 413 newsig
=kmalloc(sizeof(*newsig
), GFP_KERNEL
); 416 spin_lock_init(&newsig
->siglock
); 417 atomic_set(&newsig
->count
,1); 418 memcpy(newsig
->action
, current
->sig
->action
,sizeof(newsig
->action
)); 419 current
->sig
= newsig
; 424 * If make_private_signals() made a copy of the signal table, decrement the 425 * refcount of the original table, and free it if necessary. 426 * We don't do that in make_private_signals() so that we can back off 427 * in flush_old_exec() if an error occurs after calling make_private_signals(). 430 staticinlinevoidrelease_old_signals(struct signal_struct
* oldsig
) 432 if(current
->sig
== oldsig
) 434 if(atomic_dec_and_test(&oldsig
->count
)) 439 * These functions flushes out all traces of the currently running executable 440 * so that a new one can be started 443 staticinlinevoidflush_old_files(struct files_struct
* files
) 449 unsigned long set
, i
; 452 if(i
>= files
->max_fds
|| i
>= files
->max_fdset
) 454 set
=xchg(&files
->close_on_exec
->fds_bits
[j
],0); 456 for( ; set
; i
++,set
>>=1) { 463 intflush_old_exec(struct linux_binprm
* bprm
) 467 struct signal_struct
* oldsig
; 470 * Make sure we have a private signal table 472 oldsig
= current
->sig
; 473 retval
=make_private_signals(); 474 if(retval
)goto flush_failed
; 477 * Release all of the old mmap stuff 480 if(retval
)goto mmap_failed
; 482 /* This is the point of no return */ 483 release_old_signals(oldsig
); 485 if(current
->euid
== current
->uid
&& current
->egid
== current
->gid
) 486 current
->dumpable
=1; 487 name
= bprm
->filename
; 488 for(i
=0; (ch
= *(name
++)) !='\0';) { 493 current
->comm
[i
++] = ch
; 495 current
->comm
[i
] ='\0'; 499 if(bprm
->e_uid
!= current
->euid
|| bprm
->e_gid
!= current
->egid
|| 500 permission(bprm
->dentry
->d_inode
,MAY_READ
)) 501 current
->dumpable
=0; 503 flush_signal_handlers(current
); 504 flush_old_files(current
->files
); 509 if(current
->sig
!= oldsig
) 512 current
->sig
= oldsig
; 517 * We mustn't allow tracing of suid binaries, unless 518 * the tracer has the capability to trace anything.. 520 staticinlineintmust_not_trace_exec(struct task_struct
* p
) 522 return(p
->flags
& PF_PTRACED
) && !cap_raised(p
->p_pptr
->cap_effective
, CAP_SYS_PTRACE
); 526 * Fill the binprm structure from the inode. 527 * Check permissions, then read the first 512 bytes 529 intprepare_binprm(struct linux_binprm
*bprm
) 532 int retval
,id_change
,cap_raised
; 533 struct inode
* inode
= bprm
->dentry
->d_inode
; 535 mode
= inode
->i_mode
; 536 if(!S_ISREG(mode
))/* must be regular file */ 538 if(!(mode
&0111))/* with at least _one_ execute bit set */ 540 if(IS_NOEXEC(inode
))/* FS mustn't be mounted noexec */ 544 if((retval
=permission(inode
, MAY_EXEC
)) !=0) 546 /* better not execute files which are being written to */ 547 if(atomic_read(&inode
->i_writecount
) >0) 550 bprm
->e_uid
= current
->euid
; 551 bprm
->e_gid
= current
->egid
; 552 id_change
= cap_raised
=0; 556 bprm
->e_uid
= inode
->i_uid
; 557 if(bprm
->e_uid
!= current
->euid
) 563 * If setgid is set but no group execute bit then this 564 * is a candidate for mandatory locking, not a setgid 567 if((mode
& (S_ISGID
| S_IXGRP
)) == (S_ISGID
| S_IXGRP
)) { 568 bprm
->e_gid
= inode
->i_gid
; 569 if(!in_group_p(bprm
->e_gid
)) 573 /* We don't have VFS support for capabilities yet */ 574 cap_clear(bprm
->cap_inheritable
); 575 cap_clear(bprm
->cap_permitted
); 576 cap_clear(bprm
->cap_effective
); 578 /* To support inheritance of root-permissions and suid-root 579 * executables under compatibility mode, we raise the 580 * effective and inherited bitmasks of the executable file 581 * (translation: we set the executable "capability dumb" and 582 * set the allowed set to maximum). We don't set any forced 585 * If only the real uid is 0, we only raise the inheritable 586 * bitmask of the executable file (translation: we set the 587 * allowed set to maximum and the application to "capability 591 if(!issecure(SECURE_NOROOT
)) { 592 if(bprm
->e_uid
==0|| current
->uid
==0) 593 cap_set_full(bprm
->cap_inheritable
); 595 cap_set_full(bprm
->cap_effective
); 598 /* Only if pP' is _not_ a subset of pP, do we consider there 599 * has been a capability related "change of capability". In 600 * such cases, we need to check that the elevation of 601 * privilege does not go against other system constraints. 602 * The new Permitted set is defined below -- see (***). */ 604 kernel_cap_t working
= 605 cap_combine(bprm
->cap_permitted
, 606 cap_intersect(bprm
->cap_inheritable
, 607 current
->cap_inheritable
)); 608 if(!cap_issubset(working
, current
->cap_permitted
)) { 613 if(id_change
|| cap_raised
) { 614 /* We can't suid-execute if we're sharing parts of the executable */ 615 /* or if we're being traced (or if suid execs are not allowed) */ 616 /* (current->mm->mm_users > 1 is ok, as we'll get a new mm anyway) */ 618 ||must_not_trace_exec(current
) 619 || (atomic_read(¤t
->fs
->count
) >1) 620 || (atomic_read(¤t
->sig
->count
) >1) 621 || (atomic_read(¤t
->files
->count
) >1)) { 622 if(id_change
&& !capable(CAP_SETUID
)) 624 if(cap_raised
&& !capable(CAP_SETPCAP
)) 629 memset(bprm
->buf
,0,sizeof(bprm
->buf
)); 630 returnread_exec(bprm
->dentry
,0,bprm
->buf
,128,1); 634 * This function is used to produce the new IDs and capabilities 635 * from the old ones and the file's capabilities. 637 * The formula used for evolving capabilities is: 640 * (***) pP' = fP | (fI & pI) 641 * pE' = pP' & fE [NB. fE is 0 or ~0] 643 * I=Inheritable, P=Permitted, E=Effective // p=process, f=file 644 * ' indicates post-exec(). 647 voidcompute_creds(struct linux_binprm
*bprm
) 649 int new_permitted
=cap_t(bprm
->cap_permitted
) | 650 (cap_t(bprm
->cap_inheritable
) & 651 cap_t(current
->cap_inheritable
)); 653 /* For init, we want to retain the capabilities set 654 * in the init_task struct. Thus we skip the usual 655 * capability rules */ 656 if(current
->pid
!=1) { 657 cap_t(current
->cap_permitted
) = new_permitted
; 658 cap_t(current
->cap_effective
) = new_permitted
& 659 cap_t(bprm
->cap_effective
); 662 /* AUD: Audit candidate if current->cap_effective is set */ 664 current
->suid
= current
->euid
= current
->fsuid
= bprm
->e_uid
; 665 current
->sgid
= current
->egid
= current
->fsgid
= bprm
->e_gid
; 666 if(current
->euid
!= current
->uid
|| current
->egid
!= current
->gid
|| 667 !cap_issubset(new_permitted
, current
->cap_permitted
)) 668 current
->dumpable
=0; 672 voidremove_arg_zero(struct linux_binprm
*bprm
) 675 unsigned long offset
; 677 offset
= bprm
->p
% PAGE_SIZE
; 678 page
= (char*)bprm
->page
[bprm
->p
/PAGE_SIZE
]; 679 while(bprm
->p
++,*(page
+offset
++)) 680 if(offset
==PAGE_SIZE
){ 682 page
= (char*)bprm
->page
[bprm
->p
/PAGE_SIZE
]; 689 * cycle the list of binary formats handler, until one recognizes the image 691 intsearch_binary_handler(struct linux_binprm
*bprm
,struct pt_regs
*regs
) 694 struct linux_binfmt
*fmt
; 696 /* handle /sbin/loader.. */ 698 struct exec
* eh
= (struct exec
*) bprm
->buf
; 699 struct linux_binprm bprm_loader
; 701 if(!bprm
->loader
&& eh
->fh
.f_magic
==0x183&& 702 (eh
->fh
.f_flags
&0x3000) ==0x3000) 705 char* dynloader
[] = {"/sbin/loader"}; 706 struct dentry
* dentry
; 711 bprm_loader
.p
= PAGE_SIZE
*MAX_ARG_PAGES
-sizeof(void*); 712 for(i
=0; i
<MAX_ARG_PAGES
; i
++)/* clear page-table */ 713 bprm_loader
.page
[i
] =0; 715 dentry
=open_namei(dynloader
[0],0,0); 716 retval
=PTR_ERR(dentry
); 719 bprm
->dentry
= dentry
; 720 bprm
->loader
= bprm_loader
.p
; 721 retval
=prepare_binprm(bprm
); 724 /* should call search_binary_handler recursively here, 725 but it does not matter */ 729 for(try=0;try<2;try++) { 730 for(fmt
= formats
; fmt
; fmt
= fmt
->next
) { 731 int(*fn
)(struct linux_binprm
*,struct pt_regs
*) = fmt
->load_binary
; 734 retval
=fn(bprm
, regs
); 739 current
->did_exec
=1; 742 if(retval
!= -ENOEXEC
) 744 if(!bprm
->dentry
)/* We don't have the dentry anymore */ 747 if(retval
!= -ENOEXEC
) { 751 #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) 753 if(printable(bprm
->buf
[0]) && 754 printable(bprm
->buf
[1]) && 755 printable(bprm
->buf
[2]) && 756 printable(bprm
->buf
[3])) 758 sprintf(modname
,"binfmt-%04x", *(unsigned short*)(&bprm
->buf
[2])); 759 request_module(modname
); 768 * sys_execve() executes a new program. 770 intdo_execve(char* filename
,char** argv
,char** envp
,struct pt_regs
* regs
) 772 struct linux_binprm bprm
; 773 struct dentry
* dentry
; 777 bprm
.p
= PAGE_SIZE
*MAX_ARG_PAGES
-sizeof(void*); 778 memset(bprm
.page
,0, MAX_ARG_PAGES
*sizeof(bprm
.page
[0])); 780 dentry
=open_namei(filename
,0,0); 781 retval
=PTR_ERR(dentry
); 785 bprm
.dentry
= dentry
; 786 bprm
.filename
= filename
; 790 if((bprm
.argc
=count(argv
)) <0) { 795 if((bprm
.envc
=count(envp
)) <0) { 800 retval
=prepare_binprm(&bprm
); 804 retval
=copy_strings_kernel(1, &bprm
.filename
, &bprm
); 809 retval
=copy_strings(bprm
.envc
, envp
, &bprm
); 813 retval
=copy_strings(bprm
.argc
, argv
, &bprm
); 817 retval
=search_binary_handler(&bprm
,regs
); 823 /* Something went wrong, return the inode and free the argument pages*/ 827 /* Assumes that free_page() can take a NULL argument. */ 828 /* I hope this is ok for all architectures */ 829 for(i
=0; i
<MAX_ARG_PAGES
; i
++) 830 free_page(bprm
.page
[i
]); 835 intdo_coredump(long signr
,struct pt_regs
* regs
) 837 struct linux_binfmt
* binfmt
; 838 char corename
[6+sizeof(current
->comm
)]; 840 struct dentry
* dentry
; 841 struct inode
* inode
; 844 binfmt
= current
->binfmt
; 845 if(!binfmt
|| !binfmt
->core_dump
) 847 if(!current
->dumpable
||atomic_read(¤t
->mm
->mm_users
) !=1) 848 current
->dumpable
=0; 849 if(current
->rlim
[RLIMIT_CORE
].rlim_cur
< binfmt
->min_coredump
) 852 memcpy(corename
,"core.",5); 854 memcpy(corename
+5,current
->comm
,sizeof(current
->comm
)); 858 file
=filp_open(corename
, O_CREAT
|2| O_TRUNC
| O_NOFOLLOW
,0600); 861 dentry
= file
->f_dentry
; 862 inode
= dentry
->d_inode
; 863 if(inode
->i_nlink
>1) 864 goto close_fail
;/* multiple links - don't dump */ 866 if(!S_ISREG(inode
->i_mode
)) 868 if(!inode
->i_op
|| !inode
->i_op
->default_file_ops
) 870 if(!file
->f_op
->write
) 872 if(!binfmt
->core_dump(signr
, regs
, file
)) 874 filp_close(file
, NULL
); 879 filp_close(file
, NULL
);