| From 5006dd0fae6126c149868102c100cd90a20ef2e3 Mon Sep 17 00:00:00 2001 |
| From: Kyle McMartin <kyle@phobos.i.jkkm.org> |
| Date: Mon, 29 Mar 2010 23:20:18 -0400 |
| Subject: execshield |
| |
| cebbert@redhat.com: added fix for bz#220892 |
| |
| diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h |
| index 617bd56..526248d 100644 |
| |
| |
| @@ -5,6 +5,7 @@ |
| #include <asm/ldt.h> |
| #include <asm/mmu.h> |
| #include <linux/smp.h> |
| +#include <linux/mm_types.h> |
| |
| static inline void fill_ldt(struct desc_struct *desc, |
| const struct user_desc *info) |
| @@ -93,6 +94,9 @@ static inline int desc_empty(const void *ptr) |
| |
| #define load_TLS(t, cpu) native_load_tls(t, cpu) |
| #define set_ldt native_set_ldt |
| +#ifdef CONFIG_X86_32 |
| +#define load_user_cs_desc native_load_user_cs_desc |
| +#endif /*CONFIG_X86_32*/ |
| |
| #define write_ldt_entry(dt, entry, desc) \ |
| native_write_ldt_entry(dt, entry, desc) |
| @@ -392,4 +396,25 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist) |
| _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); |
| } |
| |
| +#ifdef CONFIG_X86_32 |
| +static inline void set_user_cs(struct desc_struct *desc, unsigned long limit) |
| +{ |
| + limit = (limit - 1) / PAGE_SIZE; |
| + desc->a = limit & 0xffff; |
| + desc->b = (limit & 0xf0000) | 0x00c0fb00; |
| +} |
| + |
| +static inline void native_load_user_cs_desc(int cpu, struct mm_struct *mm) |
| +{ |
| + get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs; |
| +} |
| + |
| +#define arch_add_exec_range arch_add_exec_range |
| +#define arch_remove_exec_range arch_remove_exec_range |
| +#define arch_flush_exec_range arch_flush_exec_range |
| +extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit); |
| +extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit); |
| +extern void arch_flush_exec_range(struct mm_struct *mm); |
| +#endif /* CONFIG_X86_32 */ |
| + |
| #endif /* _ASM_X86_DESC_H */ |
| diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h |
| index 80a1dee..8314c66 100644 |
| |
| |
| @@ -7,12 +7,19 @@ |
| /* |
| * The x86 doesn't have a mmu context, but |
| * we put the segment information here. |
| + * |
| + * exec_limit is used to track the range PROT_EXEC |
| + * mappings span. |
| */ |
| typedef struct { |
| void *ldt; |
| int size; |
| struct mutex lock; |
| void *vdso; |
| +#ifdef CONFIG_X86_32 |
| + struct desc_struct user_cs; |
| + unsigned long exec_limit; |
| +#endif |
| } mm_context_t; |
| |
| #ifdef CONFIG_SMP |
| diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h |
| index 5653f43..55dadb2 100644 |
| |
| |
| @@ -289,6 +289,12 @@ static inline void set_ldt(const void *addr, unsigned entries) |
| { |
| PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); |
| } |
| +#ifdef CONFIG_X86_32 |
| +static inline void load_user_cs_desc(unsigned int cpu, struct mm_struct *mm) |
| +{ |
| + PVOP_VCALL2(pv_cpu_ops.load_user_cs_desc, cpu, mm); |
| +} |
| +#endif /*CONFIG_X86_32*/ |
| static inline void store_gdt(struct desc_ptr *dtr) |
| { |
| PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr); |
| diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h |
| index db9ef55..19c2793 100644 |
| |
| |
| @@ -118,6 +118,9 @@ struct pv_cpu_ops { |
| void (*store_gdt)(struct desc_ptr *); |
| void (*store_idt)(struct desc_ptr *); |
| void (*set_ldt)(const void *desc, unsigned entries); |
| +#ifdef CONFIG_X86_32 |
| + void (*load_user_cs_desc)(int cpu, struct mm_struct *mm); |
| +#endif |
| unsigned long (*store_tr)(void); |
| void (*load_tls)(struct thread_struct *t, unsigned int cpu); |
| #ifdef CONFIG_X86_64 |
| diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h |
| index b753ea5..4893156 100644 |
| |
| |
| @@ -162,6 +162,9 @@ static inline int hlt_works(int cpu) |
| |
| #define cache_line_size() (boot_cpu_data.x86_cache_alignment) |
| |
| +#define __HAVE_ARCH_ALIGN_STACK |
| +extern unsigned long arch_align_stack(unsigned long sp); |
| + |
| extern void cpu_detect(struct cpuinfo_x86 *c); |
| |
| extern struct pt_regs *idle_regs(struct pt_regs *); |
| diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c |
| index 4868e4a..6c8d2ca 100644 |
| |
| |
| @@ -802,6 +802,20 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) |
| /* Filter out anything that depends on CPUID levels we don't have */ |
| filter_cpuid_features(c, true); |
| |
| + /* |
| + * emulation of NX with segment limits unfortunately means |
| + * we have to disable the fast system calls, due to the way that |
| + * sysexit clears the segment limits on return. |
| + * If we have either disabled exec-shield on the boot command line, |
| + * or we have NX, then we don't need to do this. |
| + */ |
| + if (exec_shield != 0) { |
| +#ifdef CONFIG_X86_PAE |
| + if (!test_cpu_cap(c, X86_FEATURE_NX)) |
| +#endif |
| + clear_cpu_cap(c, X86_FEATURE_SEP); |
| + } |
| + |
| /* If the model name is still unset, do table lookup. */ |
| if (!c->x86_model_id[0]) { |
| const char *p; |
| diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c |
| index 1db183e..238b97d 100644 |
| |
| |
| @@ -345,6 +345,9 @@ struct pv_cpu_ops pv_cpu_ops = { |
| .read_tscp = native_read_tscp, |
| .load_tr_desc = native_load_tr_desc, |
| .set_ldt = native_set_ldt, |
| +#ifdef CONFIG_X86_32 |
| + .load_user_cs_desc = native_load_user_cs_desc, |
| +#endif /*CONFIG_X86_32*/ |
| .load_gdt = native_load_gdt, |
| .load_idt = native_load_idt, |
| .store_gdt = native_store_gdt, |
| diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c |
| index f6c6266..8ac2589 100644 |
| |
| |
| @@ -251,7 +251,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, |
| void |
| start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) |
| { |
| + int cpu; |
| + |
| set_user_gs(regs, 0); |
| + |
| regs->fs = 0; |
| set_fs(USER_DS); |
| regs->ds = __USER_DS; |
| @@ -260,6 +263,11 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) |
| regs->cs = __USER_CS; |
| regs->ip = new_ip; |
| regs->sp = new_sp; |
| + |
| + cpu = get_cpu(); |
| + load_user_cs_desc(cpu, current->mm); |
| + put_cpu(); |
| + |
| /* |
| * Free the old FP and other extended state |
| */ |
| @@ -319,6 +327,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
| if (preload_fpu) |
| prefetch(next->xstate); |
| |
| + if (next_p->mm) |
| + load_user_cs_desc(cpu, next_p->mm); |
| + |
| /* |
| * Reload esp0. |
| */ |
| @@ -412,3 +423,40 @@ unsigned long get_wchan(struct task_struct *p) |
| return 0; |
| } |
| |
| +static void modify_cs(struct mm_struct *mm, unsigned long limit) |
| +{ |
| + mm->context.exec_limit = limit; |
| + set_user_cs(&mm->context.user_cs, limit); |
| + if (mm == current->mm) { |
| + int cpu; |
| + |
| + cpu = get_cpu(); |
| + load_user_cs_desc(cpu, mm); |
| + put_cpu(); |
| + } |
| +} |
| + |
| +void arch_add_exec_range(struct mm_struct *mm, unsigned long limit) |
| +{ |
| + if (limit > mm->context.exec_limit) |
| + modify_cs(mm, limit); |
| +} |
| + |
| +void arch_remove_exec_range(struct mm_struct *mm, unsigned long old_end) |
| +{ |
| + struct vm_area_struct *vma; |
| + unsigned long limit = PAGE_SIZE; |
| + |
| + if (old_end == mm->context.exec_limit) { |
| + for (vma = mm->mmap; vma; vma = vma->vm_next) |
| + if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit)) |
| + limit = vma->vm_end; |
| + modify_cs(mm, limit); |
| + } |
| +} |
| + |
| +void arch_flush_exec_range(struct mm_struct *mm) |
| +{ |
| + mm->context.exec_limit = 0; |
| + set_user_cs(&mm->context.user_cs, 0); |
| +} |
| diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c |
| index 1168e44..c452918 100644 |
| |
| |
| @@ -115,6 +115,76 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err) |
| if (!user_mode_vm(regs)) |
| die(str, regs, err); |
| } |
| + |
| +static inline int |
| +__compare_user_cs_desc(const struct desc_struct *desc1, |
| + const struct desc_struct *desc2) |
| +{ |
| + return ((desc1->limit0 != desc2->limit0) || |
| + (desc1->limit != desc2->limit) || |
| + (desc1->base0 != desc2->base0) || |
| + (desc1->base1 != desc2->base1) || |
| + (desc1->base2 != desc2->base2)); |
| +} |
| + |
| +/* |
| + * lazy-check for CS validity on exec-shield binaries: |
| + * |
| + * the original non-exec stack patch was written by |
| + * Solar Designer <solar at openwall.com>. Thanks! |
| + */ |
| +static int |
| +check_lazy_exec_limit(int cpu, struct pt_regs *regs, long error_code) |
| +{ |
| + struct desc_struct *desc1, *desc2; |
| + struct vm_area_struct *vma; |
| + unsigned long limit; |
| + |
| + if (current->mm == NULL) |
| + return 0; |
| + |
| + limit = -1UL; |
| + if (current->mm->context.exec_limit != -1UL) { |
| + limit = PAGE_SIZE; |
| + spin_lock(¤t->mm->page_table_lock); |
| + for (vma = current->mm->mmap; vma; vma = vma->vm_next) |
| + if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit)) |
| + limit = vma->vm_end; |
| + vma = get_gate_vma(current); |
| + if (vma && (vma->vm_flags & VM_EXEC) && (vma->vm_end > limit)) |
| + limit = vma->vm_end; |
| + spin_unlock(¤t->mm->page_table_lock); |
| + if (limit >= TASK_SIZE) |
| + limit = -1UL; |
| + current->mm->context.exec_limit = limit; |
| + } |
| + set_user_cs(¤t->mm->context.user_cs, limit); |
| + |
| + desc1 = ¤t->mm->context.user_cs; |
| + desc2 = get_cpu_gdt_table(cpu) + GDT_ENTRY_DEFAULT_USER_CS; |
| + |
| + if (__compare_user_cs_desc(desc1, desc2)) { |
| + /* |
| + * The CS was not in sync - reload it and retry the |
| + * instruction. If the instruction still faults then |
| + * we won't hit this branch next time around. |
| + */ |
| + if (print_fatal_signals >= 2) { |
| + printk(KERN_ERR "#GPF fixup (%ld[seg:%lx]) at %08lx, CPU#%d.\n", |
| + error_code, error_code/8, regs->ip, |
| + smp_processor_id()); |
| + printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x, CPU_cs: %08x/%08x.\n", |
| + current->mm->context.exec_limit, |
| + desc1->a, desc1->b, desc2->a, desc2->b); |
| + } |
| + |
| + load_user_cs_desc(cpu, current->mm); |
| + |
| + return 1; |
| + } |
| + |
| + return 0; |
| +} |
| #endif |
| |
| static void __kprobes |
| @@ -273,6 +343,29 @@ do_general_protection(struct pt_regs *regs, long error_code) |
| if (!user_mode(regs)) |
| goto gp_in_kernel; |
| |
| +#ifdef CONFIG_X86_32 |
| +{ |
| + int cpu; |
| + int ok; |
| + |
| + cpu = get_cpu(); |
| + ok = check_lazy_exec_limit(cpu, regs, error_code); |
| + put_cpu(); |
| + |
| + if (ok) |
| + return; |
| + |
| + if (print_fatal_signals) { |
| + printk(KERN_ERR "#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n", |
| + error_code, error_code/8, regs->ip, smp_processor_id()); |
| + printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x.\n", |
| + current->mm->context.exec_limit, |
| + current->mm->context.user_cs.a, |
| + current->mm->context.user_cs.b); |
| + } |
| +} |
| +#endif /*CONFIG_X86_32*/ |
| + |
| tsk->thread.error_code = error_code; |
| tsk->thread.trap_no = 13; |
| |
| @@ -863,19 +956,37 @@ do_device_not_available(struct pt_regs *regs, long error_code) |
| } |
| |
| #ifdef CONFIG_X86_32 |
| +/* |
| + * The fixup code for errors in iret jumps to here (iret_exc). It loses |
| + * the original trap number and erorr code. The bogus trap 32 and error |
| + * code 0 are what the vanilla kernel delivers via: |
| + * DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1) |
| + * |
| + * NOTE: Because of the final "1" in the macro we need to enable interrupts. |
| + * |
| + * In case of a general protection fault in the iret instruction, we |
| + * need to check for a lazy CS update for exec-shield. |
| + */ |
| dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) |
| { |
| - siginfo_t info; |
| + int ok; |
| + int cpu; |
| + |
| local_irq_enable(); |
| |
| - info.si_signo = SIGILL; |
| - info.si_errno = 0; |
| - info.si_code = ILL_BADSTK; |
| - info.si_addr = NULL; |
| - if (notify_die(DIE_TRAP, "iret exception", |
| - regs, error_code, 32, SIGILL) == NOTIFY_STOP) |
| - return; |
| - do_trap(32, SIGILL, "iret exception", regs, error_code, &info); |
| + cpu = get_cpu(); |
| + ok = check_lazy_exec_limit(cpu, regs, error_code); |
| + put_cpu(); |
| + |
| + if (!ok && notify_die(DIE_TRAP, "iret exception", regs, |
| + error_code, 32, SIGSEGV) != NOTIFY_STOP) { |
| + siginfo_t info; |
| + info.si_signo = SIGSEGV; |
| + info.si_errno = 0; |
| + info.si_code = ILL_BADSTK; |
| + info.si_addr = 0; |
| + do_trap(32, SIGSEGV, "iret exception", regs, error_code, &info); |
| + } |
| } |
| #endif |
| |
| diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c |
| index 1dab519..360f39d 100644 |
| |
| |
| @@ -124,13 +124,16 @@ static unsigned long mmap_legacy_base(void) |
| */ |
| void arch_pick_mmap_layout(struct mm_struct *mm) |
| { |
| - if (mmap_is_legacy()) { |
| + if (!(2 & exec_shield) && mmap_is_legacy()) { |
| mm->mmap_base = mmap_legacy_base(); |
| mm->get_unmapped_area = arch_get_unmapped_area; |
| mm->unmap_area = arch_unmap_area; |
| } else { |
| mm->mmap_base = mmap_base(); |
| mm->get_unmapped_area = arch_get_unmapped_area_topdown; |
| + if (!(current->personality & READ_IMPLIES_EXEC) |
| + && mmap_is_ia32()) |
| + mm->get_unmapped_exec_area = arch_get_unmapped_exec_area; |
| mm->unmap_area = arch_unmap_area_topdown; |
| } |
| } |
| diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c |
| index a3250aa..e0d9cce 100644 |
| |
| |
| @@ -1,3 +1,4 @@ |
| +#include <linux/sched.h> |
| #include <linux/spinlock.h> |
| #include <linux/errno.h> |
| #include <linux/init.h> |
| @@ -23,6 +24,7 @@ static int __init noexec_setup(char *str) |
| disable_nx = 0; |
| } else if (!strncmp(str, "off", 3)) { |
| disable_nx = 1; |
| + exec_shield = 0; |
| } |
| x86_configure_nx(); |
| return 0; |
| @@ -40,6 +42,10 @@ void __cpuinit x86_configure_nx(void) |
| void __init x86_report_nx(void) |
| { |
| if (!cpu_has_nx) { |
| + if (exec_shield) |
| + printk(KERN_INFO "Using x86 segment limits to approximate NX protection\n"); |
| + else |
| + |
| printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " |
| "missing in CPU or disabled in BIOS!\n"); |
| } else { |
| diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c |
| index 426f3a1..e0286b1 100644 |
| |
| |
| @@ -6,6 +6,7 @@ |
| #include <linux/interrupt.h> |
| #include <linux/module.h> |
| |
| +#include <asm/desc.h> |
| #include <asm/tlbflush.h> |
| #include <asm/mmu_context.h> |
| #include <asm/cache.h> |
| @@ -131,6 +132,12 @@ void smp_invalidate_interrupt(struct pt_regs *regs) |
| union smp_flush_state *f; |
| |
| cpu = smp_processor_id(); |
| + |
| +#ifdef CONFIG_X86_32 |
| + if (current->active_mm) |
| + load_user_cs_desc(cpu, current->active_mm); |
| +#endif |
| + |
| /* |
| * orig_rax contains the negated interrupt vector. |
| * Use that to determine where the sender put the data. |
| diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c |
| index 02b442e..957bb67 100644 |
| |
| |
| @@ -331,7 +331,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) |
| if (compat) |
| addr = VDSO_HIGH_BASE; |
| else { |
| - addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); |
| + addr = get_unmapped_area_prot(NULL, 0, PAGE_SIZE, 0, 0, 1); |
| if (IS_ERR_VALUE(addr)) { |
| ret = addr; |
| goto up_fail; |
| diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c |
| index b607239..e426a3f 100644 |
| |
| |
| @@ -334,6 +334,24 @@ static void xen_set_ldt(const void *addr, unsigned entries) |
| xen_mc_issue(PARAVIRT_LAZY_CPU); |
| } |
| |
| +#ifdef CONFIG_X86_32 |
| +static void xen_load_user_cs_desc(int cpu, struct mm_struct *mm) |
| +{ |
| + void *gdt; |
| + xmaddr_t mgdt; |
| + u64 descriptor; |
| + struct desc_struct user_cs; |
| + |
| + gdt = &get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS]; |
| + mgdt = virt_to_machine(gdt); |
| + |
| + user_cs = mm->context.user_cs; |
| + descriptor = (u64) user_cs.a | ((u64) user_cs.b) << 32; |
| + |
| + HYPERVISOR_update_descriptor(mgdt.maddr, descriptor); |
| +} |
| +#endif /*CONFIG_X86_32*/ |
| + |
| static void xen_load_gdt(const struct desc_ptr *dtr) |
| { |
| unsigned long va = dtr->address; |
| @@ -960,6 +978,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { |
| |
| .load_tr_desc = paravirt_nop, |
| .set_ldt = xen_set_ldt, |
| +#ifdef CONFIG_X86_32 |
| + .load_user_cs_desc = xen_load_user_cs_desc, |
| +#endif /*CONFIG_X86_32*/ |
| .load_gdt = xen_load_gdt, |
| .load_idt = xen_load_idt, |
| .load_tls = xen_load_tls, |
| diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c |
| index 535e763..d114af6 100644 |
| |
| |
| @@ -74,7 +74,7 @@ static struct linux_binfmt elf_format = { |
| .hasvdso = 1 |
| }; |
| |
| -#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) |
| +#define BAD_ADDR(x) IS_ERR_VALUE(x) |
| |
| static int set_brk(unsigned long start, unsigned long end) |
| { |
| @@ -701,6 +701,11 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) |
| break; |
| } |
| |
| + if (current->personality == PER_LINUX && (exec_shield & 2)) { |
| + executable_stack = EXSTACK_DISABLE_X; |
| + current->flags |= PF_RANDOMIZE; |
| + } |
| + |
| /* Some simple consistency checks for the interpreter */ |
| if (elf_interpreter) { |
| retval = -ELIBBAD; |
| @@ -717,6 +722,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) |
| if (retval) |
| goto out_free_dentry; |
| |
| +#ifdef CONFIG_X86_32 |
| + /* |
| + * Turn off the CS limit completely if exec-shield disabled or |
| + * NX active: |
| + */ |
| + if (!exec_shield || executable_stack != EXSTACK_DISABLE_X || (__supported_pte_mask & _PAGE_NX)) |
| + arch_add_exec_range(current->mm, -1); |
| +#endif |
| + |
| /* OK, This is the point of no return */ |
| current->flags &= ~PF_FORKNOEXEC; |
| current->mm->def_flags = def_flags; |
| @@ -724,7 +738,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) |
| /* Do this immediately, since STACK_TOP as used in setup_arg_pages |
| may depend on the personality. */ |
| SET_PERSONALITY(loc->elf_ex); |
| - if (elf_read_implies_exec(loc->elf_ex, executable_stack)) |
| + if (!(exec_shield & 2) && |
| + elf_read_implies_exec(loc->elf_ex, executable_stack)) |
| current->personality |= READ_IMPLIES_EXEC; |
| |
| if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) |
| @@ -890,7 +905,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) |
| interpreter, |
| &interp_map_addr, |
| load_bias); |
| - if (!IS_ERR((void *)elf_entry)) { |
| + if (!BAD_ADDR(elf_entry)) { |
| /* |
| * load_elf_interp() returns relocation |
| * adjustment |
| diff --git a/include/linux/mm.h b/include/linux/mm.h |
| index e70f21b..44e6d63 100644 |
| |
| |
| @@ -1259,7 +1259,13 @@ extern int install_special_mapping(struct mm_struct *mm, |
| unsigned long addr, unsigned long len, |
| unsigned long flags, struct page **pages); |
| |
| -extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); |
| +extern unsigned long get_unmapped_area_prot(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, int); |
| + |
| +static inline unsigned long get_unmapped_area(struct file *file, unsigned long addr, |
| + unsigned long len, unsigned long pgoff, unsigned long flags) |
| +{ |
| + return get_unmapped_area_prot(file, addr, len, pgoff, flags, 0); |
| +} |
| |
| extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, |
| unsigned long len, unsigned long prot, |
| diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h |
| index b8bb9a6..f478e39 100644 |
| |
| |
| @@ -227,6 +227,9 @@ struct mm_struct { |
| unsigned long (*get_unmapped_area) (struct file *filp, |
| unsigned long addr, unsigned long len, |
| unsigned long pgoff, unsigned long flags); |
| + unsigned long (*get_unmapped_exec_area) (struct file *filp, |
| + unsigned long addr, unsigned long len, |
| + unsigned long pgoff, unsigned long flags); |
| void (*unmap_area) (struct mm_struct *mm, unsigned long addr); |
| #endif |
| unsigned long mmap_base; /* base of mmap area */ |
| diff --git a/include/linux/resource.h b/include/linux/resource.h |
| index f1e914e..d2aef9a 100644 |
| |
| |
| @@ -53,8 +53,11 @@ struct rlimit { |
| /* |
| * Limit the stack by to some sane default: root can always |
| * increase this limit if needed.. 8MB seems reasonable. |
| + * |
| + * (2MB more to cover randomization effects.) |
| */ |
| -#define _STK_LIM (8*1024*1024) |
| +#define _STK_LIM (10*1024*1024) |
| +#define EXEC_STACK_BIAS (2*1024*1024) |
| |
| /* |
| * GPG2 wants 64kB of mlocked memory, to make sure pass phrases |
| diff --git a/include/linux/sched.h b/include/linux/sched.h |
| index dad7f66..c5a3948 100644 |
| |
| |
| @@ -102,6 +102,9 @@ struct fs_struct; |
| struct bts_context; |
| struct perf_event_context; |
| |
| +extern int exec_shield; |
| +extern int print_fatal_signals; |
| + |
| /* |
| * List of flags we want to share for kernel threads, |
| * if only because they are not used by them anyway. |
| @@ -390,6 +393,10 @@ extern void arch_pick_mmap_layout(struct mm_struct *mm); |
| extern unsigned long |
| arch_get_unmapped_area(struct file *, unsigned long, unsigned long, |
| unsigned long, unsigned long); |
| + |
| +extern unsigned long |
| +arch_get_unmapped_exec_area(struct file *, unsigned long, unsigned long, |
| + unsigned long, unsigned long); |
| extern unsigned long |
| arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, |
| unsigned long len, unsigned long pgoff, |
| diff --git a/kernel/sysctl.c b/kernel/sysctl.c |
| index 8686b0f..a4fad81 100644 |
| |
| |
| @@ -99,6 +99,26 @@ extern int sysctl_nr_open_min, sysctl_nr_open_max; |
| #ifndef CONFIG_MMU |
| extern int sysctl_nr_trim_pages; |
| #endif |
| + |
| +int exec_shield = (1<<0); |
| +/* exec_shield is a bitmask: |
| + * 0: off; vdso at STACK_TOP, 1 page below TASK_SIZE |
| + * (1<<0) 1: on [also on if !=0] |
| + * (1<<1) 2: force noexecstack regardless of PT_GNU_STACK |
| + * The old settings |
| + * (1<<2) 4: vdso just below .text of main (unless too low) |
| + * (1<<3) 8: vdso just below .text of PT_INTERP (unless too low) |
| + * are ignored because the vdso is placed completely randomly |
| + */ |
| + |
| +static int __init setup_exec_shield(char *str) |
| +{ |
| + get_option(&str, &exec_shield); |
| + |
| + return 1; |
| +} |
| +__setup("exec-shield=", setup_exec_shield); |
| + |
| #ifdef CONFIG_BLOCK |
| extern int blk_iopoll_enabled; |
| #endif |
| @@ -400,6 +420,14 @@ static struct ctl_table kern_table[] = { |
| .mode = 0644, |
| .proc_handler = proc_dointvec, |
| }, |
| + { |
| + .procname = "exec-shield", |
| + .data = &exec_shield, |
| + .maxlen = sizeof(int), |
| + .mode = 0644, |
| + .proc_handler = &proc_dointvec, |
| + }, |
| + |
| #ifdef CONFIG_PROC_SYSCTL |
| { |
| .procname = "tainted", |
| diff --git a/mm/mmap.c b/mm/mmap.c |
| index 75557c6..8173284 100644 |
| |
| |
| @@ -28,6 +28,7 @@ |
| #include <linux/rmap.h> |
| #include <linux/mmu_notifier.h> |
| #include <linux/perf_event.h> |
| +#include <linux/random.h> |
| |
| #include <asm/uaccess.h> |
| #include <asm/cacheflush.h> |
| @@ -44,6 +45,18 @@ |
| #define arch_rebalance_pgtables(addr, len) (addr) |
| #endif |
| |
| +/* No sane architecture will #define these to anything else */ |
| +#ifndef arch_add_exec_range |
| +#define arch_add_exec_range(mm, limit) do { ; } while (0) |
| +#endif |
| +#ifndef arch_flush_exec_range |
| +#define arch_flush_exec_range(mm) do { ; } while (0) |
| +#endif |
| +#ifndef arch_remove_exec_range |
| +#define arch_remove_exec_range(mm, limit) do { ; } while (0) |
| +#endif |
| + |
| + |
| static void unmap_region(struct mm_struct *mm, |
| struct vm_area_struct *vma, struct vm_area_struct *prev, |
| unsigned long start, unsigned long end); |
| @@ -388,6 +401,9 @@ static inline void |
| { |
| struct vm_area_struct *next; |
| |
| + if (vma->vm_flags & VM_EXEC) |
| + arch_add_exec_range(mm, vma->vm_end); |
| + |
| vma->vm_prev = prev; |
| if (prev) { |
| next = prev->vm_next; |
| @@ -489,6 +504,8 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, |
| rb_erase(&vma->vm_rb, &mm->mm_rb); |
| if (mm->mmap_cache == vma) |
| mm->mmap_cache = prev; |
| + if (vma->vm_flags & VM_EXEC) |
| + arch_remove_exec_range(mm, vma->vm_end); |
| } |
| |
| /* |
| @@ -798,6 +815,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, |
| } else /* cases 2, 5, 7 */ |
| err = vma_adjust(prev, prev->vm_start, |
| end, prev->vm_pgoff, NULL); |
| + if (prev->vm_flags & VM_EXEC) |
| + arch_add_exec_range(mm, prev->vm_end); |
| if (err) |
| return NULL; |
| return prev; |
| @@ -952,7 +971,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, |
| /* Obtain the address to map to. we verify (or select) it and ensure |
| * that it represents a valid section of the address space. |
| */ |
| - addr = get_unmapped_area(file, addr, len, pgoff, flags); |
| + addr = get_unmapped_area_prot(file, addr, len, pgoff, flags, |
| + prot & PROT_EXEC); |
| if (addr & ~PAGE_MASK) |
| return addr; |
| |
| @@ -1504,8 +1524,8 @@ void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr) |
| } |
| |
| unsigned long |
| -get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, |
| - unsigned long pgoff, unsigned long flags) |
| +get_unmapped_area_prot(struct file *file, unsigned long addr, unsigned long len, |
| + unsigned long pgoff, unsigned long flags, int exec) |
| { |
| unsigned long (*get_area)(struct file *, unsigned long, |
| unsigned long, unsigned long, unsigned long); |
| @@ -1518,7 +1538,11 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, |
| if (len > TASK_SIZE) |
| return -ENOMEM; |
| |
| - get_area = current->mm->get_unmapped_area; |
| + if (exec && current->mm->get_unmapped_exec_area) |
| + get_area = current->mm->get_unmapped_exec_area; |
| + else |
| + get_area = current->mm->get_unmapped_area; |
| + |
| if (file && file->f_op && file->f_op->get_unmapped_area) |
| get_area = file->f_op->get_unmapped_area; |
| addr = get_area(file, addr, len, pgoff, flags); |
| @@ -1532,8 +1556,83 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, |
| |
| return arch_rebalance_pgtables(addr, len); |
| } |
| +EXPORT_SYMBOL(get_unmapped_area_prot); |
| + |
| +static bool should_randomize(void) |
| +{ |
| + return (current->flags & PF_RANDOMIZE) && |
| + !(current->personality & ADDR_NO_RANDOMIZE); |
| +} |
| + |
| +#define SHLIB_BASE 0x00110000 |
| + |
| +unsigned long |
| +arch_get_unmapped_exec_area(struct file *filp, unsigned long addr0, |
| + unsigned long len0, unsigned long pgoff, unsigned long flags) |
| +{ |
| + unsigned long addr = addr0, len = len0; |
| + struct mm_struct *mm = current->mm; |
| + struct vm_area_struct *vma; |
| + unsigned long tmp; |
| + |
| + if (len > TASK_SIZE) |
| + return -ENOMEM; |
| + |
| + if (flags & MAP_FIXED) |
| + return addr; |
| + |
| + if (!addr) |
| + addr = !should_randomize() ? SHLIB_BASE : |
| + randomize_range(SHLIB_BASE, 0x01000000, len); |
| + |
| + if (addr) { |
| + addr = PAGE_ALIGN(addr); |
| + vma = find_vma(mm, addr); |
| + if (TASK_SIZE - len >= addr && |
| + (!vma || addr + len <= vma->vm_start)) |
| + return addr; |
| + } |
| + |
| + addr = SHLIB_BASE; |
| + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { |
| + /* At this point: (!vma || addr < vma->vm_end). */ |
| + if (TASK_SIZE - len < addr) |
| + return -ENOMEM; |
| + |
| + if (!vma || addr + len <= vma->vm_start) { |
| + /* |
| + * Must not let a PROT_EXEC mapping get into the |
| + * brk area: |
| + */ |
| + if (addr + len > mm->brk) |
| + goto failed; |
| + |
| + /* |
| + * Up until the brk area we randomize addresses |
| + * as much as possible: |
| + */ |
| + if (addr >= 0x01000000 && should_randomize()) { |
| + tmp = randomize_range(0x01000000, |
| + PAGE_ALIGN(max(mm->start_brk, |
| + (unsigned long)0x08000000)), len); |
| + vma = find_vma(mm, tmp); |
| + if (TASK_SIZE - len >= tmp && |
| + (!vma || tmp + len <= vma->vm_start)) |
| + return tmp; |
| + } |
| + /* |
| + * Ok, randomization didnt work out - return |
| + * the result of the linear search: |
| + */ |
| + return addr; |
| + } |
| + addr = vma->vm_end; |
| + } |
| + |
| +failed: |
| + return current->mm->get_unmapped_area(filp, addr0, len0, pgoff, flags); |
| +} |
| |
| -EXPORT_SYMBOL(get_unmapped_area); |
| |
| /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ |
| struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) |
| @@ -1608,6 +1707,16 @@ out: |
| return prev ? prev->vm_next : vma; |
| } |
| |
| +static int over_stack_limit(unsigned long sz) |
| +{ |
| + struct rlimit *rlim = current->signal->rlim; |
| + |
| + if (sz < EXEC_STACK_BIAS) |
| + return 0; |
| + return (sz - EXEC_STACK_BIAS) > |
| + ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur); |
| +} |
| + |
| /* |
| * Verify that the stack growth is acceptable and |
| * update accounting. This is shared with both the |
| @@ -1624,7 +1733,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns |
| return -ENOMEM; |
| |
| /* Stack limit test */ |
| - if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur)) |
| + if (over_stack_limit(size)) |
| return -ENOMEM; |
| |
| /* mlock limit tests */ |
| @@ -1936,10 +2045,14 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, |
| if (new->vm_ops && new->vm_ops->open) |
| new->vm_ops->open(new); |
| |
| - if (new_below) |
| + if (new_below) { |
| + unsigned long old_end = vma->vm_end; |
| + |
| err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff + |
| ((addr - new->vm_start) >> PAGE_SHIFT), new); |
| - else |
| + if (vma->vm_flags & VM_EXEC) |
| + arch_remove_exec_range(mm, old_end); |
| + } else |
| err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); |
| |
| /* Success. */ |
| @@ -2223,6 +2336,7 @@ void exit_mmap(struct mm_struct *mm) |
| |
| free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0); |
| tlb_finish_mmu(tlb, 0, end); |
| + arch_flush_exec_range(mm); |
| |
| /* |
| * Walk the list again, actually closing and freeing it, |
| diff --git a/mm/mprotect.c b/mm/mprotect.c |
| index 8bc969d..3c9b4fc 100644 |
| |
| |
| @@ -26,9 +26,14 @@ |
| #include <linux/perf_event.h> |
| #include <asm/uaccess.h> |
| #include <asm/pgtable.h> |
| +#include <asm/pgalloc.h> |
| #include <asm/cacheflush.h> |
| #include <asm/tlbflush.h> |
| |
| +#ifndef arch_remove_exec_range |
| +#define arch_remove_exec_range(mm, limit) do { ; } while (0) |
| +#endif |
| + |
| #ifndef pgprot_modify |
| static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) |
| { |
| @@ -139,7 +144,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, |
| struct mm_struct *mm = vma->vm_mm; |
| unsigned long oldflags = vma->vm_flags; |
| long nrpages = (end - start) >> PAGE_SHIFT; |
| - unsigned long charged = 0; |
| + unsigned long charged = 0, old_end = vma->vm_end; |
| pgoff_t pgoff; |
| int error; |
| int dirty_accountable = 0; |
| @@ -204,6 +209,9 @@ success: |
| dirty_accountable = 1; |
| } |
| |
| + if (oldflags & VM_EXEC) |
| + arch_remove_exec_range(current->mm, old_end); |
| + |
| mmu_notifier_invalidate_range_start(mm, start, end); |
| if (is_vm_hugetlb_page(vma)) |
| hugetlb_change_protection(vma, start, end, vma->vm_page_prot); |
| diff --git a/mm/mremap.c b/mm/mremap.c |
| index e9c75ef..0a5379f 100644 |
| |
| |
| @@ -488,10 +488,10 @@ unsigned long do_mremap(unsigned long addr, |
| if (vma->vm_flags & VM_MAYSHARE) |
| map_flags |= MAP_SHARED; |
| |
| - new_addr = get_unmapped_area(vma->vm_file, 0, new_len, |
| + new_addr = get_unmapped_area_prot(vma->vm_file, 0, new_len, |
| vma->vm_pgoff + |
| ((addr - vma->vm_start) >> PAGE_SHIFT), |
| - map_flags); |
| + map_flags, vma->vm_flags & VM_EXEC); |
| if (new_addr & ~PAGE_MASK) { |
| ret = new_addr; |
| goto out; |
| -- |
| 1.7.0.1 |
| |