Tree - source-git/kernel - CentOS Git server

source-git / kernel

Blame ipc/sem.c

Blob History Raw

		f2d475	`// SPDX-License-Identifier: GPL-2.0`
		f2d475	`/*`
		f2d475	`* linux/ipc/sem.c`
		f2d475	`* Copyright (C) 1992 Krishna Balasubramanian`
		f2d475	`* Copyright (C) 1995 Eric Schenk, Bruno Haible`
		f2d475	`*`
		f2d475	`* /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com>`
		f2d475	`*`
		f2d475	`* SMP-threaded, sysctl's added`
		f2d475	`* (c) 1999 Manfred Spraul <manfred@colorfullife.com>`
		f2d475	`* Enforced range limit on SEM_UNDO`
		f2d475	`* (c) 2001 Red Hat Inc`
		f2d475	`* Lockless wakeup`
		f2d475	`* (c) 2003 Manfred Spraul <manfred@colorfullife.com>`
		f2d475	`* (c) 2016 Davidlohr Bueso <dave@stgolabs.net>`
		f2d475	`* Further wakeup optimizations, documentation`
		f2d475	`* (c) 2010 Manfred Spraul <manfred@colorfullife.com>`
		f2d475	`*`
		f2d475	`* support for audit of ipc object properties and permission changes`
		f2d475	`* Dustin Kirkland <dustin.kirkland@us.ibm.com>`
		f2d475	`*`
		f2d475	`* namespaces support`
		f2d475	`* OpenVZ, SWsoft Inc.`
		f2d475	`* Pavel Emelianov <xemul@openvz.org>`
		f2d475	`*`
		f2d475	`* Implementation notes: (May 2010)`
		f2d475	`* This file implements System V semaphores.`
		f2d475	`*`
		f2d475	`* User space visible behavior:`
		f2d475	`* - FIFO ordering for semop() operations (just FIFO, not starvation`
		f2d475	`* protection)`
		f2d475	`* - multiple semaphore operations that alter the same semaphore in`
		f2d475	`* one semop() are handled.`
		f2d475	`* - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and`
		f2d475	`* SETALL calls.`
		f2d475	`* - two Linux specific semctl() commands: SEM_STAT, SEM_INFO.`
		f2d475	`* - undo adjustments at process exit are limited to 0..SEMVMX.`
		f2d475	`* - namespace are supported.`
		f2d475	`* - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing`
		f2d475	`* to /proc/sys/kernel/sem.`
		f2d475	`* - statistics about the usage are reported in /proc/sysvipc/sem.`
		f2d475	`*`
		f2d475	`* Internals:`
		f2d475	`* - scalability:`
		f2d475	`* - all global variables are read-mostly.`
		f2d475	`* - semop() calls and semctl(RMID) are synchronized by RCU.`
		f2d475	`* - most operations do write operations (actually: spin_lock calls) to`
		f2d475	`* the per-semaphore array structure.`
		f2d475	`* Thus: Perfect SMP scaling between independent semaphore arrays.`
		f2d475	`* If multiple semaphores in one array are used, then cache line`
		f2d475	`* trashing on the semaphore array spinlock will limit the scaling.`
		f2d475	`* - semncnt and semzcnt are calculated on demand in count_semcnt()`
		f2d475	`* - the task that performs a successful semop() scans the list of all`
		f2d475	`* sleeping tasks and completes any pending operations that can be fulfilled.`
		f2d475	`* Semaphores are actively given to waiting tasks (necessary for FIFO).`
		f2d475	`* (see update_queue())`
		f2d475	`* - To improve the scalability, the actual wake-up calls are performed after`
		f2d475	`* dropping all locks. (see wake_up_sem_queue_prepare())`
		f2d475	`* - All work is done by the waker, the woken up task does not have to do`
		f2d475	`* anything - not even acquiring a lock or dropping a refcount.`
		f2d475	`* - A woken up task may not even touch the semaphore array anymore, it may`
		f2d475	`* have been destroyed already by a semctl(RMID).`
		f2d475	`* - UNDO values are stored in an array (one per process and per`
		f2d475	`* semaphore array, lazily allocated). For backwards compatibility, multiple`
		f2d475	`* modes for the UNDO variables are supported (per process, per thread)`
		f2d475	`* (see copy_semundo, CLONE_SYSVSEM)`
		f2d475	`* - There are two lists of the pending operations: a per-array list`
		f2d475	`* and per-semaphore list (stored in the array). This allows to achieve FIFO`
		f2d475	`* ordering without always scanning all pending operations.`
		f2d475	`* The worst-case behavior is nevertheless O(N^2) for N wakeups.`
		f2d475	`*/`
		f2d475
		f2d475	`#include <linux/compat.h>`
		f2d475	`#include <linux/slab.h>`
		f2d475	`#include <linux/spinlock.h>`
		f2d475	`#include <linux/init.h>`
		f2d475	`#include <linux/proc_fs.h>`
		f2d475	`#include <linux/time.h>`
		f2d475	`#include <linux/security.h>`
		f2d475	`#include <linux/syscalls.h>`
		f2d475	`#include <linux/audit.h>`
		f2d475	`#include <linux/capability.h>`
		f2d475	`#include <linux/seq_file.h>`
		f2d475	`#include <linux/rwsem.h>`
		f2d475	`#include <linux/nsproxy.h>`
		f2d475	`#include <linux/ipc_namespace.h>`
		f2d475	`#include <linux/sched/wake_q.h>`
		f2d475	`#include <linux/nospec.h>`
		f2d475	`#include <linux/rhashtable.h>`
		f2d475
		f2d475	`#include <linux/uaccess.h>`
		f2d475	`#include "util.h"`
		f2d475
		f2d475	`/* One semaphore structure for each semaphore in the system. */`
		f2d475	`struct sem {`
		f2d475	`int semval; /* current value */`
		f2d475	`/*`
		f2d475	`* PID of the process that last modified the semaphore. For`
		f2d475	`* Linux, specifically these are:`
		f2d475	`* - semop`
		f2d475	`* - semctl, via SETVAL and SETALL.`
		f2d475	`* - at task exit when performing undo adjustments (see exit_sem).`
		f2d475	`*/`
		f2d475	`struct pid *sempid;`
		f2d475	`spinlock_t lock; /* spinlock for fine-grained semtimedop */`
		f2d475	`struct list_head pending_alter; /* pending single-sop operations */`
		f2d475	`/* that alter the semaphore */`
		f2d475	`struct list_head pending_const; /* pending single-sop operations */`
		f2d475	`/* that do not alter the semaphore*/`
		f2d475	`time64_t sem_otime; /* candidate for sem_otime */`
		f2d475	`} ____cacheline_aligned_in_smp;`
		f2d475
		f2d475	`/* One sem_array data structure for each set of semaphores in the system. */`
		f2d475	`struct sem_array {`
		f2d475	`struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */`
		f2d475	`time64_t sem_ctime; /* create/last semctl() time */`
		f2d475	`struct list_head pending_alter; /* pending operations */`
		f2d475	`/* that alter the array */`
		f2d475	`struct list_head pending_const; /* pending complex operations */`
		f2d475	`/* that do not alter semvals */`
		f2d475	`struct list_head list_id; /* undo requests on this array */`
		f2d475	`int sem_nsems; /* no. of semaphores in array */`
		f2d475	`int complex_count; /* pending complex operations */`
		f2d475	`unsigned int use_global_lock;/* >0: global lock required */`
		f2d475
		f2d475	`struct sem sems[];`
		f2d475	`} __randomize_layout;`
		f2d475
		f2d475	`/* One queue for each sleeping process in the system. */`
		f2d475	`struct sem_queue {`
		f2d475	`struct list_head list; /* queue of pending operations */`
		f2d475	`struct task_struct sleeper; / this process */`
		f2d475	`struct sem_undo undo; / undo structure */`
		f2d475	`struct pid pid; / process id of requesting process */`
		f2d475	`int status; /* completion status of operation */`
		f2d475	`struct sembuf sops; / array of pending operations */`
		f2d475	`struct sembuf blocking; / the operation that blocked */`
		f2d475	`int nsops; /* number of operations */`
		f2d475	`bool alter; /* does sops alter the array? /`
		f2d475	`bool dupsop; /* sops on more than one sem_num */`
		f2d475	`};`
		f2d475
		f2d475	`/* Each task has a list of undo requests. They are executed automatically`
		f2d475	`* when the process exits.`
		f2d475	`*/`
		f2d475	`struct sem_undo {`
		f2d475	`struct list_head list_proc; /* per-process list: *`
		f2d475	`* all undos from one process`
		f2d475	`* rcu protected */`
		f2d475	`struct rcu_head rcu; /* rcu struct for sem_undo */`
		f2d475	`struct sem_undo_list ulp; / back ptr to sem_undo_list */`
		f2d475	`struct list_head list_id; /* per semaphore array list:`
		f2d475	`* all undos for one array */`
		f2d475	`int semid; /* semaphore set identifier */`
		f2d475	`short semadj; / array of adjustments */`
		f2d475	`/* one per semaphore */`
		f2d475	`};`
		f2d475
		f2d475	`/* sem_undo_list controls shared access to the list of sem_undo structures`
		f2d475	`* that may be shared among all a CLONE_SYSVSEM task group.`
		f2d475	`*/`
		f2d475	`struct sem_undo_list {`
		f2d475	`refcount_t refcnt;`
		f2d475	`spinlock_t lock;`
		f2d475	`struct list_head list_proc;`
		f2d475	`};`
		f2d475
		f2d475
		f2d475	`#define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS])`
		f2d475
		f2d475	`static int newary(struct ipc_namespace , struct ipc_params );`
		f2d475	`static void freeary(struct ipc_namespace , struct kern_ipc_perm );`
		f2d475	`#ifdef CONFIG_PROC_FS`
		f2d475	`static int sysvipc_sem_proc_show(struct seq_file s, void it);`
		f2d475	`#endif`
		f2d475
		f2d475	`#define SEMMSL_FAST 256 /* 512 bytes on stack */`
		f2d475	`#define SEMOPM_FAST 64 /* ~ 372 bytes on stack */`
		f2d475
		f2d475	`/*`
		f2d475	`* Switching from the mode suitable for simple ops`
		f2d475	`* to the mode for complex ops is costly. Therefore:`
		f2d475	`* use some hysteresis`
		f2d475	`*/`
		f2d475	`#define USE_GLOBAL_LOCK_HYSTERESIS 10`
		f2d475
		f2d475	`/*`
		f2d475	`* Locking:`
		f2d475	`* a) global sem_lock() for read/write`
		f2d475	`* sem_undo.id_next,`
		f2d475	`* sem_array.complex_count,`
		f2d475	`* sem_array.pending{_alter,_const},`
		f2d475	`* sem_array.sem_undo`
		f2d475	`*`
		f2d475	`* b) global or semaphore sem_lock() for read/write:`
		f2d475	`* sem_array.sems[i].pending_{const,alter}:`
		f2d475	`*`
		f2d475	`* c) special:`
		f2d475	`* sem_undo_list.list_proc:`
		f2d475	`* * undo_list->lock for write`
		f2d475	`* * rcu for read`
		f2d475	`* use_global_lock:`
		f2d475	`* * global sem_lock() for write`
		f2d475	`* * either local or global sem_lock() for read.`
		f2d475	`*`
		f2d475	`* Memory ordering:`
		f2d475	`* Most ordering is enforced by using spin_lock() and spin_unlock().`
		f2d475	`* The special case is use_global_lock:`
		f2d475	`* Setting it from non-zero to 0 is a RELEASE, this is ensured by`
		f2d475	`* using smp_store_release().`
		f2d475	`* Testing if it is non-zero is an ACQUIRE, this is ensured by using`
		f2d475	`* smp_load_acquire().`
		f2d475	`* Setting it from 0 to non-zero must be ordered with regards to`
		f2d475	`* this smp_load_acquire(), this is guaranteed because the smp_load_acquire()`
		f2d475	`* is inside a spin_lock() and after a write from 0 to non-zero a`
		f2d475	`* spin_lock()+spin_unlock() is done.`
		f2d475	`*/`
		f2d475
		f2d475	`#define sc_semmsl sem_ctls[0]`
		f2d475	`#define sc_semmns sem_ctls[1]`
		f2d475	`#define sc_semopm sem_ctls[2]`
		f2d475	`#define sc_semmni sem_ctls[3]`
		f2d475
		f2d475	`void sem_init_ns(struct ipc_namespace *ns)`
		f2d475	`{`
		f2d475	`ns->sc_semmsl = SEMMSL;`
		f2d475	`ns->sc_semmns = SEMMNS;`
		f2d475	`ns->sc_semopm = SEMOPM;`
		f2d475	`ns->sc_semmni = SEMMNI;`
		f2d475	`ns->used_sems = 0;`
		f2d475	`ipc_init_ids(&ns->ids[IPC_SEM_IDS]);`
		f2d475	`}`
		f2d475
		f2d475	`#ifdef CONFIG_IPC_NS`
		f2d475	`void sem_exit_ns(struct ipc_namespace *ns)`
		f2d475	`{`
		f2d475	`free_ipcs(ns, &sem_ids(ns), freeary);`
		f2d475	`idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr);`
		f2d475	`rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht);`
		f2d475	`}`
		f2d475	`#endif`
		f2d475
		f2d475	`void __init sem_init(void)`
		f2d475	`{`
		f2d475	`sem_init_ns(&init_ipc_ns);`
		f2d475	`ipc_init_proc_interface("sysvipc/sem",`
		f2d475	`" key semid perms nsems uid gid cuid cgid otime ctime\n",`
		f2d475	`IPC_SEM_IDS, sysvipc_sem_proc_show);`
		f2d475	`}`
		f2d475
		f2d475	`/**`
		f2d475	`* unmerge_queues - unmerge queues, if possible.`
		f2d475	`* @sma: semaphore array`
		f2d475	`*`
		f2d475	`* The function unmerges the wait queues if complex_count is 0.`
		f2d475	`* It must be called prior to dropping the global semaphore array lock.`
		f2d475	`*/`
		f2d475	`static void unmerge_queues(struct sem_array *sma)`
		f2d475	`{`
		f2d475	`struct sem_queue q, tq;`
		f2d475
		f2d475	`/* complex operations still around? */`
		f2d475	`if (sma->complex_count)`
		f2d475	`return;`
		f2d475	`/*`
		f2d475	`* We will switch back to simple mode.`
		f2d475	`* Move all pending operation back into the per-semaphore`
		f2d475	`* queues.`
		f2d475	`*/`
		f2d475	`list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {`
		f2d475	`struct sem *curr;`
		f2d475	`curr = &sma->sems[q->sops[0].sem_num];`
		f2d475
		f2d475	`list_add_tail(&q->list, &curr->pending_alter);`
		f2d475	`}`
		f2d475	`INIT_LIST_HEAD(&sma->pending_alter);`
		f2d475	`}`
		f2d475
		f2d475	`/**`
		f2d475	`* merge_queues - merge single semop queues into global queue`
		f2d475	`* @sma: semaphore array`
		f2d475	`*`
		f2d475	`* This function merges all per-semaphore queues into the global queue.`
		f2d475	`* It is necessary to achieve FIFO ordering for the pending single-sop`
		f2d475	`* operations when a multi-semop operation must sleep.`
		f2d475	`* Only the alter operations must be moved, the const operations can stay.`
		f2d475	`*/`
		f2d475	`static void merge_queues(struct sem_array *sma)`
		f2d475	`{`
		f2d475	`int i;`
		f2d475	`for (i = 0; i < sma->sem_nsems; i++) {`
		f2d475	`struct sem *sem = &sma->sems[i];`
		f2d475
		f2d475	`list_splice_init(&sem->pending_alter, &sma->pending_alter);`
		f2d475	`}`
		f2d475	`}`
		f2d475
		f2d475	`static void sem_rcu_free(struct rcu_head *head)`
		f2d475	`{`
		f2d475	`struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);`
		f2d475	`struct sem_array *sma = container_of(p, struct sem_array, sem_perm);`
		f2d475
		f2d475	`security_sem_free(&sma->sem_perm);`
		f2d475	`kvfree(sma);`
		f2d475	`}`
		f2d475
		f2d475	`/*`
		f2d475	`* Enter the mode suitable for non-simple operations:`
		f2d475	`* Caller must own sem_perm.lock.`
		f2d475	`*/`
		f2d475	`static void complexmode_enter(struct sem_array *sma)`
		f2d475	`{`
		f2d475	`int i;`
		f2d475	`struct sem *sem;`
		f2d475
		f2d475	`if (sma->use_global_lock > 0) {`
		f2d475	`/*`
		f2d475	`* We are already in global lock mode.`
		f2d475	`* Nothing to do, just reset the`
		f2d475	`* counter until we return to simple mode.`
		f2d475	`*/`
		f2d475	`sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;`
		f2d475	`return;`
		f2d475	`}`
		f2d475	`sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;`
		f2d475
		f2d475	`for (i = 0; i < sma->sem_nsems; i++) {`
		f2d475	`sem = &sma->sems[i];`
		f2d475	`spin_lock(&sem->lock);`
		f2d475	`spin_unlock(&sem->lock);`
		f2d475	`}`
		f2d475	`}`
		f2d475
		f2d475	`/*`
		f2d475	`* Try to leave the mode that disallows simple operations:`
		f2d475	`* Caller must own sem_perm.lock.`
		f2d475	`*/`
		f2d475	`static void complexmode_tryleave(struct sem_array *sma)`
		f2d475	`{`
		f2d475	`if (sma->complex_count) {`
		f2d475	`/* Complex ops are sleeping.`
		f2d475	`* We must stay in complex mode`
		f2d475	`*/`
		f2d475	`return;`
		f2d475	`}`
		f2d475	`if (sma->use_global_lock == 1) {`
		f2d475	`/*`
		f2d475	`* Immediately after setting use_global_lock to 0,`
		f2d475	`* a simple op can start. Thus: all memory writes`
		f2d475	`* performed by the current operation must be visible`
		f2d475	`* before we set use_global_lock to 0.`
		f2d475	`*/`
		f2d475	`smp_store_release(&sma->use_global_lock, 0);`
		f2d475	`} else {`
		f2d475	`sma->use_global_lock--;`
		f2d475	`}`
		f2d475	`}`
		f2d475
		f2d475	`#define SEM_GLOBAL_LOCK (-1)`
		f2d475	`/*`
		f2d475	`* If the request contains only one semaphore operation, and there are`
		f2d475	`* no complex transactions pending, lock only the semaphore involved.`
		f2d475	`* Otherwise, lock the entire semaphore array, since we either have`
		f2d475	`* multiple semaphores in our own semops, or we need to look at`
		f2d475	`* semaphores from other pending complex operations.`
		f2d475	`*/`
		f2d475	`static inline int sem_lock(struct sem_array sma, struct sembuf sops,`
		f2d475	`int nsops)`
		f2d475	`{`
		f2d475	`struct sem *sem;`
		f2d475	`int idx;`
		f2d475
		f2d475	`if (nsops != 1) {`
		f2d475	`/* Complex operation - acquire a full lock */`
		f2d475	`ipc_lock_object(&sma->sem_perm);`
		f2d475
		f2d475	`/* Prevent parallel simple ops */`
		f2d475	`complexmode_enter(sma);`
		f2d475	`return SEM_GLOBAL_LOCK;`
		f2d475	`}`
		f2d475
		f2d475	`/*`
		f2d475	`* Only one semaphore affected - try to optimize locking.`
		f2d475	`* Optimized locking is possible if no complex operation`
		f2d475	`* is either enqueued or processed right now.`
		f2d475	`*`
		f2d475	`* Both facts are tracked by use_global_mode.`
		f2d475	`*/`
		f2d475	`idx = array_index_nospec(sops->sem_num, sma->sem_nsems);`
		f2d475	`sem = &sma->sems[idx];`
		f2d475
		f2d475	`/*`
		f2d475	`* Initial check for use_global_lock. Just an optimization,`
		f2d475	`* no locking, no memory barrier.`
		f2d475	`*/`
		f2d475	`if (!sma->use_global_lock) {`
		f2d475	`/*`
		f2d475	`* It appears that no complex operation is around.`
		f2d475	`* Acquire the per-semaphore lock.`
		f2d475	`*/`
		f2d475	`spin_lock(&sem->lock);`
		f2d475
		f2d475	`/* pairs with smp_store_release() */`
		f2d475	`if (!smp_load_acquire(&sma->use_global_lock)) {`
		f2d475	`/* fast path successful! */`
		f2d475	`return sops->sem_num;`
		f2d475	`}`
		f2d475	`spin_unlock(&sem->lock);`
		f2d475	`}`
		f2d475
		f2d475	`/* slow path: acquire the full lock */`
		f2d475	`ipc_lock_object(&sma->sem_perm);`
		f2d475
		f2d475	`if (sma->use_global_lock == 0) {`
		f2d475	`/*`
		f2d475	`* The use_global_lock mode ended while we waited for`
		f2d475	`* sma->sem_perm.lock. Thus we must switch to locking`
		f2d475	`* with sem->lock.`
		f2d475	`* Unlike in the fast path, there is no need to recheck`
		f2d475	`* sma->use_global_lock after we have acquired sem->lock:`
		f2d475	`* We own sma->sem_perm.lock, thus use_global_lock cannot`
		f2d475	`* change.`
		f2d475	`*/`
		f2d475	`spin_lock(&sem->lock);`
		f2d475
		f2d475	`ipc_unlock_object(&sma->sem_perm);`
		f2d475	`return sops->sem_num;`
		f2d475	`} else {`
		f2d475	`/*`
		f2d475	`* Not a false alarm, thus continue to use the global lock`
		f2d475	`* mode. No need for complexmode_enter(), this was done by`
		f2d475	`* the caller that has set use_global_mode to non-zero.`
		f2d475	`*/`
		f2d475	`return SEM_GLOBAL_LOCK;`
		f2d475	`}`
		f2d475	`}`
		f2d475
		f2d475	`static inline void sem_unlock(struct sem_array *sma, int locknum)`
		f2d475	`{`
		f2d475	`if (locknum == SEM_GLOBAL_LOCK) {`
		f2d475	`unmerge_queues(sma);`
		f2d475	`complexmode_tryleave(sma);`
		f2d475	`ipc_unlock_object(&sma->sem_perm);`
		f2d475	`} else {`
		f2d475	`struct sem *sem = &sma->sems[locknum];`
		f2d475	`spin_unlock(&sem->lock);`
		f2d475	`}`
		f2d475	`}`
		f2d475
		f2d475	`/*`
		f2d475	`* sem_lock_(check_) routines are called in the paths where the rwsem`
		f2d475	`* is not held.`
		f2d475	`*`
		f2d475	`* The caller holds the RCU read lock.`
		f2d475	`*/`
		f2d475	`static inline struct sem_array sem_obtain_object(struct ipc_namespace ns, int id)`
		f2d475	`{`
		f2d475	`struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id);`
		f2d475
		f2d475	`if (IS_ERR(ipcp))`
		f2d475	`return ERR_CAST(ipcp);`
		f2d475
		f2d475	`return container_of(ipcp, struct sem_array, sem_perm);`
		f2d475	`}`
		f2d475
		f2d475	`static inline struct sem_array sem_obtain_object_check(struct ipc_namespace ns,`
		f2d475	`int id)`
		f2d475	`{`
		f2d475	`struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id);`
		f2d475
		f2d475	`if (IS_ERR(ipcp))`
		f2d475	`return ERR_CAST(ipcp);`
		f2d475
		f2d475	`return container_of(ipcp, struct sem_array, sem_perm);`
		f2d475	`}`
		f2d475
		f2d475	`static inline void sem_lock_and_putref(struct sem_array *sma)`
		f2d475	`{`
		f2d475	`sem_lock(sma, NULL, -1);`
		f2d475	`ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);`
		f2d475	`}`
		f2d475
		f2d475	`static inline void sem_rmid(struct ipc_namespace ns, struct sem_array s)`
		f2d475	`{`
		f2d475	`ipc_rmid(&sem_ids(ns), &s->sem_perm);`
		f2d475	`}`
		f2d475
		f2d475	`static struct sem_array *sem_alloc(size_t nsems)`
		f2d475	`{`
		f2d475	`struct sem_array *sma;`
		f2d475	`size_t size;`
		f2d475
		f2d475	`if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0]))`
		f2d475	`return NULL;`
		f2d475
		f2d475	`size = sizeof(sma) + nsems sizeof(sma->sems[0]);`
		f2d475	`sma = kvmalloc(size, GFP_KERNEL);`
		f2d475	`if (unlikely(!sma))`
		f2d475	`return NULL;`
		f2d475
		f2d475	`memset(sma, 0, size);`
		f2d475
		f2d475	`return sma;`
		f2d475	`}`
		f2d475
		f2d475	`/**`
		f2d475	`* newary - Create a new semaphore set`
		f2d475	`* @ns: namespace`
		f2d475	`* @params: ptr to the structure that contains key, semflg and nsems`
		f2d475	`*`
		f2d475	`* Called with sem_ids.rwsem held (as a writer)`
		f2d475	`*/`
		f2d475	`static int newary(struct ipc_namespace ns, struct ipc_params params)`
		f2d475	`{`
		f2d475	`int retval;`
		f2d475	`struct sem_array *sma;`
		f2d475	`key_t key = params->key;`
		f2d475	`int nsems = params->u.nsems;`
		f2d475	`int semflg = params->flg;`
		f2d475	`int i;`
		f2d475
		f2d475	`if (!nsems)`
		f2d475	`return -EINVAL;`
		f2d475	`if (ns->used_sems + nsems > ns->sc_semmns)`
		f2d475	`return -ENOSPC;`
		f2d475
		f2d475	`sma = sem_alloc(nsems);`
		f2d475	`if (!sma)`
		f2d475	`return -ENOMEM;`
		f2d475
		f2d475	`sma->sem_perm.mode = (semflg & S_IRWXUGO);`
		f2d475	`sma->sem_perm.key = key;`
		f2d475
		f2d475	`sma->sem_perm.security = NULL;`
		f2d475	`retval = security_sem_alloc(&sma->sem_perm);`
		f2d475	`if (retval) {`
		f2d475	`kvfree(sma);`
		f2d475	`return retval;`
		f2d475	`}`
		f2d475
		f2d475	`for (i = 0; i < nsems; i++) {`
		f2d475	`INIT_LIST_HEAD(&sma->sems[i].pending_alter);`
		f2d475	`INIT_LIST_HEAD(&sma->sems[i].pending_const);`
		f2d475	`spin_lock_init(&sma->sems[i].lock);`
		f2d475	`}`
		f2d475
		f2d475	`sma->complex_count = 0;`
		f2d475	`sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;`
		f2d475	`INIT_LIST_HEAD(&sma->pending_alter);`
		f2d475	`INIT_LIST_HEAD(&sma->pending_const);`
		f2d475	`INIT_LIST_HEAD(&sma->list_id);`
		f2d475	`sma->sem_nsems = nsems;`
		f2d475	`sma->sem_ctime = ktime_get_real_seconds();`
		f2d475
		f2d475	`/* ipc_addid() locks sma upon success. */`
		f2d475	`retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);`
		f2d475	`if (retval < 0) {`
		f2d475	`ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);`
		f2d475	`return retval;`
		f2d475	`}`
		f2d475	`ns->used_sems += nsems;`
		f2d475
		f2d475	`sem_unlock(sma, -1);`
		f2d475	`rcu_read_unlock();`
		f2d475
		f2d475	`return sma->sem_perm.id;`
		f2d475	`}`
		f2d475
		f2d475
		f2d475	`/*`
		f2d475	`* Called with sem_ids.rwsem and ipcp locked.`
		f2d475	`*/`
		f2d475	`static inline int sem_more_checks(struct kern_ipc_perm *ipcp,`
		f2d475	`struct ipc_params *params)`
		f2d475	`{`
		f2d475	`struct sem_array *sma;`
		f2d475
		f2d475	`sma = container_of(ipcp, struct sem_array, sem_perm);`
		f2d475	`if (params->u.nsems > sma->sem_nsems)`
		f2d475	`return -EINVAL;`
		f2d475
		f2d475	`return 0;`
		f2d475	`}`
		f2d475
		f2d475	`long ksys_semget(key_t key, int nsems, int semflg)`
		f2d475	`{`
		f2d475	`struct ipc_namespace *ns;`
		f2d475	`static const struct ipc_ops sem_ops = {`
		f2d475	`.getnew = newary,`
		f2d475	`.associate = security_sem_associate,`
		f2d475	`.more_checks = sem_more_checks,`
		f2d475	`};`
		f2d475	`struct ipc_params sem_params;`
		f2d475
		f2d475	`ns = current->nsproxy->ipc_ns;`
		f2d475
		f2d475	`if (nsems < 0 \|\| nsems > ns->sc_semmsl)`
		f2d475	`return -EINVAL;`
		f2d475
		f2d475	`sem_params.key = key;`
		f2d475	`sem_params.flg = semflg;`
		f2d475	`sem_params.u.nsems = nsems;`
		f2d475
		f2d475	`return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);`
		f2d475	`}`
		f2d475
		f2d475	`SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)`
		f2d475	`{`
		f2d475	`return ksys_semget(key, nsems, semflg);`
		f2d475	`}`
		f2d475
		f2d475	`/**`
		f2d475	`* perform_atomic_semop[_slow] - Attempt to perform semaphore`
		f2d475	`* operations on a given array.`
		f2d475	`* @sma: semaphore array`
		f2d475	`* @q: struct sem_queue that describes the operation`
		f2d475	`*`
		f2d475	`* Caller blocking are as follows, based the value`
		f2d475	`* indicated by the semaphore operation (sem_op):`
		f2d475	`*`
		f2d475	`* (1) >0 never blocks.`
		f2d475	`* (2) 0 (wait-for-zero operation): semval is non-zero.`
		f2d475	`* (3) <0 attempting to decrement semval to a value smaller than zero.`
		f2d475	`*`
		f2d475	`* Returns 0 if the operation was possible.`
		f2d475	`* Returns 1 if the operation is impossible, the caller must sleep.`
		f2d475	`* Returns <0 for error codes.`
		f2d475	`*/`
		f2d475	`static int perform_atomic_semop_slow(struct sem_array sma, struct sem_queue q)`
		f2d475	`{`
		f2d475	`int result, sem_op, nsops;`
		f2d475	`struct pid *pid;`
		f2d475	`struct sembuf *sop;`
		f2d475	`struct sem *curr;`
		f2d475	`struct sembuf *sops;`
		f2d475	`struct sem_undo *un;`
		f2d475
		f2d475	`sops = q->sops;`
		f2d475	`nsops = q->nsops;`
		f2d475	`un = q->undo;`
		f2d475
		f2d475	`for (sop = sops; sop < sops + nsops; sop++) {`
		f2d475	`int idx = array_index_nospec(sop->sem_num, sma->sem_nsems);`
		f2d475	`curr = &sma->sems[idx];`
		f2d475	`sem_op = sop->sem_op;`
		f2d475	`result = curr->semval;`
		f2d475
		f2d475	`if (!sem_op && result)`
		f2d475	`goto would_block;`
		f2d475
		f2d475	`result += sem_op;`
		f2d475	`if (result < 0)`
		f2d475	`goto would_block;`
		f2d475	`if (result > SEMVMX)`
		f2d475	`goto out_of_range;`
		f2d475
		f2d475	`if (sop->sem_flg & SEM_UNDO) {`
		f2d475	`int undo = un->semadj[sop->sem_num] - sem_op;`
		f2d475	`/* Exceeding the undo range is an error. */`
		f2d475	`if (undo < (-SEMAEM - 1) \|\| undo > SEMAEM)`
		f2d475	`goto out_of_range;`
		f2d475	`un->semadj[sop->sem_num] = undo;`
		f2d475	`}`
		f2d475
		f2d475	`curr->semval = result;`
		f2d475	`}`
		f2d475
		f2d475	`sop--;`
		f2d475	`pid = q->pid;`
		f2d475	`while (sop >= sops) {`
		f2d475	`ipc_update_pid(&sma->sems[sop->sem_num].sempid, pid);`
		f2d475	`sop--;`
		f2d475	`}`
		f2d475
		f2d475	`return 0;`
		f2d475
		f2d475	`out_of_range:`
		f2d475	`result = -ERANGE;`
		f2d475	`goto undo;`
		f2d475
		f2d475	`would_block:`
		f2d475	`q->blocking = sop;`
		f2d475
		f2d475	`if (sop->sem_flg & IPC_NOWAIT)`
		f2d475	`result = -EAGAIN;`
		f2d475	`else`
		f2d475	`result = 1;`
		f2d475
		f2d475	`undo:`
		f2d475	`sop--;`
		f2d475	`while (sop >= sops) {`
		f2d475	`sem_op = sop->sem_op;`
		f2d475	`sma->sems[sop->sem_num].semval -= sem_op;`
		f2d475	`if (sop->sem_flg & SEM_UNDO)`
		f2d475	`un->semadj[sop->sem_num] += sem_op;`
		f2d475	`sop--;`
		f2d475	`}`
		f2d475
		f2d475	`return result;`
		f2d475	`}`
		f2d475
		f2d475	`static int perform_atomic_semop(struct sem_array sma, struct sem_queue q)`
		f2d475	`{`
		f2d475	`int result, sem_op, nsops;`
		f2d475	`struct sembuf *sop;`
		f2d475	`struct sem *curr;`
		f2d475	`struct sembuf *sops;`
		f2d475	`struct sem_undo *un;`
		f2d475
		f2d475	`sops = q->sops;`
		f2d475	`nsops = q->nsops;`
		f2d475	`un = q->undo;`
		f2d475
		f2d475	`if (unlikely(q->dupsop))`
		f2d475	`return perform_atomic_semop_slow(sma, q);`
		f2d475
		f2d475	`/*`
		f2d475	`* We scan the semaphore set twice, first to ensure that the entire`
		f2d475	`* operation can succeed, therefore avoiding any pointless writes`
		f2d475	`* to shared memory and having to undo such changes in order to block`
		f2d475	`* until the operations can go through.`
		f2d475	`*/`
		f2d475	`for (sop = sops; sop < sops + nsops; sop++) {`
		f2d475	`int idx = array_index_nospec(sop->sem_num, sma->sem_nsems);`
		f2d475
		f2d475	`curr = &sma->sems[idx];`
		f2d475	`sem_op = sop->sem_op;`
		f2d475	`result = curr->semval;`
		f2d475
		f2d475	`if (!sem_op && result)`
		f2d475	`goto would_block; /* wait-for-zero */`
		f2d475
		f2d475	`result += sem_op;`
		f2d475	`if (result < 0)`
		f2d475	`goto would_block;`
		f2d475
		f2d475	`if (result > SEMVMX)`
		f2d475	`return -ERANGE;`
		f2d475
		f2d475	`if (sop->sem_flg & SEM_UNDO) {`
		f2d475	`int undo = un->semadj[sop->sem_num] - sem_op;`
		f2d475
		f2d475	`/* Exceeding the undo range is an error. */`
		f2d475	`if (undo < (-SEMAEM - 1) \|\| undo > SEMAEM)`
		f2d475	`return -ERANGE;`
		f2d475	`}`
		f2d475	`}`
		f2d475
		f2d475	`for (sop = sops; sop < sops + nsops; sop++) {`
		f2d475	`curr = &sma->sems[sop->sem_num];`
		f2d475	`sem_op = sop->sem_op;`
		f2d475	`result = curr->semval;`
		f2d475
		f2d475	`if (sop->sem_flg & SEM_UNDO) {`
		f2d475	`int undo = un->semadj[sop->sem_num] - sem_op;`
		f2d475
		f2d475	`un->semadj[sop->sem_num] = undo;`
		f2d475	`}`
		f2d475	`curr->semval += sem_op;`
		f2d475	`ipc_update_pid(&curr->sempid, q->pid);`
		f2d475	`}`
		f2d475
		f2d475	`return 0;`
		f2d475
		f2d475	`would_block:`
		f2d475	`q->blocking = sop;`
		f2d475	`return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1;`
		f2d475	`}`
		f2d475
		f2d475	`static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error,`
		f2d475	`struct wake_q_head *wake_q)`
		f2d475	`{`
		f2d475	`wake_q_add(wake_q, q->sleeper);`
		f2d475	`/*`
		f2d475	`* Rely on the above implicit barrier, such that we can`
		f2d475	`* ensure that we hold reference to the task before setting`
		f2d475	`* q->status. Otherwise we could race with do_exit if the`
		f2d475	`* task is awoken by an external event before calling`
		f2d475	`* wake_up_process().`
		f2d475	`*/`
		f2d475	`WRITE_ONCE(q->status, error);`
		f2d475	`}`
		f2d475
		f2d475	`static void unlink_queue(struct sem_array sma, struct sem_queue q)`
		f2d475	`{`
		f2d475	`list_del(&q->list);`
		f2d475	`if (q->nsops > 1)`
		f2d475	`sma->complex_count--;`
		f2d475	`}`
		f2d475
		f2d475	`/** check_restart(sma, q)`
		f2d475	`* @sma: semaphore array`
		f2d475	`* @q: the operation that just completed`
		f2d475	`*`
		f2d475	`* update_queue is O(N^2) when it restarts scanning the whole queue of`
		f2d475	`* waiting operations. Therefore this function checks if the restart is`
		f2d475	`* really necessary. It is called after a previously waiting operation`
		f2d475	`* modified the array.`
		f2d475	`* Note that wait-for-zero operations are handled without restart.`
		f2d475	`*/`
		f2d475	`static inline int check_restart(struct sem_array sma, struct sem_queue q)`
		f2d475	`{`
		f2d475	`/* pending complex alter operations are too difficult to analyse */`
		f2d475	`if (!list_empty(&sma->pending_alter))`
		f2d475	`return 1;`
		f2d475
		f2d475	`/* we were a sleeping complex operation. Too difficult */`
		f2d475	`if (q->nsops > 1)`
		f2d475	`return 1;`
		f2d475
		f2d475	`/* It is impossible that someone waits for the new value:`
		f2d475	`* - complex operations always restart.`
		f2d475	`* - wait-for-zero are handled seperately.`
		f2d475	`* - q is a previously sleeping simple operation that`
		f2d475	`* altered the array. It must be a decrement, because`
		f2d475	`* simple increments never sleep.`
		f2d475	`* - If there are older (higher priority) decrements`
		f2d475	`* in the queue, then they have observed the original`
		f2d475	`* semval value and couldn't proceed. The operation`
		f2d475	`* decremented to value - thus they won't proceed either.`
		f2d475	`*/`
		f2d475	`return 0;`
		f2d475	`}`
		f2d475
		f2d475	`/**`
		f2d475	`* wake_const_ops - wake up non-alter tasks`
		f2d475	`* @sma: semaphore array.`
		f2d475	`* @semnum: semaphore that was modified.`
		f2d475	`* @wake_q: lockless wake-queue head.`
		f2d475	`*`
		f2d475	`* wake_const_ops must be called after a semaphore in a semaphore array`
		f2d475	`* was set to 0. If complex const operations are pending, wake_const_ops must`
		f2d475	`* be called with semnum = -1, as well as with the number of each modified`
		f2d475	`* semaphore.`
		f2d475	`* The tasks that must be woken up are added to @wake_q. The return code`
		f2d475	`* is stored in q->pid.`
		f2d475	`* The function returns 1 if at least one operation was completed successfully.`
		f2d475	`*/`
		f2d475	`static int wake_const_ops(struct sem_array *sma, int semnum,`
		f2d475	`struct wake_q_head *wake_q)`
		f2d475	`{`
		f2d475	`struct sem_queue q, tmp;`
		f2d475	`struct list_head *pending_list;`
		f2d475	`int semop_completed = 0;`
		f2d475
		f2d475	`if (semnum == -1)`
		f2d475	`pending_list = &sma->pending_const;`
		f2d475	`else`
		f2d475	`pending_list = &sma->sems[semnum].pending_const;`
		f2d475
		f2d475	`list_for_each_entry_safe(q, tmp, pending_list, list) {`
		f2d475	`int error = perform_atomic_semop(sma, q);`
		f2d475
		f2d475	`if (error > 0)`
		f2d475	`continue;`
		f2d475	`/* operation completed, remove from queue & wakeup */`
		f2d475	`unlink_queue(sma, q);`
		f2d475
		f2d475	`wake_up_sem_queue_prepare(q, error, wake_q);`
		f2d475	`if (error == 0)`
		f2d475	`semop_completed = 1;`
		f2d475	`}`
		f2d475
		f2d475	`return semop_completed;`
		f2d475	`}`
		f2d475
		f2d475	`/**`
		f2d475	`* do_smart_wakeup_zero - wakeup all wait for zero tasks`
		f2d475	`* @sma: semaphore array`
		f2d475	`* @sops: operations that were performed`
		f2d475	`* @nsops: number of operations`
		f2d475	`* @wake_q: lockless wake-queue head`
		f2d475	`*`
		f2d475	`* Checks all required queue for wait-for-zero operations, based`
		f2d475	`* on the actual changes that were performed on the semaphore array.`
		f2d475	`* The function returns 1 if at least one operation was completed successfully.`
		f2d475	`*/`
		f2d475	`static int do_smart_wakeup_zero(struct sem_array sma, struct sembuf sops,`
		f2d475	`int nsops, struct wake_q_head *wake_q)`
		f2d475	`{`
		f2d475	`int i;`
		f2d475	`int semop_completed = 0;`
		f2d475	`int got_zero = 0;`
		f2d475
		f2d475	`/* first: the per-semaphore queues, if known */`
		f2d475	`if (sops) {`
		f2d475	`for (i = 0; i < nsops; i++) {`
		f2d475	`int num = sops[i].sem_num;`
		f2d475
		f2d475	`if (sma->sems[num].semval == 0) {`
		f2d475	`got_zero = 1;`
		f2d475	`semop_completed \|= wake_const_ops(sma, num, wake_q);`
		f2d475	`}`
		f2d475	`}`
		f2d475	`} else {`
		f2d475	`/*`
		f2d475	`* No sops means modified semaphores not known.`
		f2d475	`* Assume all were changed.`
		f2d475	`*/`
		f2d475	`for (i = 0; i < sma->sem_nsems; i++) {`
		f2d475	`if (sma->sems[i].semval == 0) {`
		f2d475	`got_zero = 1;`
		f2d475	`semop_completed \|= wake_const_ops(sma, i, wake_q);`
		f2d475	`}`
		f2d475	`}`
		f2d475	`}`
		f2d475	`/*`
		f2d475	`* If one of the modified semaphores got 0,`
		f2d475	`* then check the global queue, too.`
		f2d475	`*/`
		f2d475	`if (got_zero)`
		f2d475	`semop_completed \|= wake_const_ops(sma, -1, wake_q);`
		f2d475
		f2d475	`return semop_completed;`
		f2d475	`}`
		f2d475
		f2d475
		f2d475	`/**`
		f2d475	`* update_queue - look for tasks that can be completed.`
		f2d475	`* @sma: semaphore array.`
		f2d475	`* @semnum: semaphore that was modified.`
		f2d475	`* @wake_q: lockless wake-queue head.`
		f2d475	`*`
		f2d475	`* update_queue must be called after a semaphore in a semaphore array`
		f2d475	`* was modified. If multiple semaphores were modified, update_queue must`
		f2d475	`* be called with semnum = -1, as well as with the number of each modified`
		f2d475	`* semaphore.`
		f2d475	`* The tasks that must be woken up are added to @wake_q. The return code`
		f2d475	`* is stored in q->pid.`
		f2d475	`* The function internally checks if const operations can now succeed.`
		f2d475	`*`
		f2d475	`* The function return 1 if at least one semop was completed successfully.`
		f2d475	`*/`
		f2d475	`static int update_queue(struct sem_array sma, int semnum, struct wake_q_head wake_q)`
		f2d475	`{`
		f2d475	`struct sem_queue q, tmp;`
		f2d475	`struct list_head *pending_list;`
		f2d475	`int semop_completed = 0;`
		f2d475
		f2d475	`if (semnum == -1)`
		f2d475	`pending_list = &sma->pending_alter;`
		f2d475	`else`
		f2d475	`pending_list = &sma->sems[semnum].pending_alter;`
		f2d475
		f2d475	`again:`
		f2d475	`list_for_each_entry_safe(q, tmp, pending_list, list) {`
		f2d475	`int error, restart;`
		f2d475
		f2d475	`/* If we are scanning the single sop, per-semaphore list of`
		f2d475	`* one semaphore and that semaphore is 0, then it is not`
		f2d475	`* necessary to scan further: simple increments`
		f2d475	`* that affect only one entry succeed immediately and cannot`
		f2d475	`* be in the per semaphore pending queue, and decrements`
		f2d475	`* cannot be successful if the value is already 0.`
		f2d475	`*/`
		f2d475	`if (semnum != -1 && sma->sems[semnum].semval == 0)`
		f2d475	`break;`
		f2d475
		f2d475	`error = perform_atomic_semop(sma, q);`
		f2d475
		f2d475	`/* Does q->sleeper still need to sleep? */`
		f2d475	`if (error > 0)`
		f2d475	`continue;`
		f2d475
		f2d475	`unlink_queue(sma, q);`
		f2d475
		f2d475	`if (error) {`
		f2d475	`restart = 0;`
		f2d475	`} else {`
		f2d475	`semop_completed = 1;`
		f2d475	`do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q);`
		f2d475	`restart = check_restart(sma, q);`
		f2d475	`}`
		f2d475
		f2d475	`wake_up_sem_queue_prepare(q, error, wake_q);`
		f2d475	`if (restart)`
		f2d475	`goto again;`
		f2d475	`}`
		f2d475	`return semop_completed;`
		f2d475	`}`
		f2d475
		f2d475	`/**`
		f2d475	`* set_semotime - set sem_otime`
		f2d475	`* @sma: semaphore array`
		f2d475	`* @sops: operations that modified the array, may be NULL`
		f2d475	`*`
		f2d475	`* sem_otime is replicated to avoid cache line trashing.`
		f2d475	`* This function sets one instance to the current time.`
		f2d475	`*/`
		f2d475	`static void set_semotime(struct sem_array sma, struct sembuf sops)`
		f2d475	`{`
		f2d475	`if (sops == NULL) {`
		f2d475	`sma->sems[0].sem_otime = ktime_get_real_seconds();`
		f2d475	`} else {`
		f2d475	`sma->sems[sops[0].sem_num].sem_otime =`
		f2d475	`ktime_get_real_seconds();`
		f2d475	`}`
		f2d475	`}`
		f2d475
		f2d475	`/**`
		f2d475	`* do_smart_update - optimized update_queue`
		f2d475	`* @sma: semaphore array`
		f2d475	`* @sops: operations that were performed`
		f2d475	`* @nsops: number of operations`
		f2d475	`* @otime: force setting otime`
		f2d475	`* @wake_q: lockless wake-queue head`
		f2d475	`*`
		f2d475	`* do_smart_update() does the required calls to update_queue and wakeup_zero,`
		f2d475	`* based on the actual changes that were performed on the semaphore array.`
		f2d475	`* Note that the function does not do the actual wake-up: the caller is`
		f2d475	`* responsible for calling wake_up_q().`
		f2d475	`* It is safe to perform this call after dropping all locks.`
		f2d475	`*/`
		f2d475	`static void do_smart_update(struct sem_array sma, struct sembuf sops, int nsops,`
		f2d475	`int otime, struct wake_q_head *wake_q)`
		f2d475	`{`
		f2d475	`int i;`
		f2d475
		f2d475	`otime \|= do_smart_wakeup_zero(sma, sops, nsops, wake_q);`
		f2d475
		f2d475	`if (!list_empty(&sma->pending_alter)) {`
		f2d475	`/* semaphore array uses the global queue - just process it. */`
		f2d475	`otime \|= update_queue(sma, -1, wake_q);`
		f2d475	`} else {`
		f2d475	`if (!sops) {`
		f2d475	`/*`
		f2d475	`* No sops, thus the modified semaphores are not`
		f2d475	`* known. Check all.`
		f2d475	`*/`
		f2d475	`for (i = 0; i < sma->sem_nsems; i++)`
		f2d475	`otime \|= update_queue(sma, i, wake_q);`
		f2d475	`} else {`
		f2d475	`/*`
		f2d475	`* Check the semaphores that were increased:`
		f2d475	`* - No complex ops, thus all sleeping ops are`
		f2d475	`* decrease.`
		f2d475	`* - if we decreased the value, then any sleeping`
		f2d475	`* semaphore ops wont be able to run: If the`
		f2d475	`* previous value was too small, then the new`
		f2d475	`* value will be too small, too.`
		f2d475	`*/`
		f2d475	`for (i = 0; i < nsops; i++) {`
		f2d475	`if (sops[i].sem_op > 0) {`
		f2d475	`otime \|= update_queue(sma,`
		f2d475	`sops[i].sem_num, wake_q);`
		f2d475	`}`
		f2d475	`}`
		f2d475	`}`
		f2d475	`}`
		f2d475	`if (otime)`
		f2d475	`set_semotime(sma, sops);`
		f2d475	`}`
		f2d475
		f2d475	`/*`
		f2d475	`* check_qop: Test if a queued operation sleeps on the semaphore semnum`
		f2d475	`*/`
		f2d475	`static int check_qop(struct sem_array sma, int semnum, struct sem_queue q,`
		f2d475	`bool count_zero)`
		f2d475	`{`
		f2d475	`struct sembuf *sop = q->blocking;`
		f2d475
		f2d475	`/*`
		f2d475	`* Linux always (since 0.99.10) reported a task as sleeping on all`
		f2d475	`* semaphores. This violates SUS, therefore it was changed to the`
		f2d475	`* standard compliant behavior.`
		f2d475	`* Give the administrators a chance to notice that an application`
		f2d475	`* might misbehave because it relies on the Linux behavior.`
		f2d475	`*/`
		f2d475	`pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n"`
		f2d475	`"The task %s (%d) triggered the difference, watch for misbehavior.\n",`
		f2d475	`current->comm, task_pid_nr(current));`
		f2d475
		f2d475	`if (sop->sem_num != semnum)`
		f2d475	`return 0;`
		f2d475
		f2d475	`if (count_zero && sop->sem_op == 0)`
		f2d475	`return 1;`
		f2d475	`if (!count_zero && sop->sem_op < 0)`
		f2d475	`return 1;`
		f2d475
		f2d475	`return 0;`
		f2d475	`}`
		f2d475
		f2d475	`/* The following counts are associated to each semaphore:`
		f2d475	`* semncnt number of tasks waiting on semval being nonzero`
		f2d475	`* semzcnt number of tasks waiting on semval being zero`
		f2d475	`*`
		f2d475	`* Per definition, a task waits only on the semaphore of the first semop`
		f2d475	`* that cannot proceed, even if additional operation would block, too.`
		f2d475	`*/`
		f2d475	`static int count_semcnt(struct sem_array *sma, ushort semnum,`
		f2d475	`bool count_zero)`
		f2d475	`{`
		f2d475	`struct list_head *l;`
		f2d475	`struct sem_queue *q;`
		f2d475	`int semcnt;`
		f2d475
		f2d475	`semcnt = 0;`
		f2d475	`/* First: check the simple operations. They are easy to evaluate */`
		f2d475	`if (count_zero)`
		f2d475	`l = &sma->sems[semnum].pending_const;`
		f2d475	`else`
		f2d475	`l = &sma->sems[semnum].pending_alter;`
		f2d475
		f2d475	`list_for_each_entry(q, l, list) {`
		f2d475	`/* all task on a per-semaphore list sleep on exactly`
		f2d475	`* that semaphore`
		f2d475	`*/`
		f2d475	`semcnt++;`
		f2d475	`}`
		f2d475
		f2d475	`/* Then: check the complex operations. */`
		f2d475	`list_for_each_entry(q, &sma->pending_alter, list) {`
		f2d475	`semcnt += check_qop(sma, semnum, q, count_zero);`
		f2d475	`}`
		f2d475	`if (count_zero) {`
		f2d475	`list_for_each_entry(q, &sma->pending_const, list) {`
		f2d475	`semcnt += check_qop(sma, semnum, q, count_zero);`
		f2d475	`}`
		f2d475	`}`
		f2d475	`return semcnt;`
		f2d475	`}`
		f2d475
		f2d475	`/* Free a semaphore set. freeary() is called with sem_ids.rwsem locked`
		f2d475	`* as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem`
		f2d475	`* remains locked on exit.`
		f2d475	`*/`
		f2d475	`static void freeary(struct ipc_namespace ns, struct kern_ipc_perm ipcp)`
		f2d475	`{`
		f2d475	`struct sem_undo un, tu;`
		f2d475	`struct sem_queue q, tq;`
		f2d475	`struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);`
		f2d475	`int i;`
		f2d475	`DEFINE_WAKE_Q(wake_q);`
		f2d475
		f2d475	`/* Free the existing undo structures for this semaphore set. */`
		f2d475	`ipc_assert_locked_object(&sma->sem_perm);`
		f2d475	`list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {`
		f2d475	`list_del(&un->list_id);`
		f2d475	`spin_lock(&un->ulp->lock);`
		f2d475	`un->semid = -1;`
		f2d475	`list_del_rcu(&un->list_proc);`
		f2d475	`spin_unlock(&un->ulp->lock);`
		f2d475	`kfree_rcu(un, rcu);`
		f2d475	`}`
		f2d475
		f2d475	`/* Wake up all pending processes and let them fail with EIDRM. */`
		f2d475	`list_for_each_entry_safe(q, tq, &sma->pending_const, list) {`
		f2d475	`unlink_queue(sma, q);`
		f2d475	`wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);`
		f2d475	`}`
		f2d475
		f2d475	`list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {`
		f2d475	`unlink_queue(sma, q);`
		f2d475	`wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);`
		f2d475	`}`
		f2d475	`for (i = 0; i < sma->sem_nsems; i++) {`
		f2d475	`struct sem *sem = &sma->sems[i];`
		f2d475	`list_for_each_entry_safe(q, tq, &sem->pending_const, list) {`
		f2d475	`unlink_queue(sma, q);`
		f2d475	`wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);`
		f2d475	`}`
		f2d475	`list_for_each_entry_safe(q, tq, &sem->pending_alter, list) {`
		f2d475	`unlink_queue(sma, q);`
		f2d475	`wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);`
		f2d475	`}`
		f2d475	`ipc_update_pid(&sem->sempid, NULL);`
		f2d475	`}`
		f2d475
		f2d475	`/* Remove the semaphore set from the IDR */`
		f2d475	`sem_rmid(ns, sma);`
		f2d475	`sem_unlock(sma, -1);`
		f2d475	`rcu_read_unlock();`
		f2d475
		f2d475	`wake_up_q(&wake_q);`
		f2d475	`ns->used_sems -= sma->sem_nsems;`
		f2d475	`ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);`
		f2d475	`}`
		f2d475
		f2d475	`static unsigned long copy_semid_to_user(void __user buf, struct semid64_ds in, int version)`
		f2d475	`{`
		f2d475	`switch (version) {`
		f2d475	`case IPC_64:`
		f2d475	`return copy_to_user(buf, in, sizeof(*in));`
		f2d475	`case IPC_OLD:`
		f2d475	`{`
		f2d475	`struct semid_ds out;`
		f2d475
		f2d475	`memset(&out, 0, sizeof(out));`
		f2d475
		f2d475	`ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm);`
		f2d475
		f2d475	`out.sem_otime = in->sem_otime;`
		f2d475	`out.sem_ctime = in->sem_ctime;`
		f2d475	`out.sem_nsems = in->sem_nsems;`
		f2d475
		f2d475	`return copy_to_user(buf, &out, sizeof(out));`
		f2d475	`}`
		f2d475	`default:`
		f2d475	`return -EINVAL;`
		f2d475	`}`
		f2d475	`}`
		f2d475
		f2d475	`static time64_t get_semotime(struct sem_array *sma)`
		f2d475	`{`
		f2d475	`int i;`
		f2d475	`time64_t res;`
		f2d475
		f2d475	`res = sma->sems[0].sem_otime;`
		f2d475	`for (i = 1; i < sma->sem_nsems; i++) {`
		f2d475	`time64_t to = sma->sems[i].sem_otime;`
		f2d475
		f2d475	`if (to > res)`
		f2d475	`res = to;`
		f2d475	`}`
		f2d475	`return res;`
		f2d475	`}`
		f2d475
		f2d475	`static int semctl_stat(struct ipc_namespace *ns, int semid,`
		f2d475	`int cmd, struct semid64_ds *semid64)`
		f2d475	`{`
		f2d475	`struct sem_array *sma;`
		f2d475	`time64_t semotime;`
		f2d475	`int err;`
		f2d475
		f2d475	`memset(semid64, 0, sizeof(*semid64));`
		f2d475
		f2d475	`rcu_read_lock();`
		f2d475	`if (cmd == SEM_STAT \|\| cmd == SEM_STAT_ANY) {`
		f2d475	`sma = sem_obtain_object(ns, semid);`
		f2d475	`if (IS_ERR(sma)) {`
		f2d475	`err = PTR_ERR(sma);`
		f2d475	`goto out_unlock;`
		f2d475	`}`
		f2d475	`} else { /* IPC_STAT */`
		f2d475	`sma = sem_obtain_object_check(ns, semid);`
		f2d475	`if (IS_ERR(sma)) {`
		f2d475	`err = PTR_ERR(sma);`
		f2d475	`goto out_unlock;`
		f2d475	`}`
		f2d475	`}`
		f2d475
		f2d475	`/* see comment for SHM_STAT_ANY */`
		f2d475	`if (cmd == SEM_STAT_ANY)`
		f2d475	`audit_ipc_obj(&sma->sem_perm);`
		f2d475	`else {`
		f2d475	`err = -EACCES;`
		f2d475	`if (ipcperms(ns, &sma->sem_perm, S_IRUGO))`
		f2d475	`goto out_unlock;`
		f2d475	`}`
		f2d475
		f2d475	`err = security_sem_semctl(&sma->sem_perm, cmd);`
		f2d475	`if (err)`
		f2d475	`goto out_unlock;`
		f2d475
		f2d475	`ipc_lock_object(&sma->sem_perm);`
		f2d475
		f2d475	`if (!ipc_valid_object(&sma->sem_perm)) {`
		f2d475	`ipc_unlock_object(&sma->sem_perm);`
		f2d475	`err = -EIDRM;`
		f2d475	`goto out_unlock;`
		f2d475	`}`
		f2d475
		f2d475	`kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm);`
		f2d475	`semotime = get_semotime(sma);`
		f2d475	`semid64->sem_otime = semotime;`
		f2d475	`semid64->sem_ctime = sma->sem_ctime;`
		f2d475	`#ifndef CONFIG_64BIT`
		f2d475	`semid64->sem_otime_high = semotime >> 32;`
		f2d475	`semid64->sem_ctime_high = sma->sem_ctime >> 32;`
		f2d475	`#endif`
		f2d475	`semid64->sem_nsems = sma->sem_nsems;`
		f2d475
		f2d475	`if (cmd == IPC_STAT) {`
		f2d475	`/*`
		f2d475	`* As defined in SUS:`
		f2d475	`* Return 0 on success`
		f2d475	`*/`
		f2d475	`err = 0;`
		f2d475	`} else {`
		f2d475	`/*`
		f2d475	`* SEM_STAT and SEM_STAT_ANY (both Linux specific)`
		f2d475	`* Return the full id, including the sequence number`
		f2d475	`*/`
		f2d475	`err = sma->sem_perm.id;`
		f2d475	`}`
		f2d475	`ipc_unlock_object(&sma->sem_perm);`
		f2d475	`out_unlock:`
		f2d475	`rcu_read_unlock();`
		f2d475	`return err;`
		f2d475	`}`
		f2d475
		f2d475	`static int semctl_info(struct ipc_namespace *ns, int semid,`
		f2d475	`int cmd, void __user *p)`
		f2d475	`{`
		f2d475	`struct seminfo seminfo;`
		f2d475	`int max_idx;`
		f2d475	`int err;`
		f2d475
		f2d475	`err = security_sem_semctl(NULL, cmd);`
		f2d475	`if (err)`
		f2d475	`return err;`
		f2d475
		f2d475	`memset(&seminfo, 0, sizeof(seminfo));`
		f2d475	`seminfo.semmni = ns->sc_semmni;`
		f2d475	`seminfo.semmns = ns->sc_semmns;`
		f2d475	`seminfo.semmsl = ns->sc_semmsl;`
		f2d475	`seminfo.semopm = ns->sc_semopm;`
		f2d475	`seminfo.semvmx = SEMVMX;`
		f2d475	`seminfo.semmnu = SEMMNU;`
		f2d475	`seminfo.semmap = SEMMAP;`
		f2d475	`seminfo.semume = SEMUME;`
		f2d475	`down_read(&sem_ids(ns).rwsem);`
		f2d475	`if (cmd == SEM_INFO) {`
		f2d475	`seminfo.semusz = sem_ids(ns).in_use;`
		f2d475	`seminfo.semaem = ns->used_sems;`
		f2d475	`} else {`
		f2d475	`seminfo.semusz = SEMUSZ;`
		f2d475	`seminfo.semaem = SEMAEM;`
		f2d475	`}`
		f2d475	`max_idx = ipc_get_maxidx(&sem_ids(ns));`
		f2d475	`up_read(&sem_ids(ns).rwsem);`
		f2d475	`if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))`
		f2d475	`return -EFAULT;`
		f2d475	`return (max_idx < 0) ? 0 : max_idx;`
		f2d475	`}`
		f2d475
		f2d475	`static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,`
		f2d475	`int val)`
		f2d475	`{`
		f2d475	`struct sem_undo *un;`
		f2d475	`struct sem_array *sma;`
		f2d475	`struct sem *curr;`
		f2d475	`int err;`
		f2d475	`DEFINE_WAKE_Q(wake_q);`
		f2d475
		f2d475	`if (val > SEMVMX \|\| val < 0)`
		f2d475	`return -ERANGE;`
		f2d475
		f2d475	`rcu_read_lock();`
		f2d475	`sma = sem_obtain_object_check(ns, semid);`
		f2d475	`if (IS_ERR(sma)) {`
		f2d475	`rcu_read_unlock();`
		f2d475	`return PTR_ERR(sma);`
		f2d475	`}`
		f2d475
		f2d475	`if (semnum < 0 \|\| semnum >= sma->sem_nsems) {`
		f2d475	`rcu_read_unlock();`
		f2d475	`return -EINVAL;`
		f2d475	`}`
		f2d475
		f2d475
		f2d475	`if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) {`
		f2d475	`rcu_read_unlock();`
		f2d475	`return -EACCES;`
		f2d475	`}`
		f2d475
		f2d475	`err = security_sem_semctl(&sma->sem_perm, SETVAL);`
		f2d475	`if (err) {`
		f2d475	`rcu_read_unlock();`
		f2d475	`return -EACCES;`
		f2d475	`}`
		f2d475
		f2d475	`sem_lock(sma, NULL, -1);`
		f2d475
		f2d475	`if (!ipc_valid_object(&sma->sem_perm)) {`
		f2d475	`sem_unlock(sma, -1);`
		f2d475	`rcu_read_unlock();`
		f2d475	`return -EIDRM;`
		f2d475	`}`
		f2d475
		f2d475	`semnum = array_index_nospec(semnum, sma->sem_nsems);`
		f2d475	`curr = &sma->sems[semnum];`
		f2d475
		f2d475	`ipc_assert_locked_object(&sma->sem_perm);`
		f2d475	`list_for_each_entry(un, &sma->list_id, list_id)`
		f2d475	`un->semadj[semnum] = 0;`
		f2d475
		f2d475	`curr->semval = val;`
		f2d475	`ipc_update_pid(&curr->sempid, task_tgid(current));`
		f2d475	`sma->sem_ctime = ktime_get_real_seconds();`
		f2d475	`/* maybe some queued-up processes were waiting for this */`
		f2d475	`do_smart_update(sma, NULL, 0, 0, &wake_q);`
		f2d475	`sem_unlock(sma, -1);`
		f2d475	`rcu_read_unlock();`
		f2d475	`wake_up_q(&wake_q);`
		f2d475	`return 0;`
		f2d475	`}`
		f2d475
		f2d475	`static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,`
		f2d475	`int cmd, void __user *p)`
		f2d475	`{`
		f2d475	`struct sem_array *sma;`
		f2d475	`struct sem *curr;`
		f2d475	`int err, nsems;`
		f2d475	`ushort fast_sem_io[SEMMSL_FAST];`
		f2d475	`ushort *sem_io = fast_sem_io;`
		f2d475	`DEFINE_WAKE_Q(wake_q);`
		f2d475
		f2d475	`rcu_read_lock();`
		f2d475	`sma = sem_obtain_object_check(ns, semid);`
		f2d475	`if (IS_ERR(sma)) {`
		f2d475	`rcu_read_unlock();`
		f2d475	`return PTR_ERR(sma);`
		f2d475	`}`
		f2d475
		f2d475	`nsems = sma->sem_nsems;`
		f2d475
		f2d475	`err = -EACCES;`
		f2d475	`if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO))`
		f2d475	`goto out_rcu_wakeup;`
		f2d475
		f2d475	`err = security_sem_semctl(&sma->sem_perm, cmd);`
		f2d475	`if (err)`
		f2d475	`goto out_rcu_wakeup;`
		f2d475
		f2d475	`err = -EACCES;`
		f2d475	`switch (cmd) {`
		f2d475	`case GETALL:`
		f2d475	`{`
		f2d475	`ushort __user *array = p;`
		f2d475	`int i;`
		f2d475
		f2d475	`sem_lock(sma, NULL, -1);`
		f2d475	`if (!ipc_valid_object(&sma->sem_perm)) {`
		f2d475	`err = -EIDRM;`
		f2d475	`goto out_unlock;`
		f2d475	`}`
		f2d475	`if (nsems > SEMMSL_FAST) {`
		f2d475	`if (!ipc_rcu_getref(&sma->sem_perm)) {`
		f2d475	`err = -EIDRM;`
		f2d475	`goto out_unlock;`
		f2d475	`}`
		f2d475	`sem_unlock(sma, -1);`
		f2d475	`rcu_read_unlock();`
		f2d475	`sem_io = kvmalloc_array(nsems, sizeof(ushort),`
		f2d475	`GFP_KERNEL);`
		f2d475	`if (sem_io == NULL) {`
		f2d475	`ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);`
		f2d475	`return -ENOMEM;`
		f2d475	`}`
		f2d475
		f2d475	`rcu_read_lock();`
		f2d475	`sem_lock_and_putref(sma);`
		f2d475	`if (!ipc_valid_object(&sma->sem_perm)) {`
		f2d475	`err = -EIDRM;`
		f2d475	`goto out_unlock;`
		f2d475	`}`
		f2d475	`}`
		f2d475	`for (i = 0; i < sma->sem_nsems; i++)`
		f2d475	`sem_io[i] = sma->sems[i].semval;`
		f2d475	`sem_unlock(sma, -1);`
		f2d475	`rcu_read_unlock();`
		f2d475	`err = 0;`
		f2d475	`if (copy_to_user(array, sem_io, nsems*sizeof(ushort)))`
		f2d475	`err = -EFAULT;`
		f2d475	`goto out_free;`
		f2d475	`}`
		f2d475	`case SETALL:`
		f2d475	`{`
		f2d475	`int i;`
		f2d475	`struct sem_undo *un;`
		f2d475
		f2d475	`if (!ipc_rcu_getref(&sma->sem_perm)) {`
		f2d475	`err = -EIDRM;`
		f2d475	`goto out_rcu_wakeup;`
		f2d475	`}`
		f2d475	`rcu_read_unlock();`
		f2d475
		f2d475	`if (nsems > SEMMSL_FAST) {`
		f2d475	`sem_io = kvmalloc_array(nsems, sizeof(ushort),`
		f2d475	`GFP_KERNEL);`
		f2d475	`if (sem_io == NULL) {`
		f2d475	`ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);`
		f2d475	`return -ENOMEM;`
		f2d475	`}`
		f2d475	`}`
		f2d475
		f2d475	`if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {`
		f2d475	`ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);`
		f2d475	`err = -EFAULT;`
		f2d475	`goto out_free;`
		f2d475	`}`
		f2d475
		f2d475	`for (i = 0; i < nsems; i++) {`
		f2d475	`if (sem_io[i] > SEMVMX) {`
		f2d475	`ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);`
		f2d475	`err = -ERANGE;`
		f2d475	`goto out_free;`
		f2d475	`}`
		f2d475	`}`
		f2d475	`rcu_read_lock();`
		f2d475	`sem_lock_and_putref(sma);`
		f2d475	`if (!ipc_valid_object(&sma->sem_perm)) {`
		f2d475	`err = -EIDRM;`
		f2d475	`goto out_unlock;`
		f2d475	`}`
		f2d475
		f2d475	`for (i = 0; i < nsems; i++) {`
		f2d475	`sma->sems[i].semval = sem_io[i];`
		f2d475	`ipc_update_pid(&sma->sems[i].sempid, task_tgid(current));`
		f2d475	`}`
		f2d475
		f2d475	`ipc_assert_locked_object(&sma->sem_perm);`
		f2d475	`list_for_each_entry(un, &sma->list_id, list_id) {`
		f2d475	`for (i = 0; i < nsems; i++)`
		f2d475	`un->semadj[i] = 0;`
		f2d475	`}`
		f2d475	`sma->sem_ctime = ktime_get_real_seconds();`
		f2d475	`/* maybe some queued-up processes were waiting for this */`
		f2d475	`do_smart_update(sma, NULL, 0, 0, &wake_q);`
		f2d475	`err = 0;`
		f2d475	`goto out_unlock;`
		f2d475	`}`
		f2d475	`/* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */`
		f2d475	`}`
		f2d475	`err = -EINVAL;`
		f2d475	`if (semnum < 0 \|\| semnum >= nsems)`
		f2d475	`goto out_rcu_wakeup;`
		f2d475
		f2d475	`sem_lock(sma, NULL, -1);`
		f2d475	`if (!ipc_valid_object(&sma->sem_perm)) {`
		f2d475	`err = -EIDRM;`
		f2d475	`goto out_unlock;`
		f2d475	`}`
		f2d475
		f2d475	`semnum = array_index_nospec(semnum, nsems);`
		f2d475	`curr = &sma->sems[semnum];`
		f2d475
		f2d475	`switch (cmd) {`
		f2d475	`case GETVAL:`
		f2d475	`err = curr->semval;`
		f2d475	`goto out_unlock;`
		f2d475	`case GETPID:`
		f2d475	`err = pid_vnr(curr->sempid);`
		f2d475	`goto out_unlock;`
		f2d475	`case GETNCNT:`
		f2d475	`err = count_semcnt(sma, semnum, 0);`
		f2d475	`goto out_unlock;`
		f2d475	`case GETZCNT:`
		f2d475	`err = count_semcnt(sma, semnum, 1);`
		f2d475	`goto out_unlock;`
		f2d475	`}`
		f2d475
		f2d475	`out_unlock:`
		f2d475	`sem_unlock(sma, -1);`
		f2d475	`out_rcu_wakeup:`
		f2d475	`rcu_read_unlock();`
		f2d475	`wake_up_q(&wake_q);`
		f2d475	`out_free:`
		f2d475	`if (sem_io != fast_sem_io)`
		f2d475	`kvfree(sem_io);`
		f2d475	`return err;`
		f2d475	`}`
		f2d475
		f2d475	`static inline unsigned long`
		f2d475	`copy_semid_from_user(struct semid64_ds out, void __user buf, int version)`
		f2d475	`{`
		f2d475	`switch (version) {`
		f2d475	`case IPC_64:`
		f2d475	`if (copy_from_user(out, buf, sizeof(*out)))`
		f2d475	`return -EFAULT;`
		f2d475	`return 0;`
		f2d475	`case IPC_OLD:`
		f2d475	`{`
		f2d475	`struct semid_ds tbuf_old;`
		f2d475
		f2d475	`if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))`
		f2d475	`return -EFAULT;`
		f2d475
		f2d475	`out->sem_perm.uid = tbuf_old.sem_perm.uid;`
		f2d475	`out->sem_perm.gid = tbuf_old.sem_perm.gid;`
		f2d475	`out->sem_perm.mode = tbuf_old.sem_perm.mode;`
		f2d475
		f2d475	`return 0;`
		f2d475	`}`
		f2d475	`default:`
		f2d475	`return -EINVAL;`
		f2d475	`}`
		f2d475	`}`
		f2d475
		f2d475	`/*`
		f2d475	`* This function handles some semctl commands which require the rwsem`
		f2d475	`* to be held in write mode.`
		f2d475	`* NOTE: no locks must be held, the rwsem is taken inside this function.`
		f2d475	`*/`
		f2d475	`static int semctl_down(struct ipc_namespace *ns, int semid,`
		f2d475	`int cmd, struct semid64_ds *semid64)`
		f2d475	`{`
		f2d475	`struct sem_array *sma;`
		f2d475	`int err;`
		f2d475	`struct kern_ipc_perm *ipcp;`
		f2d475
		f2d475	`down_write(&sem_ids(ns).rwsem);`
		f2d475	`rcu_read_lock();`
		f2d475
		f2d475	`ipcp = ipcctl_obtain_check(ns, &sem_ids(ns), semid, cmd,`
		f2d475	`&semid64->sem_perm, 0);`
		f2d475	`if (IS_ERR(ipcp)) {`
		f2d475	`err = PTR_ERR(ipcp);`
		f2d475	`goto out_unlock1;`
		f2d475	`}`
		f2d475
		f2d475	`sma = container_of(ipcp, struct sem_array, sem_perm);`
		f2d475
		f2d475	`err = security_sem_semctl(&sma->sem_perm, cmd);`
		f2d475	`if (err)`
		f2d475	`goto out_unlock1;`
		f2d475
		f2d475	`switch (cmd) {`
		f2d475	`case IPC_RMID:`
		f2d475	`sem_lock(sma, NULL, -1);`
		f2d475	`/* freeary unlocks the ipc object and rcu */`
		f2d475	`freeary(ns, ipcp);`
		f2d475	`goto out_up;`
		f2d475	`case IPC_SET:`
		f2d475	`sem_lock(sma, NULL, -1);`
		f2d475	`err = ipc_update_perm(&semid64->sem_perm, ipcp);`
		f2d475	`if (err)`
		f2d475	`goto out_unlock0;`
		f2d475	`sma->sem_ctime = ktime_get_real_seconds();`
		f2d475	`break;`
		f2d475	`default:`
		f2d475	`err = -EINVAL;`
		f2d475	`goto out_unlock1;`
		f2d475	`}`
		f2d475
		f2d475	`out_unlock0:`
		f2d475	`sem_unlock(sma, -1);`
		f2d475	`out_unlock1:`
		f2d475	`rcu_read_unlock();`
		f2d475	`out_up:`
		f2d475	`up_write(&sem_ids(ns).rwsem);`
		f2d475	`return err;`
		f2d475	`}`
		f2d475
		f2d475	`long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg)`
		f2d475	`{`
		f2d475	`int version;`
		f2d475	`struct ipc_namespace *ns;`
		f2d475	`void __user p = (void __user )arg;`
		f2d475	`struct semid64_ds semid64;`
		f2d475	`int err;`
		f2d475
		f2d475	`if (semid < 0)`
		f2d475	`return -EINVAL;`
		f2d475
		f2d475	`version = ipc_parse_version(&cmd);`
		f2d475	`ns = current->nsproxy->ipc_ns;`
		f2d475
		f2d475	`switch (cmd) {`
		f2d475	`case IPC_INFO:`
		f2d475	`case SEM_INFO:`
		f2d475	`return semctl_info(ns, semid, cmd, p);`
		f2d475	`case IPC_STAT:`
		f2d475	`case SEM_STAT:`
		f2d475	`case SEM_STAT_ANY:`
		f2d475	`err = semctl_stat(ns, semid, cmd, &semid64);`
		f2d475	`if (err < 0)`
		f2d475	`return err;`
		f2d475	`if (copy_semid_to_user(p, &semid64, version))`
		f2d475	`err = -EFAULT;`
		f2d475	`return err;`
		f2d475	`case GETALL:`
		f2d475	`case GETVAL:`
		f2d475	`case GETPID:`
		f2d475	`case GETNCNT:`
		f2d475	`case GETZCNT:`
		f2d475	`case SETALL:`
		f2d475	`return semctl_main(ns, semid, semnum, cmd, p);`
		f2d475	`case SETVAL: {`
		f2d475	`int val;`
		f2d475	`#if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN)`
		f2d475	`/* big-endian 64bit */`
		f2d475	`val = arg >> 32;`
		f2d475	`#else`
		f2d475	`/* 32bit or little-endian 64bit */`
		f2d475	`val = arg;`
		f2d475	`#endif`
		f2d475	`return semctl_setval(ns, semid, semnum, val);`
		f2d475	`}`
		f2d475	`case IPC_SET:`
		f2d475	`if (copy_semid_from_user(&semid64, p, version))`
		f2d475	`return -EFAULT;`
		f2d475	`case IPC_RMID:`
		f2d475	`return semctl_down(ns, semid, cmd, &semid64);`
		f2d475	`default:`
		f2d475	`return -EINVAL;`
		f2d475	`}`
		f2d475	`}`
		f2d475
		f2d475	`SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)`
		f2d475	`{`
		f2d475	`return ksys_semctl(semid, semnum, cmd, arg);`
		f2d475	`}`
		f2d475
		f2d475	`#ifdef CONFIG_COMPAT`
		f2d475
		f2d475	`struct compat_semid_ds {`
		f2d475	`struct compat_ipc_perm sem_perm;`
		f2d475	`compat_time_t sem_otime;`
		f2d475	`compat_time_t sem_ctime;`
		f2d475	`compat_uptr_t sem_base;`
		f2d475	`compat_uptr_t sem_pending;`
		f2d475	`compat_uptr_t sem_pending_last;`
		f2d475	`compat_uptr_t undo;`
		f2d475	`unsigned short sem_nsems;`
		f2d475	`};`
		f2d475
		f2d475	`static int copy_compat_semid_from_user(struct semid64_ds out, void __user buf,`
		f2d475	`int version)`
		f2d475	`{`
		f2d475	`memset(out, 0, sizeof(*out));`
		f2d475	`if (version == IPC_64) {`
		f2d475	`struct compat_semid64_ds __user *p = buf;`
		f2d475	`return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm);`
		f2d475	`} else {`
		f2d475	`struct compat_semid_ds __user *p = buf;`
		f2d475	`return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm);`
		f2d475	`}`
		f2d475	`}`
		f2d475
		f2d475	`static int copy_compat_semid_to_user(void __user buf, struct semid64_ds in,`
		f2d475	`int version)`
		f2d475	`{`
		f2d475	`if (version == IPC_64) {`
		f2d475	`struct compat_semid64_ds v;`
		f2d475	`memset(&v, 0, sizeof(v));`
		f2d475	`to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm);`
		f2d475	`v.sem_otime = lower_32_bits(in->sem_otime);`
		f2d475	`v.sem_otime_high = upper_32_bits(in->sem_otime);`
		f2d475	`v.sem_ctime = lower_32_bits(in->sem_ctime);`
		f2d475	`v.sem_ctime_high = upper_32_bits(in->sem_ctime);`
		f2d475	`v.sem_nsems = in->sem_nsems;`
		f2d475	`return copy_to_user(buf, &v, sizeof(v));`
		f2d475	`} else {`
		f2d475	`struct compat_semid_ds v;`
		f2d475	`memset(&v, 0, sizeof(v));`
		f2d475	`to_compat_ipc_perm(&v.sem_perm, &in->sem_perm);`
		f2d475	`v.sem_otime = in->sem_otime;`
		f2d475	`v.sem_ctime = in->sem_ctime;`
		f2d475	`v.sem_nsems = in->sem_nsems;`
		f2d475	`return copy_to_user(buf, &v, sizeof(v));`
		f2d475	`}`
		f2d475	`}`
		f2d475
		f2d475	`long compat_ksys_semctl(int semid, int semnum, int cmd, int arg)`
		f2d475	`{`
		f2d475	`void __user *p = compat_ptr(arg);`
		f2d475	`struct ipc_namespace *ns;`
		f2d475	`struct semid64_ds semid64;`
		f2d475	`int version = compat_ipc_parse_version(&cmd);`
		f2d475	`int err;`
		f2d475
		f2d475	`ns = current->nsproxy->ipc_ns;`
		f2d475
		f2d475	`if (semid < 0)`
		f2d475	`return -EINVAL;`
		f2d475
		f2d475	`switch (cmd & (~IPC_64)) {`
		f2d475	`case IPC_INFO:`
		f2d475	`case SEM_INFO:`
		f2d475	`return semctl_info(ns, semid, cmd, p);`
		f2d475	`case IPC_STAT:`
		f2d475	`case SEM_STAT:`
		f2d475	`case SEM_STAT_ANY:`
		f2d475	`err = semctl_stat(ns, semid, cmd, &semid64);`
		f2d475	`if (err < 0)`
		f2d475	`return err;`
		f2d475	`if (copy_compat_semid_to_user(p, &semid64, version))`
		f2d475	`err = -EFAULT;`
		f2d475	`return err;`
		f2d475	`case GETVAL:`
		f2d475	`case GETPID:`
		f2d475	`case GETNCNT:`
		f2d475	`case GETZCNT:`
		f2d475	`case GETALL:`
		f2d475	`case SETALL:`
		f2d475	`return semctl_main(ns, semid, semnum, cmd, p);`
		f2d475	`case SETVAL:`
		f2d475	`return semctl_setval(ns, semid, semnum, arg);`
		f2d475	`case IPC_SET:`
		f2d475	`if (copy_compat_semid_from_user(&semid64, p, version))`
		f2d475	`return -EFAULT;`
		f2d475	`/* fallthru */`
		f2d475	`case IPC_RMID:`
		f2d475	`return semctl_down(ns, semid, cmd, &semid64);`
		f2d475	`default:`
		f2d475	`return -EINVAL;`
		f2d475	`}`
		f2d475	`}`
		f2d475
		f2d475	`COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg)`
		f2d475	`{`
		f2d475	`return compat_ksys_semctl(semid, semnum, cmd, arg);`
		f2d475	`}`
		f2d475	`#endif`
		f2d475
		f2d475	`/* If the task doesn't already have a undo_list, then allocate one`
		f2d475	`* here. We guarantee there is only one thread using this undo list,`
		f2d475	`* and current is THE ONE`
		f2d475	`*`
		f2d475	`* If this allocation and assignment succeeds, but later`
		f2d475	`* portions of this code fail, there is no need to free the sem_undo_list.`
		f2d475	`* Just let it stay associated with the task, and it'll be freed later`
		f2d475	`* at exit time.`
		f2d475	`*`
		f2d475	`* This can block, so callers must hold no locks.`
		f2d475	`*/`
		f2d475	`static inline int get_undo_list(struct sem_undo_list **undo_listp)`
		f2d475	`{`
		f2d475	`struct sem_undo_list *undo_list;`
		f2d475
		f2d475	`undo_list = current->sysvsem.undo_list;`
		f2d475	`if (!undo_list) {`
		f2d475	`undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL);`
		f2d475	`if (undo_list == NULL)`
		f2d475	`return -ENOMEM;`
		f2d475	`spin_lock_init(&undo_list->lock);`
		f2d475	`refcount_set(&undo_list->refcnt, 1);`
		f2d475	`INIT_LIST_HEAD(&undo_list->list_proc);`
		f2d475
		f2d475	`current->sysvsem.undo_list = undo_list;`
		f2d475	`}`
		f2d475	`*undo_listp = undo_list;`
		f2d475	`return 0;`
		f2d475	`}`
		f2d475
		f2d475	`static struct sem_undo __lookup_undo(struct sem_undo_list ulp, int semid)`
		f2d475	`{`
		f2d475	`struct sem_undo *un;`
		f2d475
		f2d475	`list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) {`
		f2d475	`if (un->semid == semid)`
		f2d475	`return un;`
		f2d475	`}`
		f2d475	`return NULL;`
		f2d475	`}`
		f2d475
		f2d475	`static struct sem_undo lookup_undo(struct sem_undo_list ulp, int semid)`
		f2d475	`{`
		f2d475	`struct sem_undo *un;`
		f2d475
		f2d475	`assert_spin_locked(&ulp->lock);`
		f2d475
		f2d475	`un = __lookup_undo(ulp, semid);`
		f2d475	`if (un) {`
		f2d475	`list_del_rcu(&un->list_proc);`
		f2d475	`list_add_rcu(&un->list_proc, &ulp->list_proc);`
		f2d475	`}`
		f2d475	`return un;`
		f2d475	`}`
		f2d475
		f2d475	`/**`
		f2d475	`* find_alloc_undo - lookup (and if not present create) undo array`
		f2d475	`* @ns: namespace`
		f2d475	`* @semid: semaphore array id`
		f2d475	`*`
		f2d475	`* The function looks up (and if not present creates) the undo structure.`
		f2d475	`* The size of the undo structure depends on the size of the semaphore`
		f2d475	`* array, thus the alloc path is not that straightforward.`
		f2d475	`* Lifetime-rules: sem_undo is rcu-protected, on success, the function`
		f2d475	`* performs a rcu_read_lock().`
		f2d475	`*/`
		f2d475	`static struct sem_undo find_alloc_undo(struct ipc_namespace ns, int semid)`
		f2d475	`{`
		f2d475	`struct sem_array *sma;`
		f2d475	`struct sem_undo_list *ulp;`
		f2d475	`struct sem_undo un, new;`
		f2d475	`int nsems, error;`
		f2d475
		f2d475	`error = get_undo_list(&ulp;;`
		f2d475	`if (error)`
		f2d475	`return ERR_PTR(error);`
		f2d475
		f2d475	`rcu_read_lock();`
		f2d475	`spin_lock(&ulp->lock);`
		f2d475	`un = lookup_undo(ulp, semid);`
		f2d475	`spin_unlock(&ulp->lock);`
		f2d475	`if (likely(un != NULL))`
		f2d475	`goto out;`
		f2d475
		f2d475	`/* no undo structure around - allocate one. */`
		f2d475	`/* step 1: figure out the size of the semaphore array */`
		f2d475	`sma = sem_obtain_object_check(ns, semid);`
		f2d475	`if (IS_ERR(sma)) {`
		f2d475	`rcu_read_unlock();`
		f2d475	`return ERR_CAST(sma);`
		f2d475	`}`
		f2d475
		f2d475	`nsems = sma->sem_nsems;`
		f2d475	`if (!ipc_rcu_getref(&sma->sem_perm)) {`
		f2d475	`rcu_read_unlock();`
		f2d475	`un = ERR_PTR(-EIDRM);`
		f2d475	`goto out;`
		f2d475	`}`
		f2d475	`rcu_read_unlock();`
		f2d475
		f2d475	`/* step 2: allocate new undo structure */`
		f2d475	`new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);`
		f2d475	`if (!new) {`
		f2d475	`ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);`
		f2d475	`return ERR_PTR(-ENOMEM);`
		f2d475	`}`
		f2d475
		f2d475	`/* step 3: Acquire the lock on semaphore array */`
		f2d475	`rcu_read_lock();`
		f2d475	`sem_lock_and_putref(sma);`
		f2d475	`if (!ipc_valid_object(&sma->sem_perm)) {`
		f2d475	`sem_unlock(sma, -1);`
		f2d475	`rcu_read_unlock();`
		f2d475	`kfree(new);`
		f2d475	`un = ERR_PTR(-EIDRM);`
		f2d475	`goto out;`
		f2d475	`}`
		f2d475	`spin_lock(&ulp->lock);`
		f2d475
		f2d475	`/*`
		f2d475	`* step 4: check for races: did someone else allocate the undo struct?`
		f2d475	`*/`
		f2d475	`un = lookup_undo(ulp, semid);`
		f2d475	`if (un) {`
		f2d475	`kfree(new);`
		f2d475	`goto success;`
		f2d475	`}`
		f2d475	`/* step 5: initialize & link new undo structure */`
		f2d475	`new->semadj = (short *) &new[1];`
		f2d475	`new->ulp = ulp;`
		f2d475	`new->semid = semid;`
		f2d475	`assert_spin_locked(&ulp->lock);`
		f2d475	`list_add_rcu(&new->list_proc, &ulp->list_proc);`
		f2d475	`ipc_assert_locked_object(&sma->sem_perm);`
		f2d475	`list_add(&new->list_id, &sma->list_id);`
		f2d475	`un = new;`
		f2d475
		f2d475	`success:`
		f2d475	`spin_unlock(&ulp->lock);`
		f2d475	`sem_unlock(sma, -1);`
		f2d475	`out:`
		f2d475	`return un;`
		f2d475	`}`
		f2d475
		f2d475	`static long do_semtimedop(int semid, struct sembuf __user *tsops,`
		f2d475	`unsigned nsops, const struct timespec64 *timeout)`
		f2d475	`{`
		f2d475	`int error = -EINVAL;`
		f2d475	`struct sem_array *sma;`
		f2d475	`struct sembuf fast_sops[SEMOPM_FAST];`
		f2d475	`struct sembuf sops = fast_sops, sop;`
		f2d475	`struct sem_undo *un;`
		f2d475	`int max, locknum;`
		f2d475	`bool undos = false, alter = false, dupsop = false;`
		f2d475	`struct sem_queue queue;`
		f2d475	`unsigned long dup = 0, jiffies_left = 0;`
		f2d475	`struct ipc_namespace *ns;`
		f2d475
		f2d475	`ns = current->nsproxy->ipc_ns;`
		f2d475
		f2d475	`if (nsops < 1 \|\| semid < 0)`
		f2d475	`return -EINVAL;`
		f2d475	`if (nsops > ns->sc_semopm)`
		f2d475	`return -E2BIG;`
		f2d475	`if (nsops > SEMOPM_FAST) {`
		f2d475	`sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL);`
		f2d475	`if (sops == NULL)`
		f2d475	`return -ENOMEM;`
		f2d475	`}`
		f2d475
		f2d475	`if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) {`
		f2d475	`error = -EFAULT;`
		f2d475	`goto out_free;`
		f2d475	`}`
		f2d475
		f2d475	`if (timeout) {`
		f2d475	`if (timeout->tv_sec < 0 \|\| timeout->tv_nsec < 0 \|\|`
		f2d475	`timeout->tv_nsec >= 1000000000L) {`
		f2d475	`error = -EINVAL;`
		f2d475	`goto out_free;`
		f2d475	`}`
		f2d475	`jiffies_left = timespec64_to_jiffies(timeout);`
		f2d475	`}`
		f2d475
		f2d475	`max = 0;`
		f2d475	`for (sop = sops; sop < sops + nsops; sop++) {`
		f2d475	`unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG);`
		f2d475
		f2d475	`if (sop->sem_num >= max)`
		f2d475	`max = sop->sem_num;`
		f2d475	`if (sop->sem_flg & SEM_UNDO)`
		f2d475	`undos = true;`
		f2d475	`if (dup & mask) {`
		f2d475	`/*`
		f2d475	`* There was a previous alter access that appears`
		f2d475	`* to have accessed the same semaphore, thus use`
		f2d475	`* the dupsop logic. "appears", because the detection`
		f2d475	`* can only check % BITS_PER_LONG.`
		f2d475	`*/`
		f2d475	`dupsop = true;`
		f2d475	`}`
		f2d475	`if (sop->sem_op != 0) {`
		f2d475	`alter = true;`
		f2d475	`dup \|= mask;`
		f2d475	`}`
		f2d475	`}`
		f2d475
		f2d475	`if (undos) {`
		f2d475	`/* On success, find_alloc_undo takes the rcu_read_lock */`
		f2d475	`un = find_alloc_undo(ns, semid);`
		f2d475	`if (IS_ERR(un)) {`
		f2d475	`error = PTR_ERR(un);`
		f2d475	`goto out_free;`
		f2d475	`}`
		f2d475	`} else {`
		f2d475	`un = NULL;`
		f2d475	`rcu_read_lock();`
		f2d475	`}`
		f2d475
		f2d475	`sma = sem_obtain_object_check(ns, semid);`
		f2d475	`if (IS_ERR(sma)) {`
		f2d475	`rcu_read_unlock();`
		f2d475	`error = PTR_ERR(sma);`
		f2d475	`goto out_free;`
		f2d475	`}`
		f2d475
		f2d475	`error = -EFBIG;`
		f2d475	`if (max >= sma->sem_nsems) {`
		f2d475	`rcu_read_unlock();`
		f2d475	`goto out_free;`
		f2d475	`}`
		f2d475
		f2d475	`error = -EACCES;`
		f2d475	`if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) {`
		f2d475	`rcu_read_unlock();`
		f2d475	`goto out_free;`
		f2d475	`}`
		f2d475
		f2d475	`error = security_sem_semop(&sma->sem_perm, sops, nsops, alter);`
		f2d475	`if (error) {`
		f2d475	`rcu_read_unlock();`
		f2d475	`goto out_free;`
		f2d475	`}`
		f2d475
		f2d475	`error = -EIDRM;`
		f2d475	`locknum = sem_lock(sma, sops, nsops);`
		f2d475	`/*`
		f2d475	`* We eventually might perform the following check in a lockless`
		f2d475	`* fashion, considering ipc_valid_object() locking constraints.`
		f2d475	`* If nsops == 1 and there is no contention for sem_perm.lock, then`
		f2d475	`* only a per-semaphore lock is held and it's OK to proceed with the`
		f2d475	`* check below. More details on the fine grained locking scheme`
		f2d475	`* entangled here and why it's RMID race safe on comments at sem_lock()`
		f2d475	`*/`
		f2d475	`if (!ipc_valid_object(&sma->sem_perm))`
		f2d475	`goto out_unlock_free;`
		f2d475	`/*`
		f2d475	`* semid identifiers are not unique - find_alloc_undo may have`
		f2d475	`* allocated an undo structure, it was invalidated by an RMID`
		f2d475	`* and now a new array with received the same id. Check and fail.`
		f2d475	`* This case can be detected checking un->semid. The existence of`
		f2d475	`* "un" itself is guaranteed by rcu.`
		f2d475	`*/`
		f2d475	`if (un && un->semid == -1)`
		f2d475	`goto out_unlock_free;`
		f2d475
		f2d475	`queue.sops = sops;`
		f2d475	`queue.nsops = nsops;`
		f2d475	`queue.undo = un;`
		f2d475	`queue.pid = task_tgid(current);`
		f2d475	`queue.alter = alter;`
		f2d475	`queue.dupsop = dupsop;`
		f2d475
		f2d475	`error = perform_atomic_semop(sma, &queue);`
		f2d475	`if (error == 0) { /* non-blocking succesfull path */`
		f2d475	`DEFINE_WAKE_Q(wake_q);`
		f2d475
		f2d475	`/*`
		f2d475	`* If the operation was successful, then do`
		f2d475	`* the required updates.`
		f2d475	`*/`
		f2d475	`if (alter)`
		f2d475	`do_smart_update(sma, sops, nsops, 1, &wake_q);`
		f2d475	`else`
		f2d475	`set_semotime(sma, sops);`
		f2d475
		f2d475	`sem_unlock(sma, locknum);`
		f2d475	`rcu_read_unlock();`
		f2d475	`wake_up_q(&wake_q);`
		f2d475
		f2d475	`goto out_free;`
		f2d475	`}`
		f2d475	`if (error < 0) /* non-blocking error path */`
		f2d475	`goto out_unlock_free;`
		f2d475
		f2d475	`/*`
		f2d475	`* We need to sleep on this operation, so we put the current`
		f2d475	`* task into the pending queue and go to sleep.`
		f2d475	`*/`
		f2d475	`if (nsops == 1) {`
		f2d475	`struct sem *curr;`
		f2d475	`int idx = array_index_nospec(sops->sem_num, sma->sem_nsems);`
		f2d475	`curr = &sma->sems[idx];`
		f2d475
		f2d475	`if (alter) {`
		f2d475	`if (sma->complex_count) {`
		f2d475	`list_add_tail(&queue.list,`
		f2d475	`&sma->pending_alter);`
		f2d475	`} else {`
		f2d475
		f2d475	`list_add_tail(&queue.list,`
		f2d475	`&curr->pending_alter);`
		f2d475	`}`
		f2d475	`} else {`
		f2d475	`list_add_tail(&queue.list, &curr->pending_const);`
		f2d475	`}`
		f2d475	`} else {`
		f2d475	`if (!sma->complex_count)`
		f2d475	`merge_queues(sma);`
		f2d475
		f2d475	`if (alter)`
		f2d475	`list_add_tail(&queue.list, &sma->pending_alter);`
		f2d475	`else`
		f2d475	`list_add_tail(&queue.list, &sma->pending_const);`
		f2d475
		f2d475	`sma->complex_count++;`
		f2d475	`}`
		f2d475
		f2d475	`do {`
		f2d475	`WRITE_ONCE(queue.status, -EINTR);`
		f2d475	`queue.sleeper = current;`
		f2d475
		f2d475	`__set_current_state(TASK_INTERRUPTIBLE);`
		f2d475	`sem_unlock(sma, locknum);`
		f2d475	`rcu_read_unlock();`
		f2d475
		f2d475	`if (timeout)`
		f2d475	`jiffies_left = schedule_timeout(jiffies_left);`
		f2d475	`else`
		f2d475	`schedule();`
		f2d475
		f2d475	`/*`
		f2d475	`* fastpath: the semop has completed, either successfully or`
		f2d475	`* not, from the syscall pov, is quite irrelevant to us at this`
		f2d475	`* point; we're done.`
		f2d475	`*`
		f2d475	`* We _do_ care, nonetheless, about being awoken by a signal or`
		f2d475	`* spuriously. The queue.status is checked again in the`
		f2d475	`* slowpath (aka after taking sem_lock), such that we can detect`
		f2d475	`* scenarios where we were awakened externally, during the`
		f2d475	`* window between wake_q_add() and wake_up_q().`
		f2d475	`*/`
		f2d475	`error = READ_ONCE(queue.status);`
		f2d475	`if (error != -EINTR) {`
		f2d475	`/*`
		f2d475	`* User space could assume that semop() is a memory`
		f2d475	`* barrier: Without the mb(), the cpu could`
		f2d475	`* speculatively read in userspace stale data that was`
		f2d475	`* overwritten by the previous owner of the semaphore.`
		f2d475	`*/`
		f2d475	`smp_mb();`
		f2d475	`goto out_free;`
		f2d475	`}`
		f2d475
		f2d475	`rcu_read_lock();`
		f2d475	`locknum = sem_lock(sma, sops, nsops);`
		f2d475
		f2d475	`if (!ipc_valid_object(&sma->sem_perm))`
		f2d475	`goto out_unlock_free;`
		f2d475
		f2d475	`error = READ_ONCE(queue.status);`
		f2d475
		f2d475	`/*`
		f2d475	`* If queue.status != -EINTR we are woken up by another process.`
		f2d475	`* Leave without unlink_queue(), but with sem_unlock().`
		f2d475	`*/`
		f2d475	`if (error != -EINTR)`
		f2d475	`goto out_unlock_free;`
		f2d475
		f2d475	`/*`
		f2d475	`* If an interrupt occurred we have to clean up the queue.`
		f2d475	`*/`
		f2d475	`if (timeout && jiffies_left == 0)`
		f2d475	`error = -EAGAIN;`
		f2d475	`} while (error == -EINTR && !signal_pending(current)); /* spurious */`
		f2d475
		f2d475	`unlink_queue(sma, &queue);`
		f2d475
		f2d475	`out_unlock_free:`
		f2d475	`sem_unlock(sma, locknum);`
		f2d475	`rcu_read_unlock();`
		f2d475	`out_free:`
		f2d475	`if (sops != fast_sops)`
		f2d475	`kvfree(sops);`
		f2d475	`return error;`
		f2d475	`}`
		f2d475
		f2d475	`long ksys_semtimedop(int semid, struct sembuf __user *tsops,`
		f2d475	`unsigned int nsops, const struct __kernel_timespec __user *timeout)`
		f2d475	`{`
		f2d475	`if (timeout) {`
		f2d475	`struct timespec64 ts;`
		f2d475	`if (get_timespec64(&ts, timeout))`
		f2d475	`return -EFAULT;`
		f2d475	`return do_semtimedop(semid, tsops, nsops, &ts);`
		f2d475	`}`
		f2d475	`return do_semtimedop(semid, tsops, nsops, NULL);`
		f2d475	`}`
		f2d475
		f2d475	`SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,`
		f2d475	`unsigned int, nsops, const struct __kernel_timespec __user *, timeout)`
		f2d475	`{`
		f2d475	`return ksys_semtimedop(semid, tsops, nsops, timeout);`
		f2d475	`}`
		f2d475
		f2d475	`#ifdef CONFIG_COMPAT_32BIT_TIME`
		f2d475	`long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,`
		f2d475	`unsigned int nsops,`
		f2d475	`const struct compat_timespec __user *timeout)`
		f2d475	`{`
		f2d475	`if (timeout) {`
		f2d475	`struct timespec64 ts;`
		f2d475	`if (compat_get_timespec64(&ts, timeout))`
		f2d475	`return -EFAULT;`
		f2d475	`return do_semtimedop(semid, tsems, nsops, &ts);`
		f2d475	`}`
		f2d475	`return do_semtimedop(semid, tsems, nsops, NULL);`
		f2d475	`}`
		f2d475
		f2d475	`COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems,`
		f2d475	`unsigned int, nsops,`
		f2d475	`const struct compat_timespec __user *, timeout)`
		f2d475	`{`
		f2d475	`return compat_ksys_semtimedop(semid, tsems, nsops, timeout);`
		f2d475	`}`
		f2d475	`#endif`
		f2d475
		f2d475	`SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops,`
		f2d475	`unsigned, nsops)`
		f2d475	`{`
		f2d475	`return do_semtimedop(semid, tsops, nsops, NULL);`
		f2d475	`}`
		f2d475
		f2d475	`/* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between`
		f2d475	`* parent and child tasks.`
		f2d475	`*/`
		f2d475
		f2d475	`int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)`
		f2d475	`{`
		f2d475	`struct sem_undo_list *undo_list;`
		f2d475	`int error;`
		f2d475
		f2d475	`if (clone_flags & CLONE_SYSVSEM) {`
		f2d475	`error = get_undo_list(&undo_list);`
		f2d475	`if (error)`
		f2d475	`return error;`
		f2d475	`refcount_inc(&undo_list->refcnt);`
		f2d475	`tsk->sysvsem.undo_list = undo_list;`
		f2d475	`} else`
		f2d475	`tsk->sysvsem.undo_list = NULL;`
		f2d475
		f2d475	`return 0;`
		f2d475	`}`
		f2d475
		f2d475	`/*`
		f2d475	`* add semadj values to semaphores, free undo structures.`
		f2d475	`* undo structures are not freed when semaphore arrays are destroyed`
		f2d475	`* so some of them may be out of date.`
		f2d475	`* IMPLEMENTATION NOTE: There is some confusion over whether the`
		f2d475	`* set of adjustments that needs to be done should be done in an atomic`
		f2d475	`* manner or not. That is, if we are attempting to decrement the semval`
		f2d475	`* should we queue up and wait until we can do so legally?`
		f2d475	`* The original implementation attempted to do this (queue and wait).`
		f2d475	`* The current implementation does not do so. The POSIX standard`
		f2d475	`* and SVID should be consulted to determine what behavior is mandated.`
		f2d475	`*/`
		f2d475	`void exit_sem(struct task_struct *tsk)`
		f2d475	`{`
		f2d475	`struct sem_undo_list *ulp;`
		f2d475
		f2d475	`ulp = tsk->sysvsem.undo_list;`
		f2d475	`if (!ulp)`
		f2d475	`return;`
		f2d475	`tsk->sysvsem.undo_list = NULL;`
		f2d475
		f2d475	`if (!refcount_dec_and_test(&ulp->refcnt))`
		f2d475	`return;`
		f2d475
		f2d475	`for (;;) {`
		f2d475	`struct sem_array *sma;`
		f2d475	`struct sem_undo *un;`
		f2d475	`int semid, i;`
		f2d475	`DEFINE_WAKE_Q(wake_q);`
		f2d475
		f2d475	`cond_resched();`
		f2d475
		f2d475	`rcu_read_lock();`
		f2d475	`un = list_entry_rcu(ulp->list_proc.next,`
		f2d475	`struct sem_undo, list_proc);`
		f2d475	`if (&un->list_proc == &ulp->list_proc) {`
		f2d475	`/*`
		f2d475	`* We must wait for freeary() before freeing this ulp,`
		f2d475	`* in case we raced with last sem_undo. There is a small`
		f2d475	`* possibility where we exit while freeary() didn't`
		f2d475	`* finish unlocking sem_undo_list.`
		f2d475	`*/`
		f2d475	`spin_lock(&ulp->lock);`
		f2d475	`spin_unlock(&ulp->lock);`
		f2d475	`rcu_read_unlock();`
		f2d475	`break;`
		f2d475	`}`
		f2d475	`spin_lock(&ulp->lock);`
		f2d475	`semid = un->semid;`
		f2d475	`spin_unlock(&ulp->lock);`
		f2d475
		f2d475	`/* exit_sem raced with IPC_RMID, nothing to do */`
		f2d475	`if (semid == -1) {`
		f2d475	`rcu_read_unlock();`
		f2d475	`continue;`
		f2d475	`}`
		f2d475
		f2d475	`sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid);`
		f2d475	`/* exit_sem raced with IPC_RMID, nothing to do */`
		f2d475	`if (IS_ERR(sma)) {`
		f2d475	`rcu_read_unlock();`
		f2d475	`continue;`
		f2d475	`}`
		f2d475
		f2d475	`sem_lock(sma, NULL, -1);`
		f2d475	`/* exit_sem raced with IPC_RMID, nothing to do */`
		f2d475	`if (!ipc_valid_object(&sma->sem_perm)) {`
		f2d475	`sem_unlock(sma, -1);`
		f2d475	`rcu_read_unlock();`
		f2d475	`continue;`
		f2d475	`}`
		f2d475	`un = __lookup_undo(ulp, semid);`
		f2d475	`if (un == NULL) {`
		f2d475	`/* exit_sem raced with IPC_RMID+semget() that created`
		f2d475	`* exactly the same semid. Nothing to do.`
		f2d475	`*/`
		f2d475	`sem_unlock(sma, -1);`
		f2d475	`rcu_read_unlock();`
		f2d475	`continue;`
		f2d475	`}`
		f2d475
		f2d475	`/* remove un from the linked lists */`
		f2d475	`ipc_assert_locked_object(&sma->sem_perm);`
		f2d475	`list_del(&un->list_id);`
		f2d475
		f2d475	`spin_lock(&ulp->lock);`
		f2d475	`list_del_rcu(&un->list_proc);`
		f2d475	`spin_unlock(&ulp->lock);`
		f2d475
		f2d475	`/* perform adjustments registered in un */`
		f2d475	`for (i = 0; i < sma->sem_nsems; i++) {`
		f2d475	`struct sem *semaphore = &sma->sems[i];`
		f2d475	`if (un->semadj[i]) {`
		f2d475	`semaphore->semval += un->semadj[i];`
		f2d475	`/*`
		f2d475	`* Range checks of the new semaphore value,`
		f2d475	`* not defined by sus:`
		f2d475	`* - Some unices ignore the undo entirely`
		f2d475	`* (e.g. HP UX 11i 11.22, Tru64 V5.1)`
		f2d475	`* - some cap the value (e.g. FreeBSD caps`
		f2d475	`* at 0, but doesn't enforce SEMVMX)`
		f2d475	`*`
		f2d475	`* Linux caps the semaphore value, both at 0`
		f2d475	`* and at SEMVMX.`
		f2d475	`*`
		f2d475	`* Manfred <manfred@colorfullife.com>`
		f2d475	`*/`
		f2d475	`if (semaphore->semval < 0)`
		f2d475	`semaphore->semval = 0;`
		f2d475	`if (semaphore->semval > SEMVMX)`
		f2d475	`semaphore->semval = SEMVMX;`
		f2d475	`ipc_update_pid(&semaphore->sempid, task_tgid(current));`
		f2d475	`}`
		f2d475	`}`
		f2d475	`/* maybe some queued-up processes were waiting for this */`
		f2d475	`do_smart_update(sma, NULL, 0, 1, &wake_q);`
		f2d475	`sem_unlock(sma, -1);`
		f2d475	`rcu_read_unlock();`
		f2d475	`wake_up_q(&wake_q);`
		f2d475
		f2d475	`kfree_rcu(un, rcu);`
		f2d475	`}`
		f2d475	`kfree(ulp);`
		f2d475	`}`
		f2d475
		f2d475	`#ifdef CONFIG_PROC_FS`
		f2d475	`static int sysvipc_sem_proc_show(struct seq_file s, void it)`
		f2d475	`{`
		f2d475	`struct user_namespace *user_ns = seq_user_ns(s);`
		f2d475	`struct kern_ipc_perm *ipcp = it;`
		f2d475	`struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);`
		f2d475	`time64_t sem_otime;`
		f2d475
		f2d475	`/*`
		f2d475	`* The proc interface isn't aware of sem_lock(), it calls`
		f2d475	`* ipc_lock_object() directly (in sysvipc_find_ipc).`
		f2d475	`* In order to stay compatible with sem_lock(), we must`
		f2d475	`* enter / leave complex_mode.`
		f2d475	`*/`
		f2d475	`complexmode_enter(sma);`
		f2d475
		f2d475	`sem_otime = get_semotime(sma);`
		f2d475
		f2d475	`seq_printf(s,`
		f2d475	`"%10d %10d %4o %10u %5u %5u %5u %5u %10llu %10llu\n",`
		f2d475	`sma->sem_perm.key,`
		f2d475	`sma->sem_perm.id,`
		f2d475	`sma->sem_perm.mode,`
		f2d475	`sma->sem_nsems,`
		f2d475	`from_kuid_munged(user_ns, sma->sem_perm.uid),`
		f2d475	`from_kgid_munged(user_ns, sma->sem_perm.gid),`
		f2d475	`from_kuid_munged(user_ns, sma->sem_perm.cuid),`
		f2d475	`from_kgid_munged(user_ns, sma->sem_perm.cgid),`
		f2d475	`sem_otime,`
		f2d475	`sma->sem_ctime);`
		f2d475
		f2d475	`complexmode_tryleave(sma);`
		f2d475
		f2d475	`return 0;`
		f2d475	`}`
		f2d475	`#endif`

source-git / kernel

Source Code

Blame ipc/sem.c