| 1 | /* |
|---|
| 2 | * (C) 2001 Clemson University and The University of Chicago |
|---|
| 3 | * (C) 2011 Omnibond Systems |
|---|
| 4 | * |
|---|
| 5 | * Changes by Acxiom Corporation to implement generic service_operation() |
|---|
| 6 | * function, Copyright © Acxiom Corporation, 2005. |
|---|
| 7 | * |
|---|
| 8 | * See COPYING in top-level directory. |
|---|
| 9 | */ |
|---|
| 10 | |
|---|
| 11 | /** \file |
|---|
| 12 | * \ingroup pvfs2linux |
|---|
| 13 | * |
|---|
| 14 | * In-kernel waitqueue operations. |
|---|
| 15 | */ |
|---|
| 16 | |
|---|
| 17 | #include "pvfs2-kernel.h" |
|---|
| 18 | #include "pvfs2-internal.h" |
|---|
| 19 | |
|---|
| 20 | |
|---|
| 21 | /* What we do in this function is to walk the list of operations that are present |
|---|
| 22 | * in the request queue and mark them as purged. |
|---|
| 23 | * NOTE: This is called from the device close after client-core has guaranteed that no new |
|---|
| 24 | * operations could appear on the list since the client-core is anyway going to exit. |
|---|
| 25 | */ |
|---|
| 26 | void purge_waiting_ops(void) |
|---|
| 27 | { |
|---|
| 28 | pvfs2_kernel_op_t *op; |
|---|
| 29 | spin_lock(&pvfs2_request_list_lock); |
|---|
| 30 | list_for_each_entry(op, &pvfs2_request_list, list) |
|---|
| 31 | { |
|---|
| 32 | gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2-client-core: purging op tag %lld %s\n", lld(op->tag), get_opname_string(op)); |
|---|
| 33 | spin_lock(&op->lock); |
|---|
| 34 | set_op_state_purged(op); |
|---|
| 35 | spin_unlock(&op->lock); |
|---|
| 36 | wake_up_interruptible(&op->waitq); |
|---|
| 37 | } |
|---|
| 38 | spin_unlock(&pvfs2_request_list_lock); |
|---|
| 39 | return; |
|---|
| 40 | } |
|---|
| 41 | |
|---|
| 42 | /** |
|---|
| 43 | * submits a PVFS2 operation and waits for it to complete |
|---|
| 44 | * |
|---|
| 45 | * \note op->downcall.status will contain the status of the operation (in |
|---|
| 46 | * errno format), whether provided by pvfs2-client or a result of failure to |
|---|
| 47 | * service the operation. If the caller wishes to distinguish, then |
|---|
| 48 | * op->state can be checked to see if it was serviced or not. |
|---|
| 49 | * |
|---|
| 50 | * \returns contents of op->downcall.status for convenience |
|---|
| 51 | */ |
|---|
| 52 | int service_operation( |
|---|
| 53 | pvfs2_kernel_op_t* op, /**< operation structure to process */ |
|---|
| 54 | const char* op_name, /**< string name for operation */ |
|---|
| 55 | int flags) /**< flags to modify behavior */ |
|---|
| 56 | { |
|---|
| 57 | sigset_t orig_sigset; |
|---|
| 58 | int ret = 0; |
|---|
| 59 | op->upcall.pid = current->pid; |
|---|
| 60 | #ifdef PVFS2_LINUX_KERNEL_2_4 |
|---|
| 61 | op->upcall.tgid = -1; |
|---|
| 62 | #else |
|---|
| 63 | op->upcall.tgid = current->tgid; |
|---|
| 64 | #endif |
|---|
| 65 | |
|---|
| 66 | retry_servicing: |
|---|
| 67 | op->downcall.status = 0; |
|---|
| 68 | gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2: service_operation: %s %p\n", op_name, op); |
|---|
| 69 | gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2: operation posted by process: %s, pid: %i\n", current->comm, current->pid); |
|---|
| 70 | |
|---|
| 71 | /* mask out signals if this operation is not to be interrupted */ |
|---|
| 72 | if(!(flags & PVFS2_OP_INTERRUPTIBLE)) |
|---|
| 73 | { |
|---|
| 74 | mask_blocked_signals(&orig_sigset); |
|---|
| 75 | } |
|---|
| 76 | |
|---|
| 77 | if(!(flags & PVFS2_OP_NO_SEMAPHORE)) |
|---|
| 78 | { |
|---|
| 79 | ret = down_interruptible(&request_semaphore); |
|---|
| 80 | /* check to see if we were interrupted while waiting for semaphore */ |
|---|
| 81 | if(ret < 0) |
|---|
| 82 | { |
|---|
| 83 | if(!(flags & PVFS2_OP_INTERRUPTIBLE)) |
|---|
| 84 | { |
|---|
| 85 | unmask_blocked_signals(&orig_sigset); |
|---|
| 86 | } |
|---|
| 87 | op->downcall.status = ret; |
|---|
| 88 | gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2: service_operation interrupted.\n"); |
|---|
| 89 | return(ret); |
|---|
| 90 | } |
|---|
| 91 | } |
|---|
| 92 | |
|---|
| 93 | if (is_daemon_in_service() < 0) |
|---|
| 94 | { |
|---|
| 95 | /* By incrementing the per-operation attempt counter, we directly go into the timeout logic |
|---|
| 96 | * while waiting for the matching downcall to be read |
|---|
| 97 | */ |
|---|
| 98 | op->attempts++; |
|---|
| 99 | } |
|---|
| 100 | |
|---|
| 101 | /* queue up the operation */ |
|---|
| 102 | if(flags & PVFS2_OP_PRIORITY) |
|---|
| 103 | { |
|---|
| 104 | add_priority_op_to_request_list(op); |
|---|
| 105 | } |
|---|
| 106 | else |
|---|
| 107 | { |
|---|
| 108 | add_op_to_request_list(op); |
|---|
| 109 | } |
|---|
| 110 | |
|---|
| 111 | if(!(flags & PVFS2_OP_NO_SEMAPHORE)) |
|---|
| 112 | { |
|---|
| 113 | up(&request_semaphore); |
|---|
| 114 | } |
|---|
| 115 | |
|---|
| 116 | /* If we are asked to service an asynchronous operation from VFS perspective, we are done */ |
|---|
| 117 | if (flags & PVFS2_OP_ASYNC) |
|---|
| 118 | { |
|---|
| 119 | return 0; |
|---|
| 120 | } |
|---|
| 121 | |
|---|
| 122 | if(flags & PVFS2_OP_CANCELLATION) |
|---|
| 123 | { |
|---|
| 124 | ret = wait_for_cancellation_downcall(op); |
|---|
| 125 | } |
|---|
| 126 | else |
|---|
| 127 | { |
|---|
| 128 | ret = wait_for_matching_downcall(op); |
|---|
| 129 | } |
|---|
| 130 | |
|---|
| 131 | if(ret < 0) |
|---|
| 132 | { |
|---|
| 133 | /* failed to get matching downcall */ |
|---|
| 134 | if(ret == -ETIMEDOUT) |
|---|
| 135 | { |
|---|
| 136 | gossip_err("pvfs2: %s -- wait timed out; aborting attempt.\n", |
|---|
| 137 | op_name); |
|---|
| 138 | } |
|---|
| 139 | op->downcall.status = ret; |
|---|
| 140 | } |
|---|
| 141 | else |
|---|
| 142 | { |
|---|
| 143 | /* got matching downcall; make sure status is in errno format */ |
|---|
| 144 | op->downcall.status = pvfs2_normalize_to_errno(op->downcall.status); |
|---|
| 145 | ret = op->downcall.status; |
|---|
| 146 | } |
|---|
| 147 | |
|---|
| 148 | if(!(flags & PVFS2_OP_INTERRUPTIBLE)) |
|---|
| 149 | { |
|---|
| 150 | unmask_blocked_signals(&orig_sigset); |
|---|
| 151 | } |
|---|
| 152 | |
|---|
| 153 | BUG_ON(ret != op->downcall.status); |
|---|
| 154 | /* retry if operation has not been serviced and if requested */ |
|---|
| 155 | if (!op_state_serviced(op) && op->downcall.status == -EAGAIN) |
|---|
| 156 | { |
|---|
| 157 | gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2: tag %lld (%s) -- operation to be retried (%d attempt)\n", |
|---|
| 158 | lld(op->tag), op_name, op->attempts + 1); |
|---|
| 159 | goto retry_servicing; |
|---|
| 160 | } |
|---|
| 161 | gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2: service_operation %s returning: %d for %p.\n", op_name, ret, op); |
|---|
| 162 | return(ret); |
|---|
| 163 | } |
|---|
| 164 | |
|---|
| 165 | void pvfs2_clean_up_interrupted_operation( |
|---|
| 166 | pvfs2_kernel_op_t * op) |
|---|
| 167 | { |
|---|
| 168 | /* |
|---|
| 169 | handle interrupted cases depending on what state we were in when |
|---|
| 170 | the interruption is detected. there is a coarse grained lock |
|---|
| 171 | across the operation. |
|---|
| 172 | |
|---|
| 173 | NOTE: be sure not to reverse lock ordering by locking an op lock |
|---|
| 174 | while holding the request_list lock. Here, we first lock the op |
|---|
| 175 | and then lock the appropriate list. |
|---|
| 176 | */ |
|---|
| 177 | if( !op ) |
|---|
| 178 | { |
|---|
| 179 | gossip_debug(GOSSIP_WAIT_DEBUG, "%s: op is null, ignoring\n", |
|---|
| 180 | __func__); |
|---|
| 181 | return; |
|---|
| 182 | } |
|---|
| 183 | |
|---|
| 184 | /* one more sanity check, make sure it's in one of the possible states |
|---|
| 185 | * or don't try to cancel it */ |
|---|
| 186 | if( ! (op_state_waiting(op) || op_state_in_progress(op) || |
|---|
| 187 | op_state_serviced(op) || op_state_purged(op)) ) |
|---|
| 188 | { |
|---|
| 189 | gossip_debug(GOSSIP_WAIT_DEBUG, "%s: op %p not in a valid state (%0x), " |
|---|
| 190 | "ignoring\n", __func__, op, op->op_state); |
|---|
| 191 | return; |
|---|
| 192 | } |
|---|
| 193 | |
|---|
| 194 | spin_lock(&op->lock); |
|---|
| 195 | |
|---|
| 196 | if (op_state_waiting(op)) |
|---|
| 197 | { |
|---|
| 198 | /* |
|---|
| 199 | upcall hasn't been read; remove op from upcall request |
|---|
| 200 | list. |
|---|
| 201 | */ |
|---|
| 202 | spin_unlock(&op->lock); |
|---|
| 203 | remove_op_from_request_list(op); |
|---|
| 204 | gossip_debug(GOSSIP_WAIT_DEBUG, "Interrupted: Removed op %p from request_list\n", op); |
|---|
| 205 | } |
|---|
| 206 | else if (op_state_in_progress(op)) |
|---|
| 207 | { |
|---|
| 208 | /* op must be removed from the in progress htable */ |
|---|
| 209 | spin_unlock(&op->lock); |
|---|
| 210 | remove_op_from_htable_ops_in_progress(op); |
|---|
| 211 | gossip_debug(GOSSIP_WAIT_DEBUG, "Interrupted: Removed op %p from " |
|---|
| 212 | "htable_ops_in_progress\n", op); |
|---|
| 213 | } |
|---|
| 214 | else if (!op_state_serviced(op)) |
|---|
| 215 | { |
|---|
| 216 | spin_unlock(&op->lock); |
|---|
| 217 | gossip_err("interrupted operation is in a weird state 0x%x\n", |
|---|
| 218 | op->op_state); |
|---|
| 219 | } |
|---|
| 220 | } |
|---|
| 221 | |
|---|
| 222 | /** sleeps on waitqueue waiting for matching downcall. |
|---|
| 223 | * if client-core finishes servicing, then we are good to go. |
|---|
| 224 | * else if client-core exits, we get woken up here, and retry with a timeout |
|---|
| 225 | * |
|---|
| 226 | * \post when this call returns to the caller, the specified op will no |
|---|
| 227 | * longer be on any list or htable. |
|---|
| 228 | * |
|---|
| 229 | * \returns 0 on success and -errno on failure |
|---|
| 230 | * Errors are: |
|---|
| 231 | * EAGAIN in case we want the caller to requeue and try again.. |
|---|
| 232 | * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this |
|---|
| 233 | * operation since client-core seems to be exiting too often |
|---|
| 234 | * or if we were interrupted. |
|---|
| 235 | */ |
|---|
| 236 | int wait_for_matching_downcall(pvfs2_kernel_op_t * op) |
|---|
| 237 | { |
|---|
| 238 | int ret = -EINVAL; |
|---|
| 239 | DECLARE_WAITQUEUE(wait_entry, current); |
|---|
| 240 | |
|---|
| 241 | spin_lock(&op->lock); |
|---|
| 242 | add_wait_queue(&op->waitq, &wait_entry); |
|---|
| 243 | spin_unlock(&op->lock); |
|---|
| 244 | |
|---|
| 245 | while (1) |
|---|
| 246 | { |
|---|
| 247 | set_current_state(TASK_INTERRUPTIBLE); |
|---|
| 248 | |
|---|
| 249 | spin_lock(&op->lock); |
|---|
| 250 | if (op_state_serviced(op)) |
|---|
| 251 | { |
|---|
| 252 | spin_unlock(&op->lock); |
|---|
| 253 | ret = 0; |
|---|
| 254 | break; |
|---|
| 255 | } |
|---|
| 256 | spin_unlock(&op->lock); |
|---|
| 257 | |
|---|
| 258 | if (!signal_pending(current)) |
|---|
| 259 | { |
|---|
| 260 | /* if this was our first attempt and client-core has not purged our |
|---|
| 261 | * operation, we are happy to simply wait */ |
|---|
| 262 | spin_lock(&op->lock); |
|---|
| 263 | if (op->attempts == 0 && !op_state_purged(op)) |
|---|
| 264 | { |
|---|
| 265 | spin_unlock(&op->lock); |
|---|
| 266 | schedule(); |
|---|
| 267 | } |
|---|
| 268 | else { |
|---|
| 269 | spin_unlock(&op->lock); |
|---|
| 270 | /* subsequent attempts, we retry exactly once with timeouts */ |
|---|
| 271 | if (!schedule_timeout(MSECS_TO_JIFFIES(1000 * op_timeout_secs))) |
|---|
| 272 | { |
|---|
| 273 | gossip_debug(GOSSIP_WAIT_DEBUG, "*** %s: operation timed " |
|---|
| 274 | "out (tag %lld, %p, att %d)\n", __func__, |
|---|
| 275 | lld(op->tag), op, op->attempts); |
|---|
| 276 | ret = -ETIMEDOUT; |
|---|
| 277 | pvfs2_clean_up_interrupted_operation(op); |
|---|
| 278 | break; |
|---|
| 279 | } |
|---|
| 280 | } |
|---|
| 281 | spin_lock(&op->lock); |
|---|
| 282 | op->attempts++; |
|---|
| 283 | /* if the operation was purged in the meantime, it is better to |
|---|
| 284 | * requeue it afresh but ensure that we have not been purged |
|---|
| 285 | * repeatedly. This could happen if client-core crashes when an op |
|---|
| 286 | * is being serviced, so we requeue the op, client core crashes |
|---|
| 287 | * again so we requeue the op, client core starts, and so on...*/ |
|---|
| 288 | if (op_state_purged(op)) |
|---|
| 289 | { |
|---|
| 290 | ret = (op->attempts < PVFS2_PURGE_RETRY_COUNT) ? -EAGAIN : -EIO; |
|---|
| 291 | spin_unlock(&op->lock); |
|---|
| 292 | gossip_debug(GOSSIP_WAIT_DEBUG, "*** %s: operation purged " |
|---|
| 293 | "(tag %lld, %p, att %d)\n", __func__, lld(op->tag), |
|---|
| 294 | op, op->attempts); |
|---|
| 295 | pvfs2_clean_up_interrupted_operation(op); |
|---|
| 296 | break; |
|---|
| 297 | } |
|---|
| 298 | spin_unlock(&op->lock); |
|---|
| 299 | continue; |
|---|
| 300 | } |
|---|
| 301 | |
|---|
| 302 | gossip_debug(GOSSIP_WAIT_DEBUG, "*** %s: operation interrupted by a " |
|---|
| 303 | "signal (tag %lld, op %p)\n", __func__, lld(op->tag), op); |
|---|
| 304 | pvfs2_clean_up_interrupted_operation(op); |
|---|
| 305 | ret = -EINTR; |
|---|
| 306 | break; |
|---|
| 307 | } |
|---|
| 308 | |
|---|
| 309 | set_current_state(TASK_RUNNING); |
|---|
| 310 | |
|---|
| 311 | spin_lock(&op->lock); |
|---|
| 312 | remove_wait_queue(&op->waitq, &wait_entry); |
|---|
| 313 | spin_unlock(&op->lock); |
|---|
| 314 | |
|---|
| 315 | return ret; |
|---|
| 316 | } |
|---|
| 317 | |
|---|
| 318 | /** similar to wait_for_matching_downcall(), but used in the special case |
|---|
| 319 | * of I/O cancellations. |
|---|
| 320 | * |
|---|
| 321 | * \note we need a special wait function because if this is called we already |
|---|
| 322 | * know that a signal is pending in current and need to service the |
|---|
| 323 | * cancellation upcall anyway. the only way to exit this is to either |
|---|
| 324 | * timeout or have the cancellation be serviced properly. |
|---|
| 325 | */ |
|---|
| 326 | int wait_for_cancellation_downcall(pvfs2_kernel_op_t * op) |
|---|
| 327 | { |
|---|
| 328 | int ret = -EINVAL; |
|---|
| 329 | DECLARE_WAITQUEUE(wait_entry, current); |
|---|
| 330 | |
|---|
| 331 | spin_lock(&op->lock); |
|---|
| 332 | add_wait_queue(&op->waitq, &wait_entry); |
|---|
| 333 | spin_unlock(&op->lock); |
|---|
| 334 | |
|---|
| 335 | while (1) |
|---|
| 336 | { |
|---|
| 337 | set_current_state(TASK_INTERRUPTIBLE); |
|---|
| 338 | |
|---|
| 339 | spin_lock(&op->lock); |
|---|
| 340 | if (op_state_serviced(op)) |
|---|
| 341 | { |
|---|
| 342 | spin_unlock(&op->lock); |
|---|
| 343 | ret = 0; |
|---|
| 344 | break; |
|---|
| 345 | } |
|---|
| 346 | spin_unlock(&op->lock); |
|---|
| 347 | |
|---|
| 348 | if (!schedule_timeout |
|---|
| 349 | (MSECS_TO_JIFFIES(1000 * op_timeout_secs))) |
|---|
| 350 | { |
|---|
| 351 | gossip_debug(GOSSIP_WAIT_DEBUG, "*** %s: operation timed out: " |
|---|
| 352 | "(tag %lld, op %p)\n", __func__, lld(op->tag), op); |
|---|
| 353 | pvfs2_clean_up_interrupted_operation(op); |
|---|
| 354 | ret = -ETIMEDOUT; |
|---|
| 355 | break; |
|---|
| 356 | } |
|---|
| 357 | } |
|---|
| 358 | |
|---|
| 359 | set_current_state(TASK_RUNNING); |
|---|
| 360 | |
|---|
| 361 | spin_lock(&op->lock); |
|---|
| 362 | remove_wait_queue(&op->waitq, &wait_entry); |
|---|
| 363 | spin_unlock(&op->lock); |
|---|
| 364 | |
|---|
| 365 | return ret; |
|---|
| 366 | } |
|---|
| 367 | |
|---|
| 368 | |
|---|
| 369 | /* |
|---|
| 370 | * Local variables: |
|---|
| 371 | * c-indent-level: 4 |
|---|
| 372 | * c-basic-offset: 4 |
|---|
| 373 | * End: |
|---|
| 374 | * |
|---|
| 375 | * vim: ts=8 sts=4 sw=4 expandtab |
|---|
| 376 | */ |
|---|