root/branches/orange-next/src/client/sysint/sys-remove.sm @ 8935

Revision 8935, 15.3 KB (checked in by mtmoore, 23 months ago)

uuid converstation, PVFS_x_position changes, server/client side first pass done

Line 
1/*
2 * (C) 2003 Clemson University and The University of Chicago
3 *
4 * See COPYING in top-level directory.
5 */
6
7/** \file
8 *  \ingroup sysint
9 *
10 *  PVFS2 system interface routines for removing an object and its
11 *  associated directory entry.
12 */
13
14#include <string.h>
15#include <assert.h>
16
17#include "client-state-machine.h"
18#include "pvfs2-debug.h"
19#include "job.h"
20#include "gossip.h"
21#include "str-utils.h"
22#include "pint-cached-config.h"
23#include "PINT-reqproto-encode.h"
24#include "ncache.h"
25#include "pvfs2-internal.h"
26
27/*
28  PVFS_{i}sys_remove takes the following steps:
29
30  - rmdirent the entry from the parent directory
31  - getattr on the object to be removed
32  - if the object is a directory
33    - check if the dir is empty
34    - if so, continue
35    - if not, crdirent the previously rmdirent'd
36      entry and return -PVFS_ENOTEMPTY
37  - if the object is a metafile
38    - remove all associated data files
39  - remove the actual object specified
40*/
41
42extern job_context_id pint_client_sm_context;
43
44enum
45{
46    RMDIRENT_RETRY = 1,
47    CRDIRENT_RETRY,
48    RETURN_STORED_ERROR_CODE
49};
50
51static int remove_rmdirent_comp_fn(
52    void *v_p, struct PVFS_server_resp *resp_p, int i);
53static int remove_crdirent_comp_fn(
54    void *v_p, struct PVFS_server_resp *resp_p, int i);
55
56#define PRINT_REMOVE_WARNING()                                        \
57do {                                                                  \
58    gossip_err("WARNING: PVFS_sys_remove() encountered an error "     \
59               "which may lead to\n  inconsistent state.\n");         \
60    gossip_err("WARNING: PVFS2 fsck (if available) may be needed.\n");\
61} while(0)
62
63%%
64
65machine pvfs2_client_remove_sm
66{
67    state init
68    {
69        run remove_init;
70        default => rmdirent_setup_msgpair;
71    }
72
73    state rmdirent_setup_msgpair
74    {
75        run remove_rmdirent_setup_msgpair;
76        success => rmdirent_xfer_msgpair;
77        default => rmdirent_retry_or_fail;
78    }
79
80    state rmdirent_xfer_msgpair
81    {
82        jump pvfs2_msgpairarray_sm;
83        success => do_remove;
84        default => rmdirent_retry_or_fail;
85    }
86
87    state rmdirent_retry_or_fail
88    {
89        run remove_rmdirent_retry_or_fail;
90        RMDIRENT_RETRY => rmdirent_timer;
91        default => cleanup;
92    }
93
94    state do_remove
95    {
96        jump pvfs2_client_remove_helper_sm;
97        default => check_error_code;
98    }
99
100    state check_error_code
101    {
102        run remove_check_error_code;
103        success => cleanup;
104        RETURN_STORED_ERROR_CODE => cleanup;
105        default => crdirent_setup_msgpair;
106    }
107
108    state crdirent_timer
109    {
110        run remove_generic_timer;
111        default => crdirent_setup_msgpair;
112    }
113
114    state rmdirent_timer
115    {
116        run remove_generic_timer;
117        default => rmdirent_setup_msgpair;
118    }
119
120    state crdirent_setup_msgpair
121    {
122        run remove_crdirent_setup_msgpair;
123        success => crdirent_xfer_msgpair;
124        default => crdirent_retry_or_fail;
125    }
126
127    state crdirent_xfer_msgpair
128    {
129        jump pvfs2_msgpairarray_sm;
130        success => cleanup;
131        default => crdirent_retry_or_fail;
132    }
133
134    state crdirent_retry_or_fail
135    {
136        run remove_crdirent_retry_or_fail;
137        CRDIRENT_RETRY => crdirent_timer;
138        default => cleanup;
139    }
140
141    state cleanup
142    {
143        run remove_cleanup;
144        default => terminate;
145    }
146}
147
148%%
149
150/** Initiate removal of an object and its directory entry.
151 */
152PVFS_error PVFS_isys_remove(
153    char *object_name,
154    PVFS_object_ref parent_ref,
155    const PVFS_credentials *credentials,
156    PVFS_sys_op_id *op_id,
157    PVFS_hint hints,
158    void *user_ptr)
159{
160    PVFS_error ret = -PVFS_EINVAL;
161    PINT_smcb *smcb = NULL;
162    PINT_client_sm *sm_p = NULL;
163
164    gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_isys_remove entered\n");
165
166    if ((PVFS_handle_is_null(parent_ref.handle)) ||
167        (parent_ref.fs_id == PVFS_FS_ID_NULL) ||
168        (object_name == NULL))
169    {
170        gossip_err("invalid (NULL) required argument\n");
171        return ret;
172    }
173
174    PINT_smcb_alloc(&smcb, PVFS_SYS_REMOVE,
175             sizeof(struct PINT_client_sm),
176             client_op_state_get_machine,
177             client_state_machine_terminate,
178             pint_client_sm_context);
179    if (smcb == NULL)
180    {
181        return -PVFS_ENOMEM;
182    }
183    sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
184
185    PINT_init_msgarray_params(sm_p, parent_ref.fs_id);
186    PINT_init_sysint_credentials(sm_p->cred_p, credentials);
187    sm_p->u.remove.object_name = object_name;
188    sm_p->parent_ref  = parent_ref;
189    sm_p->u.remove.stored_error_code = 0;
190    PVFS_hint_copy(hints, &sm_p->hints);
191    PVFS_hint_add(&sm_p->hints, PVFS_HINT_HANDLE_NAME, sizeof(PVFS_handle), &parent_ref.handle);
192
193    gossip_debug(
194        GOSSIP_CLIENT_DEBUG, "Trying to remove entry %s under %llu,%d\n",
195        object_name, llu(parent_ref.handle), parent_ref.fs_id);
196
197    /* NOTE: This state machine previously multiplied the default job timeout
198     * by five to allow for potentially long sync delays.  We instead now set
199     * the default client BMI timeout higher for all operations: if a sync can
200     * go slow then so can any other arbitrary operation queued behind it. -PHC
201     */
202   
203    return PINT_client_state_machine_post(
204        smcb,  op_id, user_ptr);
205}
206
207/** Remove an object and its directory entry.
208 */
209PVFS_error PVFS_sys_remove(
210    char *object_name,
211    PVFS_object_ref parent_ref,
212    const PVFS_credentials *credentials,
213    PVFS_hint hints)
214{
215    PVFS_error ret = -PVFS_EINVAL, error = 0;
216    PVFS_sys_op_id op_id;
217
218    gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_remove entered\n");
219
220    ret = PVFS_isys_remove(object_name, parent_ref,
221                           credentials, &op_id, hints, NULL);
222    if (ret)
223    {
224        PVFS_perror_gossip("PVFS_isys_remove call", ret);
225        error = ret;
226    }
227    else
228    {
229        ret = PVFS_sys_wait(op_id, "remove", &error);
230        if (ret)
231        {
232            PVFS_perror_gossip("PVFS_sys_wait call", ret);
233            error = ret;
234        }
235    }
236
237    PINT_sys_release(op_id);
238    return error;
239}
240
241
242/****************************************************************/
243
244static PINT_sm_action remove_init(
245        struct PINT_smcb *smcb, job_status_s *js_p)
246{
247    gossip_debug(GOSSIP_CLIENT_DEBUG, "remove state: init\n");
248
249    assert(js_p->error_code == 0);
250    return SM_ACTION_COMPLETE;
251}
252
253static PINT_sm_action remove_rmdirent_setup_msgpair(
254        struct PINT_smcb *smcb, job_status_s *js_p)
255{
256    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
257    int ret = -PVFS_EINVAL;
258    PINT_sm_msgpair_state *msg_p = NULL;
259
260    /* NOTE: we remove the dirent first because this gets the object
261     * out of the system fastest.
262     */
263    gossip_debug(GOSSIP_CLIENT_DEBUG,
264                 "remove state: rmdirent_setup_msgpair\n");
265
266    js_p->error_code = 0;
267
268    PINT_msgpair_init(&sm_p->msgarray_op);
269    msg_p = &sm_p->msgarray_op.msgpair;
270
271    PINT_SERVREQ_RMDIRENT_FILL(
272        msg_p->req,
273        *sm_p->cred_p,
274        sm_p->parent_ref.fs_id,
275        sm_p->parent_ref.handle,
276        sm_p->u.remove.object_name,
277        sm_p->hints);
278
279    gossip_debug(GOSSIP_REMOVE_DEBUG, "- doing RMDIRENT on %s "
280                 "under %llu,%d\n", sm_p->u.remove.object_name,
281                 llu(sm_p->parent_ref.handle),
282                 sm_p->parent_ref.fs_id);
283
284    msg_p->fs_id = sm_p->parent_ref.fs_id;
285    PVFS_handle_copy(msg_p->handle, sm_p->parent_ref.handle);
286    msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY;
287    msg_p->comp_fn = remove_rmdirent_comp_fn;
288
289    ret = PINT_cached_config_map_to_server(
290        &msg_p->svr_addr, msg_p->handle, msg_p->fs_id);
291
292    if (ret)
293    {
294        gossip_err("Failed to map meta server address\n");
295        js_p->error_code = ret;
296    }
297
298    PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op);
299    return SM_ACTION_COMPLETE;
300}
301
302static PINT_sm_action remove_crdirent_setup_msgpair(
303        struct PINT_smcb *smcb, job_status_s *js_p)
304{
305    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
306    int ret = -PVFS_EINVAL;
307    PINT_sm_msgpair_state *msg_p = NULL;
308
309    gossip_debug(GOSSIP_CLIENT_DEBUG,
310                 "remove state: crdirent_setup_msgpair\n");
311
312    sm_p->u.remove.stored_error_code = js_p->error_code;
313
314    js_p->error_code = 0;
315
316    PINT_msgpair_init(&sm_p->msgarray_op);
317    msg_p = &sm_p->msgarray_op.msgpair;
318
319    PINT_SERVREQ_CRDIRENT_FILL(
320        msg_p->req,
321        *sm_p->cred_p,
322        sm_p->u.remove.object_name,
323        sm_p->object_ref.handle,
324        sm_p->parent_ref.handle,
325        sm_p->parent_ref.fs_id,
326        sm_p->hints);
327
328    gossip_debug(GOSSIP_REMOVE_DEBUG, "- doing CRDIRENT of %s (%llu,%d) "
329                 "under %llu,%d\n", sm_p->u.remove.object_name,
330                 llu(sm_p->object_ref.handle),
331                 sm_p->object_ref.fs_id,
332                 llu(sm_p->parent_ref.handle),
333                 sm_p->parent_ref.fs_id);
334
335    msg_p->fs_id = sm_p->parent_ref.fs_id;
336    PVFS_handle_copy(msg_p->handle, sm_p->parent_ref.handle);
337    msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY;
338    msg_p->comp_fn = remove_crdirent_comp_fn;
339
340    ret = PINT_cached_config_map_to_server(
341        &msg_p->svr_addr, msg_p->handle, msg_p->fs_id);
342
343    if (ret)
344    {
345        gossip_err("Failed to map meta server address\n");
346        js_p->error_code = ret;
347    }
348
349    PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op);
350    return SM_ACTION_COMPLETE;
351}
352
353static int remove_rmdirent_comp_fn(
354    void *v_p,
355    struct PVFS_server_resp *resp_p,
356    int index)
357{
358    PINT_smcb *smcb = v_p;
359    PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM);
360
361    assert(resp_p->op == PVFS_SERV_RMDIRENT);
362
363    if (resp_p->status == 0)
364    {
365        assert(!PVFS_handle_is_null(resp_p->u.rmdirent.entry_handle));
366        assert(sm_p->parent_ref.fs_id != PVFS_FS_ID_NULL);
367
368        /* pull handle out of response, also copy in fs_id from before */
369        PVFS_handle_copy(sm_p->object_ref.handle,
370                         resp_p->u.rmdirent.entry_handle);
371        sm_p->object_ref.fs_id  = sm_p->parent_ref.fs_id;
372
373        gossip_debug(
374            GOSSIP_CLIENT_DEBUG,
375            "  remove_rmdirent_comp_fn: metafile handle = %llu\n",
376            llu(sm_p->object_ref.handle));
377    }
378    return resp_p->status;
379}
380
381static int remove_crdirent_comp_fn(
382    void *v_p,
383    struct PVFS_server_resp *resp_p,
384    int index)
385{
386    assert(resp_p->op == PVFS_SERV_CRDIRENT);
387
388    if (resp_p->status == 0)
389    {
390        gossip_debug(GOSSIP_CLIENT_DEBUG,
391                     "  remove_crdirent_comp_fn: OK\n");
392    }
393    return resp_p->status;
394}
395
396static PINT_sm_action remove_check_error_code(
397        struct PINT_smcb *smcb, job_status_s *js_p)
398{
399    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
400    char buf[64] = {0};
401
402    PVFS_strerror_r(js_p->error_code, buf, 64);
403    gossip_debug(GOSSIP_REMOVE_DEBUG, "remove_check_error_code got %s "
404                 "(%d)\n", buf, js_p->error_code);
405    /*
406      separate error codes which are ok to return to the caller at
407      this point without trying to undo what we've done and retry the
408      removal
409    */
410    switch(js_p->error_code)
411    {
412        case -PVFS_ENOTEMPTY:
413            /* let the error pass through for the crdirent cleanup */
414            break;
415        case -PVFS_EINVAL:
416        case -PVFS_ENOMEM:
417            /* don't undo after these errors */
418            sm_p->u.remove.stored_error_code = js_p->error_code;
419            js_p->error_code = RETURN_STORED_ERROR_CODE;
420            break;
421    }
422    return SM_ACTION_COMPLETE;
423}
424
425static PINT_sm_action remove_cleanup(
426        struct PINT_smcb *smcb, job_status_s *js_p)
427{
428    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
429    gossip_debug(GOSSIP_CLIENT_DEBUG, "remove state: cleanup\n");
430
431    sm_p->error_code = (sm_p->u.remove.stored_error_code ?
432                        sm_p->u.remove.stored_error_code :
433                        js_p->error_code);
434
435    /* NOTE: acache is invalidated by remove_helper now */
436   
437    /* The ncache invalidate must be done from this function, because the
438     * remove_helper may not  have all the information needed
439     */
440    PINT_ncache_invalidate((const char*) sm_p->u.remove.object_name,
441                           (const PVFS_object_ref*) &(sm_p->parent_ref));
442
443    PINT_msgpairarray_destroy(&sm_p->msgarray_op);
444
445    PINT_SET_OP_COMPLETE;
446    return SM_ACTION_TERMINATE;
447}
448
449static PINT_sm_action remove_generic_timer(
450        struct PINT_smcb *smcb, job_status_s *js_p)
451{
452    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
453    int ret = -PVFS_EINVAL;
454    job_id_t tmp_id;
455
456    gossip_debug(GOSSIP_CLIENT_DEBUG, "remove state: generic_timer\n");
457
458    ret = job_req_sched_post_timer(
459        sm_p->msgarray_op.params.retry_delay, smcb, 0, js_p, &tmp_id,
460        pint_client_sm_context);
461
462    return ret;
463}
464
465static PINT_sm_action remove_crdirent_retry_or_fail(
466        struct PINT_smcb *smcb, job_status_s *js_p)
467{
468    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
469    gossip_debug(GOSSIP_CLIENT_DEBUG,
470                 "remove state: crdirent_retry_or_fail\n");
471
472    /* try again (up to a point) if we get a comm. failure. */
473    if ((PVFS_ERROR_CLASS(-js_p->error_code) == PVFS_ERROR_BMI) &&
474        (sm_p->u.remove.retry_count < sm_p->msgarray_op.params.retry_limit))
475    {
476        sm_p->u.remove.retry_count++;
477        js_p->error_code = CRDIRENT_RETRY;
478        return SM_ACTION_COMPLETE;
479    }
480
481    if ((js_p->error_code == -PVFS_EEXIST) &&
482        (sm_p->u.remove.retry_count > 0))
483    {
484        /* assume everything worked out ok and we got the right
485         * directory entry back.  there was just a transient network
486         * problem along the way
487         */
488        js_p->error_code = 0;
489        return SM_ACTION_COMPLETE;
490    }
491
492    sm_p->u.remove.stored_error_code = js_p->error_code;
493
494    gossip_err("Error: failed to replace directory during remove recovery: entry %s for object %llu.\n",
495        sm_p->u.remove.object_name,
496        llu(sm_p->object_ref.handle));
497    PVFS_perror_gossip("crdirent", js_p->error_code);
498
499    PRINT_REMOVE_WARNING();
500    return SM_ACTION_COMPLETE;
501}
502
503
504static PINT_sm_action remove_rmdirent_retry_or_fail(
505        struct PINT_smcb *smcb, job_status_s *js_p)
506{
507    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
508    gossip_debug(GOSSIP_CLIENT_DEBUG,
509                 "remove state: rmdirent_retry_or_fail\n");
510
511    /* try again (up to a point) if we get a comm. failure. */
512    if ((PVFS_ERROR_CLASS(-js_p->error_code) == PVFS_ERROR_BMI) &&
513        (sm_p->u.remove.retry_count < sm_p->msgarray_op.params.retry_limit))
514    {
515        sm_p->u.remove.retry_count++;
516        js_p->error_code = RMDIRENT_RETRY;
517        return SM_ACTION_COMPLETE;
518    }
519
520    if ((js_p->error_code == -PVFS_ENOENT) &&
521        (sm_p->u.remove.retry_count > 0))
522    {
523        /* this is a tricky error case.  Server reports ENOENT, but
524         * this is not the first time we attempted the rmdirent.  It
525         * may be the case that it is reporting ENOENT because one of
526         * the earlier retries possibly completed.  We will treat this
527         * as success, but put out an error message.  This could
528         * strand objects, or remove non-empty directories, for
529         * example.
530         */
531        gossip_err("Warning: Received ENOENT on retry to remove entry %s.\n",
532            sm_p->u.remove.object_name);
533
534        PRINT_REMOVE_WARNING();
535        js_p->error_code = -PVFS_ENOENT;
536        return SM_ACTION_COMPLETE;
537    }
538
539    /* other errors are preserved and passed along to the next state */
540    return SM_ACTION_COMPLETE;
541}
542
543/*
544 * Local variables:
545 *  mode: c
546 *  c-indent-level: 4
547 *  c-basic-offset: 4
548 * End:
549 *
550 * vim: ft=c ts=8 sts=4 sw=4 expandtab
551 */
Note: See TracBrowser for help on using the browser.