root/branches/Orange-Elaine-Distr-Dir-Branch/src/client/sysint/sys-remove.sm @ 8418

Revision 8418, 16.9 KB (checked in by elaine, 3 years ago)

Pass multiple dirent handles between server and client.

Line 
1/*
2 * (C) 2003 Clemson University and The University of Chicago
3 *
4 * See COPYING in top-level directory.
5 */
6
7/** \file
8 *  \ingroup sysint
9 *
10 *  PVFS2 system interface routines for removing an object and its
11 *  associated directory entry.
12 */
13
14#include <string.h>
15#include <assert.h>
16
17#include "client-state-machine.h"
18#include "pint-util.h"
19#include "pvfs2-debug.h"
20#include "job.h"
21#include "gossip.h"
22#include "str-utils.h"
23#include "pint-cached-config.h"
24#include "PINT-reqproto-encode.h"
25#include "ncache.h"
26#include "pvfs2-internal.h"
27
28/*
29  PVFS_{i}sys_remove takes the following steps:
30
31  - rmdirent the entry from the parent directory
32  - getattr on the object to be removed
33  - if the object is a directory
34    - check if the dir is empty
35    - if so, continue
36    - if not, crdirent the previously rmdirent'd
37      entry and return -PVFS_ENOTEMPTY
38  - if the object is a metafile
39    - remove all associated data files
40  - remove the actual object specified
41*/
42
43extern job_context_id pint_client_sm_context;
44
45enum
46{
47    RMDIRENT_RETRY = 1,
48    CRDIRENT_RETRY,
49    RETURN_STORED_ERROR_CODE
50};
51
52static int remove_rmdirent_comp_fn(
53    void *v_p, struct PVFS_server_resp *resp_p, int i);
54static int remove_crdirent_comp_fn(
55    void *v_p, struct PVFS_server_resp *resp_p, int i);
56
57#define PRINT_REMOVE_WARNING()                                        \
58do {                                                                  \
59    gossip_err("WARNING: PVFS_sys_remove() encountered an error "     \
60               "which may lead to\n  inconsistent state.\n");         \
61    gossip_err("WARNING: PVFS2 fsck (if available) may be needed.\n");\
62} while(0)
63
64%%
65
66machine pvfs2_client_remove_sm
67{
68    state init
69    {
70        run remove_init;
71        default => init_getattr;
72    }
73
74    state init_getattr
75    {
76        jump pvfs2_client_getattr_sm;
77        success => rmdirent_setup_msgpair;
78        default => cleanup;
79    }
80
81    state rmdirent_setup_msgpair
82    {
83        run remove_rmdirent_setup_msgpair;
84        success => rmdirent_xfer_msgpair;
85        default => rmdirent_retry_or_fail;
86    }
87
88    state rmdirent_xfer_msgpair
89    {
90        jump pvfs2_msgpairarray_sm;
91        success => do_remove;
92        default => rmdirent_retry_or_fail;
93    }
94
95    state rmdirent_retry_or_fail
96    {
97        run remove_rmdirent_retry_or_fail;
98        RMDIRENT_RETRY => rmdirent_timer;
99        default => cleanup;
100    }
101
102    state do_remove
103    {
104        jump pvfs2_client_remove_helper_sm;
105        default => check_error_code;
106    }
107
108    state check_error_code
109    {
110        run remove_check_error_code;
111        success => cleanup;
112        RETURN_STORED_ERROR_CODE => cleanup;
113        default => getattr_setup;
114    }
115
116    state crdirent_timer
117    {
118        run remove_generic_timer;
119        default => getattr_setup;
120    }
121
122    state rmdirent_timer
123    {
124        run remove_generic_timer;
125        default => rmdirent_setup_msgpair;
126    }
127
128    state getattr_setup
129    {
130        run remove_getattr_setup;
131        default => parent_getattr;
132    }
133
134    state parent_getattr
135    {
136        jump pvfs2_client_getattr_sm;
137        success => crdirent_setup_msgpair;
138        default => cleanup;
139    }
140
141    state crdirent_setup_msgpair
142    {
143        run remove_crdirent_setup_msgpair;
144        success => crdirent_xfer_msgpair;
145        default => crdirent_retry_or_fail;
146    }
147
148    state crdirent_xfer_msgpair
149    {
150        jump pvfs2_msgpairarray_sm;
151        success => cleanup;
152        default => crdirent_retry_or_fail;
153    }
154
155    state crdirent_retry_or_fail
156    {
157        run remove_crdirent_retry_or_fail;
158        CRDIRENT_RETRY => crdirent_timer;
159        default => cleanup;
160    }
161
162    state cleanup
163    {
164        run remove_cleanup;
165        default => terminate;
166    }
167}
168
169%%
170
171/** Initiate removal of an object and its directory entry.
172 */
173PVFS_error PVFS_isys_remove(
174    char *object_name,
175    PVFS_object_ref parent_ref,
176    const PVFS_credentials *credentials,
177    PVFS_sys_op_id *op_id,
178    PVFS_hint hints,
179    void *user_ptr)
180{
181    PVFS_error ret = -PVFS_EINVAL;
182    PINT_smcb *smcb = NULL;
183    PINT_client_sm *sm_p = NULL;
184
185    gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_isys_remove entered\n");
186
187    if ((parent_ref.handle == PVFS_HANDLE_NULL) ||
188        (parent_ref.fs_id == PVFS_FS_ID_NULL) ||
189        (object_name == NULL))
190    {
191        gossip_err("invalid (NULL) required argument\n");
192        return ret;
193    }
194
195    PINT_smcb_alloc(&smcb, PVFS_SYS_REMOVE,
196             sizeof(struct PINT_client_sm),
197             client_op_state_get_machine,
198             client_state_machine_terminate,
199             pint_client_sm_context);
200    if (smcb == NULL)
201    {
202        return -PVFS_ENOMEM;
203    }
204    sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
205
206    PINT_init_msgarray_params(sm_p, parent_ref.fs_id);
207    PINT_init_sysint_credentials(sm_p->cred_p, credentials);
208    sm_p->u.remove.object_name = object_name;
209    sm_p->parent_ref  = parent_ref;
210    sm_p->u.remove.stored_error_code = 0;
211    PVFS_hint_copy(hints, &sm_p->hints);
212    PVFS_hint_add(&sm_p->hints, PVFS_HINT_HANDLE_NAME, sizeof(PVFS_handle), &parent_ref.handle);
213
214    gossip_debug(
215        GOSSIP_CLIENT_DEBUG, "Trying to remove entry %s under %llu,%d\n",
216        object_name, llu(parent_ref.handle), parent_ref.fs_id);
217
218    /* NOTE: This state machine previously multiplied the default job timeout
219     * by five to allow for potentially long sync delays.  We instead now set
220     * the default client BMI timeout higher for all operations: if a sync can
221     * go slow then so can any other arbitrary operation queued behind it. -PHC
222     */
223   
224    return PINT_client_state_machine_post(
225        smcb,  op_id, user_ptr);
226}
227
228/** Remove an object and its directory entry.
229 */
230PVFS_error PVFS_sys_remove(
231    char *object_name,
232    PVFS_object_ref parent_ref,
233    const PVFS_credentials *credentials,
234    PVFS_hint hints)
235{
236    PVFS_error ret = -PVFS_EINVAL, error = 0;
237    PVFS_sys_op_id op_id;
238
239    gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_remove entered\n");
240
241    ret = PVFS_isys_remove(object_name, parent_ref,
242                           credentials, &op_id, hints, NULL);
243    if (ret)
244    {
245        PVFS_perror_gossip("PVFS_isys_remove call", ret);
246        error = ret;
247    }
248    else
249    {
250        ret = PVFS_sys_wait(op_id, "remove", &error);
251        if (ret)
252        {
253            PVFS_perror_gossip("PVFS_sys_wait call", ret);
254            error = ret;
255        }
256    }
257
258    PINT_sys_release(op_id);
259    return error;
260}
261
262
263/****************************************************************/
264
265static PINT_sm_action remove_init(
266        struct PINT_smcb *smcb, job_status_s *js_p)
267{
268    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
269
270    gossip_debug(GOSSIP_CLIENT_DEBUG, "remove state: init\n");
271
272    assert(js_p->error_code == 0);
273
274    PINT_SM_GETATTR_STATE_FILL(
275        sm_p->getattr,
276        sm_p->parent_ref,
277        PVFS_ATTR_COMMON_ALL|PVFS_ATTR_DIR_DIRENT_FILES,
278        PVFS_TYPE_DIRECTORY,
279        0);
280
281    return SM_ACTION_COMPLETE;
282}
283
284static PINT_sm_action remove_rmdirent_setup_msgpair(
285        struct PINT_smcb *smcb, job_status_s *js_p)
286{
287    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
288    int ret = -PVFS_EINVAL;
289    PINT_sm_msgpair_state *msg_p = NULL;
290
291    /* NOTE: we remove the dirent first because this gets the object
292     * out of the system fastest.
293     */
294    gossip_debug(GOSSIP_CLIENT_DEBUG,
295                 "remove state: rmdirent_setup_msgpair\n");
296
297    js_p->error_code = 0;
298
299    PINT_msgpair_init(&sm_p->msgarray_op);
300    msg_p = &sm_p->msgarray_op.msgpair;
301
302    /* TODO: Need to find the correct dirent_handle */
303    PINT_SERVREQ_RMDIRENT_FILL(
304        msg_p->req,
305        *sm_p->cred_p,
306        sm_p->parent_ref.fs_id,
307        sm_p->parent_ref.handle,
308        sm_p->getattr.attr.u.dir.dirent_handle[0],
309        sm_p->u.remove.object_name,
310        sm_p->hints);
311
312    gossip_debug(GOSSIP_REMOVE_DEBUG, "- doing RMDIRENT on %s "
313                 "under %llu,%d\n", sm_p->u.remove.object_name,
314                 llu(sm_p->parent_ref.handle),
315                 sm_p->parent_ref.fs_id);
316
317    msg_p->fs_id = sm_p->parent_ref.fs_id;
318    msg_p->handle = sm_p->parent_ref.handle;
319    msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY;
320    msg_p->comp_fn = remove_rmdirent_comp_fn;
321
322    ret = PINT_cached_config_map_to_server(
323        &msg_p->svr_addr, msg_p->handle, msg_p->fs_id);
324
325    if (ret)
326    {
327        gossip_err("Failed to map meta server address\n");
328        js_p->error_code = ret;
329    }
330
331    PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op);
332    return SM_ACTION_COMPLETE;
333}
334
335static PINT_sm_action remove_getattr_setup(
336        struct PINT_smcb *smcb, job_status_s *js_p)
337{
338    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
339
340    gossip_debug(GOSSIP_CLIENT_DEBUG,
341                 "remove state: getattr_setup\n");
342
343    sm_p->u.remove.stored_error_code = js_p->error_code;
344    js_p->error_code = 0;
345
346    PINT_SM_GETATTR_STATE_FILL(
347        sm_p->getattr,
348        sm_p->object_ref,
349        PVFS_ATTR_DIR_DIRENT_FILES,
350        PVFS_TYPE_DIRECTORY,
351        0);
352
353    return SM_ACTION_COMPLETE;
354}
355
356static PINT_sm_action remove_crdirent_setup_msgpair(
357        struct PINT_smcb *smcb, job_status_s *js_p)
358{
359    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
360    PVFS_object_attr *attr = NULL;
361    int ret = -PVFS_EINVAL;
362    PINT_sm_msgpair_state *msg_p = NULL;
363
364    gossip_debug(GOSSIP_CLIENT_DEBUG,
365                 "remove state: crdirent_setup_msgpair\n");
366
367    sm_p->u.remove.stored_error_code = js_p->error_code;
368
369    js_p->error_code = 0;
370
371    attr = &sm_p->getattr.attr;
372    assert(attr);
373
374    /* TODO: Need to find the correct dirent_handle */
375    gossip_debug(GOSSIP_CLIENT_DEBUG, "dirent_handle: %llu\n",
376        llu(attr->u.dir.dirent_handle[0]));
377 
378    PINT_msgpair_init(&sm_p->msgarray_op);
379    msg_p = &sm_p->msgarray_op.msgpair;
380
381    PINT_SERVREQ_CRDIRENT_FILL(
382        msg_p->req,
383        *sm_p->cred_p,
384        sm_p->u.remove.object_name,
385        sm_p->object_ref.handle,
386        sm_p->parent_ref.handle,
387        attr->u.dir.dirent_handle[0],
388        sm_p->parent_ref.fs_id,
389        sm_p->hints);
390
391    gossip_debug(GOSSIP_REMOVE_DEBUG, "- doing CRDIRENT of %s (%llu,%d) "
392                 "under %llu,%d\n", sm_p->u.remove.object_name,
393                 llu(sm_p->object_ref.handle),
394                 sm_p->object_ref.fs_id,
395                 llu(sm_p->parent_ref.handle),
396                 sm_p->parent_ref.fs_id);
397
398    msg_p->fs_id = sm_p->parent_ref.fs_id;
399    msg_p->handle = sm_p->parent_ref.handle;
400    msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY;
401    msg_p->comp_fn = remove_crdirent_comp_fn;
402
403    ret = PINT_cached_config_map_to_server(
404        &msg_p->svr_addr, msg_p->handle, msg_p->fs_id);
405
406    if (ret)
407    {
408        gossip_err("Failed to map meta server address\n");
409        js_p->error_code = ret;
410    }
411
412    PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op);
413    return SM_ACTION_COMPLETE;
414}
415
416static int remove_rmdirent_comp_fn(
417    void *v_p,
418    struct PVFS_server_resp *resp_p,
419    int index)
420{
421    PINT_smcb *smcb = v_p;
422    PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM);
423
424    assert(resp_p->op == PVFS_SERV_RMDIRENT);
425
426    if (resp_p->status == 0)
427    {
428        assert(resp_p->u.rmdirent.entry_handle != PVFS_HANDLE_NULL);
429        assert(sm_p->parent_ref.fs_id != PVFS_FS_ID_NULL);
430
431        /* pull handle out of response, also copy in fs_id from before */
432        sm_p->object_ref.handle = resp_p->u.rmdirent.entry_handle;
433        sm_p->object_ref.fs_id  = sm_p->parent_ref.fs_id;
434
435        gossip_debug(
436            GOSSIP_CLIENT_DEBUG,
437            "  remove_rmdirent_comp_fn: metafile handle = %llu\n",
438            llu(sm_p->object_ref.handle));
439    }
440    return resp_p->status;
441}
442
443static int remove_crdirent_comp_fn(
444    void *v_p,
445    struct PVFS_server_resp *resp_p,
446    int index)
447{
448    assert(resp_p->op == PVFS_SERV_CRDIRENT);
449
450    if (resp_p->status == 0)
451    {
452        gossip_debug(GOSSIP_CLIENT_DEBUG,
453                     "  remove_crdirent_comp_fn: OK\n");
454    }
455    return resp_p->status;
456}
457
458static PINT_sm_action remove_check_error_code(
459        struct PINT_smcb *smcb, job_status_s *js_p)
460{
461    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
462    char buf[64] = {0};
463
464    PVFS_strerror_r(js_p->error_code, buf, 64);
465    gossip_debug(GOSSIP_REMOVE_DEBUG, "remove_check_error_code got %s "
466                 "(%d)\n", buf, js_p->error_code);
467    /*
468      separate error codes which are ok to return to the caller at
469      this point without trying to undo what we've done and retry the
470      removal
471    */
472    switch(js_p->error_code)
473    {
474        case -PVFS_ENOTEMPTY:
475            /* let the error pass through for the crdirent cleanup */
476            break;
477        case -PVFS_EINVAL:
478        case -PVFS_ENOMEM:
479            /* don't undo after these errors */
480            sm_p->u.remove.stored_error_code = js_p->error_code;
481            js_p->error_code = RETURN_STORED_ERROR_CODE;
482            break;
483    }
484    return SM_ACTION_COMPLETE;
485}
486
487static PINT_sm_action remove_cleanup(
488        struct PINT_smcb *smcb, job_status_s *js_p)
489{
490    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
491    gossip_debug(GOSSIP_CLIENT_DEBUG, "remove state: cleanup\n");
492
493    sm_p->error_code = (sm_p->u.remove.stored_error_code ?
494                        sm_p->u.remove.stored_error_code :
495                        js_p->error_code);
496
497    /* NOTE: acache is invalidated by remove_helper now */
498   
499    /* The ncache invalidate must be done from this function, because the
500     * remove_helper may not  have all the information needed
501     */
502    PINT_ncache_invalidate((const char*) sm_p->u.remove.object_name,
503                           (const PVFS_object_ref*) &(sm_p->parent_ref));
504
505    PINT_msgpairarray_destroy(&sm_p->msgarray_op);
506    PINT_SM_GETATTR_STATE_CLEAR(sm_p->getattr);
507
508    PINT_SET_OP_COMPLETE;
509    return SM_ACTION_TERMINATE;
510}
511
512static PINT_sm_action remove_generic_timer(
513        struct PINT_smcb *smcb, job_status_s *js_p)
514{
515    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
516    int ret = -PVFS_EINVAL;
517    job_id_t tmp_id;
518
519    gossip_debug(GOSSIP_CLIENT_DEBUG, "remove state: generic_timer\n");
520
521    ret = job_req_sched_post_timer(
522        sm_p->msgarray_op.params.retry_delay, smcb, 0, js_p, &tmp_id,
523        pint_client_sm_context);
524
525    return ret;
526}
527
528static PINT_sm_action remove_crdirent_retry_or_fail(
529        struct PINT_smcb *smcb, job_status_s *js_p)
530{
531    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
532    gossip_debug(GOSSIP_CLIENT_DEBUG,
533                 "remove state: crdirent_retry_or_fail\n");
534
535    /* try again (up to a point) if we get a comm. failure. */
536    if ((PVFS_ERROR_CLASS(-js_p->error_code) == PVFS_ERROR_BMI) &&
537        (sm_p->u.remove.retry_count < sm_p->msgarray_op.params.retry_limit))
538    {
539        sm_p->u.remove.retry_count++;
540        js_p->error_code = CRDIRENT_RETRY;
541        return SM_ACTION_COMPLETE;
542    }
543
544    if ((js_p->error_code == -PVFS_EEXIST) &&
545        (sm_p->u.remove.retry_count > 0))
546    {
547        /* assume everything worked out ok and we got the right
548         * directory entry back.  there was just a transient network
549         * problem along the way
550         */
551        js_p->error_code = 0;
552        return SM_ACTION_COMPLETE;
553    }
554
555    sm_p->u.remove.stored_error_code = js_p->error_code;
556
557    gossip_err("Error: failed to replace directory during remove recovery: entry %s for object %llu.\n",
558        sm_p->u.remove.object_name,
559        llu(sm_p->object_ref.handle));
560    PVFS_perror_gossip("crdirent", js_p->error_code);
561
562    PRINT_REMOVE_WARNING();
563    return SM_ACTION_COMPLETE;
564}
565
566
567static PINT_sm_action remove_rmdirent_retry_or_fail(
568        struct PINT_smcb *smcb, job_status_s *js_p)
569{
570    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
571    gossip_debug(GOSSIP_CLIENT_DEBUG,
572                 "remove state: rmdirent_retry_or_fail\n");
573
574    /* try again (up to a point) if we get a comm. failure. */
575    if ((PVFS_ERROR_CLASS(-js_p->error_code) == PVFS_ERROR_BMI) &&
576        (sm_p->u.remove.retry_count < sm_p->msgarray_op.params.retry_limit))
577    {
578        sm_p->u.remove.retry_count++;
579        js_p->error_code = RMDIRENT_RETRY;
580        return SM_ACTION_COMPLETE;
581    }
582
583    if ((js_p->error_code == -PVFS_ENOENT) &&
584        (sm_p->u.remove.retry_count > 0))
585    {
586        /* this is a tricky error case.  Server reports ENOENT, but
587         * this is not the first time we attempted the rmdirent.  It
588         * may be the case that it is reporting ENOENT because one of
589         * the earlier retries possibly completed.  We will treat this
590         * as success, but put out an error message.  This could
591         * strand objects, or remove non-empty directories, for
592         * example.
593         */
594        gossip_err("Warning: Received ENOENT on retry to remove entry %s.\n",
595            sm_p->u.remove.object_name);
596
597        PRINT_REMOVE_WARNING();
598        js_p->error_code = -PVFS_ENOENT;
599        return SM_ACTION_COMPLETE;
600    }
601
602    /* other errors are preserved and passed along to the next state */
603    return SM_ACTION_COMPLETE;
604}
605
606/*
607 * Local variables:
608 *  mode: c
609 *  c-indent-level: 4
610 *  c-basic-offset: 4
611 * End:
612 *
613 * vim: ft=c ts=8 sts=4 sw=4 expandtab
614 */
Note: See TracBrowser for help on using the browser.