root/trunk/src/client/sysint/sys-remove.sm @ 7471

Revision 7471, 15.3 KB (checked in by slang, 5 years ago)

merging hints/events code into trunk.

Line 
1/*
2 * (C) 2003 Clemson University and The University of Chicago
3 *
4 * See COPYING in top-level directory.
5 */
6
7/** \file
8 *  \ingroup sysint
9 *
10 *  PVFS2 system interface routines for removing an object and its
11 *  associated directory entry.
12 */
13
14#include <string.h>
15#include <assert.h>
16
17#include "client-state-machine.h"
18#include "pvfs2-debug.h"
19#include "job.h"
20#include "gossip.h"
21#include "str-utils.h"
22#include "pint-cached-config.h"
23#include "PINT-reqproto-encode.h"
24#include "ncache.h"
25#include "pvfs2-internal.h"
26
27/*
28  PVFS_{i}sys_remove takes the following steps:
29
30  - rmdirent the entry from the parent directory
31  - getattr on the object to be removed
32  - if the object is a directory
33    - check if the dir is empty
34    - if so, continue
35    - if not, crdirent the previously rmdirent'd
36      entry and return -PVFS_ENOTEMPTY
37  - if the object is a metafile
38    - remove all associated data files
39  - remove the actual object specified
40*/
41
42extern job_context_id pint_client_sm_context;
43
44enum
45{
46    RMDIRENT_RETRY = 1,
47    CRDIRENT_RETRY,
48    RETURN_STORED_ERROR_CODE
49};
50
51static int remove_rmdirent_comp_fn(
52    void *v_p, struct PVFS_server_resp *resp_p, int i);
53static int remove_crdirent_comp_fn(
54    void *v_p, struct PVFS_server_resp *resp_p, int i);
55
56#define PRINT_REMOVE_WARNING()                                        \
57do {                                                                  \
58    gossip_err("WARNING: PVFS_sys_remove() encountered an error "     \
59               "which may lead to\n  inconsistent state.\n");         \
60    gossip_err("WARNING: PVFS2 fsck (if available) may be needed.\n");\
61} while(0)
62
63%%
64
65machine pvfs2_client_remove_sm
66{
67    state init
68    {
69        run remove_init;
70        default => rmdirent_setup_msgpair;
71    }
72
73    state rmdirent_setup_msgpair
74    {
75        run remove_rmdirent_setup_msgpair;
76        success => rmdirent_xfer_msgpair;
77        default => rmdirent_retry_or_fail;
78    }
79
80    state rmdirent_xfer_msgpair
81    {
82        jump pvfs2_msgpairarray_sm;
83        success => do_remove;
84        default => rmdirent_retry_or_fail;
85    }
86
87    state rmdirent_retry_or_fail
88    {
89        run remove_rmdirent_retry_or_fail;
90        RMDIRENT_RETRY => rmdirent_timer;
91        default => cleanup;
92    }
93
94    state do_remove
95    {
96        jump pvfs2_client_remove_helper_sm;
97        default => check_error_code;
98    }
99
100    state check_error_code
101    {
102        run remove_check_error_code;
103        success => cleanup;
104        RETURN_STORED_ERROR_CODE => cleanup;
105        default => crdirent_setup_msgpair;
106    }
107
108    state crdirent_timer
109    {
110        run remove_generic_timer;
111        default => crdirent_setup_msgpair;
112    }
113
114    state rmdirent_timer
115    {
116        run remove_generic_timer;
117        default => rmdirent_setup_msgpair;
118    }
119
120    state crdirent_setup_msgpair
121    {
122        run remove_crdirent_setup_msgpair;
123        success => crdirent_xfer_msgpair;
124        default => crdirent_retry_or_fail;
125    }
126
127    state crdirent_xfer_msgpair
128    {
129        jump pvfs2_msgpairarray_sm;
130        success => cleanup;
131        default => crdirent_retry_or_fail;
132    }
133
134    state crdirent_retry_or_fail
135    {
136        run remove_crdirent_retry_or_fail;
137        CRDIRENT_RETRY => crdirent_timer;
138        default => cleanup;
139    }
140
141    state cleanup
142    {
143        run remove_cleanup;
144        default => terminate;
145    }
146}
147
148%%
149
150/** Initiate removal of an object and its directory entry.
151 */
152PVFS_error PVFS_isys_remove(
153    char *object_name,
154    PVFS_object_ref parent_ref,
155    const PVFS_credentials *credentials,
156    PVFS_sys_op_id *op_id,
157    PVFS_hint hints,
158    void *user_ptr)
159{
160    PVFS_error ret = -PVFS_EINVAL;
161    PINT_smcb *smcb = NULL;
162    PINT_client_sm *sm_p = NULL;
163
164    gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_isys_remove entered\n");
165
166    if ((parent_ref.handle == PVFS_HANDLE_NULL) ||
167        (parent_ref.fs_id == PVFS_FS_ID_NULL) ||
168        (object_name == NULL))
169    {
170        gossip_err("invalid (NULL) required argument\n");
171        return ret;
172    }
173
174    PINT_smcb_alloc(&smcb, PVFS_SYS_REMOVE,
175             sizeof(struct PINT_client_sm),
176             client_op_state_get_machine,
177             client_state_machine_terminate,
178             pint_client_sm_context);
179    if (smcb == NULL)
180    {
181        return -PVFS_ENOMEM;
182    }
183    sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
184
185    PINT_init_msgarray_params(sm_p, parent_ref.fs_id);
186    PINT_init_sysint_credentials(sm_p->cred_p, credentials);
187    sm_p->u.remove.object_name = object_name;
188    sm_p->parent_ref  = parent_ref;
189    sm_p->u.remove.stored_error_code = 0;
190    PVFS_hint_copy(hints, &sm_p->hints);
191    PVFS_hint_add(&sm_p->hints, PVFS_HINT_HANDLE_NAME, sizeof(PVFS_handle), &parent_ref.handle);
192
193    gossip_debug(
194        GOSSIP_CLIENT_DEBUG, "Trying to remove entry %s under %llu,%d\n",
195        object_name, llu(parent_ref.handle), parent_ref.fs_id);
196
197    /* NOTE: This state machine previously multiplied the default job timeout
198     * by five to allow for potentially long sync delays.  We instead now set
199     * the default client BMI timeout higher for all operations: if a sync can
200     * go slow then so can any other arbitrary operation queued behind it. -PHC
201     */
202   
203    return PINT_client_state_machine_post(
204        smcb,  op_id, user_ptr);
205}
206
207/** Remove an object and its directory entry.
208 */
209PVFS_error PVFS_sys_remove(
210    char *object_name,
211    PVFS_object_ref parent_ref,
212    const PVFS_credentials *credentials,
213    PVFS_hint hints)
214{
215    PVFS_error ret = -PVFS_EINVAL, error = 0;
216    PVFS_sys_op_id op_id;
217
218    gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_sys_remove entered\n");
219
220    ret = PVFS_isys_remove(object_name, parent_ref,
221                           credentials, &op_id, hints, NULL);
222    if (ret)
223    {
224        PVFS_perror_gossip("PVFS_isys_remove call", ret);
225        error = ret;
226    }
227    else
228    {
229        ret = PVFS_sys_wait(op_id, "remove", &error);
230        if (ret)
231        {
232            PVFS_perror_gossip("PVFS_sys_wait call", ret);
233            error = ret;
234        }
235    }
236
237    PINT_sys_release(op_id);
238    return error;
239}
240
241
242/****************************************************************/
243
244static PINT_sm_action remove_init(
245        struct PINT_smcb *smcb, job_status_s *js_p)
246{
247    gossip_debug(GOSSIP_CLIENT_DEBUG, "remove state: init\n");
248
249    assert(js_p->error_code == 0);
250    return SM_ACTION_COMPLETE;
251}
252
253static PINT_sm_action remove_rmdirent_setup_msgpair(
254        struct PINT_smcb *smcb, job_status_s *js_p)
255{
256    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
257    int ret = -PVFS_EINVAL;
258    PINT_sm_msgpair_state *msg_p = NULL;
259
260    /* NOTE: we remove the dirent first because this gets the object
261     * out of the system fastest.
262     */
263    gossip_debug(GOSSIP_CLIENT_DEBUG,
264                 "remove state: rmdirent_setup_msgpair\n");
265
266    js_p->error_code = 0;
267
268    PINT_msgpair_init(&sm_p->msgarray_op);
269    msg_p = &sm_p->msgarray_op.msgpair;
270
271    PINT_SERVREQ_RMDIRENT_FILL(
272        msg_p->req,
273        *sm_p->cred_p,
274        sm_p->parent_ref.fs_id,
275        sm_p->parent_ref.handle,
276        sm_p->u.remove.object_name,
277        sm_p->hints);
278
279    gossip_debug(GOSSIP_REMOVE_DEBUG, "- doing RMDIRENT on %s "
280                 "under %llu,%d\n", sm_p->u.remove.object_name,
281                 llu(sm_p->parent_ref.handle),
282                 sm_p->parent_ref.fs_id);
283
284    msg_p->fs_id = sm_p->parent_ref.fs_id;
285    msg_p->handle = sm_p->parent_ref.handle;
286    msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY;
287    msg_p->comp_fn = remove_rmdirent_comp_fn;
288
289    ret = PINT_cached_config_map_to_server(
290        &msg_p->svr_addr, msg_p->handle, msg_p->fs_id);
291
292    if (ret)
293    {
294        gossip_err("Failed to map meta server address\n");
295        js_p->error_code = ret;
296    }
297
298    PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op);
299    return SM_ACTION_COMPLETE;
300}
301
302static PINT_sm_action remove_crdirent_setup_msgpair(
303        struct PINT_smcb *smcb, job_status_s *js_p)
304{
305    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
306    int ret = -PVFS_EINVAL;
307    PINT_sm_msgpair_state *msg_p = NULL;
308
309    gossip_debug(GOSSIP_CLIENT_DEBUG,
310                 "remove state: crdirent_setup_msgpair\n");
311
312    sm_p->u.remove.stored_error_code = js_p->error_code;
313
314    js_p->error_code = 0;
315
316    PINT_msgpair_init(&sm_p->msgarray_op);
317    msg_p = &sm_p->msgarray_op.msgpair;
318
319    PINT_SERVREQ_CRDIRENT_FILL(
320        msg_p->req,
321        *sm_p->cred_p,
322        sm_p->u.remove.object_name,
323        sm_p->object_ref.handle,
324        sm_p->parent_ref.handle,
325        sm_p->parent_ref.fs_id,
326        sm_p->hints);
327
328    gossip_debug(GOSSIP_REMOVE_DEBUG, "- doing CRDIRENT of %s (%llu,%d) "
329                 "under %llu,%d\n", sm_p->u.remove.object_name,
330                 llu(sm_p->object_ref.handle),
331                 sm_p->object_ref.fs_id,
332                 llu(sm_p->parent_ref.handle),
333                 sm_p->parent_ref.fs_id);
334
335    msg_p->fs_id = sm_p->parent_ref.fs_id;
336    msg_p->handle = sm_p->parent_ref.handle;
337    msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY;
338    msg_p->comp_fn = remove_crdirent_comp_fn;
339
340    ret = PINT_cached_config_map_to_server(
341        &msg_p->svr_addr, msg_p->handle, msg_p->fs_id);
342
343    if (ret)
344    {
345        gossip_err("Failed to map meta server address\n");
346        js_p->error_code = ret;
347    }
348
349    PINT_sm_push_frame(smcb, 0, &sm_p->msgarray_op);
350    return SM_ACTION_COMPLETE;
351}
352
353static int remove_rmdirent_comp_fn(
354    void *v_p,
355    struct PVFS_server_resp *resp_p,
356    int index)
357{
358    PINT_smcb *smcb = v_p;
359    PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_MSGPAIR_PARENT_SM);
360
361    assert(resp_p->op == PVFS_SERV_RMDIRENT);
362
363    if (resp_p->status == 0)
364    {
365        assert(resp_p->u.rmdirent.entry_handle != PVFS_HANDLE_NULL);
366        assert(sm_p->parent_ref.fs_id != PVFS_FS_ID_NULL);
367
368        /* pull handle out of response, also copy in fs_id from before */
369        sm_p->object_ref.handle = resp_p->u.rmdirent.entry_handle;
370        sm_p->object_ref.fs_id  = sm_p->parent_ref.fs_id;
371
372        gossip_debug(
373            GOSSIP_CLIENT_DEBUG,
374            "  remove_rmdirent_comp_fn: metafile handle = %llu\n",
375            llu(sm_p->object_ref.handle));
376    }
377    return resp_p->status;
378}
379
380static int remove_crdirent_comp_fn(
381    void *v_p,
382    struct PVFS_server_resp *resp_p,
383    int index)
384{
385    assert(resp_p->op == PVFS_SERV_CRDIRENT);
386
387    if (resp_p->status == 0)
388    {
389        gossip_debug(GOSSIP_CLIENT_DEBUG,
390                     "  remove_crdirent_comp_fn: OK\n");
391    }
392    return resp_p->status;
393}
394
395static PINT_sm_action remove_check_error_code(
396        struct PINT_smcb *smcb, job_status_s *js_p)
397{
398    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
399    char buf[64] = {0};
400
401    PVFS_strerror_r(js_p->error_code, buf, 64);
402    gossip_debug(GOSSIP_REMOVE_DEBUG, "remove_check_error_code got %s "
403                 "(%d)\n", buf, js_p->error_code);
404    /*
405      separate error codes which are ok to return to the caller at
406      this point without trying to undo what we've done and retry the
407      removal
408    */
409    switch(js_p->error_code)
410    {
411        case -PVFS_ENOTEMPTY:
412            /* let the error pass through for the crdirent cleanup */
413            break;
414        case -PVFS_EINVAL:
415        case -PVFS_ENOMEM:
416            /* don't undo after these errors */
417            sm_p->u.remove.stored_error_code = js_p->error_code;
418            js_p->error_code = RETURN_STORED_ERROR_CODE;
419            break;
420    }
421    return SM_ACTION_COMPLETE;
422}
423
424static PINT_sm_action remove_cleanup(
425        struct PINT_smcb *smcb, job_status_s *js_p)
426{
427    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
428    gossip_debug(GOSSIP_CLIENT_DEBUG, "remove state: cleanup\n");
429
430    sm_p->error_code = (sm_p->u.remove.stored_error_code ?
431                        sm_p->u.remove.stored_error_code :
432                        js_p->error_code);
433
434    /* NOTE: acache is invalidated by remove_helper now */
435   
436    /* The ncache invalidate must be done from this function, because the
437     * remove_helper may not  have all the information needed
438     */
439    PINT_ncache_invalidate((const char*) sm_p->u.remove.object_name,
440                           (const PVFS_object_ref*) &(sm_p->parent_ref));
441
442    PINT_msgpairarray_destroy(&sm_p->msgarray_op);
443
444    PINT_SET_OP_COMPLETE;
445    return SM_ACTION_TERMINATE;
446}
447
448static PINT_sm_action remove_generic_timer(
449        struct PINT_smcb *smcb, job_status_s *js_p)
450{
451    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
452    int ret = -PVFS_EINVAL;
453    job_id_t tmp_id;
454
455    gossip_debug(GOSSIP_CLIENT_DEBUG, "remove state: generic_timer\n");
456
457    ret = job_req_sched_post_timer(
458        sm_p->msgarray_op.params.retry_delay, smcb, 0, js_p, &tmp_id,
459        pint_client_sm_context);
460
461    return ret;
462}
463
464static PINT_sm_action remove_crdirent_retry_or_fail(
465        struct PINT_smcb *smcb, job_status_s *js_p)
466{
467    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
468    gossip_debug(GOSSIP_CLIENT_DEBUG,
469                 "remove state: crdirent_retry_or_fail\n");
470
471    /* try again (up to a point) if we get a comm. failure. */
472    if ((PVFS_ERROR_CLASS(-js_p->error_code) == PVFS_ERROR_BMI) &&
473        (sm_p->u.remove.retry_count < sm_p->msgarray_op.params.retry_limit))
474    {
475        sm_p->u.remove.retry_count++;
476        js_p->error_code = CRDIRENT_RETRY;
477        return SM_ACTION_COMPLETE;
478    }
479
480    if ((js_p->error_code == -PVFS_EEXIST) &&
481        (sm_p->u.remove.retry_count > 0))
482    {
483        /* assume everything worked out ok and we got the right
484         * directory entry back.  there was just a transient network
485         * problem along the way
486         */
487        js_p->error_code = 0;
488        return SM_ACTION_COMPLETE;
489    }
490
491    sm_p->u.remove.stored_error_code = js_p->error_code;
492
493    gossip_err("Error: failed to replace directory during remove recovery: entry %s for object %llu.\n",
494        sm_p->u.remove.object_name,
495        llu(sm_p->object_ref.handle));
496    PVFS_perror_gossip("crdirent", js_p->error_code);
497
498    PRINT_REMOVE_WARNING();
499    return SM_ACTION_COMPLETE;
500}
501
502
503static PINT_sm_action remove_rmdirent_retry_or_fail(
504        struct PINT_smcb *smcb, job_status_s *js_p)
505{
506    struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
507    gossip_debug(GOSSIP_CLIENT_DEBUG,
508                 "remove state: rmdirent_retry_or_fail\n");
509
510    /* try again (up to a point) if we get a comm. failure. */
511    if ((PVFS_ERROR_CLASS(-js_p->error_code) == PVFS_ERROR_BMI) &&
512        (sm_p->u.remove.retry_count < sm_p->msgarray_op.params.retry_limit))
513    {
514        sm_p->u.remove.retry_count++;
515        js_p->error_code = RMDIRENT_RETRY;
516        return SM_ACTION_COMPLETE;
517    }
518
519    if ((js_p->error_code == -PVFS_ENOENT) &&
520        (sm_p->u.remove.retry_count > 0))
521    {
522        /* this is a tricky error case.  Server reports ENOENT, but
523         * this is not the first time we attempted the rmdirent.  It
524         * may be the case that it is reporting ENOENT because one of
525         * the earlier retries possibly completed.  We will treat this
526         * as success, but put out an error message.  This could
527         * strand objects, or remove non-empty directories, for
528         * example.
529         */
530        gossip_err("Warning: Received ENOENT on retry to remove entry %s.\n",
531            sm_p->u.remove.object_name);
532
533        PRINT_REMOVE_WARNING();
534        js_p->error_code = -PVFS_ENOENT;
535        return SM_ACTION_COMPLETE;
536    }
537
538    /* other errors are preserved and passed along to the next state */
539    return SM_ACTION_COMPLETE;
540}
541
542/*
543 * Local variables:
544 *  mode: c
545 *  c-indent-level: 4
546 *  c-basic-offset: 4
547 * End:
548 *
549 * vim: ft=c ts=8 sts=4 sw=4 expandtab
550 */
Note: See TracBrowser for help on using the browser.