root/branches/cu-security-branch/examples/heartbeat/hardware-specific/Filesystem-qla-monitor @ 8397

Revision 8397, 23.7 KB (checked in by nlmills, 3 years ago)

initial merge with Orange-Branch. much will be broken

Line 
1#!/bin/sh
2#
3# Support:      linux-ha@lists.linux-ha.org
4# License:      GNU General Public License (GPL)
5#
6# Filesystem
7#      Description: Manages a Filesystem on a shared storage medium.
8#  Original Author: Eric Z. Ayers (eric.ayers@compgen.com)
9# Original Release: 25 Oct 2000
10#
11# Modified to support monitoring of a QLogic adapter, 2007
12# Relies on underlying scripts named fs-instance-alarm.pl and
13# fs-power-control.pl to take action in the event of monitoring failure
14#
15# usage: ./Filesystem {start|stop|status|monitor|validate-all|meta-data}
16#
17#         OCF parameters are as below:
18#               OCF_RESKEY_device
19#               OCF_RESKEY_directory
20#               OCF_RESKEY_fstype
21#               OCF_RESKEY_options
22#               OCF_RESKEY_fsname
23#               OCF_RESKEY_conf_dir
24#
25#OCF_RESKEY_device    : name of block device for the filesystem. e.g. /dev/sda1, /dev/md0
26#                       Or a -U or -L option for mount, or an NFS mount specification
27#OCF_RESKEY_directory : the mount point for the filesystem
28#OCF_RESKEY_fstype    : optional name of the filesystem type. e.g. ext2
29#OCF_RESKEY_options   : options to be given to the mount command via -o
30#OCF_RESKEY_fsname    : file system name (PVFS2)
31#OCF_RESKEY_conf_dir  : file system conf directory (PVFS2)
32#
33#
34# An example usage in /etc/ha.d/haresources:
35#       node1  10.0.0.170 Filesystem::/dev/sda1::/data1::ext2
36#  or
37#       node1  10.0.0.170 Filesystem::-Ldata1::/data1::ext2
38#  or
39#       node1  10.0.0.170 Filesystem::server:/data1::/data1::nfs::ro
40#
41# This assumes you want to manage a filesystem on a shared (scsi) bus.
42# Do not put this filesystem in /etc/fstab.  This script manages all of
43# that for you.
44#
45# If you are interested in High Availability, you will probably also want
46# some sort of external hardware RAID controller in front of the actual
47# disks.  I don't mean a RAID controller embedded in the host controller -
48# it has to be an external controller.
49#
50# It can also be an internal RAID controller if the controller supports
51# failover.  IBM's ServeRAID controller does this, and it automatically
52# prohibits concurrent access too, so it's pretty cool in this application.
53#
54# There is a script for software RAID-1 included in this directory.  Right
55# now, I wouldn't recommend using software RAID (see notes in the Raid1 script)
56#
57# NOTE: There is no locking (such as a SCSI reservation) being done here.
58#       I would if the SCSI driver could properly maintain the reservation,
59#       which it cannot, even with the 'scsi reservation' patch submitted
60#       earlier this year by James Bottomley.  The patch minimizes the
61#       bus resets caused by a RESERVATION_CONFLICT return, and helps the
62#       reservation stay when 2 nodes contend for a reservation,
63#       but it does not attempt to recover the reservation in the
64#       case of a bus reset. 
65#
66#       What all this means is that if 2 nodes mount the same file system
67#       read-write, the filesystem is going to become corrupted.
68#
69#       As a result, you should use this together with the stonith option
70#       and redundant, independent communications paths.
71#
72#       If you don't do this, don't blame us when you scramble your disk.
73#
74#       Note:  the ServeRAID controller does prohibit concurrent acess
75#       In this case, you don't actually need STONITH, but redundant comm is
76#       still an excellent idea.
77#
78
79#######################################################################
80# Initialization:
81
82# newer versions of heartbeat have moved the ocf-shellfuncs  file
83if [ -f /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs ] ; then
84. /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs
85else
86. /usr/lib/heartbeat/ocf-shellfuncs
87fi
88
89#######################################################################
90
91# Utilities used by this script
92MODPROBE=/sbin/modprobe
93FSCK=/sbin/fsck
94FUSER=/sbin/fuser
95MOUNT=/bin/mount
96UMOUNT=/bin/umount
97BLOCKDEV=/sbin/blockdev
98
99check_util () {
100    if [ ! -x "$1" ] ; then
101        ocf_log err "Setup problem: Couldn't find utility $1"
102        exit $OCF_ERR_GENERIC
103    fi
104}
105
106usage() {
107        cat <<-EOT
108        usage: $0 {start|stop|status|monitor|validate-all|meta-data}
109        EOT
110}
111
112meta_data() {
113        cat <<END
114<?xml version="1.0"?>
115<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
116<resource-agent name="Filesystem">
117<version>1.0</version>
118
119<longdesc lang="en">
120Resource script for Filesystem. It manages a Filesystem on a shared storage medium.
121</longdesc>
122<shortdesc lang="en">Filesystem resource agent</shortdesc>
123
124<parameters>
125<parameter name="device" unique="0" required="1">
126<longdesc lang="en">
127The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification.
128</longdesc>
129<shortdesc lang="en">block device</shortdesc>
130<content type="string" default="" />
131</parameter>
132
133<parameter name="directory" unique="0" required="1">
134<longdesc lang="en">
135The mount point for the filesystem.
136</longdesc>
137<shortdesc lang="en">mount point</shortdesc>
138<content type="string" default="" />
139</parameter>
140
141<parameter name="fsname" unique="0" required="1">
142<longdesc lang="en">
143Name of PVFS2 file system
144</longdesc>
145<shortdesc lang="en">File system</shortdesc>
146<content type="string" default="" />
147</parameter>
148
149<parameter name="conf_dir" unique="0" required="1">
150<longdesc lang="en">
151Path to file system conf directory
152</longdesc>
153<shortdesc lang="en">File system conf directory</shortdesc>
154<content type="string" default="" />
155</parameter>
156
157<parameter name="fstype" unique="0" required="1">
158<longdesc lang="en">
159The optional type of filesystem to be mounted.
160</longdesc>
161<shortdesc lang="en">filesystem type</shortdesc>
162<content type="string" default="" />
163</parameter>
164
165<parameter name="options" unique="0">
166<longdesc lang="en">
167Any extra options to be given as -o options to mount.
168</longdesc>
169<shortdesc lang="en">options</shortdesc>
170<content type="string" default="" />
171</parameter>
172
173<parameter name="ocfs2_cluster" unique="0">
174<longdesc lang="en">
175The name (UUID) of the OCFS2 cluster this filesystem is part of,
176iff this is an OCFS2 resource and there's more than one cluster. You
177should not need to specify this.
178</longdesc>
179<shortdesc lang="en">OCFS2 cluster name/UUID</shortdesc>
180<content type="string" default="" />
181</parameter>
182
183<parameter name="ocfs2_configfs" unique="0">
184<longdesc lang="en">
185Mountpoint of the cluster hierarchy below configfs. You should not
186need to specify this.
187</longdesc>
188<shortdesc lang="en">OCFS2 configfs root</shortdesc>
189<content type="string" default="" />
190</parameter>
191</parameters>
192
193<actions>
194<action name="start" timeout="60" />
195<action name="stop" timeout="60" />
196<action name="notify" timeout="60" />
197<action name="status" depth="0" timeout="10" interval="10" start-delay="10" />
198<action name="monitor" depth="0" timeout="10" interval="10" start-delay="10" />
199<action name="validate-all" timeout="5" />
200<action name="meta-data" timeout="5" />
201</actions>
202</resource-agent>
203END
204}
205
206#
207#       Make sure the kernel does the right thing with the FS buffers
208#       This function should be called after unmounting and before mounting
209#       It may not be necessary in 2.4 and later kernels, but it shouldn't hurt
210#       anything either...
211#
212#       It's really a bug that you have to do this at all...
213#
214flushbufs() {
215  if
216    [ "$BLOCKDEV" != "" -a -x "$BLOCKDEV" -a "$blockdevice" = "yes" ]
217  then
218    $BLOCKDEV --flushbufs $1
219    return $?
220  fi
221 
222  return 0
223}
224
225# Take advantage of /proc/mounts if present, use portabel mount command
226# otherwise. Normalize format to "dev mountpoint fstype".
227list_mounts() {
228  if [ -f "/proc/mounts" -a -r "/proc/mounts" ]; then
229    cut -d' ' -f1,2,3 </proc/mounts
230  else
231    $MOUNT | cut -d' ' -f1,3,5
232  fi
233}
234
235determine_blockdevice() {
236  if [ $blockdevice == "yes" ]; then
237    return
238  fi
239 
240  # Get the current real device name, if possible.
241  # (specified devname could be -L or -U...)
242  case "$FSTYPE" in
243    nfs|smbfs|cifs) ;;
244    *)  DEVICE=`list_mounts | grep " $MOUNTPOINT " | cut -d' ' -f1`
245        if [ -b "$DEVICE" ]; then
246          blockdevice=yes
247        fi
248        ;;
249  esac
250}
251
252# Lists all filesystems potentially mounted under a given path,
253# excluding the path itself.
254list_submounts() {
255  list_mounts | grep " $1/" | cut -d' ' -f2 | sort -r
256}
257
258ocfs2_cleanup() {
259        # We'll never see the post-stop notification. We're gone now,
260        # have unmounted, and thus should remove the membership.
261        #
262        # (Do so regardless of whether we were unmounted already,
263        # because the admin might have manually unmounted but not
264        # cleared up the membership directory. Bad admin, no cookie.)
265        #
266        if [ ! -d "$OCFS2_FS_ROOT" ]; then
267                ocf_log info "$OCFS2_FS_ROOT: Filesystem membership already gone."
268        else
269                ocf_log info "$OCFS2_FS_ROOT: Removing membership directory."
270                rm -rf $OCFS2_FS_ROOT/
271        fi
272}
273
274ocfs2_init()
275{
276        # Check & initialize the OCFS2 specific variables.
277        if [ -z "$OCF_RESKEY_CRM_meta_clone_max" ]; then
278                ocf_log err "ocfs2 must be run as a clone."
279                exit $OCF_ERR_GENERIC
280        fi
281
282        if [ $blockdevice = "no" ]; then
283                ocf_log err "$DEVICE: ocfs2 needs a block device instead."
284                exit $OCF_ERR_GENERIC
285        fi
286       
287        for f in "$OCF_RESKEY_ocfs2_configfs" /sys/kernel/config/cluster /configfs/cluster ; do
288                if [ -n "$f" -a -d "$f" ]; then
289                        OCFS2_CONFIGFS="$f"
290                        break
291                fi
292        done
293        if [ ! -d "$OCFS2_CONFIGFS" ]; then
294                ocf_log err "ocfs2 needs configfs mounted."
295                exit $OCF_ERR_GENERIC
296        fi
297
298        OCFS2_UUID=$(mounted.ocfs2 -d $DEVICE|tail -1|awk '{print $3}'|tr -d -- -|tr a-z A-Z)
299        if [ -z "$OCFS2_UUID" -o "$OCFS2_UUID" = "UUID" ]; then
300                ocf_log err "$DEVICE: Could not determine ocfs2 UUID."
301                exit $OCF_ERR_GENERIC
302        fi
303       
304        if [ -n "$OCF_RESKEY_ocfs2_cluster" ]; then
305                OCFS2_CLUSTER=$(echo $OCF_RESKEY_ocfs2_cluster | tr a-z A-Z)
306        else
307                OCFS2_CLUSTER=$(find "$OCFS2_CONFIGFS" -maxdepth 1 -mindepth 1 -type d -printf %f 2>/dev/null)
308                set -- $OCFS2_CLUSTER
309                local n="$#"
310                if [ $n -gt 1 ]; then
311                        ocf_log err "$OCFS2_CLUSTER: several clusters found."
312                        exit $OCF_ERR_GENERIC
313                fi
314                if [ $n -eq 0 ]; then
315                        ocf_log err "$OCFS2_CONFIGFS: no clusters found."
316                        exit $OCF_ERR_GENERIC
317                fi
318        fi
319
320        OCFS2_CLUSTER_ROOT="$OCFS2_CONFIGFS/$OCFS2_CLUSTER"
321        if [ ! -d "$OCFS2_CLUSTER_ROOT" ]; then
322                ocf_log err "$OCFS2_CLUSTER: Cluster doesn't exist. Maybe o2cb hasn't been run?"
323                exit $OCF_ERR_GENERIC
324        fi
325       
326        OCFS2_FS_ROOT=$OCFS2_CLUSTER_ROOT/heartbeat/$OCFS2_UUID
327}
328
329#
330# START: Start up the filesystem
331#
332Filesystem_start()
333{
334        if [ "$FSTYPE" = "ocfs2" ]; then
335                # "start" now has the notification data available; that
336                # we're being started means we didn't get the
337                # pre-notification, because we weren't running, so
338                # process the information now first.
339                ocf_log info "$OCFS2_UUID: Faking pre-notification on start."
340                OCF_RESKEY_CRM_meta_notify_type="pre"
341                OCF_RESKEY_CRM_meta_notify_operation="start"
342                Filesystem_notify
343        fi             
344
345        # See if the device is already mounted.
346        if Filesystem_status >/dev/null 2>&1 ; then
347            ocf_log info "Filesystem $MOUNTPOINT is already mounted."
348            return $OCF_SUCCESS
349        fi
350
351        # Insert SCSI module
352        # TODO: This probably should go away. Why should the filesystem
353        # RA magically load a kernel module?
354        $MODPROBE scsi_hostadapter >/dev/null 2>&1
355
356        if [ -z $FSTYPE ]; then
357            : No $FSTYPE specified, rely on the system has the right file-system support already
358        else
359            # Insert Filesystem module
360            $MODPROBE $FSTYPE >/dev/null 2>&1
361            grep -e "$FSTYPE"'$' /proc/filesystems >/dev/null
362            if [ $? != 0  ] ; then
363                ocf_log err "Couldn't find filesystem $FSTYPE in /proc/filesystems"
364                return $OCF_ERR_ARGS
365            fi
366        fi
367
368        # Check the filesystem & auto repair. 
369        # NOTE: Some filesystem types don't need this step...  Please modify
370        #       accordingly
371
372    if [ $blockdevice = "yes" ]; then
373        if [ ! -b "$DEVICE" ] ; then
374                ocf_log err "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
375                exit $OCF_ERR_ARGS
376        fi
377
378        if
379          case $FSTYPE in
380            ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs|cifs|smbfs|ocfs2)    false;;
381            *)                          true;;
382          esac
383        then
384          ocf_log info  "Starting filesystem check on $DEVICE"
385          if [ -z $FSTYPE ]; then
386                $FSCK -a $DEVICE
387          else
388                $FSCK -t $FSTYPE -a $DEVICE
389          fi
390       
391          # NOTE: if any errors at all are detected, it returns non-zero
392          # if the error is >= 4 then there is a big problem
393          if
394            [ $? -ge 4 ]
395          then
396            ocf_log err "Couldn't sucessfully fsck filesystem for $DEVICE"
397            return $OCF_ERR_GENERIC     
398          fi   
399        fi
400    fi
401
402        if [ ! -d "$MOUNTPOINT" ] ; then
403                ocf_log err "Couldn't find directory  [$MOUNTPOINT] to use as a mount point"
404                exit $OCF_ERR_ARGS
405        fi
406
407        flushbufs $DEVICE
408        # Mount the filesystem.
409        if [ -z $FSTYPE ]; then
410          $MOUNT $options $DEVICE $MOUNTPOINT
411        else
412          $MOUNT -t $FSTYPE $options $DEVICE $MOUNTPOINT
413        fi
414
415        if [ $? -ne 0 ]; then
416          ocf_log err "Couldn't mount filesystem $DEVICE on $MOUNTPOINT"
417          if [ "$FSTYPE" = "ocfs2" ]; then
418                ocfs2_cleanup
419          fi
420          return $OCF_ERR_GENERIC
421        fi
422
423        return 0
424}
425# end of Filesystem_start
426
427Filesystem_notify() {
428        # Process notifications; this is the essential glue level for
429        # giving user-space membership events to a cluster-aware
430        # filesystem. Right now, only OCFS2 is supported.
431        #
432        # When we get a pre-start notification, we set up all the nodes
433        # which will be active in our membership for the filesystem.
434        # (For the resource to be started, this happens at the time of
435        # the actual 'start' operation.)
436        #
437        # At a post-start, actually there's nothing to do for us really,
438        # but no harm done in re-syncing either.
439        #
440        # pre-stop is meaningless; we can't remove any node yet, it
441        # first needs to unmount.
442        #
443        # post-stop: the node is removed from the membership of the
444        # other nodes.
445        #
446        # Note that this expects that the base cluster is already
447        # active; ie o2cb has been started and populated
448        # $OCFS2_CLUSTER_ROOT/node/ already. This can be achieved by
449        # simply having o2cb run on all nodes by the CRM too.  This
450        # probably ought to be mentioned somewhere in the to be written
451        # documentation. ;-)
452        #
453
454        if [ "$FSTYPE" != "ocfs2" ]; then
455                # One of the cases which shouldn't occur; it should have
456                # been caught much earlier. Still, you know ...
457                ocf_log err "$DEVICE: Notification received for non-ocfs2 mount."
458                return $OCF_ERR_GENERIC
459        fi
460
461        local n_type="$OCF_RESKEY_CRM_meta_notify_type"
462        local n_op="$OCF_RESKEY_CRM_meta_notify_operation"
463        local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname"
464        local n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname"
465        local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname"
466
467        ocf_log info "$OCFS2_UUID: notify: $n_type for $n_op"
468        ocf_log info "$OCFS2_UUID: notify active: $n_active"
469        ocf_log info "$OCFS2_UUID: notify stop: $n_stop"
470        ocf_log info "$OCFS2_UUID: notify start: $n_start"
471
472        case "$n_type" in
473        pre)
474                case "$n_op" in
475                stop)
476                        ocf_log info "$OCFS2_UUID: ignoring pre-notify for stop."
477                        return $infoUCCESS
478                        ;;
479                start)
480                        # These are about to become active; prepare to
481                        # communicate with them.
482                        n_active="$n_active $n_start"
483                        ;;
484                esac
485                ;;
486        post)
487                case "$n_op" in
488                stop)
489                        # remove unames from notify_stop_uname; these have been
490                        # stopped and can no longer be considered active.
491                        for UNAME in "$n_stop"; do
492                                n_active="${n_active//$UNAME/}"
493                        done
494                        ;;
495                start)
496                        if [ "$n_op" = "start" ]; then
497                                ocf_log info "$OCFS2_UUID: ignoring post-notify for start."
498                                return $OCF_SUCCESS
499                        fi
500                        ;;
501                esac
502                ;;
503        esac
504       
505        ocf_log info "$OCFS2_UUID: post-processed active: $n_active"
506
507        local n_myself=${HA_CURHOST:-$(uname -n | tr A-Z a-z)}
508        ocf_log info "$OCFS2_UUID: I am node $n_myself."
509
510        case " $n_active " in
511        *" $n_myself "*) ;;
512        *)      ocf_log err "$OCFS2_UUID: $n_myself (local) not on active list!"
513                return $OCF_ERR_GENERIC
514                ;;
515        esac
516
517        if [ -d "$OCFS2_FS_ROOT" ]; then
518        entry_prefix=$OCFS2_FS_ROOT/
519        for entry in $OCFS2_FS_ROOT/* ; do
520                n_fs="${entry##$entry_prefix}"
521#               ocf_log info "$OCFS2_UUID: Found current node $n_fs"
522                case " $n_active " in
523                *" $n_fs "*)
524                        # Construct a list of nodes which are present
525                        # already in the membership.
526                        n_exists="$n_exists $n_fs"
527                        ocf_log info "$OCFS2_UUID: Keeping node: $n_fs"
528                        ;;
529                *)
530                        # Node is in the membership currently, but not on our
531                        # active list. Must be removed.
532                        if [ "$n_op" = "start" ]; then
533                                ocf_log warn "$OCFS2_UUID: Removing nodes on start"
534                        fi
535                        ocf_log info "$OCFS2_UUID: Removing dead node: $n_fs"
536                        if ! rm -f $entry ; then
537                                ocf_log err "$OCFS2_UUID: Removal of $n_fs failed!"
538                        fi
539                        ;;
540                esac
541        done
542        else
543                ocf_log info "$OCFS2_UUID: heartbeat directory doesn't exist yet, creating."
544                mkdir -p $OCFS2_FS_ROOT
545        fi
546
547        ocf_log info "$OCFS2_UUID: Existing node list: $n_exists"
548       
549        # (2)
550        for entry in $n_active ; do
551#               ocf_log info "$OCFS2_UUID: Expected active node: $entry"
552                case " $n_exists " in
553                *" $entry "*)
554                        ocf_log info "$OCFS2_UUID: Already active: $entry"
555                        ;;
556                *)
557                        if [ "$n_op" = "stop" ]; then
558                                ocf_log warn "$OCFS2_UUID: Adding nodes on stop"
559                        fi
560                        ocf_log info "$OCFS2_UUID: Activating node: $entry"
561                        if ! ln -s $OCFS2_CLUSTER_ROOT/node/$entry $OCFS2_FS_ROOT/$entry ; then
562                                ocf_log err "$OCFS2_CLUSTER_ROOT/node/$entry: failed to link"
563                        fi
564                        ;;
565                esac
566        done
567}
568
569#
570# STOP: Unmount the filesystem
571#
572Filesystem_stop()
573{
574  # See if the device is currently mounted
575  Filesystem_status >/dev/null 2>&1
576  if [ $? -ne $OCF_NOT_RUNNING ]; then
577    # Determine the real blockdevice this is mounted on (if
578    # possible) prior to unmounting.
579    determine_blockdevice
580   
581    # For networked filesystems, there's merit in trying -f:
582    case "$FSTYPE" in
583      nfs|cifs|smbfs) umount_force="-f" ;;
584    esac
585   
586    # Umount all sub-filesystems mounted under $MOUNTPOINT/ too.
587    for SUB in `list_submounts $MOUNTPOINT` $MOUNTPOINT; do
588      ocf_log info "Trying to unmount $MOUNTPOINT"
589      for sig in SIGTERM SIGTERM SIGTERM SIGKILL SIGKILL SIGKILL; do
590        if $UMOUNT $umount_force $SUB ; then
591          rc=$OCF_SUCCESS
592          ocf_log info "unmounted $SUB successfully"
593          break
594        else
595          rc=$OCF_ERR_GENERIC
596          ocf_log err "Couldn't unmount $SUB; trying cleanup with $sig"
597          # fuser returns a non-zero return code if none of the
598          # specified files is accessed or in case of a fatal
599          # error.
600          if $FUSER -$sig -m -k $SUB ; then
601            ocf_log info "Some processes on $SUB were signalled"
602          else
603            ocf_log info "No processes on $SUB were signalled"
604          fi
605          sleep 1
606        fi
607      done
608     
609      if [ $rc -ne $OCF_SUCCESS ]; then
610        ocf_log err "Couldn't unmount $SUB, giving up!"
611      fi
612    done
613  else
614    # Already unmounted, wonderful.
615    rc=$OCF_SUCCESS
616  fi
617 
618  flushbufs $DEVICE
619 
620  if [ "$FSTYPE" = "ocfs2" ]; then
621    ocfs2_cleanup
622  fi
623 
624  return $rc
625}
626# end of Filesystem_stop
627
628#
629# STATUS: is the file system mounted and the SAN path alive?
630#
631Filesystem_status_qla_monitor()
632{
633  Filesystem_status >/dev/null 2>&1
634  ORIG_RET=$?
635  if [ $ORIG_RET -eq $OCF_SUCCESS ]; then
636    # the fs is mounted.  Are the paths ok?
637    /usr/bin/qla-monitor.pl --device $DEVICE
638    if [ $? -ne 0 ]; then
639      # log as much as we can
640      ocf_log info "Error: $DEVICE is dead."
641      echo "Error: $DEVICE is dead on node `hostname`, failing over" >> /var/log/pvfs2/pvfs2-failover.log;
642      /usr/bin/fs-instance-alarm.pl --fs-name $fsname --ce `hostname` --type PVFS2_HA --msg "$DEVICE is dead, failing over."
643      /usr/bin/fs-power-control.pl --fs-name $fsname --conf-dir $conf_dir --host `hostname` --command reboot
644      # shoot ourselves in the head
645      return $OCF_ERR_GENERIC
646    fi
647    dd bs=4096 count=1 if=/dev/zero oflag=direct of=${MOUNTPOINT}/filesystem.monitor
648    if [ $? -ne 0 ]; then
649      ocf_log err "Error: Unable to write to $DEVICE on node `hostname`."
650      /usr/bin/logger -p user.err -t PVFS2 "Error: Unable to write to $DEVICE on node `hostname`, failing over."
651      /usr/bin/fs-instance-alarm.pl --fs-name $fsname --ce `hostname` --type PVFS2_HA --msg "Error: Unable to write to $DEVICE on node `hostname`, failing over."
652      # shoot ourselves in the head
653      /usr/bin/fs-power-control.pl --fs-name $fsname --conf-dir $conf_dir --host `hostname` --command reboot
654      return $OCF_ERR_GENERIC
655    fi
656  fi
657  return $ORIG_RET
658}
659# end of Filesystem_status_qla_monitor
660
661#
662# STATUS: is the filesystem mounted or not?
663#
664Filesystem_status()
665{
666        if
667          list_mounts | grep -q " $MOUNTPOINT " >/dev/null 2>&1
668        then
669          rc=$OCF_SUCCESS
670          msg="$MOUNTPOINT is mounted (running)"
671        else
672          rc=$OCF_NOT_RUNNING
673          msg="$MOUNTPOINT is unmounted (stopped)"
674        fi
675
676        # TODO: For ocfs2, or other cluster filesystems, should we be
677        # checking connectivity to other nodes here, or the IO path to
678        # the storage?
679       
680        case "$OP" in
681          status)       ocf_log info "$msg";;
682        esac
683        return $rc
684}
685# end of Filesystem_status
686
687#
688# VALIDATE_ALL: Are the instance parameters valid?
689#       FIXME!!  The only part that's useful is the return code.
690#       This code always returns $OCF_SUCCESS (!)
691#
692Filesystem_validate_all()
693{
694        if [ -n $MOUNTPOINT -a ! -d $MOUNTPOINT ]; then
695            ocf_log warn "Mountpoint $MOUNTPOINT does not exist"
696        fi
697
698# Check if the $FSTYPE is workable
699# NOTE: Without inserting the $FSTYPE module, this step may be imprecise
700# TODO: This is Linux specific crap.
701        if [ ! -z $FSTYPE ]; then
702            cut -f2 /proc/filesystems |grep -q ^$FSTYPE$
703            if [ $? -ne 0 ]; then
704                modpath=/lib/modules/`uname -r`
705                moddep=$modpath/modules.dep
706                # Do we have $FSTYPE in modules.dep?
707                cut -d' ' -f1 $moddep |grep -q "^$modpath.*$FSTYPE\.k\?o:$"
708                if [ $? -ne 0 ]; then
709                    ocf_log info "It seems we do not have $FSTYPE support"
710                fi
711            fi
712        fi
713
714#TODO: How to check the $options ?
715
716        return $OCF_SUCCESS
717}
718
719# Check the arguments passed to this script
720if
721  [ $# -ne 1 ]
722then
723  usage
724  exit $OCF_ERR_ARGS
725fi
726
727OP=$1
728
729# These operations do not require instance parameters
730case $OP in
731  meta-data)            meta_data
732                        exit $OCF_SUCCESS
733                        ;;
734  usage)                usage
735                        exit $OCF_SUCCESS
736                        ;;
737esac
738
739if
740  [ -z "$OCF_RESKEY_fsname" ]
741then
742  ocf_log err "Filesystem-qla-monitor must specify fsname!"
743#  usage
744  exit $OCF_ERR_GENERIC
745fi
746
747if
748  [ -z "$OCF_RESKEY_conf_dir" ]
749then
750  ocf_log err "Filesystem-qla-monitor must specify conf_dir!"
751#  usage
752  exit $OCF_ERR_GENERIC
753fi
754
755# Check the OCF_RESKEY_ environment variables...
756DEVICE=$OCF_RESKEY_device
757FSTYPE=$OCF_RESKEY_fstype
758if [ ! -z "$OCF_RESKEY_options" ]; then
759        options="-o $OCF_RESKEY_options"
760fi
761fsname=$OCF_RESKEY_fsname
762conf_dir=$OCF_RESKEY_conf_dir
763
764blockdevice=no
765case $DEVICE in
766  "")   ocf_log err "Please set OCF_RESKEY_device to the device to be managed"
767        exit $OCF_ERR_ARGS
768        ;;
769  -*) # Oh... An option to mount instead...  Typically -U or -L
770        ;;
771  [^/]*:/*)     # An NFS filesystem specification...
772        ;;
773  //[^/]*/*)    # An SMB filesystem specification...
774        ;;
775  *)    if [ ! -b "$DEVICE"  -a "X$OP" != Xstart ] ; then
776          ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
777        fi
778        blockdevice=yes
779        ;;
780esac
781
782case $FSTYPE in
783  ocfs2)
784        ocfs2_init
785  ;;
786  nfs)
787        : # this is kind of safe too
788  ;;
789  *)
790        if [ -n "$OCF_RESKEY_CRM_meta_clone_max" ]; then
791                ocf_log err "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!"
792                ocf_log err "DO NOT RUN IT AS A CLONE!"
793                ocf_log err "Politely refusing to proceed to avoid data corruption."
794                exit $OCF_ERR_GENERIC   
795        fi
796  ;;
797esac
798
799# It is possible that OCF_RESKEY_directory has one or even multiple trailing "/".
800# But the output of `mount` and /proc/mounts do not.
801if [ -z $OCF_RESKEY_directory ]; then
802    if [ X$OP = "Xstart" -o $blockdevice = "no" ]; then
803        ocf_log err "Please specify the directory"
804        exit $OCF_ERR_ARGS
805    fi
806else
807    MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//')
808    : ${MOUNTPOINT:=/}
809    # At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/"
810    # TODO: / mounted via Filesystem sounds dangerous. On stop, we'll
811    # kill the whole system. Is that a good idea?
812fi
813       
814# Check to make sure the utilites are found
815check_util $MODPROBE
816check_util $FSCK
817check_util $FUSER
818check_util $MOUNT
819check_util $UMOUNT
820
821if [ "$OP" != "monitor" ]; then
822        ocf_log info "Running $OP for $DEVICE on $MOUNTPOINT"
823fi
824
825case $OP in
826  start)                Filesystem_start
827                        ;;
828  notify)               Filesystem_notify
829                        ;;
830  stop)                 Filesystem_stop
831                        ;;
832  status|monitor)       Filesystem_status_qla_monitor
833                        ;;
834  validate-all)         Filesystem_validate_all
835                        ;;
836  *)                    usage
837                        exit $OCF_ERR_UNIMPLEMENTED
838                        ;;
839esac
840exit $?
Note: See TracBrowser for help on using the browser.