lvmlockd: use persistent reservations for recovery with sanlock dev-dct-setlockargs-3
authorDavid Teigland <teigland@redhat.com>
Wed, 13 Aug 2025 14:57:20 +0000 (09:57 -0500)
committerDavid Teigland <teigland@redhat.com>
Fri, 3 Oct 2025 14:57:10 +0000 (09:57 -0500)
The process of using persistent reservations for recovery:

host A owns a lock
host A fails
host B requests the lock
host B request fails because A owns the lock
host A enters the FAIL state in sanlock
host B retries the lock, and sees owner A is failed
host B runs lvmpersist to remove the PR key of host A
host B tells sanlock that host A is dead
host B retries the lock, which is now granted by sanlock

The new option: --setlockargs persist,notimeout
adds "persist" and "notimeout" components to the
lock_args string in the VG metadata, viewable with
vgs -o+lockargs.  The option is available in vgcreate
or vgchange.

. "persist" tells lvmlockd to remove the PR key of
  a failed host, by running lvmpersist, and set the
  sanlock host state to "dead".

. "notimeout" tells lvmlockd to configure sanlock
  leases to not time out.  sanlock does not use the
  watchdog to protect leases that do not time out.

With this combination, PR removal replaces the watchdog
for fencing hosts with expired leases that are blocking
other hosts.

The lock_args version component is "2.0.0" when
these new settings are used, otherwise remains
"1.0.0".  Previous lvm versions will not start a
VG with lockargs version 2.

Requires sanlock version 4.2.0 or later.

23 files changed:
configure.ac
daemons/lvmlockd/Makefile.in
daemons/lvmlockd/lvmlockd-client.h
daemons/lvmlockd/lvmlockd-core.c
daemons/lvmlockd/lvmlockd-dlm.c
daemons/lvmlockd/lvmlockd-helper.c [new file with mode: 0644]
daemons/lvmlockd/lvmlockd-internal.h
daemons/lvmlockd/lvmlockd-sanlock.c
lib/device/persist.c
lib/device/persist.h
lib/locking/lvmlockd.c
lib/locking/lvmlockd.h
lib/metadata/metadata.c
man/lvmlockd.8_main
tools/args.h
tools/command-lines.in
tools/lvmcmdline.c
tools/toollib.c
tools/toollib.h
tools/tools.h
tools/vgchange.c
tools/vgcreate.c
tools/vgsplit.c

index d64a407595e407352333c1fac7bb5e08ef0fabde..58fc62da715afda339eb77ebd9501b486493054a 100644 (file)
@@ -934,9 +934,12 @@ AC_MSG_RESULT([$BUILD_LOCKDSANLOCK])
 
 dnl -- Look for sanlock libraries
 AS_IF([test "$BUILD_LOCKDSANLOCK" = "yes"], [
-       LOCKDSANLOCK_SUPPORT=370
+# FIXME: forcing sanlock 4.2.0 for testing, default should be 370
+#      LOCKDSANLOCK_SUPPORT=370
        PKG_CHECK_EXISTS(libsanlock_client >= 4.0.0, [LOCKDSANLOCK_SUPPORT=400])
        PKG_CHECK_EXISTS(libsanlock_client >= 4.1.0, [LOCKDSANLOCK_SUPPORT=410])
+       PKG_CHECK_EXISTS(libsanlock_client >= 4.2.0, [LOCKDSANLOCK_SUPPORT=420])
+       LOCKDSANLOCK_SUPPORT=420
        PKG_CHECK_MODULES(LIBSANLOCKCLIENT, libsanlock_client >= 3.7.0, [BUILD_LVMLOCKD="yes"])
        AC_DEFINE_UNQUOTED([LOCKDSANLOCK_SUPPORT], [$LOCKDSANLOCK_SUPPORT], [Define version of sanlock.])
 ])
index 7ae4b3da3e94c1cf702c6b3e0e23a31f89dc1d27..65a76510ae839fffd39ecd64e4441561aff99636 100644 (file)
@@ -15,7 +15,7 @@ srcdir = @srcdir@
 top_srcdir = @top_srcdir@
 top_builddir = @top_builddir@
 
-SOURCES = lvmlockd-core.c
+SOURCES = lvmlockd-core.c lvmlockd-helper.c
 SOURCES2 = lvmlockctl.c
 
 TARGETS = lvmlockd lvmlockctl
index 9a6f3a93982b5d5fdb7c55eafcecd4cfd4842dbd..acbb225102d93e5f500cf57f0e4e297372f7509f 100644 (file)
@@ -60,4 +60,11 @@ static inline void lvmlockd_close(daemon_handle h)
 #define EIOTIMEOUT   225
 #define ELOCKREPAIR  226
 
+#define LOCKARGS_VERSION       0x00000001 /* meta only */
+#define LOCKARGS_LVMLOCK       0x00000002 /* meta only */
+#define LOCKARGS_TIMEOUT        0x00000004 /* user only */
+#define LOCKARGS_NOTIMEOUT      0x00000008 /* meta or user */
+#define LOCKARGS_PERSIST        0x00000010 /* meta or user */
+#define LOCKARGS_NOPERSIST      0x00000020 /* user only */
+
 #endif /* _LVM_LVMLOCKD_CLIENT_H */
index d0c17802bebbc12c5ed618e497760f84e210edc3..f93c68e5461eb634df1722d60388eff59f5dc63d 100644 (file)
 #include <syslog.h>
 #include <dirent.h>
 #include <time.h>
+#include <fcntl.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/utsname.h>
 #include <sys/un.h>
+#include <sys/wait.h>
 
 #ifdef SD_NOTIFY_SUPPORT
 #include <systemd/sd-daemon.h>
@@ -180,6 +182,12 @@ static int listen_fd;
 static int restart_pi;
 static int restart_fds[2];
 
+static int helper_send_fd = -1; /* main loop sends requests to helper */
+static int helper_recv_fd = -1; /* main loop receives results from helper */
+static int helper_pid = -1;
+static int helper_pi = -1;
+static uint32_t helper_msg_id = 1;
+
 /*
  * Each lockspace has its own thread to do locking.
  * The lockspace thread makes synchronous lock requests to dlm/sanlock.
@@ -252,6 +260,8 @@ static int alloc_new_structs; /* used for initializing in setup_structs */
 #define DO_FORCE 1
 #define NO_FORCE 0
 
+static int add_fence_action(struct lockspace *ls, struct owner *owner);
+static int send_helper_request(struct action *act, char *ls_name, uint32_t new_msg_id);
 static int add_lock_action(struct action *act);
 static int str_to_lm(const char *str);
 static int setup_dump_socket(void);
@@ -405,6 +415,131 @@ static int dump_log(int *dump_len)
        return 0;
 }
 
+static void split_line(char *buf, int *argc, char **argv, int max_args, char sep)
+{
+       char *p = buf;
+       int i;
+
+       argv[0] = p;
+
+       for (i = 1; i < max_args; i++) {
+               p = strchr(p, sep);
+               if (!p)
+                       break;
+               *p++ = '\0';
+
+               argv[i] = p;
+       }
+       *argc = i;
+}
+
+int lockd_lockargs_get_version(char *str, unsigned int *major, unsigned int *minor, unsigned int *patch)
+{
+       char version[16] = {0};
+       char *major_str, *minor_str, *patch_str;
+       char *n, *d1, *d2;
+
+       strncpy(version, str, 15);
+
+       n = strchr(version, ':');
+       if (n)
+               *n = '\0';
+
+       d1 = strchr(version, '.');
+       if (!d1)
+               return -1;
+
+       d2 = strchr(d1 + 1, '.');
+       if (!d2)
+               return -1;
+
+       major_str = version;
+       minor_str = d1 + 1;
+       patch_str = d2 + 1;
+
+       *d1 = '\0';
+       *d2 = '\0';
+
+       if (major)
+               *major = atoi(major_str);
+       if (minor)
+               *minor = atoi(minor_str);
+       if (patch)
+               *patch = atoi(patch_str);
+
+       return 0;
+}
+
+#define MAX_LOCKARGS 8
+
+/* parse lock_args string for values that may appear in VG metadata lock_args */
+
+static int lockd_lockargs_get_meta_flags(const char *str, uint32_t *flags)
+{
+       char buf[PATH_MAX];
+       char *argv[MAX_LOCKARGS];
+       int argc;
+       int i;
+
+       if (!str)
+               return -1;
+
+       dm_strncpy(buf, str, sizeof(buf));
+
+       split_line(buf, &argc, argv, MAX_LOCKARGS, ':');
+
+       for (i = 0; i < argc; i++) {
+               if (!i && !lockd_lockargs_get_version(argv[i], NULL, NULL, NULL))
+                       *flags |= LOCKARGS_VERSION;
+               else if ((i == 1) && !strcmp(argv[i], "lvmlock"))
+                       *flags |= LOCKARGS_LVMLOCK;
+               else if (!strcmp(argv[i], "persist"))
+                       *flags |= LOCKARGS_PERSIST;
+               else if (!strcmp(argv[i], "notimeout"))
+                       *flags |= LOCKARGS_NOTIMEOUT;
+               else {
+                       log_error("Unknown lockargs meta value: %s", argv[i]);
+                       return -1;
+               }
+       }
+       log_debug("lockd_lockargs_get_meta_flags %s = 0x%x", str, *flags);
+       return 0;
+}
+
+/* parse lock_args string for values that may appear in command line --setlockargs */
+
+int lockd_lockargs_get_user_flags(const char *str, uint32_t *flags)
+{
+       char buf[PATH_MAX];
+       char *argv[MAX_LOCKARGS];
+       int argc;
+       int i;
+
+       if (!str)
+               return -1;
+
+       dm_strncpy(buf, str, sizeof(buf));
+
+       split_line(buf, &argc, argv, MAX_LOCKARGS, ',');
+
+       for (i = 0; i < argc; i++) {
+               if (!strcmp(argv[i], "persist"))
+                       *flags |= LOCKARGS_PERSIST;
+               else if (!strcmp(argv[i], "nopersist"))
+                       *flags |= LOCKARGS_NOPERSIST;
+               else if (!strcmp(argv[i], "timeout"))
+                       *flags |= LOCKARGS_TIMEOUT;
+               else if (!strcmp(argv[i], "notimeout"))
+                       *flags |= LOCKARGS_NOTIMEOUT;
+               else {
+                       log_error("Unknown lockargs option value: %s", argv[i]);
+                       return -1;
+               }
+       }
+       log_debug("lockd_lockargs_get_user_flags %s = 0x%x", str, *flags);
+       return 0;
+}
+
 struct lockspace *alloc_lockspace(void)
 {
        struct lockspace *ls;
@@ -417,6 +552,7 @@ struct lockspace *alloc_lockspace(void)
        INIT_LIST_HEAD(&ls->actions);
        INIT_LIST_HEAD(&ls->resources);
        INIT_LIST_HEAD(&ls->dispose);
+       INIT_LIST_HEAD(&ls->fence_history);
        pthread_mutex_init(&ls->mutex, NULL);
        pthread_cond_init(&ls->cond, NULL);
        return ls;
@@ -529,6 +665,7 @@ static struct resource *alloc_resource(void)
                memset(r, 0, sizeof(struct resource) + resource_lm_data_size);
                INIT_LIST_HEAD(&r->locks);
                INIT_LIST_HEAD(&r->actions);
+               INIT_LIST_HEAD(&r->fence_wait_actions);
        } else {
                log_error("out of memory for resource");
        }
@@ -586,6 +723,17 @@ static void free_client(struct client *cl)
 
 static void free_resource(struct resource *r)
 {
+       struct action *act, *act2;
+
+       list_for_each_entry_safe(act, act2, &r->actions, list) {
+               list_del(&act->list);
+               free_action(act);
+       }
+       list_for_each_entry_safe(act, act2, &r->fence_wait_actions, list) {
+               list_del(&act->list);
+               free_action(act);
+       }
+
        pthread_mutex_lock(&unused_struct_mutex);
        if (unused_resource_count >= MAX_UNUSED_RESOURCE) {
                free(r);
@@ -808,6 +956,14 @@ static const char *op_str(int x)
                return "busy";
        case LD_OP_REFRESH_LV:
                return "refresh_lv";
+       case LD_OP_FENCE:
+               return "fence";
+       case LD_OP_FENCE_RESULT:
+               return "fence_result";
+       case LD_OP_SETLOCKARGS_BEFORE:
+               return "setlockargs_before";
+       case LD_OP_SETLOCKARGS_FINAL:
+               return "setlockargs_final";
        default:
                return "op_unknown";
        };
@@ -856,45 +1012,6 @@ int last_string_from_args(char *args_in, char *last)
        return -1;
 }
 
-int version_from_args(char *args, unsigned int *major, unsigned int *minor, unsigned int *patch)
-{
-       char version[MAX_ARGS+1];
-       char *major_str, *minor_str, *patch_str;
-       char *n, *d1, *d2;
-
-       memset(version, 0, sizeof(version));
-       strncpy(version, args, MAX_ARGS);
-       version[MAX_ARGS] = '\0';
-
-       n = strstr(version, ":");
-       if (n)
-               *n = '\0';
-
-       d1 = strstr(version, ".");
-       if (!d1)
-               return -1;
-
-       d2 = strstr(d1 + 1, ".");
-       if (!d2)
-               return -1;
-
-       major_str = version;
-       minor_str = d1 + 1;
-       patch_str = d2 + 1;
-
-       *d1 = '\0';
-       *d2 = '\0';
-
-       if (major)
-               *major = atoi(major_str);
-       if (minor)
-               *minor = atoi(minor_str);
-       if (patch)
-               *patch = atoi(patch_str);
-
-       return 0;
-}
-
 /*
  * Write new info when a command exits if that command has acquired a new LV
  * lock.  If the command has released an LV lock we don't bother updating the
@@ -1915,15 +2032,24 @@ out:
  * closed/terminated their lvmlockd connection, and whose locks should
  * be released.  Do not remove these actions from act_close_list.
  *
+ * act_fence_done: list of OP_FENCE_RESULT actions, identifying hosts that
+ * have been fenced.  LOCK actions waiting for this fencing are moved from
+ * the r->fence_wait_actions list back to the r->actions list for retrying.
+ * Do not remove the FENCE_RESULT actions from act_fence_done list since
+ * these act structs are applied to multiple resources in the lockspace
+ * (like act_close_list.)
+ *
  * retry_out: set to 1 if the lock manager said we should retry,
  * meaning we should call res_process() again in a short while to retry.
  */
 
 static void res_process(struct lockspace *ls, struct resource *r,
-                       struct list_head *act_close_list, int *retry_out)
+                       struct list_head *act_close_list,
+                       struct list_head *act_fence_done,
+                       int *retry_out)
 {
        struct owner owner = { 0 };
-       struct action *act, *safe, *act_close;
+       struct action *act, *safe, *act_close, *act_fence, *act_lock;
        struct lock *lk;
        uint32_t unlock_by_client_id = 0;
        int lm_retry;
@@ -1985,6 +2111,37 @@ static void res_process(struct lockspace *ls, struct resource *r,
                res_cancel(ls, r, act_close);
        }
 
+       if (!list_empty(&r->fence_wait_actions)) {
+               list_for_each_entry(act_fence, act_fence_done, list) {
+                       list_for_each_entry_safe(act_lock, safe, &r->fence_wait_actions, list) {
+                               /*
+                                * act_lock->owner identifies the failed host that owned the
+                                * lock which we submitted a fence request for. if a fence
+                                * result identifies that same owner, then the lock request
+                                * action can continue.
+                                */
+                               if ((act_lock->owner.host_id == act_fence->owner.host_id) &&
+                                   (act_lock->owner.generation == act_fence->owner.generation)) {
+                                       list_del(&act_lock->list);
+                                       if (act_fence->result) {
+                                               /* fencing failed, return locking error to command */
+                                               log_debug("%s:%s lock error after fence error for %u %u",
+                                                         ls->name, r->name, act_fence->owner.host_id, act_fence->owner.generation);
+                                               act_lock->result = -EAGAIN;
+                                               add_client_result(act_lock);
+                                       } else {
+                                               /* fencing done, retry lock request which should no
+                                                  longer be blocked by the failed owner */
+                                               log_debug("%s:%s lock retry after fence success for %u %u",
+                                                         ls->name, r->name, act_fence->owner.host_id, act_fence->owner.generation);
+                                               memset(&act_lock->owner, 0, sizeof(struct owner));
+                                               list_add_tail(&act_lock->list, &r->actions);
+                                       }
+                               }
+                       }
+               }
+       }
+
        /*
         * handle enable/disable
         */
@@ -2215,12 +2372,26 @@ static void res_process(struct lockspace *ls, struct resource *r,
 
                        rv = res_lock(ls, r, act, &lm_retry, &owner);
 
-                       /* TODO: if lock fails because it's owned by a failed host,
-                          and persistent reservations are enabled, then remove the
-                          pr of failed host_id, tell sanlock the host_id is now
-                          dead, and retry lock request. */
+                       /*
+                        * If lock fails because it's owned by a failed host,
+                        * and persistent reservation fencing is enabled, then
+                        * remove the pr of failed host_id, tell sanlock the
+                        * host_id is now dead, and retry lock request.
+                        */
+                       if (ls->fence_pr && (rv == -EAGAIN) &&
+                           owner.host_id && owner.generation &&
+                           !strcmp(owner.state, "FAIL")) {
+                               log_debug("%s:%s res_lock fence_pr %u:%u",
+                                         ls->name, r->name, owner.host_id, owner.generation);
+                               /* after fencing is done for owner, the act's from
+                                  r->fence_wait_actions are moved back to r->actions. */
+                               act->owner = owner;
+                               list_del(&act->list);
+                               list_add(&act->list, &r->fence_wait_actions);
+                               add_fence_action(ls, &owner);
+                               *retry_out = 1;
 
-                       if ((rv == -EAGAIN) &&
+                       } else if ((rv == -EAGAIN) &&
                            (act->retries <= act->max_retries) &&
                            (lm_retry || (r->type != LD_RT_LV))) {
                                /* leave act on list */
@@ -2257,7 +2428,25 @@ static void res_process(struct lockspace *ls, struct resource *r,
 
                        rv = res_lock(ls, r, act, &lm_retry, &owner);
 
-                       if ((rv == -EAGAIN) &&
+                       /*
+                        * If lock fails because it's owned by a failed host,
+                        * and persistent reservation fencing is enabled, then
+                        * remove the pr of failed host_id, tell sanlock the
+                        * host_id is now dead, and retry lock request.
+                        */
+                       if (ls->fence_pr && (rv == -EAGAIN) &&
+                           owner.host_id && owner.generation &&
+                           !strcmp(owner.state, "FAIL")) {
+                               log_debug("%s:%s res_lock fence_pr %u:%u",
+                                         ls->name, r->name, owner.host_id, owner.generation);
+                               /* after fencing is done for owner, the act's from
+                                  r->fence_wait_actions are moved back to r->actions. */
+                               act->owner = owner;
+                               list_del(&act->list);
+                               list_add(&act->list, &r->fence_wait_actions);
+                               add_fence_action(ls, &owner);
+                               *retry_out = 1;
+                       } else if ((rv == -EAGAIN) &&
                            (act->retries <= act->max_retries) &&
                            (lm_retry || (r->type != LD_RT_LV))) {
                                /* leave act on list */
@@ -2291,7 +2480,7 @@ static void res_process(struct lockspace *ls, struct resource *r,
         * processing the OP_CLOSE for the client.
         */
        if ((r->type == LD_RT_LV) && (r->mode == LD_LK_UN) &&
-           list_empty(&r->locks) && list_empty(&r->actions)) {
+           list_empty(&r->locks) && list_empty(&r->actions) && list_empty(&r->fence_wait_actions)) {
 
                /* An implicit unlock of a transient lock. */
                if (!unlock_by_client_id)
@@ -2573,6 +2762,7 @@ static void *lockspace_thread_main(void *arg_in)
        struct action *act_op_free = NULL;
        struct list_head tmp_act;
        struct list_head act_close;
+       struct list_head act_fence;
        char tmp_name[MAX_NAME+5];
        int fail_stop_busy;
        int free_vg = 0;
@@ -2588,6 +2778,7 @@ static void *lockspace_thread_main(void *arg_in)
        int rv;
 
        INIT_LIST_HEAD(&act_close);
+       INIT_LIST_HEAD(&act_fence);
        INIT_LIST_HEAD(&tmp_act);
 
        /* first action may be client add */
@@ -2619,8 +2810,9 @@ static void *lockspace_thread_main(void *arg_in)
                adopt_ok = 1;
        }
 
-       log_debug("S %s lm_add_lockspace %s act %d wait %d adopt_only %d adopt_ok %d repair %d",
-                 ls->name, lm_str(ls->lm_type), add_act ? 1 : 0, wait_flag, adopt_only, adopt_ok, repair);
+       log_debug("S %s lm_add_lockspace %s act %d wait %d adopt_only %d adopt_ok %d repair %d no_timeout %d key 0x%llx",
+                 ls->name, lm_str(ls->lm_type), add_act ? 1 : 0, wait_flag, adopt_only, adopt_ok, repair, ls->no_timeout,
+                 (unsigned long long)ls->ourkey);
 
        /*
         * The prepare step does not wait for anything and is quick;
@@ -2699,6 +2891,10 @@ static void *lockspace_thread_main(void *arg_in)
 
                        act = list_first_entry(&ls->actions, struct action, list);
 
+                       log_debug("S %s ls actions entry: %s", ls->name, op_str(act->op));
+
+                       act->ls_generation = ls->generation;
+
                        if (act->op == LD_OP_KILL_VG && act->rt == LD_RT_VG) {
                                /* Continue processing until DROP_VG arrives. */
                                log_debug("S %s kill_vg", ls->name);
@@ -2731,12 +2927,14 @@ static void *lockspace_thread_main(void *arg_in)
                                ls->thread_work = 0;
                                ls->thread_stop = 1;
                                drop_vg = 1;
+                               /* list_del(&act->list) is done at end of lockspace_thread function */
                                break;
                        }
 
                        if (act->op == LD_OP_STOP) {
-                               /* thread_stop is already set */
                                ls->thread_work = 0;
+                               /* ls->thread_stop = 1 is already set */
+                               /* list_del(&act->list) is done at end of lockspace_thread function */
                                break;
                        }
 
@@ -2762,6 +2960,7 @@ static void *lockspace_thread_main(void *arg_in)
                                ls->thread_work = 0;
                                ls->thread_stop = 1;
                                free_vg = 1;
+                               /* list_del(&act->list) is done at end of lockspace_thread function */
                                break;
                        }
 
@@ -2779,6 +2978,50 @@ static void *lockspace_thread_main(void *arg_in)
                                continue;
                        }
 
+                       if (act->op == LD_OP_SETLOCKARGS_BEFORE && act->rt == LD_RT_VG) {
+                               /* check if sanlock version supports the new args */
+                               if (!lm_setlockargs_supported_sanlock(ls, act)) {
+                                       list_del(&act->list);
+                                       act->result = -EPROTONOSUPPORT;
+                                       add_client_result(act);
+                                       continue;
+                               }
+
+                               /* check that no LV locks are held; a VG lock is usually held */
+                               if (for_each_lock(ls, LOCKS_EXIST_LV)) {
+                                       list_del(&act->list);
+                                       act->result = -ENOTEMPTY;
+                                       add_client_result(act);
+                                       continue;
+                               }
+
+                               /* check that we are the only lockspace user */
+                               rv = lm_hosts(ls, 1);
+                               if (rv) {
+                                       /*
+                                        * rv < 0: error (don't remove)
+                                        * rv > 0: other hosts in lockspace (cannot remove)
+                                        * rv = 0: only local host in lockspace (can remove)
+                                        * Checking for hosts here in addition to after the
+                                        * main loop allows vgremove to fail and be rerun
+                                        * after the ls is stopped on other hosts.
+                                        */
+                                       log_error("S %s setlockargs_before hosts %d", ls->name, rv);
+                                       list_del(&act->list);
+                                       act->result = (rv < 0) ? rv : -EBUSY;
+                                       add_client_result(act);
+                                       continue;
+                               }
+
+                               /* return success, allow the change */
+                               /* list_del act and add_client_result done after rem_lockspace */
+
+                               /* the lockspace needs to be stopped for setlockargs_final */
+                               ls->thread_work = 0;
+                               ls->thread_stop = 1;
+                               break;
+                       }
+
                        if (act->op == LD_OP_RENAME_BEFORE && act->rt == LD_RT_VG) {
                                /* vgrename */
                                log_debug("S %s checking for lockspace hosts", ls->name);
@@ -2792,6 +3035,7 @@ static void *lockspace_thread_main(void *arg_in)
                                }
                                ls->thread_work = 0;
                                ls->thread_stop = 1;
+                               /* list_del(&act->list) is done at end of lockspace_thread function */
                                /* Do we want to check hosts again below like vgremove? */
                                break;
                        }
@@ -2821,6 +3065,7 @@ static void *lockspace_thread_main(void *arg_in)
                        }
 
                        if (act->op == LD_OP_FREE && act->rt == LD_RT_LV) {
+                               /* lvremove */
                                list_del(&act->list);
 
                                r = find_dispose_act(ls, act); /* removes r from dispose list */
@@ -2882,6 +3127,18 @@ static void *lockspace_thread_main(void *arg_in)
                                continue;
                        }
 
+                       /*
+                        * check all resources for lock actions that are waiting
+                        * for this fence result
+                        */
+                       if (act->op == LD_OP_FENCE_RESULT) {
+                               list_del(&act->list);
+                               list_add(&act->list, &act_fence);
+                               log_debug("S %s apply fence result %d for host %u %u",
+                                         ls->name, act->result, act->owner.host_id, act->owner.generation);
+                               continue;
+                       }
+
                        /*
                         * All the other op's are for locking.
                         * Find the specific resource that the lock op is for,
@@ -2905,8 +3162,21 @@ static void *lockspace_thread_main(void *arg_in)
                        log_debug("%s:%s action %s %s", ls->name, r->name,
                                  op_str(act->op), mode_str(act->mode));
                }
+               /* end processing ls->actions */
                pthread_mutex_unlock(&ls->mutex);
 
+               /*
+                * If the fence result was a success, then tell the
+                * sanlock lockspace that the fenced host is dead
+                * so it will grant locks held by the fenced host.
+                */
+               if (ls->lm_type == LD_LM_SANLOCK) {
+                       list_for_each_entry(act, &act_fence, list) {
+                               if (!act->result)
+                                       lm_set_host_dead_sanlock(ls, &act->owner);
+                       }
+               }
+
                /*
                 * Process the lock operations that have been queued for each
                 * resource.
@@ -2915,13 +3185,18 @@ static void *lockspace_thread_main(void *arg_in)
                retry = 0;
 
                list_for_each_entry_safe(r, r2, &ls->resources, list)
-                       res_process(ls, r, &act_close, &retry);
+                       res_process(ls, r, &act_close, &act_fence, &retry);
 
                list_for_each_entry_safe(act, safe, &act_close, list) {
                        list_del(&act->list);
                        free_action(act);
                }
 
+               list_for_each_entry_safe(act, safe, &act_fence, list) {
+                       list_del(&act->list);
+                       free_action(act);
+               }
+
                if (retry) {
                        ls->thread_work = 1;
                        usleep(LOCK_RETRY_MS * 1000);
@@ -3013,12 +3288,20 @@ out_rem:
 
 out_act:
        /*
-        * Move remaining actions to results; this will usually (always?)
-        * be only the stop action.
+        * Move remaining actions to results, this will usually (always?)
+        * be the act processed above which resulted in the lockspace thread
+        * being stopped.  That act is not removed from ls->actions by
+        * the main action processing loop, but remains on ls->actions
+        * and is removed removed here.  (TODO: wouldn't it be nicer
+        * to always list_del every action above, and save a pointer
+        * to the act struct that caused thread_stop=1?  This seems
+        * to incorrectly return success for any/all acts, not just
+        * the one act that was processed leading to thread_stop.)
         */
        pthread_mutex_lock(&ls->mutex);
        list_for_each_entry_safe(act, safe, &ls->actions, list) {
                if (act->op == LD_OP_FREE) {
+                       /* vgremove */
                        act_op_free = act;
                        act->result = 0;
                } else if (act->op == LD_OP_STOP)
@@ -3027,6 +3310,8 @@ out_act:
                        act->result = 0;
                else if (act->op == LD_OP_RENAME_BEFORE)
                        act->result = 0;
+               else if (act->op == LD_OP_SETLOCKARGS_BEFORE)
+                       act->result = 0;
                else
                        act->result = -ENOLS;
                list_del(&act->list);
@@ -3059,8 +3344,7 @@ out_act:
 
        pthread_mutex_lock(&lockspaces_mutex);
        ls->thread_done = 1;
-       ls->free_vg = free_vg;
-       ls->drop_vg = drop_vg;
+
        if (ls->lm_type == LD_LM_DLM && !strcmp(ls->name, gl_lsname_dlm))
                global_dlm_lockspace_exists = 0;
        if (ls->lm_type == LD_LM_IDM && !strcmp(ls->name, gl_lsname_idm))
@@ -3176,14 +3460,23 @@ static int add_lockspace_thread(const char *ls_name,
        struct resource *r;
        int rv;
 
-       log_debug("add_lockspace_thread %s %s version %u",
-                 lm_str(lm_type), ls_name, act ? act->version : 0);
+       log_debug("add_lockspace_thread %s %s version %u vg_args %s",
+                 lm_str(lm_type), ls_name, act ? act->version : 0, vg_args);
 
        if (!(ls = alloc_lockspace()))
                return -ENOMEM;
 
        strncpy(ls->name, ls_name, MAX_NAME);
        ls->lm_type = lm_type;
+       ls->ourkey = act->ourkey;
+
+       if (lockd_lockargs_get_meta_flags(vg_args, &ls->lock_args_flags) < 0) {
+               log_error("add_lockspace_thread %s lock_args invalid %s", ls->name, vg_args);
+               free(ls);
+               return -EARGS;
+       }
+       ls->no_timeout = (ls->lock_args_flags & LOCKARGS_NOTIMEOUT) ? 1 : 0;
+       ls->fence_pr = (ls->lock_args_flags & LOCKARGS_PERSIST) ? 1 : 0;
 
        if (act) {
                ls->start_client_id = act->client_id;
@@ -3438,6 +3731,9 @@ static int add_lockspace(struct action *act)
  * unlock it when stopping.
  *
  * Should we attempt to stop the lockspace containing the gl last?
+ *
+ * FIXME: why is OP_STOP partly processed here rather than just being
+ * added to ls->actions and processed by the lockspace thread?
  */
 
 static int rem_lockspace(struct action *act)
@@ -3614,6 +3910,10 @@ static int for_each_lockspace(int do_stop, int do_free, int do_force)
                                        list_del(&act->list);
                                        free_action(act);
                                }
+                               list_for_each_entry_safe(act, act2, &ls->fence_history, list) {
+                                       list_del(&act->list);
+                                       free_action(act);
+                               }
                                free_ls_resources(ls);
                                free_pvs_path(&ls->pvs);
                                free(ls);
@@ -3701,7 +4001,8 @@ static int work_init_vg(struct action *act)
        }
 
        if (act->lm_type == LD_LM_SANLOCK)
-               rv = lm_init_vg_sanlock(ls_name, act->vg_name, act->flags, act->vg_args, act->align_mb);
+               rv = lm_init_vg_sanlock(ls_name, act->vg_name, act->flags, act->vg_args, act->align_mb,
+                                       act->other_args[0] ? act->other_args : NULL);
        else if (act->lm_type == LD_LM_DLM)
                rv = lm_init_vg_dlm(ls_name, act->vg_name, act->flags, act->vg_args);
        else if (act->lm_type == LD_LM_IDM)
@@ -3732,6 +4033,36 @@ static int work_rename_vg(struct action *act)
        return rv;
 }
 
+static int work_setlockargs_vg_final(struct action *act)
+{
+       char ls_name[MAX_NAME+1] = {0};
+       int found;
+       int rv = -EINVAL;
+
+
+       if (act->lm_type == LD_LM_SANLOCK) {
+               vg_ls_name(act->vg_name, ls_name);
+
+               /*
+                * Wait for the lockspace thread to be cleared.
+                * It was stopped in setlockargs_before but has
+                * likely not been fully cleaned up yet.
+                */
+               while (1) {
+                       pthread_mutex_lock(&lockspaces_mutex);
+                       found = find_lockspace_name(ls_name) ? 1 : 0;
+                       pthread_mutex_unlock(&lockspaces_mutex);
+                       if (!found)
+                               break;
+                       log_debug("S %s work_setlockargs_vg_final ls not cleared, retry", ls_name);
+                       return -EAGAIN;
+               }
+               rv = lm_setlockargs_vg_sanlock(ls_name, act->vg_name, act);
+       }
+
+       return rv;
+}
+
 static void work_test_gl(void)
 {
        struct lockspace *ls;
@@ -3798,7 +4129,7 @@ static int work_init_lv(struct action *act)
        if (lm_type == LD_LM_SANLOCK) {
                /* ls is NULL if the lockspace is not started, which happens
                   for vgchange --locktype sanlock. */
-               rv = lm_init_lv_sanlock(ls, ls_name, act->vg_name, act->lv_uuid, vg_args, lv_args, act->prev_lv_args);
+               rv = lm_init_lv_sanlock(ls, ls_name, act->vg_name, act->lv_uuid, vg_args, lv_args, act->other_args);
                memcpy(act->lv_args, lv_args, MAX_ARGS);
                return rv;
 
@@ -3835,6 +4166,116 @@ static int work_vg_status(struct action *act)
        return rv;
 }
 
+static void work_fence(struct action *act, int *retry)
+{
+       char ls_name[MAX_NAME+1];
+       char vg_name[MAX_NAME+1];
+       struct lockspace *ls;
+       struct action *ah;
+       struct owner ah_owner;
+       uint32_t new_msg_id;
+       int ah_result;
+       int found_busy = 0;
+       int found_done = 0;
+       int rv;
+
+       /*
+        * if the new fencing act matches a previous, completed fencing act in
+        * fence_history, then take the previous result from the previous act.
+        *
+        * if the new fencing act matches a current, in-progress fencing act in
+        * fence_history, then leave the new fencing act as a delayed work item
+        * that will be retried later.
+        */
+
+       memset(ls_name, 0, sizeof(ls_name));
+       memcpy(vg_name, act->vg_name, sizeof(act->vg_name));
+
+       pthread_mutex_lock(&lockspaces_mutex);
+       vg_ls_name(vg_name, ls_name);
+       ls = find_lockspace_name(ls_name);
+       if (!ls) {
+               pthread_mutex_unlock(&lockspaces_mutex);
+               log_error("no lockspace for fence action %s.", ls_name);
+               return;
+       }
+
+       pthread_mutex_lock(&ls->mutex);
+       list_for_each_entry(ah, &ls->fence_history, list) {
+               if (ah->owner.host_id != act->owner.host_id)
+                       continue;
+               if (ah->owner.generation != act->owner.generation)
+                       continue;
+
+               if (ah->op == LD_OP_FENCE) {
+                       /* new act matches an in-progress fence act */
+                       found_busy = 1;
+               } else if (ah->op == LD_OP_FENCE_RESULT) {
+                       /* new act matches a completed fence act */
+                       found_done = 1;
+                       ah_result = ah->result;
+                       ah_owner = ah->owner;
+               }
+               break;
+       }
+
+       if (!found_done && !found_busy) {
+               /*
+                * send the helper a fencing request for this act.
+                * keep this new act in fence_history while the helper
+                * is working on it. when it's completed, this act will
+                * be changed from OP_FENCE to OP_FENCE_RESULT and kept
+                * in fence_history.
+                */
+               list_add(&act->list, &ls->fence_history);
+               new_msg_id = helper_msg_id++;
+
+               log_debug("work_fence %s found_done %d found_busy %d send helper new_msg_id %u", vg_name, found_done, found_busy, new_msg_id);
+
+       } else if (found_done) {
+               /*
+                * A matching OP_FENCE was already completed.
+                * Reuse this act as an OP_FENCE_RESULT.
+                */
+               act->op = LD_OP_FENCE_RESULT;
+               act->result = ah_result;
+               act->owner = ah_owner;
+
+               if (!ls->thread_stop) {
+                       list_add_tail(&act->list, &ls->actions);
+                       ls->thread_work = 1;
+                       pthread_cond_signal(&ls->cond);
+               } else {
+                       free_action(act);
+               }
+
+               log_debug("work_fence %s found_done %d found_busy %d fence result %d", vg_name, found_done, found_busy, ah_result);
+
+       } else if (found_busy) {
+               /* when retried, the result will eventually be found in history above */
+               *retry = 1;
+
+               log_debug("work_fence %s found_done %d found_busy %d retry", vg_name, found_done, found_busy);
+       }
+       pthread_mutex_unlock(&ls->mutex);
+
+
+       if (!found_done && !found_busy) {
+               rv = send_helper_request(act, ls_name, new_msg_id);
+               if (rv < 0) {
+                       /* change act to FENCE_RESULT error and move it to ls->actions */
+                       log_error("work_fence %s failed to send helper request %u", vg_name, new_msg_id);
+                       pthread_mutex_lock(&ls->mutex);
+                       list_del(&act->list);
+                       act->op = LD_OP_FENCE_RESULT;
+                       act->result = -ENOTCONN;
+                       list_add_tail(&act->list, &ls->actions);
+                       pthread_mutex_unlock(&ls->mutex);
+               }
+       }
+       pthread_mutex_unlock(&lockspaces_mutex);
+}
+
 /*
  * When an action is queued for the worker_thread, it is processed right away.
  * After processing, some actions need to be retried again in a short while.
@@ -3947,6 +4388,11 @@ static void *worker_thread_main(void *arg_in)
                        act->result = work_rename_vg(act);
                        add_client_result(act);
 
+               } else if ((act->op == LD_OP_SETLOCKARGS_FINAL) && (act->rt == LD_RT_VG)) {
+                       log_debug("work setlockargs_vg_final %s", act->vg_name);
+                       act->result = work_setlockargs_vg_final(act);
+                       add_client_result(act);
+
                } else if (act->op == LD_OP_START_WAIT) {
                        act->result = count_lockspace_starting(0);
                        if (!act->result)
@@ -3974,6 +4420,12 @@ static void *worker_thread_main(void *arg_in)
                        } else
                                list_add(&act->list, &delayed_list);
 
+               } else if (act->op == LD_OP_FENCE) {
+                       int retry = 0;
+                       log_debug("work_fence %s %u %u", act->vg_name, act->owner.host_id, act->owner.generation);
+                       work_fence(act, &retry);
+                       if (retry)
+                               list_add(&act->list, &delayed_list);
                } else {
                        log_error("work unknown op %d", act->op);
                        act->result = -EINVAL;
@@ -4235,10 +4687,9 @@ static int client_send_result(struct client *cl, struct action *act)
        if (act->flags & LD_AF_SH_EXISTS)
                strcat(result_flags, "SH_EXISTS,");
 
-       if (act->op == LD_OP_INIT) {
+       if (act->op == LD_OP_INIT || act->op == LD_OP_SETLOCKARGS_FINAL) {
                /*
-                * init is a special case where lock args need
-                * to be passed back to the client.
+                * init and setlockargs send lock_args back to the client.
                 */
                const char *vg_args = "none";
                const char *lv_args = "none";
@@ -4386,6 +4837,7 @@ static int client_send_result(struct client *cl, struct action *act)
                                          "op_result = " FMTd64, (int64_t) act->result,
                                          "lm_result = " FMTd64, (int64_t) act->lm_rv,
                                          "result_flags = %s", result_flags[0] ? result_flags : "none",
+                                         "ls_generation = " FMTd64, (int64_t) act->ls_generation,
                                          NULL);
        }
 
@@ -4622,6 +5074,7 @@ static int str_to_op_rt(const char *req_name, int *op, int *rt)
                return 0;
        }
        if (!strcmp(req_name, "free_vg")) {
+               /* TODO: use LD_OP_REMOVE_VG */
                *op = LD_OP_FREE;
                *rt = LD_RT_VG;
                return 0;
@@ -4632,6 +5085,7 @@ static int str_to_op_rt(const char *req_name, int *op, int *rt)
                return 0;
        }
        if (!strcmp(req_name, "free_lv")) {
+               /* TODO: use LD_OP_REMOVE_LV */
                *op = LD_OP_FREE;
                *rt = LD_RT_LV;
                return 0;
@@ -4736,6 +5190,16 @@ static int str_to_op_rt(const char *req_name, int *op, int *rt)
                *rt = 0;
                return 0;
        }
+       if (!strcmp(req_name, "setlockargs_vg_before")) {
+               *op = LD_OP_SETLOCKARGS_BEFORE;
+               *rt = LD_RT_VG;
+               return 0;
+       }
+       if (!strcmp(req_name, "setlockargs_vg_final")) {
+               *op = LD_OP_SETLOCKARGS_FINAL;
+               *rt = LD_RT_VG;
+               return 0;
+       }
 out:
        return -1;
 }
@@ -4913,13 +5377,15 @@ static int print_lockspace(struct lockspace *ls, const char *prefix, int pos, in
                        "vg_args=%s "
                        "lm_type=%s "
                        "host_id=%u "
+                       "generation=%llu "
                        "create_fail=%d "
                        "create_done=%d "
                        "thread_work=%d "
                        "thread_stop=%d "
                        "thread_done=%d "
                        "kill_vg=%d "
-                       "drop_vg=%d "
+                       "fence_pr=%d "
+                       "no_timeout=%d "
                        "sanlock_gl_enabled=%d\n",
                        prefix,
                        ls->name,
@@ -4928,13 +5394,15 @@ static int print_lockspace(struct lockspace *ls, const char *prefix, int pos, in
                        ls->vg_args,
                        lm_str(ls->lm_type),
                        ls->host_id,
+                       (unsigned long long)ls->generation,
                        ls->create_fail ? 1 : 0,
                        ls->create_done ? 1 : 0,
                        ls->thread_work ? 1 : 0,
                        ls->thread_stop ? 1 : 0,
                        ls->thread_done ? 1 : 0,
                        ls->kill_vg,
-                       ls->drop_vg,
+                       ls->fence_pr,
+                       ls->no_timeout,
                        ls->sanlock_gl_enabled ? 1 : 0);
 }
 
@@ -5117,6 +5585,7 @@ static void client_recv_action(struct client *cl)
        char buf[18];   /* "path[%d]\0", %d outputs signed integer so max to 10 bytes */
        int64_t val;
        uint32_t opts = 0;
+       uint64_t ourkey;
        int result = 0;
        int cl_pid;
        int op, rt, lm, mode;
@@ -5265,7 +5734,11 @@ static void client_recv_action(struct client *cl)
 
        str = daemon_request_str(req, "prev_lv_args", NULL);
        if (str && strcmp(str, "none"))
-               strncpy(act->prev_lv_args, str, MAX_ARGS);
+               strncpy(act->other_args, str, MAX_ARGS);
+
+       str = daemon_request_str(req, "set_lock_args", NULL);
+       if (str && strcmp(str, "none"))
+               strncpy(act->other_args, str, MAX_ARGS);
 
        /* start_vg will include lvmlocal.conf local/host_id here */
        val = daemon_request_int(req, "host_id", 0);
@@ -5278,6 +5751,10 @@ static void client_recv_action(struct client *cl)
 
        act->lv_size_bytes = (uint64_t)dm_config_find_int64(req.cft->root, "lv_size_bytes", 0);
 
+       ourkey = (uint64_t)dm_config_find_int64(req.cft->root, "our_key", 0);
+       if (ourkey)
+               act->ourkey = ourkey;
+
        /* Create PV list for idm */
        if (lm == LD_LM_IDM) {
                memset(&pvs, 0x0, sizeof(pvs));
@@ -5369,6 +5846,7 @@ skip_pvs_path:
        case LD_OP_RENAME_FINAL:
        case LD_OP_RUNNING_LM:
        case LD_OP_REFRESH_LV:
+       case LD_OP_SETLOCKARGS_FINAL:
                add_work_action(act);
                rv = 0;
                break;
@@ -5383,6 +5861,7 @@ skip_pvs_path:
        case LD_OP_KILL_VG:
        case LD_OP_DROP_VG:
        case LD_OP_BUSY:
+       case LD_OP_SETLOCKARGS_BEFORE:
                rv = add_lock_action(act);
                break;
        default:
@@ -6435,6 +6914,343 @@ static void process_restart(int fd)
                log_debug("process_restart error %d", errno);
 }
 
+/*
+ * Fencing
+ *
+ * lockspace thread
+ * . res_process() lock action fails due to a failed host
+ * . add_fence_action() creates new action OP_FENCE with owner info
+ * . adds it to work actions
+ *
+ * worker thread
+ * . takes new OP_FENCE
+ * . compares it against lockspace's fence_history list
+ *   (completed fence actions for hosts)
+ * . if action for same host is complete, add OP_FENCE_RESULT to
+ *   actions for the lockspace thread
+ * . if action for same host is in progress, return and have worker
+ *   thread retry after delay
+ * . else send new fence command to helper process
+ *
+ * helper process
+ * . receives fencing command
+ * . runs fencing command:
+ *   lvmpersist remove --ourkey OURKEY --removekey REMKEY --vg VG
+ * . sends result back to main thread
+ *
+ * main thread
+ * . receive fencing result from helper process, process_helper
+ * . process_fence_result() finds original OP_FENCE act in
+ *   ls fence_history and changes it to OP_FENCE_RESULT
+ * . adds a new OP_FENCE_RESULT action to the lockspace actions list
+ *
+ * lockspace thread
+ * . applies OP_FENCE_RESULT to each resource's fence_wait_actions
+ * . moves matching fence_wait_actions entries to r->actions
+ *   to be retried
+ */
+
+/*
+ * We cannot block the main thread on this write, so the pipe is
+ * NONBLOCK, and write fails with EAGAIN when the pipe is full.
+ * With 1k msg size and 64k default pipe size, the pipe will be full
+ * if we quickly send 64 messages.
+ *
+ * By setting the pipe size to 1MB in setup_helper, we could quickly send 1024
+ * msgs before getting EAGAIN.
+ */
+
+static int send_helper_request(struct action *act, char *ls_name, uint32_t new_msg_id)
+{
+       struct helper_msg msg = { 0 };
+       int retries = 0;
+       int rv;
+
+       if (helper_send_fd == -1) {
+               log_error("send_helper_request no send fd");
+               return -1;
+       }
+
+       if (act->op == LD_OP_FENCE) {
+               strncpy(msg.ls_name, ls_name, MAX_NAME);
+               msg.type = HELPER_COMMAND;
+               msg.act = LD_OP_FENCE;
+               msg.msg_id = new_msg_id;
+               act->msg_id = new_msg_id;
+               snprintf(msg.command, RUN_COMMAND_LEN-1, "/usr/sbin/lvmpersist remove --ourkey 0x%llx --removekey 0x%llx --vg %s",
+                        (unsigned long long)act->ourkey,
+                        (unsigned long long)act->remkey,
+                        act->vg_name);
+               log_debug("send_helper_request fence msg %u %s", new_msg_id, msg.command);
+       } else {
+               return -1;
+       }
+
+ retry:
+       rv = write(helper_send_fd, &msg, sizeof(msg));
+       if (rv == -1 && errno == EINTR)
+               goto retry;
+
+       if (rv == -1 && errno == EAGAIN) {
+               /* pipe is full */
+               if (!retries) {
+                       retries++;
+                       sleep(1);
+                       goto retry;
+               }
+               log_error("send_helper_request write EAGAIN");
+               return -1;
+       }
+
+       /* helper exited or closed fd */
+       if (rv == -1 && errno == EPIPE) {
+               log_error("send_helper_request write EPIPE");
+               return -1;
+       }
+
+       if (rv != sizeof(msg)) {
+               /* this shouldn't happen */
+               log_error("send_helper_request write error %d %d", rv, errno);
+               return -1;
+       }
+
+       return 0;
+}
+
+/* lockspace threads call add_fence_action() */
+
+static int add_fence_action(struct lockspace *ls, struct owner *owner)
+{
+       struct action *act;
+
+       if (!(act = alloc_action()))
+               return -1;
+
+       /*
+        * The creation of a key here for host_id X generation Y must match the
+        * logic that lvm commands use to generate keys for sanlock hosts:
+        *
+        * key 0x100000YYYYYYXXXX where XXXX are the hex digits for the host_id,
+        * and YYYYYY are the hex digits for the generation number.
+        */
+
+       memcpy(act->vg_name, ls->vg_name, sizeof(act->vg_name));
+       memcpy(act->vg_uuid, ls->vg_uuid, sizeof(act->vg_uuid));
+       act->op = LD_OP_FENCE;
+       act->ourkey = ls->ourkey;
+       act->remkey = 0x1000000000000000 | ((owner->generation & 0xFFFFFF) << 16) | (owner->host_id & 0xFFFF);
+       memcpy(&act->owner, owner, sizeof(struct owner));
+
+       log_debug("add_fence_action vg %s for host_id %u gen %u ourkey 0x%llx remkey 0x%llx",
+                 act->vg_name, act->owner.host_id, act->owner.generation,
+                 (unsigned long long)act->ourkey, (unsigned long long)act->remkey);
+
+       add_work_action(act);
+       return 0;
+}
+
+static int setup_helper(void)
+{
+       int pid;
+       int pw_fd = -1; /* parent write */
+       int cr_fd = -1; /* child read */
+       int pr_fd = -1; /* parent read */
+       int cw_fd = -1; /* child write */
+       int pfd[2];
+
+       /* we can't allow the main daemon loop to block */
+       if (pipe2(pfd, O_NONBLOCK | O_CLOEXEC))
+               return -errno;
+
+       /* fcntl(pfd[1], F_SETPIPE_SZ, 1024*1024); */
+
+       cr_fd = pfd[0];
+       pw_fd = pfd[1];
+
+       if (pipe2(pfd, O_NONBLOCK | O_CLOEXEC)) {
+               close(cr_fd);
+               close(pw_fd);
+               return -errno;
+       }
+
+       pr_fd = pfd[0];
+       cw_fd = pfd[1];
+
+       pid = fork();
+       if (pid < 0) {
+               close(cr_fd);
+               close(pw_fd);
+               close(pr_fd);
+               close(cw_fd);
+               return -errno;
+       }
+
+       if (pid) {
+               close(cr_fd);
+               close(cw_fd);
+               helper_send_fd = pw_fd;
+               helper_recv_fd = pr_fd;
+               helper_pid = pid;
+               return 0;
+       } else {
+               close(pr_fd);
+               close(pw_fd);
+               helper_main(cr_fd, cw_fd, daemon_debug);
+               exit(0);
+       }
+}
+
+static void close_helper(void)
+{
+       close(helper_send_fd);
+       close(helper_recv_fd);
+       helper_send_fd = -1;
+       helper_recv_fd = -1;
+       rem_pollfd(helper_pi);
+       helper_pi = -1;
+       /* don't set helper_pid = -1 until we've tried waitpid */
+}
+
+static void helper_dead(int fd)
+{
+       int pid = helper_pid;
+       int rv, status;
+
+       close_helper();
+
+       helper_pid = -1;
+
+       rv = waitpid(pid, &status, WNOHANG);
+
+       if (rv != pid) {
+               /* should not happen */
+               log_error("helper pid %d dead wait %d", pid, rv);
+               return;
+       }
+
+       if (WIFEXITED(status)) {
+               log_error("helper pid %d exit status %d", pid,
+                         WEXITSTATUS(status));
+               return;
+       }
+
+       if (WIFSIGNALED(status)) {
+               log_error("helper pid %d term signal %d", pid,
+                         WTERMSIG(status));
+               return;
+       }
+
+       /* should not happen */
+       log_error("helper pid %d state change", pid);
+}
+
+/*
+ * main thread runs process_helper() and process_fence_result()
+ * the result is given to each lockspace as an action to process.
+ */
+
+static void process_fence_result(struct helper_msg *msg)
+{
+       struct lockspace *ls;
+       struct action *ah, *act;
+       int found = 0;
+
+       log_debug("process_fence_result %s msg_id %u result %d", msg->ls_name, msg->msg_id, msg->result);
+
+       /* create a fence result act to pass the result from ah */
+       if (!(act = alloc_action()))
+               return;
+
+       /*
+        * find the OP_FENCE action that initiated the fence request,
+        * it was saved on the fence_history list.
+        */
+       pthread_mutex_lock(&lockspaces_mutex);
+       ls = find_lockspace_name(msg->ls_name);
+       if (!ls) {
+               pthread_mutex_unlock(&lockspaces_mutex);
+               log_error("No lockspace for fence result %s", msg->ls_name);
+               free_action(act);
+               return;
+       }
+
+       pthread_mutex_lock(&ls->mutex);
+       list_for_each_entry(ah, &ls->fence_history, list) {
+               if (ah->msg_id != msg->msg_id)
+                       continue;
+
+               if (ah->op != LD_OP_FENCE) {
+                       /* shouldn't happen */
+                       log_error("process_fence_result wrong history op for msg_id %u", ah->msg_id);
+               }
+
+               /*
+                * change the OP_FENCE action into an OP_FENCE_RESULT action
+                * that is saved in the fence_history.
+                *
+                * TODO: limit history, one per host_id?
+                * e.g. remove older gen results?
+                */
+               ah->op = LD_OP_FENCE_RESULT;
+               ah->result = msg->result;
+
+               /* if the result is failure, then the lock requests
+                  waiting on this fence result will return an error */
+
+               found = 1;
+               break;
+       }
+
+       if (!found) {
+               log_error("fence result does not match a fence request");
+               goto out;
+       }
+
+       act->op = LD_OP_FENCE_RESULT;
+       act->owner = ah->owner;
+       act->result = ah->result;
+
+       if (!ls->thread_stop) {
+               list_add_tail(&act->list, &ls->actions);
+               ls->thread_work = 1;
+               pthread_cond_signal(&ls->cond);
+       } else {
+               free_action(act);
+       }
+out:
+       pthread_mutex_unlock(&ls->mutex);
+       pthread_mutex_unlock(&lockspaces_mutex);
+}
+
+static void process_helper(int fd)
+{
+       struct helper_msg msg;
+       int rv;
+
+       memset(&msg, 0, sizeof(msg));
+
+       rv = read(fd, &msg, sizeof(msg));
+       if (!rv || rv == -EAGAIN)
+               return;
+       if (rv < 0) {
+               log_error("process_helper rv %d errno %d", rv, errno);
+               goto fail;
+       }
+       if (rv != sizeof(msg)) {
+               log_error("process_helper recv size %d", rv);
+               goto fail;
+       }
+
+       if ((msg.type == HELPER_COMMAND_RESULT) && (msg.act == LD_OP_FENCE))
+               process_fence_result(&msg);
+       else
+               log_error("process_helper unknown msg %u %u %u", msg.type, msg.act, msg.msg_id);
+       return;
+
+ fail:
+       close_helper();
+}
+
 static void sigterm_handler(int sig __attribute__((unused)))
 {
        daemon_quit = 1;
@@ -6445,11 +7261,18 @@ static int main_loop(daemon_state *ds_arg)
        struct client *cl;
        int i, rv, is_recv, is_dead;
 
+       rv = setup_helper();
+       if (rv < 0) {
+               log_error("Can't setup helper process");
+               return rv;
+       }
+
        signal(SIGTERM, &sigterm_handler);
 
        rv = setup_structs();
        if (rv < 0) {
                log_error("Can't allocate memory");
+               close_helper();
                return rv;
        }
 
@@ -6467,6 +7290,8 @@ static int main_loop(daemon_state *ds_arg)
        listen_fd = ds_arg->socket_fd;
        listen_pi = add_pollfd(listen_fd);
 
+       helper_pi = add_pollfd(helper_recv_fd);
+
        setup_client_thread();
        setup_worker_thread();
        setup_restart();
@@ -6527,6 +7352,14 @@ static int main_loop(daemon_state *ds_arg)
                                continue;
                        }
 
+                       if (i == helper_pi) {
+                               if (is_recv)
+                                       process_helper(pollfd[i].fd);
+                               if (is_dead)
+                                       helper_dead(pollfd[i].fd);
+                               continue;
+                       }
+
                        /*
                        log_debug("poll pi %d fd %d revents %x",
                                  i, pollfd[i].fd, pollfd[i].revents);
index 9b94e17d16a85654ecf5b28f3a76776ed7e83b5f..cb4d7b2790a22dad5f570d4f1148e3bd486f4c2c 100644 (file)
@@ -76,7 +76,7 @@ static int check_args_version(char *vg_args)
        unsigned int major = 0;
        int rv;
 
-       rv = version_from_args(vg_args, &major, NULL, NULL);
+       rv = lockd_lockargs_get_version(vg_args, &major, NULL, NULL);
        if (rv < 0) {
                log_error("check_args_version %s error %d", vg_args, rv);
                return rv;
diff --git a/daemons/lvmlockd/lvmlockd-helper.c b/daemons/lvmlockd/lvmlockd-helper.c
new file mode 100644 (file)
index 0000000..30542d4
--- /dev/null
@@ -0,0 +1,264 @@
+/*
+ * Copyright 2025 Red Hat, Inc.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v2 or (at your option) any later version.
+ */
+
+#include <inttypes.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <poll.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <time.h>
+#include <stdarg.h>
+#include <signal.h>
+#include <ctype.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <grp.h>
+#include <syslog.h>
+
+#include "lvmlockd-internal.h"
+
+struct list_head commands; /* helper_msg_list entries */
+
+static int _log_stderr;
+
+#define log_helper(fmt, args...) \
+do { \
+       if (_log_stderr) \
+               fprintf(stderr, fmt "\n", ##args); \
+} while (0)
+
+static void _save_command(struct helper_msg *msg)
+{
+       struct helper_msg_list *ml;
+
+       ml = malloc(sizeof(struct helper_msg_list));
+       if (!ml)
+               return;
+
+       memcpy(&ml->msg, msg, sizeof(struct helper_msg));
+       list_add_tail(&ml->list, &commands);
+}
+
+static struct helper_msg_list *_get_command(int pid)
+{
+       struct helper_msg_list *ml;
+
+       list_for_each_entry(ml, &commands, list) {
+               if (ml->msg.pid == pid)
+                       return ml;
+       }
+       return NULL;
+}
+
+static int read_msg(int fd, struct helper_msg *msg)
+{
+       int rv;
+ retry:
+       rv = read(fd, msg, sizeof(struct helper_msg));
+       if (rv == -1 && errno == EINTR)
+               goto retry;
+
+       if (rv != sizeof(struct helper_msg))
+               return -1;
+       return 0;
+}
+
+static void exec_command(char *cmd_str)
+{
+       char arg[ONE_ARG_LEN];
+       char *av[MAX_AV_COUNT + 1]; /* +1 for NULL */
+       int av_count = 0;
+       int i, arg_len, cmd_len;
+
+       for (i = 0; i < MAX_AV_COUNT + 1; i++)
+               av[i] = NULL;
+
+       if (!cmd_str[0])
+               return;
+
+       /* this should already be done, but make sure */
+       cmd_str[RUN_COMMAND_LEN - 1] = '\0';
+
+       memset(&arg, 0, sizeof(arg));
+       arg_len = 0;
+       cmd_len = strlen(cmd_str);
+
+       for (i = 0; i < cmd_len; i++) {
+               if (!cmd_str[i])
+                       break;
+
+               if (av_count == MAX_AV_COUNT)
+                       break;
+
+               if (cmd_str[i] == '\\') {
+                       if (i == (cmd_len - 1))
+                               break;
+                       i++;
+
+                       if (cmd_str[i] == '\\') {
+                               arg[arg_len++] = cmd_str[i];
+                               continue;
+                       }
+                       if (isspace(cmd_str[i])) {
+                               arg[arg_len++] = cmd_str[i];
+                               continue;
+                       } else {
+                               break;
+                       }
+               }
+
+               if (isalnum(cmd_str[i]) || ispunct(cmd_str[i])) {
+                       arg[arg_len++] = cmd_str[i];
+               } else if (isspace(cmd_str[i])) {
+                       if (arg_len)
+                               av[av_count++] = strdup(arg);
+
+                       memset(arg, 0, sizeof(arg));
+                       arg_len = 0;
+               } else {
+                       break;
+               }
+       }
+
+       if ((av_count < MAX_AV_COUNT) && arg_len) {
+               av[av_count++] = strdup(arg);
+       }
+
+       execvp(av[0], av);
+}
+
+static int send_result(struct helper_msg *msg, int fd)
+{
+       int rv;
+
+       rv = write(fd, msg, sizeof(struct helper_msg));
+
+       if (rv == sizeof(struct helper_msg))
+               return 0;
+       return -1;
+}
+
+#define IDLE_TIMEOUT_MS (30 * 1000)
+#define ACTIVE_TIMEOUT_MS 500
+
+__attribute__((noreturn)) void helper_main(int in_fd, int out_fd, int log_stderr)
+{
+       struct pollfd pollfd;
+       struct helper_msg msg;
+       struct helper_msg_list *ml;
+       siginfo_t info;
+       unsigned int fork_count = 0;
+       unsigned int done_count = 0;
+       int timeout = IDLE_TIMEOUT_MS;
+       int rv, pid;
+
+       INIT_LIST_HEAD(&commands);
+
+       _log_stderr = log_stderr;
+
+       rv = setgroups(0, NULL);
+       if (rv < 0)
+               log_helper("error clearing helper groups errno %i", errno);
+
+       memset(&pollfd, 0, sizeof(pollfd));
+       pollfd.fd = in_fd;
+       pollfd.events = POLLIN;
+
+       openlog("lvmlockd-helper", LOG_CONS | LOG_PID, LOG_LOCAL4);
+
+       while (1) {
+               rv = poll(&pollfd, 1, timeout);
+               if (rv == -1 && errno == EINTR)
+                       continue;
+
+               if (rv < 0)
+                       exit(0);
+
+               if (pollfd.revents & POLLIN) {
+                       memset(&msg, 0, sizeof(msg));
+
+                       rv = read_msg(in_fd, &msg);
+                       if (rv)
+                               continue;
+
+                       if (msg.type == HELPER_COMMAND) {
+                               pid = fork();
+                               if (!pid) {
+                                       exec_command(msg.command);
+                                       exit(1);
+                               }
+
+                               msg.pid = pid;
+
+                               _save_command(&msg);
+
+                               fork_count++;
+                       }
+               }
+
+               if (pollfd.revents & (POLLERR | POLLHUP | POLLNVAL))
+                       exit(0);
+
+               /* collect child exits until no more children exist (ECHILD)
+                  or none are ready (WNOHANG) */
+
+               while (1) {
+                       memset(&info, 0, sizeof(info));
+
+                       rv = waitid(P_ALL, 0, &info, WEXITED | WNOHANG);
+
+                       if ((rv < 0) && (errno == ECHILD)) {
+                               /*
+                               log_helper("helper no children exist fork_count %d done_count %d", fork_count, done_count);
+                               */
+                               timeout = IDLE_TIMEOUT_MS;
+                       }
+
+                       else if (!rv && !info.si_pid) {
+                               log_helper("helper no children ready fork_count %d done_count %d", fork_count, done_count);
+                               timeout = ACTIVE_TIMEOUT_MS;
+                       }
+
+                       else if (!rv && info.si_pid) {
+                               done_count++;
+
+                               if (!(ml = _get_command(info.si_pid))) {
+                                       log_helper("command for pid %d result %d not found",
+                                                 info.si_pid, info.si_status);
+                                       continue;
+                               }
+
+                               log_helper("command for pid %d result %d done", info.si_pid, info.si_status);
+
+                               ml->msg.type = HELPER_COMMAND_RESULT;
+                               ml->msg.result = info.si_status;
+
+                               send_result(&ml->msg, out_fd);
+
+                               list_del(&ml->list);
+                               free(ml);
+                               continue;
+                       }
+
+                       else {
+                               log_helper("helper waitid rv %d errno %d fork_count %d done_count %d",
+                                         rv, errno, fork_count, done_count);
+                       }
+
+                       break;
+               }
+       }
+}
index df69824b6167bf509f8a94624bef8f17b1e3eed1..8e713558ab9af3aeb73cbd3671bd7b330bc6e2d2 100644 (file)
@@ -63,6 +63,10 @@ enum {
        LD_OP_QUERY_LOCK,
        LD_OP_REFRESH_LV,
        LD_OP_VG_STATUS,
+       LD_OP_FENCE,
+       LD_OP_FENCE_RESULT,
+       LD_OP_SETLOCKARGS_BEFORE,
+       LD_OP_SETLOCKARGS_FINAL,
 };
 
 /* resource types */
@@ -119,6 +123,7 @@ struct client {
 #define LD_AF_ADOPT_ONLY           0x00200000 /* adopt orphan or fail */
 #define LD_AF_NODELAY              0x00400000
 #define LD_AF_REPAIR              0x00800000
+#define LD_AF_NO_TIMEOUT          0x01000000
 
 /*
  * Number of times to repeat a lock request after
@@ -132,6 +137,32 @@ struct pvs {
        int num;
 };
 
+#define RUN_COMMAND_LEN 1024
+#define MAX_AV_COUNT 32
+#define ONE_ARG_LEN 256
+
+/* helper_msg types */
+#define HELPER_COMMAND 0x1
+#define HELPER_COMMAND_RESULT 0x2
+
+struct helper_msg {
+       uint8_t  type;
+       uint8_t  act;
+       uint16_t unused1;
+       uint32_t msg_id;
+       int pid;
+       int result;
+       char ls_name[MAX_NAME+1];
+       uint8_t unused2;
+       uint16_t unused3;
+       char command[RUN_COMMAND_LEN];
+};
+
+struct helper_msg_list {
+       struct helper_msg msg;
+       struct list_head list;
+};
+
 #define OWNER_NAME_SIZE 64
 #define OWNER_STATE_SIZE 32
 
@@ -147,9 +178,13 @@ struct action {
        struct list_head list;
        uint32_t client_id;
        uint32_t flags;                 /* LD_AF_ */
+       uint32_t msg_id;
        uint32_t version;
        uint32_t host_id;
+       uint64_t ourkey;
+       uint64_t remkey;
        uint64_t lv_size_bytes;
+       uint64_t ls_generation;
        int8_t op;                      /* operation type LD_OP_ */
        int8_t rt;                      /* resource type LD_RT_ */
        int8_t mode;                    /* lock mode LD_LK_ */
@@ -166,7 +201,7 @@ struct action {
        char lv_uuid[MAX_NAME+1];
        char vg_args[MAX_ARGS+1];
        char lv_args[MAX_ARGS+1];
-       char prev_lv_args[MAX_ARGS+1];
+       char other_args[MAX_ARGS+1];
        struct owner owner;
        struct pvs pvs;                 /* PV list for idm */
 };
@@ -187,6 +222,7 @@ struct resource {
        unsigned int use_vb : 1;
        struct list_head locks;
        struct list_head actions;
+       struct list_head fence_wait_actions;
        char lv_args[MAX_ARGS+1];
        char lm_data[];                 /* lock manager specific data */
 };
@@ -209,8 +245,10 @@ struct lockspace {
        char vg_args[MAX_ARGS+1];       /* lock manager specific args */
        int8_t lm_type;                 /* lock manager: LM_DLM, LM_SANLOCK */
        void *lm_data;
+       uint32_t lock_args_flags;
        uint32_t host_id;
        uint64_t generation;
+       uint64_t ourkey;
        uint64_t free_lock_offset;      /* for sanlock, start search for free lock here */
        struct pvs pvs;                 /* for idm: PV list */
 
@@ -225,13 +263,14 @@ struct lockspace {
        unsigned int thread_done : 1;
        unsigned int sanlock_gl_enabled: 1;
        unsigned int sanlock_gl_dup: 1;
-       unsigned int free_vg: 1;
        unsigned int kill_vg: 1;
-       unsigned int drop_vg: 1;
+       unsigned int fence_pr: 1;
+       unsigned int no_timeout: 1;
 
        struct list_head actions;       /* new client actions */
        struct list_head resources;     /* resource/lock state for gl/vg/lv */
        struct list_head dispose;       /* resources to free */
+       struct list_head fence_history; /* internally created actions for fencing */
 };
 
 /* val_blk version */
@@ -390,7 +429,9 @@ void log_level(int level, const char *fmt, ...)  __attribute__((format(printf, 2
 struct lockspace *alloc_lockspace(void);
 int lockspaces_empty(void);
 int last_string_from_args(char *args_in, char *last);
-int version_from_args(char *args, unsigned int *major, unsigned int *minor, unsigned int *patch);
+void helper_main(int in_fd, int out_fd, int log_stderr);
+int lockd_lockargs_get_user_flags(const char *str, uint32_t *flags);
+int lockd_lockargs_get_version(char *str, unsigned int *major, unsigned int *minor, unsigned int *patch);
 
 static inline const char *mode_str(int x)
 {
@@ -559,7 +600,7 @@ static inline int lm_refresh_lv_check_dlm(struct action *act)
 
 #ifdef LOCKDSANLOCK_SUPPORT
 
-int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args, int opt_align_mb);
+int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args, int opt_align_mb, char *other_args);
 int lm_init_lv_sanlock(struct lockspace *ls, char *ls_name, char *vg_name, char *lv_name, char *vg_args, char *lv_args, char *prev_args);
 int lm_free_lv_sanlock(struct lockspace *ls, struct resource *r);
 int lm_rename_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args);
@@ -584,6 +625,9 @@ int lm_data_size_sanlock(void);
 int lm_is_running_sanlock(void);
 int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes);
 int lm_vg_status_sanlock(struct lockspace *ls, struct action *act);
+void lm_set_host_dead_sanlock(struct lockspace *ls, struct owner *owner);
+int lm_setlockargs_supported_sanlock(struct lockspace *ls, struct action *act);
+int lm_setlockargs_vg_sanlock(char *ls_name, char *vg_name, struct action *act);
 
 static inline int lm_support_sanlock(void)
 {
@@ -592,7 +636,7 @@ static inline int lm_support_sanlock(void)
 
 #else
 
-static inline int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args, int opt_align_mb)
+static inline int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args, int opt_align_mb, char *other_args)
 {
        return -1;
 }
@@ -706,6 +750,20 @@ static inline int lm_support_sanlock(void)
        return 0;
 }
 
+static inline void lm_set_host_dead_sanlock(struct lockspace *ls, struct owner *owner)
+{
+}
+
+static inline int lm_setlockargs_supported_sanlock(struct lockspace *ls, struct action *act)
+{
+       return 0;
+}
+
+static inline int lm_setlockargs_vg_sanlock(char *ls_name, char *vg_name, struct action *act)
+{
+       return -1;
+}
+
 #endif /* sanlock support */
 
 #ifdef LOCKDIDM_SUPPORT
index 732c841874dbb9dd831402029874c76abe610a88..40b8448a5dcc8cd1af9b5bc010f01b04248d2e8a 100644 (file)
@@ -28,6 +28,8 @@
 #define SANLK_ADD_NODELAY      0x00000002
 /* FIXME: copied from sanlock header until the sanlock update is more widespread */
 #define SANLK_GET_HOST_LOCAL   0x00000001
+/* FIXME: copied from sanlock header until the sanlock update is more widespread */
+#define SANLK_LSF_NO_TIMEOUT   0x00000004
 
 #include <stddef.h>
 #include <poll.h>
@@ -175,30 +177,32 @@ int lm_data_size_sanlock(void)
 }
 
 /*
- * lock_args format
- *
- * vg_lock_args format for sanlock is
- * vg_version_string:undefined:lock_lv_name
- *
- * lv_lock_args format for sanlock is
- * lv_version_string:undefined:offset
+ * If a new variant of the lock_args string cannot be
+ * handled by the previous version of lvmlockd, then the
+ * new variant should contain a larger major number.
  *
- * version_string is MAJOR.MINOR.PATCH
- * undefined may contain ":"
+ * VG_LOCK_ARGS_V1 format:
+ * 1.0.0:lvname
  *
- * If a new version of the lock_args string cannot be
- * handled by an old version of lvmlockd, then the
- * new lock_args string should contain a larger major number.
+ * VG_LOCK_ARGS_V2 format:
+ * 2.0.0:lvname:notimeout:persist
+ * 2.0.0:lvname:notimeout
+ * 2.0.0:lvname:persist
  */
 
-#define VG_LOCK_ARGS_MAJOR 1
+#define VG_LOCK_ARGS_MAJOR 2
 #define VG_LOCK_ARGS_MINOR 0
 #define VG_LOCK_ARGS_PATCH 0
 
+#define VG_LOCK_ARGS_V1 "1.0.0"
+#define VG_LOCK_ARGS_V2 "2.0.0"
+
 #define LV_LOCK_ARGS_MAJOR 1
 #define LV_LOCK_ARGS_MINOR 0
 #define LV_LOCK_ARGS_PATCH 0
 
+#define LV_LOCK_ARGS_V1 "1.0.0"
+
 /*
  * offset 0 is lockspace
  * offset align_size * 1 is unused
@@ -241,9 +245,31 @@ static void strcpy_name_len(char *buf, const char *str, size_t len)
        memccpy(buf, str, 0, len);
 }
 
-static int lock_lv_name_from_args(char *vg_args, char *lock_lv_name)
+/*
+ * copy out lvname from lock_args string:
+ * 1.0.0:lvname
+ * 2.0.0:lvname
+ * 2.0.0:lvname:other
+ */
+static int lockd_lockargs_get_locklv(char *vg_args, char *lock_lv_name)
 {
-       return last_string_from_args(vg_args, lock_lv_name);
+       char args[MAX_ARGS+1] = {0};
+       char *p, *name;
+
+       strncpy(args, vg_args, MAX_ARGS);
+
+       if (!(p = strchr(args, ':')))
+               return -1;
+
+       name = p+1;
+       if (!*name)
+               return -1;
+
+       if ((p = strchr(name, ':')))
+               *p = '\0';
+
+       strncpy(lock_lv_name, name, MAX_ARGS);
+       return 0;
 }
 
 static int lock_lv_offset_from_args(char *lv_args, uint64_t *lock_lv_offset)
@@ -269,7 +295,7 @@ static int check_args_version(char *args, unsigned int our_major)
        unsigned int major = 0;
        int rv;
 
-       rv = version_from_args(args, &major, NULL, NULL);
+       rv = lockd_lockargs_get_version(args, &major, NULL, NULL);
        if (rv < 0) {
                log_error("check_args_version %s error %d", args, rv);
                return rv;
@@ -333,13 +359,13 @@ out:
 }
 
 #if LOCKDSANLOCK_SUPPORT >= 410
-static int read_info_file(struct lockspace *ls, uint32_t *host_id, uint64_t *generation, int *sector_size, int *align_size)
+static int read_info_file(char *vg_name, uint32_t *host_id, uint64_t *generation, int *sector_size, int *align_size, int *no_timeout)
 {
        char line[MAX_LINE];
        char path[PATH_MAX] = { 0 };
        FILE *fp;
 
-       if (dm_snprintf(path, sizeof(path), "/var/lib/lvm/lvmlockd_info_%s", ls->vg_name) < 0)
+       if (dm_snprintf(path, sizeof(path), "/var/lib/lvm/lvmlockd_info_%s", vg_name) < 0)
                return -1;
 
        if (!(fp = fopen(path, "r"))) {
@@ -362,11 +388,14 @@ static int read_info_file(struct lockspace *ls, uint32_t *host_id, uint64_t *gen
                } else if (!strncmp(line, "align_size ", 11)) {
                        if (sscanf(line, "align_size %d", align_size) != 1)
                                goto fail;
+               } else if (!strncmp(line, "no_timeout ", 11)) {
+                       if (sscanf(line, "no_timeout %d", no_timeout) != 1)
+                               goto fail;
                }
        }
 
        _fclose(fp, path);
-       log_debug("info file: read %u %llu %d %d", *host_id, (unsigned long long)*generation, *sector_size, *align_size);
+       log_debug("info file: read %u %llu %d %d %d", *host_id, (unsigned long long)*generation, *sector_size, *align_size, *no_timeout);
        return 0;
 
 fail:
@@ -376,14 +405,13 @@ fail:
 }
 #endif
 
-static int write_info_file(struct lockspace *ls)
+static int write_info_file(char *vg_name, uint32_t host_id, uint64_t generation, int sector_size, int align_size, int no_timeout)
 {
-       struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
        char path[PATH_MAX] = { 0 };
        FILE *fp;
        time_t t = time(NULL);
 
-       if (dm_snprintf(path, sizeof(path), "/var/lib/lvm/lvmlockd_info_%s", ls->vg_name) < 0)
+       if (dm_snprintf(path, sizeof(path), "/var/lib/lvm/lvmlockd_info_%s", vg_name) < 0)
                return -1;
 
        if (!(fp = fopen(path, "w"))) {
@@ -391,17 +419,18 @@ static int write_info_file(struct lockspace *ls)
                return -1;
        }
 
-       fprintf(fp, "# vg %s %s created %s", ls->vg_name, ls->vg_uuid, ctime(&t));
-       fprintf(fp, "host_id %u\n", ls->host_id);
-       fprintf(fp, "generation %llu\n", (unsigned long long)ls->generation);
-       fprintf(fp, "sector_size %d\n", lms->sector_size);
-       fprintf(fp, "align_size %d\n", lms->align_size);
+       fprintf(fp, "# vg %s created %s", vg_name, ctime(&t));
+       fprintf(fp, "host_id %u\n", host_id);
+       fprintf(fp, "generation %llu\n", (unsigned long long)generation);
+       fprintf(fp, "sector_size %d\n", sector_size);
+       fprintf(fp, "align_size %d\n", align_size);
+       fprintf(fp, "no_timeout %d\n", no_timeout);
 
        if (fflush(fp))
                log_warn("Failed to write/flush %s", path);
        _fclose(fp, path);
 
-       log_debug("info file: wrote %u %llu %d %d", ls->host_id, (unsigned long long)ls->generation, lms->sector_size, lms->align_size);
+       log_debug("info file: wrote %u %llu %d %d %d", host_id, (unsigned long long)generation, sector_size, align_size, no_timeout);
        return 0;
 }
 
@@ -591,7 +620,7 @@ static int _lease_corrupt_error(int rv)
    sanlock encoded this in the lockspace/resource structs on disk. */
 
 static int read_lockspace_info(char *path, uint32_t host_id, int *sector_size, int *align_size, int *align_mb,
-                              uint32_t *ss_flags, uint32_t *rs_flags, struct sanlk_host *hs)
+                              uint32_t *ss_size_flags, uint32_t *rs_size_flags, int *no_timeout, struct sanlk_host *hs)
 {
        struct sanlk_lockspace ss;
        uint32_t io_timeout = 0;
@@ -623,40 +652,43 @@ static int read_lockspace_info(char *path, uint32_t host_id, int *sector_size, i
                *sector_size = 4096;
                *align_mb = 8;
                *align_size = 8 * ONE_MB;
-               *ss_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN8M;
-               *rs_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M;
+               *ss_size_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN8M;
+               *rs_size_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M;
 
        } else if ((ss.flags & SANLK_LSF_SECTOR4K) && (ss.flags & SANLK_LSF_ALIGN4M)) {
                *sector_size = 4096;
                *align_mb = 4;
                *align_size = 4 * ONE_MB;
-               *ss_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN4M;
-               *rs_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN4M;
+               *ss_size_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN4M;
+               *rs_size_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN4M;
 
        } else if ((ss.flags & SANLK_LSF_SECTOR4K) && (ss.flags & SANLK_LSF_ALIGN2M)) {
                *sector_size = 4096;
                *align_mb = 2;
                *align_size = 2 * ONE_MB;
-               *ss_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN2M;
-               *rs_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN2M;
+               *ss_size_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN2M;
+               *rs_size_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN2M;
 
        } else if ((ss.flags & SANLK_LSF_SECTOR4K) && (ss.flags & SANLK_LSF_ALIGN1M)) {
                *sector_size = 4096;
                *align_mb = 1;
                *align_size = ONE_MB;
-               *ss_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN1M;
-               *rs_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN1M;
+               *ss_size_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN1M;
+               *rs_size_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN1M;
 
        } else if ((ss.flags & SANLK_LSF_SECTOR512) && (ss.flags & SANLK_LSF_ALIGN1M)) {
                *sector_size = 512;
                *align_mb = 1;
                *align_size = ONE_MB;
-               *ss_flags = SANLK_LSF_SECTOR512 | SANLK_LSF_ALIGN1M;
-               *rs_flags = SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M;
+               *ss_size_flags = SANLK_LSF_SECTOR512 | SANLK_LSF_ALIGN1M;
+               *rs_size_flags = SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M;
        }
 
-       log_debug("read_lockspace_info %s %u found sector_size %d align_size %d",
-                 path, host_id, *sector_size, *align_size);
+       if (ss.flags & SANLK_LSF_NO_TIMEOUT)
+               *no_timeout = 1;
+
+       log_debug("read_lockspace_info %s %u found sector_size %d align_size %d no_timeout %d",
+                 path, host_id, *sector_size, *align_size, *no_timeout);
        return 0;
 }
 
@@ -670,43 +702,52 @@ static int read_lockspace_info(char *path, uint32_t host_id, int *sector_size, i
 
 #define MAX_VERSION 16
 
-int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args, int opt_align_mb)
+int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args, int opt_align_mb, char *other_args)
 {
        struct sanlk_lockspace ss;
        struct sanlk_resourced rd;
        struct sanlk_disk disk;
        char lock_lv_name[MAX_ARGS+1];
-       char lock_args_version[MAX_VERSION+1];
        const char *gl_name = NULL;
+       uint32_t lock_args_flags = 0;
        uint32_t rs_flags;
        uint32_t daemon_version;
        uint32_t daemon_proto;
        uint64_t offset;
        uint64_t dev_size;
+       int no_timeout;
+       int persist;
        int sector_size = 0;
        int align_size = 0;
        int align_mb = 0;
        int i, rv;
 
+       if (other_args && (lockd_lockargs_get_user_flags(other_args, &lock_args_flags) < 0)) {
+               log_error("S %s init_vg_san unknown other args %s", ls_name, other_args);
+               return -EARGS;
+       }
+       no_timeout = (lock_args_flags & LOCKARGS_NOTIMEOUT) ? 1 :0;
+       persist = (lock_args_flags & LOCKARGS_PERSIST) ? 1 : 0;
+
+#if LOCKDSANLOCK_SUPPORT < 420
+       if (no_timeout || persist) {
+               log_error("S %s init_vg_san sanlock 4.2 required for args %s", ls_name, other_args);
+               return -EARGS;
+       }
+#endif
+
        memset(&ss, 0, sizeof(ss));
        memset(&rd, 0, sizeof(rd));
        memset(&disk, 0, sizeof(disk));
-       memset(lock_args_version, 0, sizeof(lock_args_version));
 
        if (!vg_args || !vg_args[0] || !strcmp(vg_args, "none")) {
                log_error("S %s init_vg_san vg_args missing", ls_name);
                return -EARGS;
        }
 
-       snprintf(lock_args_version, MAX_VERSION, "%u.%u.%u",
-                VG_LOCK_ARGS_MAJOR, VG_LOCK_ARGS_MINOR, VG_LOCK_ARGS_PATCH);
-
        /* see comment above about input vg_args being only lock_lv_name */
        dm_strncpy(lock_lv_name, vg_args, sizeof(lock_lv_name));
 
-       if (strlen(lock_lv_name) + strlen(lock_args_version) + 2 > MAX_ARGS)
-               return -EARGS;
-
        if ((rv = build_dm_path(disk.path, SANLK_PATH_LEN, vg_name, lock_lv_name)))
                return rv;
 
@@ -715,7 +756,7 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar
        if (daemon_test) {
                if (!gl_lsname_sanlock[0])
                        strncpy(gl_lsname_sanlock, ls_name, MAX_NAME);
-               rv = snprintf(vg_args, MAX_ARGS, "%s:%s", lock_args_version, lock_lv_name);
+               rv = snprintf(vg_args, MAX_ARGS, "%s:%s", VG_LOCK_ARGS_V1, lock_lv_name);
                if (rv >= MAX_ARGS)
                        log_debug("init_vg_san vg_args may be too long %d %s", rv, vg_args);
                return 0;
@@ -787,6 +828,9 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar
                return -EARGS;
        }
 
+       if (no_timeout)
+               ss.flags |= SANLK_LSF_NO_TIMEOUT;
+
        rv = sanlock_write_lockspace(&ss, 0, 0, sanlock_io_timeout);
        if (rv < 0) {
                log_error("S %s init_vg_san write_lockspace error %d %s",
@@ -841,15 +885,6 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar
                return rv;
        }
 
-       if (!strcmp(gl_name, R_NAME_GL))
-               dm_strncpy(gl_lsname_sanlock, ls_name, sizeof(gl_lsname_sanlock));
-       rv = snprintf(vg_args, MAX_ARGS, "%s:%s", lock_args_version, lock_lv_name);
-       if (rv >= MAX_ARGS)
-               log_debug("init_vg_san vg_args may be too long %d %s", rv, vg_args);
-
-       log_debug("S %s init_vg_san done vg_args %s", ls_name, vg_args);
-
        /*
         * Go through all lv resource slots and initialize them with the
         * correct lockspace name but a special resource name that indicates
@@ -888,6 +923,25 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar
                offset += align_size;
        }
 
+       if (no_timeout && persist)
+               rv = snprintf(vg_args, MAX_ARGS, "%s:%s:notimeout:persist", VG_LOCK_ARGS_V2, lock_lv_name);
+       else if (no_timeout)
+               rv = snprintf(vg_args, MAX_ARGS, "%s:%s:notimeout", VG_LOCK_ARGS_V2, lock_lv_name);
+       else if (persist)
+               rv = snprintf(vg_args, MAX_ARGS, "%s:%s:persist", VG_LOCK_ARGS_V2, lock_lv_name);
+       else
+               rv = snprintf(vg_args, MAX_ARGS, "%s:%s", VG_LOCK_ARGS_V1, lock_lv_name);
+
+       if (rv >= MAX_ARGS) {
+               log_error("S %s init_vg_san vg_args string too long %d %s", ls_name, rv, vg_args);
+               return -EINVAL;
+       }
+
+       if (!strcmp(gl_name, R_NAME_GL))
+               dm_strncpy(gl_lsname_sanlock, ls_name, sizeof(gl_lsname_sanlock));
+
+       log_debug("S %s init_vg_san done vg_args %s", ls_name, vg_args);
+
        return 0;
 }
 
@@ -905,12 +959,12 @@ int lm_init_lv_sanlock(struct lockspace *ls, char *ls_name, char *vg_name, char
        struct lm_sanlock *lms;
        struct sanlk_resourced rd;
        char lock_lv_name[MAX_ARGS+1];
-       char lock_args_version[MAX_VERSION+1];
        uint64_t offset;
        uint64_t prev_offset = 0;
        int sector_size = 0;
        int align_size = 0;
        int align_mb;
+       int no_timeout = 0;
        uint32_t ss_flags;
        uint32_t rs_flags = 0;
        uint32_t tries = 1;
@@ -918,24 +972,20 @@ int lm_init_lv_sanlock(struct lockspace *ls, char *ls_name, char *vg_name, char
 
        memset(&rd, 0, sizeof(rd));
        memset(lock_lv_name, 0, sizeof(lock_lv_name));
-       memset(lock_args_version, 0, sizeof(lock_args_version));
        memset(disk_path, 0, sizeof(disk_path));
 
-       snprintf(lock_args_version, MAX_VERSION, "%u.%u.%u",
-                LV_LOCK_ARGS_MAJOR, LV_LOCK_ARGS_MINOR, LV_LOCK_ARGS_PATCH);
-
        if (daemon_test) {
                align_size = 1024 * 1024;
                snprintf(lv_args, MAX_ARGS, "%s:%llu",
-                        lock_args_version,
+                        LV_LOCK_ARGS_V1,
                         (unsigned long long)((align_size * LV_LOCK_BEGIN) + (align_size * daemon_test_lv_count)));
                daemon_test_lv_count++;
                return 0;
        }
 
-       rv = lock_lv_name_from_args(vg_args, lock_lv_name);
+       rv = lockd_lockargs_get_locklv(vg_args, lock_lv_name);
        if (rv < 0) {
-               log_error("S %s init_lv_san lock_lv_name_from_args error %d %s",
+               log_error("S %s init_lv_san lockd_lockargs_get_locklv error %d %s",
                          ls_name, rv, vg_args);
                return rv;
        }
@@ -957,7 +1007,7 @@ int lm_init_lv_sanlock(struct lockspace *ls, char *ls_name, char *vg_name, char
 
                /* using host_id 1 to get sizes since we don't need host-specific info */
 
-               rv = read_lockspace_info(disk_path, 1, &sector_size, &align_size, &align_mb, &ss_flags, &rs_flags, NULL);
+               rv = read_lockspace_info(disk_path, 1, &sector_size, &align_size, &align_mb, &ss_flags, &rs_flags, &no_timeout, NULL);
                if (rv < 0) {
                        log_error("S %s init_lv_san read_lockspace_info error %d %s",
                                  ls_name, rv, disk_path);
@@ -1025,7 +1075,7 @@ int lm_init_lv_sanlock(struct lockspace *ls, char *ls_name, char *vg_name, char
                        rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
                        if (!rv) {
                                snprintf(lv_args, MAX_ARGS, "%s:%llu",
-                                        lock_args_version, (unsigned long long)offset);
+                                        LV_LOCK_ARGS_V1, (unsigned long long)offset);
                        } else {
                                log_error("S %s init_lv_san write error %d offset %llu",
                                          ls_name, rv, (unsigned long long)rv);
@@ -1065,9 +1115,9 @@ int lm_rename_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_
                return -EINVAL;
        }
 
-       rv = lock_lv_name_from_args(vg_args, lock_lv_name);
+       rv = lockd_lockargs_get_locklv(vg_args, lock_lv_name);
        if (rv < 0) {
-               log_error("S %s init_lv_san lock_lv_name_from_args error %d %s",
+               log_error("S %s init_lv_san lockd_lockargs_get_locklv error %d %s",
                          ls_name, rv, vg_args);
                return rv;
        }
@@ -1587,6 +1637,7 @@ int lm_prepare_lockspace_sanlock(struct lockspace *ls, uint64_t *prev_generation
        int sector_size = 0;
        int align_size = 0;
        int align_mb = 0;
+       int no_timeout = 0;
        int retries = 0;
        int gl_found;
        int ret, rv;
@@ -1612,9 +1663,9 @@ int lm_prepare_lockspace_sanlock(struct lockspace *ls, uint64_t *prev_generation
                goto fail;
        }
 
-       rv = lock_lv_name_from_args(ls->vg_args, lock_lv_name);
+       rv = lockd_lockargs_get_locklv(ls->vg_args, lock_lv_name);
        if (rv < 0) {
-               log_error("S %s prepare_lockspace_san lock_lv_name_from_args error %d %s",
+               log_error("S %s prepare_lockspace_san lockd_lockargs_get_locklv error %d %s",
                          ls->name, rv, ls->vg_args);
                ret = -EARGS;
                goto fail;
@@ -1711,15 +1762,16 @@ int lm_prepare_lockspace_sanlock(struct lockspace *ls, uint64_t *prev_generation
 #endif
        sector_size = 0;
        align_size = 0;
+       no_timeout = 0;
 
-       rv = read_lockspace_info(disk_path, lms->ss.host_id, &sector_size, &align_size, &align_mb, &ss_flags, &rs_flags, &hs);
+       rv = read_lockspace_info(disk_path, lms->ss.host_id, &sector_size, &align_size, &align_mb, &ss_flags, &rs_flags, &no_timeout, &hs);
 
 #if LOCKDSANLOCK_SUPPORT >= 410
        if ((rv == -ELOCKREPAIR) && repair && !retries) {
                uint64_t generation = 0;
                uint32_t host_id = 0;
 
-               rv = read_info_file(ls, &host_id, &generation, &sector_size, &align_size);
+               rv = read_info_file(ls->vg_name, &host_id, &generation, &sector_size, &align_size, &no_timeout);
                if (rv < 0) {
                        log_error("S %s prepare_lockspace_san cannot repair lockspace no info file", lsname);
                        ret = -EINVAL;
@@ -1750,6 +1802,9 @@ int lm_prepare_lockspace_sanlock(struct lockspace *ls, uint64_t *prev_generation
                        ret = -EINVAL;
                }
 
+               if (no_timeout)
+                       lms->ss.flags |= SANLK_LSF_NO_TIMEOUT;
+
                log_debug("S %s prepare_lockspace_san repair host %u lease", lsname, host_id);
 
                rv = sanlock_init_lockspace_host(&lms->ss, NULL, generation, 0, 0, 0);
@@ -1899,7 +1954,7 @@ int lm_add_lockspace_sanlock(struct lockspace *ls, int adopt_only, int adopt_ok,
 
        free(hs);
 
-       write_info_file(ls);
+       write_info_file(ls->vg_name, ls->host_id, ls->generation, lms->sector_size, lms->align_size, ls->no_timeout);
 
        /*
         * Don't let the lockspace be cleanly released if orphan locks
@@ -2203,6 +2258,7 @@ int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
            rv == SANLK_ACQUIRE_OWNED ||
            rv == SANLK_ACQUIRE_OTHER ||
            rv == SANLK_ACQUIRE_OWNED_RETRY ||
+           rv == SANLK_ACQUIRE_OWNED_NO_TIMEOUT ||
            rv == -EAGAIN) {
 
                /*
@@ -2231,6 +2287,9 @@ int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
                if (rv == SANLK_ACQUIRE_OWNED_RETRY)
                        *retry = 0;
 
+               if (rv == SANLK_ACQUIRE_OWNED_NO_TIMEOUT)
+                       *retry = 0;
+
                if (owner && owner_host.host_id) {
                        const char *host_state;
 
@@ -2421,6 +2480,7 @@ int lm_convert_sanlock(struct lockspace *ls, struct resource *r,
        case SANLK_ACQUIRE_IDLIVE:
        case SANLK_ACQUIRE_OWNED:
        case SANLK_ACQUIRE_OWNED_RETRY:
+       case SANLK_ACQUIRE_OWNED_NO_TIMEOUT:
        case SANLK_ACQUIRE_OTHER:
        case SANLK_AIO_TIMEOUT:
                /* expected errors from known/normal cases like lock contention or io timeouts */
@@ -2729,3 +2789,181 @@ int lm_is_running_sanlock(void)
        return 1;
 }
 
+#if LOCKDSANLOCK_SUPPORT >= 420
+
+static void update_info_file(char *vg_name, int no_timeout_new)
+{
+       uint32_t host_id;
+       uint64_t generation;
+       int sector_size;
+       int align_size;
+       int no_timeout;
+       int rv;
+
+       rv = read_info_file(vg_name, &host_id, &generation, &sector_size, &align_size, &no_timeout);
+       if (rv < 0)
+               return;
+
+       write_info_file(vg_name, host_id, generation, sector_size, align_size, no_timeout_new);
+}
+
+void lm_set_host_dead_sanlock(struct lockspace *ls, struct owner *owner)
+{
+       struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
+       struct sanlk_host host = { 0 };
+       int rv;
+
+       log_debug("S %s set_host_dead_sanlock host_id %u gen %u", ls->name, owner->host_id, owner->generation);
+
+       host.host_id = owner->host_id;
+       host.generation = owner->generation;
+
+       rv = sanlock_set_host(&lms->ss, SANLK_SET_HOST_DEAD_EXT, 0, 0, &host);
+       if (rv)
+               log_error("S %s set_host_dead_sanlock host_id %u gen %u error %d", ls->name, owner->host_id, owner->generation, rv);
+}
+
+int lm_setlockargs_supported_sanlock(struct lockspace *ls, struct action *act)
+{
+       uint32_t daemon_version;
+       uint32_t daemon_proto;
+       uint32_t lock_args_flags = 0;
+       uint32_t ver_major, ver_minor;
+       int rv;
+
+       if (!act->other_args[0]) {
+               log_error("S %s setlockargs_supported empty user lock args", ls->name);
+               return 0;
+       }
+
+       if (lockd_lockargs_get_user_flags(act->other_args, &lock_args_flags) < 0) {
+               log_error("S %s setlockargs_supported invalid user lock args %s", ls->name, act->other_args);
+               return 0;
+       }
+
+       if (!(lock_args_flags & LOCKARGS_NOTIMEOUT) && !(lock_args_flags & LOCKARGS_PERSIST))
+               return 1;
+
+       rv = sanlock_version(0, &daemon_version, &daemon_proto);
+       if (rv < 0) {
+               log_error("S %s setlockargs failed to connect to sanlock daemon", ls->name);
+               return 0;
+       }
+
+       log_debug("S %s setlockargs sanlock version 0x%x lock_args_flags 0x%x", ls->name, daemon_version, lock_args_flags);
+
+       ver_major = (daemon_version & 0xFF000000) >> 24;
+       ver_minor = (daemon_version & 0x00FF0000) >> 16;
+
+       /* sanlock 4.2.0 added support for LOCKARGS_NOTIMEOUT or LOCKARGS_PERSIST. */
+
+       if (ver_major < 4)
+               return 0;
+
+       if ((ver_major == 4) && (ver_minor < 2))
+               return 0;
+
+       return 1;
+}
+
+int lm_setlockargs_vg_sanlock(char *ls_name, char *vg_name, struct action *act)
+{
+       struct sanlk_lockspace ss = {0};
+       char lock_lv_name[MAX_ARGS+1] = {0};
+       char disk_path[SANLK_PATH_LEN] = {0};
+       uint32_t ss_size_flags = 0;
+       uint32_t rs_size_flags = 0;
+       uint32_t lock_args_flags = 0;
+       int sector_size = 0;
+       int align_size = 0;
+       int align_mb = 0;
+       int no_timeout;
+       int persist;
+       int rv;
+
+       if (!act->other_args[0]) {
+               log_error("S %s setlockargs empty user lock args", ls_name);
+               return 0;
+       }
+
+       if (lockd_lockargs_get_user_flags(act->other_args, &lock_args_flags) < 0) {
+               log_error("S %s setlockargs invalid user lock args %s", ls_name, act->other_args);
+               return 0;
+       }
+
+       rv = lockd_lockargs_get_locklv(act->vg_args, lock_lv_name);
+       if (rv < 0) {
+               log_error("S %s setlockargs lockd_lockargs_get_locklv error %d %s",
+                         ls_name, rv, act->vg_args);
+               return rv;
+       }
+
+       if ((rv = build_dm_path(disk_path, SANLK_PATH_LEN, vg_name, lock_lv_name)))
+               return rv;
+
+       /* get the sector and align flags from host_id 1 in the current lockspace */
+
+       rv = read_lockspace_info(disk_path, 1, &sector_size, &align_size, &align_mb, &ss_size_flags, &rs_size_flags, &no_timeout, NULL);
+       if (rv < 0) {
+               log_error("S %s setlockargs read_lockspace_info error %d %s", ls_name, rv, disk_path);
+               return rv;
+       }
+
+       /* initialize lockspace */
+
+       no_timeout = (lock_args_flags & LOCKARGS_NOTIMEOUT) ? 1 :0;
+       persist = (lock_args_flags & LOCKARGS_PERSIST) ? 1 : 0;
+
+       strcpy_name_len(ss.name, ls_name, SANLK_NAME_LEN);
+       memcpy(ss.host_id_disk.path, disk_path, SANLK_PATH_LEN);
+       ss.host_id_disk.offset = 0;
+       ss.flags = ss_size_flags;
+
+       if (no_timeout)
+               ss.flags |= SANLK_LSF_NO_TIMEOUT;
+
+       log_debug("S %s setlockargs write_lockspace no_timeout %d flags 0x%x", ls_name, no_timeout, ss.flags);
+
+       rv = sanlock_write_lockspace(&ss, 0, 0, sanlock_io_timeout);
+       if (rv < 0) {
+               log_error("S %s setlockargs write_lockspace error %d %s", ls_name, rv, ss.host_id_disk.path);
+               return rv;
+       }
+
+       update_info_file(vg_name, no_timeout);
+
+       if (no_timeout && persist)
+               rv = snprintf(act->vg_args, MAX_ARGS, "%s:%s:notimeout:persist", VG_LOCK_ARGS_V2, lock_lv_name);
+       else if (no_timeout)
+               rv = snprintf(act->vg_args, MAX_ARGS, "%s:%s:notimeout", VG_LOCK_ARGS_V2, lock_lv_name);
+       else if (persist)
+               rv = snprintf(act->vg_args, MAX_ARGS, "%s:%s:persist", VG_LOCK_ARGS_V2, lock_lv_name);
+       else
+               rv = snprintf(act->vg_args, MAX_ARGS, "%s:%s", VG_LOCK_ARGS_V1, lock_lv_name);
+
+       log_debug("S %s setlockargs new args %s", ls_name, act->vg_args);
+
+       if (rv >= MAX_ARGS) {
+               log_error("S %s setlockargs vg_args string too long %d %s", ls_name, rv, act->vg_args);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+#else
+
+void lm_set_host_dead_sanlock(struct lockspace *ls, struct owner *owner)
+{
+}
+
+int lm_setlockargs_supported_sanlock(struct lockspace *ls, struct action *act)
+{
+       return 0;
+}
+
+int lm_setlockargs_vg_sanlock(char *ls_name, char *vg_name, struct action *act)
+{
+       return -EINVAL;
+}
+#endif /* LOCKDSANLOCK_SUPPORT >= 420 */
index 2a65e7cfd915ae53f8e0b13e7285608d180c1079..0d041ce9e08715084f4522d04245e49b7f6bdffb 100644 (file)
@@ -813,7 +813,7 @@ int vg_is_registered(struct cmd_context *cmd, struct volume_group *vg, uint64_t
        }
 }
 
-int persist_is_started(struct cmd_context *cmd, struct volume_group *vg, int may_fail)
+int persist_is_started(struct cmd_context *cmd, struct volume_group *vg, int may_fail, uint64_t *our_key_ret)
 {
        struct pv_list *pvl;
        struct device *dev;
@@ -826,6 +826,9 @@ int persist_is_started(struct cmd_context *cmd, struct volume_group *vg, int may
        if (!vg_is_registered(cmd, vg, &our_key_val, &partial))
                goto out;
 
+       if (our_key_ret)
+               *our_key_ret = our_key_val;
+
        if (partial) {
                log_debug("PR is started: partial");
                goto out;
@@ -1093,7 +1096,7 @@ int persist_key_update(struct cmd_context *cmd, struct volume_group *vg, uint32_
 
        /*
         * When using an explicit pr_key setting, there's
-        * not sanlock generation number that needs updating.
+        * no sanlock generation number that needs updating.
         */
        if (local_key)
                return 1;
@@ -1794,6 +1797,117 @@ static int _persist_extend_shared(struct cmd_context *cmd, struct volume_group *
        return error ? 0 : 1;
 }
 
+int persist_upgrade_stop(struct cmd_context *cmd, struct volume_group *vg, uint64_t our_key_val)
+{
+       DM_LIST_INIT(devs);
+       char our_key_buf[PR_KEY_BUF_SIZE] = { 0 };
+
+       if (!pv_list_to_dev_list(cmd->mem, &vg->pvs, &devs))
+               return_0;
+
+       if (dm_snprintf(our_key_buf, PR_KEY_BUF_SIZE-1, "0x%llx", (unsigned long long)our_key_val) < 0)
+               return_0;
+
+       if (!_run_stop(cmd, vg, &devs, our_key_buf, 0))
+               return_0;
+
+       return 1;
+}
+
+/*
+ * Host currently holds a normal sh access PR on shared VG,
+ * and wants to switch to an ex access PR on that VG
+ * (to prevent other hosts from using it while it's making
+ * changes.)
+ */
+
+int persist_upgrade_ex(struct cmd_context *cmd, struct volume_group *vg, uint64_t *our_key_held)
+{
+       DM_LIST_INIT(devs);
+       struct device_list *devl;
+       char *local_key = (char *)find_config_tree_str(cmd, local_pr_key_CFG, NULL);
+       int local_host_id = find_config_tree_int(cmd, local_host_id_CFG, NULL);
+       char our_key_buf[PR_KEY_BUF_SIZE] = { 0 };
+       char new_key_buf[PR_KEY_BUF_SIZE] = { 0 };
+       uint64_t our_key_val = 0;
+       uint64_t new_key_val = 0;
+       const char *devname;
+       const char **argv;
+       int pv_count;
+       int args;
+       int status;
+
+       if (!local_key && !local_host_id)
+               return 1;
+
+       if (!get_our_key(cmd, vg, local_key, local_host_id, our_key_buf, &our_key_val))
+               return_0;
+
+       if (!pv_list_to_dev_list(cmd->mem, &vg->pvs, &devs))
+               return_0;
+
+       log_debug("persist_upgrade_ex stop PR %s", our_key_buf);
+
+       if (!_run_stop(cmd, vg, &devs, our_key_buf, 0))
+               return_0;
+
+       if (local_key) {
+               new_key_val = our_key_val;
+               memcpy(new_key_buf, our_key_buf, PR_KEY_BUF_SIZE);
+       } else if (local_host_id) {
+               if (dm_snprintf(new_key_buf, PR_KEY_BUF_SIZE-1, "0x100000000000%04x", local_host_id) != 18) {
+                       log_error("Failed to format key string for host_id %d", local_host_id);
+                       return 0;
+               }
+               if (!parse_prkey(new_key_buf, &new_key_val)) {
+                       log_error("Failed to parse generated key %s", new_key_buf);
+                       return 0;
+               }
+       }
+
+       pv_count = dm_list_size(&devs);
+
+       log_debug("persist_upgrade_ex start PR on %d devs with local key %llx", pv_count, (unsigned long long)new_key_val);
+
+       args = 9 + pv_count*2;
+       if (vg->pr & VG_PR_PTPL)
+               args += 1;
+
+       if (!(argv = dm_pool_alloc(cmd->mem, args * sizeof(char *))))
+               return_0;
+
+       args = 0;
+       argv[0] = LVMPERSIST_PATH;
+       argv[++args] = "start";
+       argv[++args] = "--ourkey";
+       argv[++args] = new_key_buf;
+       argv[++args] = "--access";
+       argv[++args] = "ex";
+       argv[++args] = "--vg";
+       argv[++args] = vg->name;
+       if (vg->pr & VG_PR_PTPL)
+               argv[++args] = "--ptpl";
+
+       dm_list_iterate_items(devl, &devs) {
+               if (!(devname = dm_pool_strdup(cmd->mem, dev_name(devl->dev))))
+                       return_0;
+               argv[++args] = "--device";
+               argv[++args] = devname;
+       }
+
+       argv[++args] = NULL;
+
+       if (!exec_cmd(cmd, argv, &status, 1)) {
+               log_error("persistent reservation exclusive start failed: lvmpersist command error.");
+               log_error("(Use vgchange --persist stop to stop PR on other hosts.");
+               return 0;
+       }
+
+       *our_key_held = new_key_val;
+
+       return 1;
+}
+
 /*
  * Start PR on devices that are being used for vgcreate.
  * This is somewhat awkward because it happens early in
@@ -1817,6 +1931,8 @@ int persist_vgcreate_begin(struct cmd_context *cmd, char *vg_name, char *local_k
        int args;
        int status;
 
+       persist_key_file_remove_name(cmd, vg_name);
+
        if (local_key) {
                if (!parse_prkey(local_key, &our_key_val)) {
                        log_error("Failed to parse local key %s", local_key);
@@ -1883,7 +1999,8 @@ int persist_vgcreate_begin(struct cmd_context *cmd, char *vg_name, char *local_k
  * access PR (typically WE), and starts PR with the normal sh access
  * PR (typically WEAR), allowing other hosts to also use the new VG.
  */
-int persist_vgcreate_update(struct cmd_context *cmd, struct volume_group *vg, uint32_t set_flags)
+int persist_vgcreate_update(struct cmd_context *cmd, struct volume_group *vg, uint32_t set_flags,
+                           uint64_t *our_key_ret)
 {
        DM_LIST_INIT(devs);
        struct device_list *devl;
@@ -1987,9 +2104,13 @@ int persist_vgcreate_update(struct cmd_context *cmd, struct volume_group *vg, ui
                return 0;
        }
 
+       /* key file is an optimization, not an error condition */
        if (!write_key_file(cmd, vg, our_key_val))
                stack;
 
+       if (our_key_ret)
+               *our_key_ret = our_key_val;
+
        return 1;
 }
 
index 461f104c7e1cb12d468d206ae755d6eb90d85e56..6c534d2ccfd9a4aa487e732f331f1e9f3e15771e 100644 (file)
@@ -62,9 +62,13 @@ int persist_start_extend(struct cmd_context *cmd, struct volume_group *vg);
 
 int persist_vgcreate_begin(struct cmd_context *cmd, char *vg_name, char *local_key, int local_host_id,
                           uint32_t set_flags, struct dm_list *devs);
-int persist_vgcreate_update(struct cmd_context *cmd, struct volume_group *vg, uint32_t set_flags);
+int persist_vgcreate_update(struct cmd_context *cmd, struct volume_group *vg, uint32_t set_flags,
+                           uint64_t *our_key_ret);
 
-int persist_is_started(struct cmd_context *cmd, struct volume_group *vg, int may_fail);
+int persist_upgrade_ex(struct cmd_context *cmd, struct volume_group *vg, uint64_t *our_key_held);
+int persist_upgrade_stop(struct cmd_context *cmd, struct volume_group *vg, uint64_t our_key_val);
+
+int persist_is_started(struct cmd_context *cmd, struct volume_group *vg, int may_fail, uint64_t *our_key);
 
 int persist_key_update(struct cmd_context *cmd, struct volume_group *vg, uint32_t prev_gen);
 
index 11e7df477d9a5a1dc80cbbbc7285f73f5c0ae49a..c441ad6da42556e50abc1d3abd86d150fa2d6822 100644 (file)
@@ -117,6 +117,62 @@ void lvmlockd_disconnect(void)
        _lvmlockd_connected = 0;
 }
 
+#define MAX_LOCKARGS 8
+
+/* parse lock_args string for values that may appear in command line --setlockargs */
+
+int lockd_lockargs_get_user_flags(const char *str, uint32_t *flags)
+{
+       char buf[PATH_MAX];
+       char *argv[MAX_LOCKARGS];
+       int argc;
+       int i;
+
+       if (!str)
+               return 0;
+       dm_strncpy(buf, str, sizeof(buf));
+
+       split_line(buf, &argc, argv, MAX_LOCKARGS, ',');
+
+       for (i = 0; i < argc; i++) {
+               if (!strcmp(argv[i], "persist"))
+                       *flags |= LOCKARGS_PERSIST;
+               else if (!strcmp(argv[i], "nopersist"))
+                       *flags |= LOCKARGS_NOPERSIST;
+               else if (!strcmp(argv[i], "timeout"))
+                       *flags |= LOCKARGS_TIMEOUT;
+               else if (!strcmp(argv[i], "notimeout"))
+                       *flags |= LOCKARGS_NOTIMEOUT;
+               else {
+                       log_error("Unknown lockargs option value: %s", argv[i]);
+                       return 0;
+               }
+       } 
+
+       if (((*flags & LOCKARGS_PERSIST) && (*flags & LOCKARGS_NOPERSIST)) ||
+           ((*flags & LOCKARGS_TIMEOUT) && (*flags & LOCKARGS_NOTIMEOUT))) {
+               log_error("Invalid setlockargs option combination: %s", str);
+               return 0;
+       }
+
+       /*
+        * . nopersist and timeout: default
+        * . persist and notimeout: permitted with setlockargs
+        *
+        * FIXME: when tested, allow
+        * . nopersist and notimeout: requires manual set host dead
+        * . persist and timeout: watchdog still resets host when PR is used
+        */
+       if (((*flags & LOCKARGS_PERSIST) && !(*flags & LOCKARGS_NOTIMEOUT)) ||
+           ((*flags & LOCKARGS_NOTIMEOUT) && !(*flags & LOCKARGS_PERSIST))) {
+               log_error("setlockargs persist and notimeout are currently required together.");
+               return 0;
+       }
+
+       return 1;
+}
+
 /* Translate the result strings from lvmlockd to bit flags. */
 static void _flags_str_to_lockd_flags(const char *flags_str, uint32_t *lockd_flags)
 {
@@ -169,7 +225,7 @@ static char *_owner_str(struct owner *owner)
 #define NO_LOCKD_RESULT (-1000)
 
 static int _lockd_result(struct cmd_context *cmd, const char *req_name, daemon_reply reply,
-                        int *result, uint32_t *lockd_flags, struct owner *owner)
+                        int *result, uint32_t *lockd_flags, struct owner *owner, uint64_t *our_generation)
 {
        int reply_result;
        const char *str;
@@ -206,6 +262,9 @@ static int _lockd_result(struct cmd_context *cmd, const char *req_name, daemon_r
                        owner->name = dm_pool_strdup(cmd->mem, str);
        }
 
+       if (our_generation)
+               *our_generation = (uint64_t)daemon_reply_int(reply, "our_generation", 0);
+
        log_debug("lockd %s result: %d", req_name, reply_result);
        return 1;
 }
@@ -420,7 +479,8 @@ static int _lockd_request(struct cmd_context *cmd,
                          const struct lvmlockd_pvs *lock_pvs,
                          int *result,
                          uint32_t *lockd_flags,
-                         struct owner *owner)
+                         struct owner *owner,
+                         uint64_t *our_generation)
 {
        const char *cmd_name = get_cmd_name();
        daemon_reply reply;
@@ -457,7 +517,7 @@ static int _lockd_request(struct cmd_context *cmd,
                                        "lv_lock_args = %s", lv_lock_args ?: "none",
                                        NULL);
 
-               if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner))
+               if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner, our_generation))
                        goto fail;
 
                /*
@@ -477,7 +537,7 @@ static int _lockd_request(struct cmd_context *cmd,
                                        "vg_lock_args = %s", vg_lock_args ?: "none",
                                        NULL);
 
-               if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner))
+               if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner, our_generation))
                        goto fail;
 
                /*
@@ -495,7 +555,7 @@ static int _lockd_request(struct cmd_context *cmd,
                                        "vg_lock_type = %s", vg_lock_type ?: "none",
                                        NULL);
 
-               if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner))
+               if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner, our_generation))
                        goto fail;
 
                log_debug("lockd %s %s result %d %x",
@@ -766,7 +826,7 @@ static int _handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
                        "lv_size_bytes = " FMTd64, (int64_t) lv_size_bytes,
                        NULL);
 
-       if (!_lockd_result(cmd, "find_free_lock", reply, &result, NULL, NULL)) {
+       if (!_lockd_result(cmd, "find_free_lock", reply, &result, NULL, NULL, NULL)) {
                ret = 0;
        } else {
                ret = (result < 0) ? 0 : 1;
@@ -821,7 +881,7 @@ static int _init_vg(struct cmd_context *cmd, struct volume_group *vg,
                                "vg_lock_type = %s", lock_type,
                                NULL);
 
-       if (!_lockd_result(cmd, "init_vg", reply, &result, NULL, NULL)) {
+       if (!_lockd_result(cmd, "init_vg", reply, &result, NULL, NULL, NULL)) {
                ret = 0;
                result = -ELOCKD;
        } else {
@@ -892,7 +952,7 @@ static int _init_vg_idm(struct cmd_context *cmd, struct volume_group *vg)
        return _init_vg(cmd, vg, "idm");
 }
 
-static int _init_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg, int lv_lock_count)
+static int _init_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg, int lv_lock_count, const char *set_args)
 {
        daemon_reply reply;
        const char *reply_str;
@@ -908,9 +968,9 @@ static int _init_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg, in
        int ret;
 
        if (!_use_lvmlockd)
-               return 0;
+               return_0;
        if (!_lvmlockd_connected)
-               return 0;
+               return_0;
 
        /*
         * We need the sector size to know what size to create the LV,
@@ -1014,11 +1074,12 @@ static int _init_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg, in
                                "vg_name = %s", vg->name,
                                "vg_lock_type = %s", "sanlock",
                                "vg_lock_args = %s", vg->sanlock_lv->name,
+                               "set_lock_args = %s", set_args ?: "none",
                                "align_mb = " FMTd64, (int64_t) align_size,
                                "opts = %s", opts ?: "none",
                                NULL);
 
-       if (!_lockd_result(cmd, "init_vg", reply, &result, NULL, NULL)) {
+       if (!_lockd_result(cmd, "init_vg", reply, &result, NULL, NULL, NULL)) {
                ret = 0;
                result = -ELOCKD;
        } else {
@@ -1120,7 +1181,7 @@ static int _free_vg(struct cmd_context *cmd, struct volume_group *vg)
                                "vg_lock_args = %s", vg->lock_args,
                                NULL);
 
-       if (!_lockd_result(cmd, "free_vg", reply, &result, &lockd_flags, NULL)) {
+       if (!_lockd_result(cmd, "free_vg", reply, &result, &lockd_flags, NULL, NULL)) {
                ret = 0;
        } else {
                ret = (result < 0) ? 0 : 1;
@@ -1181,7 +1242,7 @@ int lockd_vg_is_busy(struct cmd_context *cmd, struct volume_group *vg)
                                "vg_lock_args = %s", vg->lock_args,
                                NULL);
 
-       if (!_lockd_result(cmd, "busy_vg", reply, &result, &lockd_flags, NULL)) {
+       if (!_lockd_result(cmd, "busy_vg", reply, &result, &lockd_flags, NULL, NULL)) {
                ret = 1;
                goto out;
        }
@@ -1244,7 +1305,7 @@ static int _free_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg)
                                "vg_lock_args = %s", vg->lock_args,
                                NULL);
 
-       if (!_lockd_result(cmd, "free_vg", reply, &result, &lockd_flags, NULL)) {
+       if (!_lockd_result(cmd, "free_vg", reply, &result, &lockd_flags, NULL, NULL)) {
                ret = 0;
        } else {
                ret = (result < 0) ? 0 : 1;
@@ -1301,7 +1362,7 @@ static int _free_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg)
 /* vgcreate */
 
 int lockd_init_vg(struct cmd_context *cmd, struct volume_group *vg,
-                 const char *lock_type, int lv_lock_count)
+                 const char *lock_type, int lv_lock_count, const char *set_args)
 {
        switch (get_lock_type_from_string(lock_type)) {
        case LOCK_TYPE_NONE:
@@ -1311,7 +1372,7 @@ int lockd_init_vg(struct cmd_context *cmd, struct volume_group *vg,
        case LOCK_TYPE_DLM:
                return _init_vg_dlm(cmd, vg);
        case LOCK_TYPE_SANLOCK:
-               return _init_vg_sanlock(cmd, vg, lv_lock_count);
+               return _init_vg_sanlock(cmd, vg, lv_lock_count, set_args);
        case LOCK_TYPE_IDM:
                return _init_vg_idm(cmd, vg);
        default:
@@ -1437,7 +1498,7 @@ void lockd_free_vg_final(struct cmd_context *cmd, struct volume_group *vg)
  * lock the vg, read/use/write the vg, unlock the vg.
  */
 
-int lockd_start_vg(struct cmd_context *cmd, struct volume_group *vg, int *exists)
+int lockd_start_vg(struct cmd_context *cmd, struct volume_group *vg, uint64_t our_key, int *exists)
 {
        char uuid[64] __attribute__((aligned(8)));
        const char *opts = NULL;
@@ -1515,6 +1576,7 @@ int lockd_start_vg(struct cmd_context *cmd, struct volume_group *vg, int *exists
                                "vg_uuid = %s", uuid[0] ? uuid : "none",
                                "version = " FMTd64, (int64_t) vg->seqno,
                                "host_id = " FMTd64, (int64_t) host_id,
+                               "our_key = " FMTd64, (int64_t) our_key,
                                "opts = %s", opts ?:  "none",
                                NULL);
                _lockd_free_pv_list(&lock_pvs);
@@ -1528,11 +1590,12 @@ int lockd_start_vg(struct cmd_context *cmd, struct volume_group *vg, int *exists
                                "vg_uuid = %s", uuid[0] ? uuid : "none",
                                "version = " FMTd64, (int64_t) vg->seqno,
                                "host_id = " FMTd64, (int64_t) host_id,
+                               "our_key = " FMTd64, (int64_t) our_key,
                                "opts = %s", opts ?:  "none",
                                NULL);
        }
 
-       if (!_lockd_result(cmd, "start_vg", reply, &result, &lockd_flags, NULL)) {
+       if (!_lockd_result(cmd, "start_vg", reply, &result, &lockd_flags, NULL, NULL)) {
                ret = 0;
                result = -ELOCKD;
        } else {
@@ -1622,7 +1685,7 @@ int lockd_stop_vg(struct cmd_context *cmd, struct volume_group *vg)
                        "vg_name = %s", vg->name,
                        NULL);
 
-       if (!_lockd_result(cmd, "stop_vg", reply, &result, NULL, NULL)) {
+       if (!_lockd_result(cmd, "stop_vg", reply, &result, NULL, NULL, NULL)) {
                ret = 0;
        } else {
                ret = (result < 0) ? 0 : 1;
@@ -1668,7 +1731,7 @@ int lockd_start_wait(struct cmd_context *cmd)
                        "pid = " FMTd64, (int64_t) getpid(),
                        NULL);
 
-       if (!_lockd_result(cmd, "start_wait", reply, &result, NULL, NULL)) {
+       if (!_lockd_result(cmd, "start_wait", reply, &result, NULL, NULL, NULL)) {
                ret = 0;
        } else {
                ret = (result < 0) ? 0 : 1;
@@ -1787,7 +1850,7 @@ int lockd_global_create(struct cmd_context *cmd, const char *def_mode, const cha
  req:
        if (!_lockd_request(cmd, "lock_gl",
                              NULL, vg_lock_type, NULL, NULL, NULL, NULL, mode, NULL,
-                             NULL, &result, &lockd_flags, &owner)) {
+                             NULL, &result, &lockd_flags, &owner, NULL)) {
                /* No result from lvmlockd, it is probably not running. */
                log_error("Global lock failed: check that lvmlockd is running.");
                return 0;
@@ -2051,7 +2114,7 @@ int lockd_global(struct cmd_context *cmd, const char *def_mode)
 
        if (!_lockd_request(cmd, "lock_gl",
                            NULL, NULL, NULL, NULL, NULL, NULL, mode, opts,
-                           NULL, &result, &lockd_flags, &owner)) {
+                           NULL, &result, &lockd_flags, &owner, NULL)) {
                /* No result from lvmlockd, it is probably not running. */
 
                /* We don't care if an unlock fails. */
@@ -2288,6 +2351,7 @@ int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
             uint32_t flags, uint32_t *lockd_state)
 {
        struct owner owner = { 0 };
+       uint64_t our_generation = 0;
        char opt_buf[64] = {};
        const char *mode = NULL;
        const char *opts = NULL;
@@ -2402,7 +2466,7 @@ int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
 
        if (!_lockd_request(cmd, "lock_vg",
                              vg_name, NULL, NULL, NULL, NULL, NULL, mode, opts,
-                             NULL, &result, &lockd_flags, &owner)) {
+                             NULL, &result, &lockd_flags, &owner, &our_generation)) {
                /*
                 * No result from lvmlockd, it is probably not running.
                 * Decide if it is ok to continue without a lock in
@@ -2615,7 +2679,7 @@ out:
         */
        if ((lockd_flags & LD_RF_DUP_GL_LS) && strcmp(mode, "un"))
                log_warn("Duplicate sanlock global lock in VG %s", vg_name);
+
        return ret;
 }
 
@@ -2660,7 +2724,7 @@ int lockd_vg_update(struct volume_group *vg)
                                "version = " FMTd64, (int64_t) vg->seqno,
                                NULL);
 
-       if (!_lockd_result(vg->cmd, "vg_update", reply, &result, NULL, NULL)) {
+       if (!_lockd_result(vg->cmd, "vg_update", reply, &result, NULL, NULL, NULL)) {
                ret = 0;
        } else {
                ret = (result < 0) ? 0 : 1;
@@ -2674,6 +2738,7 @@ int lockd_vg_is_started(struct cmd_context *cmd, struct volume_group *vg, uint32
 {
        daemon_reply reply;
        struct owner owner = { 0 };
+       uint64_t our_generation = 0;
        int result;
        int ret = 0;
 
@@ -2691,7 +2756,7 @@ int lockd_vg_is_started(struct cmd_context *cmd, struct volume_group *vg, uint32
                                "vg_name = %s", vg->name,
                                NULL);
 
-       if (!_lockd_result(vg->cmd, "vg_status", reply, &result, NULL, &owner)) {
+       if (!_lockd_result(vg->cmd, "vg_status", reply, &result, NULL, &owner, &our_generation)) {
                log_debug("lockd_vg_status %s no result", vg->name);
                goto out;
        }
@@ -2701,6 +2766,16 @@ int lockd_vg_is_started(struct cmd_context *cmd, struct volume_group *vg, uint32
                goto out;
        }
 
+       /*
+        * The local host generation number is returned
+        * in both fields, they should always match.
+        */
+       if (our_generation && owner.generation &&
+           ((uint32_t)our_generation != owner.generation)) {
+               log_warn("WARNING: lvmlockd local host generation mismatch %llu vs %u",
+                        (unsigned long long)our_generation, owner.generation);
+       }
+
        log_debug("lockd_vg_status %s host_id %u gen %u",
                  vg->name, owner.host_id, owner.generation);
 
@@ -2734,7 +2809,7 @@ static int _query_lv(struct cmd_context *cmd, struct volume_group *vg,
                                "lv_lock_args = %s", lock_args ?: "none",
                                NULL);
 
-       if (!_lockd_result(cmd, "query_lock_lv", reply, &result, NULL, NULL)) {
+       if (!_lockd_result(cmd, "query_lock_lv", reply, &result, NULL, NULL, NULL)) {
                /* No result from lvmlockd, it is probably not running. */
                log_error("Lock query failed for LV %s/%s", vg->name, lv_name);
                return 0;
@@ -2807,6 +2882,7 @@ int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
        const char *opts = NULL;
        const char *mode = NULL;
        uint32_t lockd_flags;
+       uint64_t our_generation = 0;
        int refreshed = 0;
        int result;
        struct lvmlockd_pvs lock_pvs;
@@ -2905,7 +2981,7 @@ int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
                if (!_lockd_request(cmd, "lock_lv",
                                       vg->name, vg->lock_type, vg->lock_args,
                                       lv_name, lv_uuid, lock_args, mode, opts,
-                                      &lock_pvs, &result, &lockd_flags, NULL)) {
+                                      &lock_pvs, &result, &lockd_flags, NULL, NULL)) {
                        _lockd_free_pv_list(&lock_pvs);
                        /* No result from lvmlockd, it is probably not running. */
                        log_error("Locking failed for LV %s/%s", vg->name, lv_name);
@@ -2916,7 +2992,7 @@ int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
                if (!_lockd_request(cmd, "lock_lv",
                                       vg->name, vg->lock_type, vg->lock_args,
                                       lv_name, lv_uuid, lock_args, mode, opts,
-                                      NULL, &result, &lockd_flags, &owner)) {
+                                      NULL, &result, &lockd_flags, &owner, &our_generation)) {
                        /* No result from lvmlockd, it is probably not running. */
                        log_error("Locking failed for LV %s/%s", vg->name, lv_name);
                        return 0;
@@ -3846,7 +3922,7 @@ static int _init_lv_sanlock(struct cmd_context *cmd, struct volume_group *vg,
                                "vg_lock_args = %s", vg->lock_args,
                                NULL);
 
-       if (!_lockd_result(cmd, "init_lv", reply, &result, NULL, NULL)) {
+       if (!_lockd_result(cmd, "init_lv", reply, &result, NULL, NULL, NULL)) {
                ret = 0;
        } else {
                ret = (result < 0) ? 0 : 1;
@@ -3921,7 +3997,7 @@ static int _free_lv(struct cmd_context *cmd, struct volume_group *vg,
                                "lv_lock_args = %s", lock_args ?: "none",
                                NULL);
 
-       if (!_lockd_result(cmd, "free_lv", reply, &result, NULL, NULL)) {
+       if (!_lockd_result(cmd, "free_lv", reply, &result, NULL, NULL, NULL)) {
                ret = 0;
        } else {
                ret = (result < 0) ? 0 : 1;
@@ -4186,7 +4262,7 @@ int lockd_rename_vg_before(struct cmd_context *cmd, struct volume_group *vg)
                        "vg_lock_args = %s", vg->lock_args,
                        NULL);
 
-       if (!_lockd_result(cmd, "rename_vg_before", reply, &result, NULL, NULL)) {
+       if (!_lockd_result(cmd, "rename_vg_before", reply, &result, NULL, NULL, NULL)) {
                ret = 0;
        } else {
                ret = (result < 0) ? 0 : 1;
@@ -4231,7 +4307,7 @@ int lockd_rename_vg_final(struct cmd_context *cmd, struct volume_group *vg, int
                 * Depending on the problem that caused the rename to
                 * fail, it may make sense to not restart the VG here.
                 */
-               if (!lockd_start_vg(cmd, vg, NULL))
+               if (!lockd_start_vg(cmd, vg, 0, NULL))
                        log_error("Failed to restart VG %s lockspace.", vg->name);
                return 1;
        }
@@ -4251,7 +4327,7 @@ int lockd_rename_vg_final(struct cmd_context *cmd, struct volume_group *vg, int
                                "vg_lock_args = %s", vg->lock_args,
                                NULL);
 
-               if (!_lockd_result(cmd, "rename_vg_final", reply, &result, NULL, NULL)) {
+               if (!_lockd_result(cmd, "rename_vg_final", reply, &result, NULL, NULL, NULL)) {
                        ret = 0;
                } else {
                        ret = (result < 0) ? 0 : 1;
@@ -4271,7 +4347,7 @@ int lockd_rename_vg_final(struct cmd_context *cmd, struct volume_group *vg, int
                }
        }
 
-       if (!lockd_start_vg(cmd, vg, NULL))
+       if (!lockd_start_vg(cmd, vg, 0, NULL))
                log_error("Failed to start VG %s lockspace.", vg->name);
 
        return 1;
@@ -4292,7 +4368,7 @@ const char *lockd_running_lock_type(struct cmd_context *cmd, int *found_multiple
                        "pid = " FMTd64, (int64_t) getpid(),
                        NULL);
 
-       if (!_lockd_result(cmd, "running_lm", reply, &result, NULL, NULL)) {
+       if (!_lockd_result(cmd, "running_lm", reply, &result, NULL, NULL, NULL)) {
                log_error("Failed to get result from lvmlockd");
                goto out;
        }
@@ -4413,7 +4489,7 @@ int lockd_lv_refresh(struct cmd_context *cmd, struct lvresize_params *lp)
                                "path = %s", path,
                                NULL);
 
-       if (!_lockd_result(cmd, "refresh_lv", reply, &result, NULL, NULL)) {
+       if (!_lockd_result(cmd, "refresh_lv", reply, &result, NULL, NULL, NULL)) {
                /* No result from lvmlockd, it is probably not running. */
                log_error("LV refresh failed for LV %s", path);
                return 0;
@@ -4487,3 +4563,171 @@ void lockd_lockopt_get_flags(const char *str, uint32_t *flags)
                        log_warn("Ignoring unknown lockopt value: %s", argv[i]);
        }
 }
+
+int lockd_setlockargs(struct cmd_context *cmd, struct volume_group *vg, const char *set_args, uint64_t *our_key_held)
+{
+       daemon_reply reply;
+       const char *reply_str;
+       const char *vg_lock_args = NULL;
+       uint32_t lockd_flags = 0;
+       uint32_t lock_args_flags = 0;
+       int result;
+       int ret;
+
+       if (!_use_lvmlockd) {
+               log_error("lvmlockd is not in use.");
+               return 0;
+       }
+       if (!_lvmlockd_connected) {
+               log_error("lvmlockd is not connected.");
+               return 0;
+       }
+
+       if (!vg->lock_type || strcmp(vg->lock_type, "sanlock")) {
+               log_error("setlockargs is only supported for lock type sanlock.");
+               return 0;
+       }
+
+       if (!set_args)
+               return_0;
+
+       if (!lockd_lockargs_get_user_flags(set_args, &lock_args_flags))
+               return_0;
+
+       if ((lock_args_flags & LOCKARGS_PERSIST) && !(vg->pr & VG_PR_REQUIRE)) {
+               log_error("lockargs \"persist\" requires persistent reservation setting \"require\".");
+               return 0;
+       }
+
+       /*
+        * Check if other PR keys are registered, which would
+        * cause the persist_upgrade_ex below to fail.
+        */
+       if (vg->pr & (VG_PR_REQUIRE | VG_PR_AUTOSTART)) {
+               struct pv_list *pvl;
+               struct device *dev;
+               int key_count;
+
+               dm_list_iterate_items(pvl, &vg->pvs) {
+                       if (!(dev = pvl->pv->dev))
+                               continue;
+                       if (dm_list_empty(&dev->aliases))
+                               continue;
+                       if (!dev_find_key(cmd, dev, 0, 0, NULL, 0, NULL, 1, &key_count, NULL)) {
+                               /* Shouldn't happen if persist_is_started already passed. */
+                               log_error("No PR key found on %s.", dev_name(dev));
+                               return 0;
+                       }
+                       if (key_count != 1) {
+                               log_error("Found %d PR keys on %s, stop PR and lockspace on other hosts.", key_count, dev_name(dev));
+                               log_error("(See vgchange --lockstop --persist stop.)");
+                               return 0;
+                       }
+               }
+       }
+
+       /*
+        * setlockargs_before checks that sanlock version supports
+        * the new set_lock_args, checks that no LV locks are held,
+        * checks we are the only host in the lockspace, and stops
+        * the lockspace.
+        */
+
+       log_debug("lockd setlockargs_vg_before %s", vg->name);
+
+       reply = _lockd_send("setlockargs_vg_before",
+                               "pid = " FMTd64, (int64_t) getpid(),
+                               "vg_name = %s", vg->name,
+                               "vg_lock_type = %s", vg->lock_type,
+                               "vg_lock_args = %s", vg->lock_args,
+                               "set_lock_args = %s", set_args,
+                               NULL);
+
+       if (!_lockd_result(cmd, "setlockargs_vg_before", reply, &result, &lockd_flags, NULL, NULL)) {
+               ret = 0;
+               goto out;
+       }
+
+       if (result == -EBUSY) {
+               log_error("Lockspace for \"%s\" not stopped on other hosts", vg->name);
+               ret = 0;
+               goto out;
+       } else if (result < 0) {
+               log_error("Lockspace setlockargs error %d for \"%s\"", result, vg->name);
+               ret = 0;
+               goto out;
+       }
+
+       daemon_reply_destroy(reply);
+
+       /*
+        * When the VG has the ability to use PR, change the
+        * current PR to an exclusive mode (WE), using a key
+        * with our host_id and gen 0.  The exclusive PR protects
+        * the VG from other hosts while the locking parameters
+        * are being changed (since locking can't be used while
+        * the locking is being changed.)  The lockspace is stopped
+        * while it's being changed.  At the end of the vgchange
+        * setlockargs command, persist_ugprade_stop() releases
+        * the exclusive PR.  After this, any host can do a normal
+        * start of PR/locking using the new lockargs.
+        */
+       if (vg->pr & (VG_PR_REQUIRE | VG_PR_AUTOSTART)) {
+               if (!persist_upgrade_ex(cmd, vg, our_key_held)) {
+                       log_error("Failed to upgrade to exclusive PR.");
+                       log_error("Restart PR and locking to retry setlockargs.");
+                       return 0;
+               }
+       }
+
+       /*
+        * setlockargs_final reformats sanlock leases on the lvmlock LV.
+        * The host generation numbers will all be reset back to 0, and
+        * the PR keys containing the gen will start over from gen 1.
+        * lvmlockd returns a new lock_args string that this command
+        * writes in VG metadata.
+        */
+
+ retry_final:
+       log_debug("lockd setlockargs_vg_final %s", vg->name);
+
+       reply = _lockd_send("setlockargs_vg_final",
+                               "pid = " FMTd64, (int64_t) getpid(),
+                               "vg_name = %s", vg->name,
+                               "vg_lock_type = %s", vg->lock_type,
+                               "vg_lock_args = %s", vg->lock_args,
+                               "set_lock_args = %s", set_args,
+                               NULL);
+
+       if (!_lockd_result(cmd, "setlockargs_vg_final", reply, &result, &lockd_flags, NULL, NULL)) {
+               ret = 0;
+               goto out;
+       }
+
+       if (result == -EAGAIN) {
+               daemon_reply_destroy(reply);
+               sleep(1);
+               goto retry_final;
+       }
+
+       if (!(reply_str = daemon_reply_str(reply, "vg_lock_args", NULL))) {
+               log_error("VG %s setlockargs failed: result %d new lock_args not returned", vg->name, result);
+               ret = 0;
+               goto out;
+       }
+
+       if (!(vg_lock_args = dm_pool_strdup(cmd->mem, reply_str))) {
+               ret = 0;
+               goto out;
+       }
+
+       log_debug("lockd setlockargs_vg %s result %d new lock_args %s", vg->name, result, vg_lock_args);
+
+       vg->lock_args = vg_lock_args;
+       ret = 1;
+
+out:
+       daemon_reply_destroy(reply);
+       return ret;
+}
+
index dc196765a8ad6024711a370a57a4de12003148bd..26ed5cd12388d7c3c0ccf199c6b52a2d48ade186 100644 (file)
@@ -14,6 +14,7 @@
 #include "libdaemon/client/config-util.h"
 #include "libdaemon/client/daemon-client.h"
 #include "lib/metadata/metadata-exported.h" /* is_lockd_type() */
+#include "daemons/lvmlockd/lvmlockd-client.h"
 
 #define LOCKD_SANLOCK_LV_NAME "lvmlock"
 
@@ -66,6 +67,7 @@
 #ifdef LVMLOCKD_SUPPORT
 
 void lockd_lockopt_get_flags(const char *str, uint32_t *flags);
+int lockd_lockargs_get_user_flags(const char *str, uint32_t *flags);
 
 struct lvresize_params;
 struct lvcreate_params;
@@ -82,7 +84,8 @@ void lvmlockd_disconnect(void);
 
 /* vgcreate/vgremove use init/free */
 
-int lockd_init_vg(struct cmd_context *cmd, struct volume_group *vg, const char *lock_type, int lv_lock_count);
+int lockd_init_vg(struct cmd_context *cmd, struct volume_group *vg,
+                  const char *lock_type, int lv_lock_count, const char *set_args);
 int lockd_free_vg_before(struct cmd_context *cmd, struct volume_group *vg, int changing, int yes);
 void lockd_free_vg_final(struct cmd_context *cmd, struct volume_group *vg);
 
@@ -93,7 +96,7 @@ int lockd_rename_vg_final(struct cmd_context *cmd, struct volume_group *vg, int
 
 /* start and stop the lockspace for a vg */
 
-int lockd_start_vg(struct cmd_context *cmd, struct volume_group *vg, int *exists);
+int lockd_start_vg(struct cmd_context *cmd, struct volume_group *vg, uint64_t our_key, int *exists);
 int lockd_stop_vg(struct cmd_context *cmd, struct volume_group *vg);
 int lockd_start_wait(struct cmd_context *cmd);
 int lockd_vg_is_started(struct cmd_context *cmd, struct volume_group *vg, uint32_t *cur_gen);
@@ -142,12 +145,19 @@ void lockd_lvcreate_done(struct cmd_context *cmd, struct volume_group *vg, struc
 int lockd_lvremove_lock(struct cmd_context *cmd, struct logical_volume *lv, struct logical_volume **lv_other, int *other_unlock);
 void lockd_lvremove_done(struct cmd_context *cmd, struct logical_volume *lv, struct logical_volume *lv_other, int other_unlock);
 
+int lockd_setlockargs(struct cmd_context *cmd, struct volume_group *vg, const char *set_args, uint64_t *our_key_held);
+
 #else /* LVMLOCKD_SUPPORT */
 
 static inline void lockd_lockopt_get_flags(const char *str, uint32_t *flags)
 {
 }
 
+static inline int lockd_lockargs_get_user_flags(const char *str, uint32_t *flags)
+{
+       return 0;
+}
+
 static inline void lvmlockd_set_socket(const char *sock)
 {
 }
@@ -173,7 +183,8 @@ static inline int lvmlockd_use(void)
        return 0;
 }
 
-static inline int lockd_init_vg(struct cmd_context *cmd, struct volume_group *vg, const char *lock_type, int lv_lock_count)
+static inline int lockd_init_vg(struct cmd_context *cmd, struct volume_group *vg,
+                  const char *lock_type, int lv_lock_count, const char *set_args)
 {
        return 1;
 }
@@ -345,6 +356,11 @@ static inline int lockd_vg_is_busy(struct cmd_context *cmd, struct volume_group
        return 0;
 }
 
+static inline int lockd_setlockargs(struct cmd_context *cmd, struct volume_group *vg, const char *set_args, uint64_t *our_key_held)
+{
+       return 0;
+}
+
 #endif /* LVMLOCKD_SUPPORT */
 
 #endif /* _LVMLOCKD_H */
index 096d26b42a70aa7a90341a2d86e96cc369c7f965..071f3806738ec089140dcf3b9fa1961c4a45984d 100644 (file)
@@ -2230,16 +2230,6 @@ static int _validate_lock_args_chars(const char *lock_args)
        return r;
 }
 
-static int _validate_vg_lock_args(struct volume_group *vg)
-{
-       if (!vg->lock_args || !_validate_lock_args_chars(vg->lock_args)) {
-               log_error(INTERNAL_ERROR "VG %s has invalid lock_args chars", vg->name);
-               return 0;
-       }
-
-       return 1;
-}
-
 /*
  * For lock_type sanlock, LV lock_args are <version>:<info>
  * For lock_type dlm, LV lock_args are not used, and lock_args is
@@ -2606,8 +2596,6 @@ int vg_validate(struct volume_group *vg)
                        r = 0;
                }
 
-               if (!_validate_vg_lock_args(vg))
-                       r = 0;
        } else {
                if (vg->lock_args) {
                        log_error(INTERNAL_ERROR "VG %s has lock_args %s without lock_type",
@@ -5150,7 +5138,7 @@ struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name, const
        }
 
        if ((vg->pr & VG_PR_REQUIRE) && (writing || activating) && !cmd->disable_pr_required) {
-               if (!persist_is_started(cmd, vg, 0)) {
+               if (!persist_is_started(cmd, vg, 0, NULL)) {
                        failure |= FAILED_PR_REQUIRED;
                        goto_bad;
                }
index 621224df7822111cc9f0369c3a65e2bf5509b05e..4e75f5b0f4446d6616f22e42f97a815a368595df 100644 (file)
@@ -157,6 +157,15 @@ Create a shared VG from one host (uses the running lock manager):
 .I VG
 .I devices
 .P
+Include vgcreate options to use Persistent Reservations (sanlock only):
+.br
+.B --setpersist y --setlockargs persist,notimeout
+.P
+Start Persistent Reservations (if they are used):
+.br
+.B $ vgchange --persist start
+.I VG
+.P
 Start the lockspace for the shared VG on all hosts:
 .br
 .B $ vgchange --lockstart
@@ -170,7 +179,7 @@ Regular shutdown steps:
 .br
     $ vgchange -an VG
 .br
-    $ vgchange --lockstop VG
+    $ vgchange --lockstop [--persist stop] VG
 .br
     $ stop lvmlockd and lock manager
 .br
@@ -179,7 +188,7 @@ Regular startup steps:
 .br
     $ start lvmlockd and lock manager
 .br
-    $ vgchange --lockstart VG
+    $ vgchange --lockstart [--persist start] VG
 .P
 .
 .SH SETUP DETAILS
@@ -252,6 +261,25 @@ to begin using locks (i.e. creating and joining a lockspace). Starting the
 VG may take some time, and until the start completes the VG may not be
 modified or activated. When shutting down, the lockspace is stopped with
 vgchange --lockstop VG.
+.P
+.B Persistent Reservations
+.br
+A shared VG with locktype sanlock can take advantage of Persistent
+Reservations (PR) for faster and more reliable recovery. This
+requires that all of the shared devices in the VG support PR.  Test
+if PR is supported by a device with the command:
+.br
+.B $ lvmpersist devtest --device
+.I device
+.P
+The vgcreate command options when enabling PR recovery with sanlock:
+.br
+.B $ vgcreate --shared --setpersist y --setlockargs persist,notimeout
+.P
+When enabled, PR needs to be started for the VG before locking:
+.br
+.B $ vgchange --persist start
+.I VG
 .
 .SH TOPICS
 .
@@ -310,6 +338,53 @@ $ vgs --shared
   vgfoo   1   0   0 wz--ns 992.00m 736.00m
 .fi
 .
+.SS Persistent Reservations
+.
+To enable PR-based recovery ("fencing") in an existing VG:
+.br
+.B $ vgchange --setpersist y --setlockargs persist,notimeout
+.I VG
+.P
+Changing the lock args requires the VG to be stopped on all other nodes.
+.P
+Once enabled, PR needs to be started before or with lockstart:
+.br
+.B $ vgchange --persist start
+.I VG
+.br
+.B $ vgchange --persist start --lockstart
+.I VG
+.P
+Display the VG attributes configured by setpersist and setlockargs:
+.br
+.B $ vgs -o+persist
+.I VG
+.br
+.B $ vgs -o+lockargs
+.I VG
+.P
+.B setpersist y
+.br
+With this setting, LVM requires that PR be started before
+lockstart, and any VG modifications or activations require
+that PR is started.
+.br
+.B setlockargs persist
+.br
+This lockargs setting causes lvmlockd to remove the PR key of a
+failed host when a lock request fails due to a lock owned by the
+failed host. sanlock is then permitted to grant the lock.
+.br
+.B setlockargs notimeout
+.br
+This lockargs setting causes lvmlockd to configure sanlock leases
+to not time out. Removing the PR of a failed host replaces timeouts
+as a faster mechanism for lock recovery. With timeouts disabled,
+the local watchdog is not used by sanlock for the VG lockspace.
+.P
+For more information, see
+.BR lvmpersist (8).
+.
 .SS System ID
 .br
 In contrast to a shared VG, a local VG can only be used by one host
index 0f5cec57f122aa92842db242b7a8fc1d9baf3af8..9744098cb801219e5587c8643470bd938a30750b 100644 (file)
@@ -786,6 +786,20 @@ arg(setautoactivation_ARG, '\0', "setautoactivation", bool_VAL, 0, 0,
     "If autoactivation is enabled on a VG, autoactivation can be disabled\n"
     "for individual LVs.\n")
 
+arg(setlockargs_ARG, '\0', "setlockargs", string_VAL, 0, 0,
+    "Add or remove lock_args settings for a shared VG.\n"
+    "The lock_args determine lock manager behavior for the VG.\n"
+    "These settings are only allowed for lock_type sanlock.\n"
+    "persist: use persistent reservations for lock recovery.\n"
+    "lvmlockd will preempt-abort the persistent reservation of a failed\n"
+    "lock owner so that the lock can be acquired.\n"
+    "notimeout: use locks that do not time out when the owner fails.\n"
+    "In this case, a lock owned by a failed host can only be acquired\n"
+    "using the persist feature.\n"
+    "nopersist: do not use the persist feature.\n"
+    "timeout: do not use the notimeout feature.\n"
+    "The default behavior with no settings configured is: nopersist and timeout.\n")
+
 arg(setpersist_ARG, '\0', "setpersist", string_VAL, 0, 0,
     "#vgcreate\n"
     "Set flags to control persistent reservation behavior.\n"
index 778187e44a1d692928909f6bc21b98ea9d3b6aeb..37af077fa9d67c1849fddfeba0557f4408767425 100644 (file)
@@ -1843,6 +1843,11 @@ OO: --select String, --removekey String, --majoritypvs, --force
 ID: vgchange_persist
 DESC: Perform persistent reservation commands on devices.
 
+vgchange --setlockargs String VG|Tag|Select
+OO: --select String
+ID: vgchange_setlockargs
+DESC: Set or clear lock_args flags to control lock manager behavior.
+
 vgchange --lockstart
 OO: --select String, --persist start
 OP: VG|Tag|Select ...
@@ -1856,6 +1861,7 @@ ID: vgchange_lockstop
 DESC: Stop the lockspace of a shared VG in lvmlockd.
 
 vgchange --locktype LockType VG
+OO: --setlockargs String
 ID: vgchange_locktype
 DESC: Change the lock type for a shared VG.
 
@@ -1880,7 +1886,7 @@ OO: --addtag Tag, --alloc Alloc, --autobackup Bool, --clustered Bool, --maxlogic
 --metadatasize SizeMB, --pvmetadatacopies MetadataCopiesPV, --vgmetadatacopies MetadataCopiesVG,
 --reportformat ReportFmt, --dataalignment SizeKB, --dataalignmentoffset SizeKB,
 --shared, --systemid String, --locktype LockType, --setautoactivation Bool,
---setpersist String, --persist start
+--setpersist String, --persist start, --setlockargs String
 ID: vgcreate_general
 
 ---
index 4fce42341bb9e28e1c1f5c2906ad2fdd532c944b..1ed76f6e022dec88f2f6e35a1024058045e08a84 100644 (file)
@@ -90,6 +90,7 @@ static const struct command_function _command_functions[CMD_COUNT] = {
        { vgchange_systemid_CMD, vgchange_systemid_cmd },
        { vgchange_setpersist_CMD, vgchange_setpersist_cmd },
        { vgchange_persist_CMD, vgchange_persist_cmd },
+       { vgchange_setlockargs_CMD, vgchange_setlockargs_cmd },
 
        /* lvdisplay variants */
        { lvdisplay_columns_CMD,        lvdisplay_columns_cmd },
index b85c6cd250ef7e299ca73b221e0171a043dbd97f..62781047711c4f028e3f057f15ba05430e3c3fd0 100644 (file)
@@ -521,7 +521,8 @@ int vgcreate_params_set_defaults(struct cmd_context *cmd,
  */
 int vgcreate_params_set_from_args(struct cmd_context *cmd,
                                  struct vgcreate_params *vp_new,
-                                 struct vgcreate_params *vp_def)
+                                 struct vgcreate_params *vp_def,
+                                 struct pvcreate_params *pp)
 {
        const char *system_id_arg_str;
        const char *lock_type = NULL;
@@ -736,6 +737,29 @@ int vgcreate_params_set_from_args(struct cmd_context *cmd,
        vp_new->lock_type = lock_type;
 
        log_debug("Setting lock_type to %s", vp_new->lock_type);
+
+       if (arg_is_set(cmd, setlockargs_ARG)) {
+               const char *set_args;
+               uint32_t lock_args_flags = 0;
+
+               if (!lock_type || strcmp(lock_type, "sanlock")) {
+                       log_error("Using setlockargs requires sanlock lock type for shared VG.");
+                       return 0;
+               }
+
+               if (!(set_args = arg_str_value(cmd, setlockargs_ARG, NULL)))
+                       return_0;
+               if (!lockd_lockargs_get_user_flags(set_args, &lock_args_flags))
+                       return_0;
+               if (!pp)
+                       return_0;
+
+               if ((lock_args_flags & LOCKARGS_PERSIST) && !(pp->setpersist_flags & (SETPR_Y | SETPR_REQUIRE))) {
+                       log_error("Using --setlockargs persist requires --setpersist y|require.");
+                       return 0;
+               }
+       }
+
        return 1;
 }
 
index d2033fb0f0680a8a4c5fada2042deea2938661aa..b9428e4781e2a09acf9c64c700bb6df30f308ab6 100644 (file)
@@ -188,7 +188,8 @@ int vgcreate_params_set_defaults(struct cmd_context *cmd,
                                 struct volume_group *vg);
 int vgcreate_params_set_from_args(struct cmd_context *cmd,
                                  struct vgcreate_params *vp_new,
-                                 struct vgcreate_params *vp_def);
+                                 struct vgcreate_params *vp_def,
+                                 struct pvcreate_params *pp);
 int lv_change_activate(struct cmd_context *cmd, struct logical_volume *lv,
                       activation_change_t activate);
 int lv_refresh(struct cmd_context *cmd, struct logical_volume *lv);
index 2d172432a31be1c56377a8cafc61f107b691fe18..c7afc5a033ef8b6ced1bd7d219db0f362aa357ec 100644 (file)
@@ -175,6 +175,7 @@ int vgchange_lock_start_stop_cmd(struct cmd_context *cmd, int argc, char **argv)
 int vgchange_systemid_cmd(struct cmd_context *cmd, int argc, char **argv);
 int vgchange_setpersist_cmd(struct cmd_context *cmd, int argc, char **argv);
 int vgchange_persist_cmd(struct cmd_context *cmd, int argc, char **argv);
+int vgchange_setlockargs_cmd(struct cmd_context *cmd, int argc, char **argv);
 
 const struct opt_name *get_opt_name(int opt);
 const struct val_name *get_val_name(int val);
index b28af4f9b670ba1514094a014f918d987fda1068..8b2e03f902f168db2aaf1bca6a1e8dcc8c93548a 100644 (file)
@@ -683,6 +683,7 @@ static int _passes_lock_start_filter(struct cmd_context *cmd,
 static int _vgchange_lock_start(struct cmd_context *cmd, struct volume_group *vg,
                                struct vgchange_params *vp)
 {
+       uint64_t our_key = 0;
        int auto_opt = 0;
        int exists = 0;
        int r;
@@ -713,12 +714,12 @@ do_start:
        if (!persist_start_include(cmd, vg, 0, auto_opt, NULL))
                return 0;
 
-       if ((vg->pr & (VG_PR_REQUIRE|VG_PR_AUTOSTART)) && !persist_is_started(cmd, vg, 0)) {
+       if ((vg->pr & (VG_PR_REQUIRE|VG_PR_AUTOSTART)) && !persist_is_started(cmd, vg, 0, &our_key)) {
                log_error("VG %s PR should be started before locking (vgchange --persist start)", vg->name);
                return 0;
        }
 
-       r = lockd_start_vg(cmd, vg, &exists);
+       r = lockd_start_vg(cmd, vg, our_key, &exists);
 
        if (r)
                vp->lock_start_count++;
@@ -1339,7 +1340,7 @@ static int _vgchange_locktype(struct cmd_context *cmd, struct volume_group *vg,
 
                vg->system_id = NULL;
 
-               if (!lockd_init_vg(cmd, vg, lock_type, lv_lock_count)) {
+               if (!lockd_init_vg(cmd, vg, lock_type, lv_lock_count, arg_str_value(cmd, setlockargs_ARG, NULL))) {
                        log_error("Failed to initialize lock args for lock type %s", lock_type);
                        return 0;
                }
@@ -1879,7 +1880,7 @@ static int _vgchange_setpersist_single(struct cmd_context *cmd, const char *vg_n
         * enabling/starting PR, otherwise enabling/starting PR will
         * cause i/o to begin failing on those other hosts.
         */
-       if (on && vg_is_shared(vg) && !persist_is_started(cmd, vg, 1) &&
+       if (on && vg_is_shared(vg) && !persist_is_started(cmd, vg, 1, NULL) &&
            lockd_vg_is_started(cmd, vg, NULL) && lockd_vg_is_busy(cmd, vg)) {
                log_error("VG lockspace should be stopped on all hosts (vgchange --lockstop) before enabling PR.");
                return ECMD_FAILED;
@@ -1949,3 +1950,51 @@ int vgchange_setpersist_cmd(struct cmd_context *cmd, int argc, char **argv)
        return ret;
 }
 
+static int _vgchange_setlockargs_single(struct cmd_context *cmd, const char *vg_name,
+                                    struct volume_group *vg,
+                                    struct processing_handle *handle)
+{
+       const char *set = arg_str_value(cmd, setlockargs_ARG, NULL);
+       uint64_t our_key_held = 0;
+
+       if (!set)
+               return_ECMD_FAILED;
+
+       /*
+        * lockd_setlockargs gets exclusive PR (if the VG is using PR),
+        * stops the lockspace, and sets new vg->lock_args that are
+        * written below.  If lockd_setlockargs got the ex PR, then
+        * persist_upgrade_stop releases the PR.
+        */
+       if (!lockd_setlockargs(cmd, vg, set, &our_key_held))
+               return_ECMD_FAILED;
+
+       if (!vg_write(vg) || !vg_commit(vg))
+               return_ECMD_FAILED;
+
+       if (our_key_held && !persist_upgrade_stop(cmd, vg, our_key_held))
+               log_warn("Failed to stop PR.");
+       persist_key_file_remove(cmd, vg);
+
+       log_print_unless_silent("Volume group \"%s\" successfully changed.", vg->name);
+
+       return ECMD_PROCESSED;
+}
+
+int vgchange_setlockargs_cmd(struct cmd_context *cmd, int argc, char **argv)
+{
+       struct processing_handle *handle;
+       uint32_t flags = READ_FOR_UPDATE;
+       int ret;
+
+       if (!(handle = init_processing_handle(cmd, NULL))) {
+               log_error("Failed to initialize processing handle.");
+               return ECMD_FAILED;
+       }
+
+       ret = process_each_vg(cmd, argc, argv, NULL, NULL, flags, 0, handle, &_vgchange_setlockargs_single);
+
+       destroy_processing_handle(cmd, handle);
+       return ret;
+}
+
index d1cdfea1745027db392e248f0893c61374dba7ad..1d06d416e711d2a03b2f4dddc762b8d6f1111690 100644 (file)
@@ -46,13 +46,12 @@ int vgcreate(struct cmd_context *cmd, int argc, char **argv)
        pp.pv_names = argv;
        pp.vg_name = vg_name;
        pp.preserve_existing = 1; /* Don't create a new PV on top of an existing PV like pvcreate does. */
-
        pp.check_consistent_block_size = 1;
 
        if (!vgcreate_params_set_defaults(cmd, &vp_def, NULL))
                return EINVALID_CMD_LINE;
        vp_def.vg_name = vg_name;
-       if (!vgcreate_params_set_from_args(cmd, &vp_new, &vp_def))
+       if (!vgcreate_params_set_from_args(cmd, &vp_new, &vp_def, &pp))
                return EINVALID_CMD_LINE;
 
        if (!vgcreate_params_validate(cmd, &vp_new))
@@ -161,7 +160,7 @@ int vgcreate(struct cmd_context *cmd, int argc, char **argv)
         * a local VG.  lockd_init_vg() then writes the VG a second time with
         * both lock_type and lock_args set.
         */
-       if (!lockd_init_vg(cmd, vg, vp_new.lock_type, 0)) {
+       if (!lockd_init_vg(cmd, vg, vp_new.lock_type, 0, arg_str_value(cmd, setlockargs_ARG, NULL))) {
                log_error("Failed to initialize lock args for lock type %s",
                          vp_new.lock_type);
                vg_remove_pvs(vg);
@@ -182,13 +181,15 @@ int vgcreate(struct cmd_context *cmd, int argc, char **argv)
         * read without locks until the lockspace is done starting.)
         */
        if (vg_is_shared(vg)) {
+               uint64_t our_key = 0;
+
                if (pp.setpersist_flags &&
-                   !persist_vgcreate_update(cmd, vg, pp.setpersist_flags)) {
+                   !persist_vgcreate_update(cmd, vg, pp.setpersist_flags, &our_key)) {
                        log_error("Failed to start PR");
                        goto out;
                }
 
-               if (!lockd_start_vg(cmd, vg, NULL)) {
+               if (!lockd_start_vg(cmd, vg, our_key, NULL)) {
                        log_error("Failed to start locking");
                        goto out;
                }
index 8fcfa6ec31b95fca81e39c6d0291052dfa8622fb..73f058a56582ff54a73e1b5022add4e5ec6e544e 100644 (file)
@@ -609,7 +609,7 @@ int vgsplit(struct cmd_context *cmd, int argc, char **argv)
                        goto_bad;
                }
                vp_def.vg_name = vg_name_to;
-               if (!vgcreate_params_set_from_args(cmd, &vp_new, &vp_def)) {
+               if (!vgcreate_params_set_from_args(cmd, &vp_new, &vp_def, NULL)) {
                        r = EINVALID_CMD_LINE;
                        goto_bad;
                }
This page took 0.240705 seconds and 5 git commands to generate.