dnl -- Look for sanlock libraries
AS_IF([test "$BUILD_LOCKDSANLOCK" = "yes"], [
- LOCKDSANLOCK_SUPPORT=370
+# FIXME: forcing sanlock 4.2.0 for testing, default should be 370
+# LOCKDSANLOCK_SUPPORT=370
PKG_CHECK_EXISTS(libsanlock_client >= 4.0.0, [LOCKDSANLOCK_SUPPORT=400])
PKG_CHECK_EXISTS(libsanlock_client >= 4.1.0, [LOCKDSANLOCK_SUPPORT=410])
+ PKG_CHECK_EXISTS(libsanlock_client >= 4.2.0, [LOCKDSANLOCK_SUPPORT=420])
+ LOCKDSANLOCK_SUPPORT=420
PKG_CHECK_MODULES(LIBSANLOCKCLIENT, libsanlock_client >= 3.7.0, [BUILD_LVMLOCKD="yes"])
AC_DEFINE_UNQUOTED([LOCKDSANLOCK_SUPPORT], [$LOCKDSANLOCK_SUPPORT], [Define version of sanlock.])
])
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
-SOURCES = lvmlockd-core.c
+SOURCES = lvmlockd-core.c lvmlockd-helper.c
SOURCES2 = lvmlockctl.c
TARGETS = lvmlockd lvmlockctl
#define EIOTIMEOUT 225
#define ELOCKREPAIR 226
+#define LOCKARGS_VERSION 0x00000001 /* meta only */
+#define LOCKARGS_LVMLOCK 0x00000002 /* meta only */
+#define LOCKARGS_TIMEOUT 0x00000004 /* user only */
+#define LOCKARGS_NOTIMEOUT 0x00000008 /* meta or user */
+#define LOCKARGS_PERSIST 0x00000010 /* meta or user */
+#define LOCKARGS_NOPERSIST 0x00000020 /* user only */
+
#endif /* _LVM_LVMLOCKD_CLIENT_H */
#include <syslog.h>
#include <dirent.h>
#include <time.h>
+#include <fcntl.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/utsname.h>
#include <sys/un.h>
+#include <sys/wait.h>
#ifdef SD_NOTIFY_SUPPORT
#include <systemd/sd-daemon.h>
static int restart_pi;
static int restart_fds[2];
+static int helper_send_fd = -1; /* main loop sends requests to helper */
+static int helper_recv_fd = -1; /* main loop receives results from helper */
+static int helper_pid = -1;
+static int helper_pi = -1;
+static uint32_t helper_msg_id = 1;
+
/*
* Each lockspace has its own thread to do locking.
* The lockspace thread makes synchronous lock requests to dlm/sanlock.
#define DO_FORCE 1
#define NO_FORCE 0
+static int add_fence_action(struct lockspace *ls, struct owner *owner);
+static int send_helper_request(struct action *act, char *ls_name, uint32_t new_msg_id);
static int add_lock_action(struct action *act);
static int str_to_lm(const char *str);
static int setup_dump_socket(void);
return 0;
}
+static void split_line(char *buf, int *argc, char **argv, int max_args, char sep)
+{
+ char *p = buf;
+ int i;
+
+ argv[0] = p;
+
+ for (i = 1; i < max_args; i++) {
+ p = strchr(p, sep);
+ if (!p)
+ break;
+ *p++ = '\0';
+
+ argv[i] = p;
+ }
+ *argc = i;
+}
+
+int lockd_lockargs_get_version(char *str, unsigned int *major, unsigned int *minor, unsigned int *patch)
+{
+ char version[16] = {0};
+ char *major_str, *minor_str, *patch_str;
+ char *n, *d1, *d2;
+
+ strncpy(version, str, 15);
+
+ n = strchr(version, ':');
+ if (n)
+ *n = '\0';
+
+ d1 = strchr(version, '.');
+ if (!d1)
+ return -1;
+
+ d2 = strchr(d1 + 1, '.');
+ if (!d2)
+ return -1;
+
+ major_str = version;
+ minor_str = d1 + 1;
+ patch_str = d2 + 1;
+
+ *d1 = '\0';
+ *d2 = '\0';
+
+ if (major)
+ *major = atoi(major_str);
+ if (minor)
+ *minor = atoi(minor_str);
+ if (patch)
+ *patch = atoi(patch_str);
+
+ return 0;
+}
+
+#define MAX_LOCKARGS 8
+
+/* parse lock_args string for values that may appear in VG metadata lock_args */
+
+static int lockd_lockargs_get_meta_flags(const char *str, uint32_t *flags)
+{
+ char buf[PATH_MAX];
+ char *argv[MAX_LOCKARGS];
+ int argc;
+ int i;
+
+ if (!str)
+ return -1;
+
+ dm_strncpy(buf, str, sizeof(buf));
+
+ split_line(buf, &argc, argv, MAX_LOCKARGS, ':');
+
+ for (i = 0; i < argc; i++) {
+ if (!i && !lockd_lockargs_get_version(argv[i], NULL, NULL, NULL))
+ *flags |= LOCKARGS_VERSION;
+ else if ((i == 1) && !strcmp(argv[i], "lvmlock"))
+ *flags |= LOCKARGS_LVMLOCK;
+ else if (!strcmp(argv[i], "persist"))
+ *flags |= LOCKARGS_PERSIST;
+ else if (!strcmp(argv[i], "notimeout"))
+ *flags |= LOCKARGS_NOTIMEOUT;
+ else {
+ log_error("Unknown lockargs meta value: %s", argv[i]);
+ return -1;
+ }
+ }
+ log_debug("lockd_lockargs_get_meta_flags %s = 0x%x", str, *flags);
+ return 0;
+}
+
+/* parse lock_args string for values that may appear in command line --setlockargs */
+
+int lockd_lockargs_get_user_flags(const char *str, uint32_t *flags)
+{
+ char buf[PATH_MAX];
+ char *argv[MAX_LOCKARGS];
+ int argc;
+ int i;
+
+ if (!str)
+ return -1;
+
+ dm_strncpy(buf, str, sizeof(buf));
+
+ split_line(buf, &argc, argv, MAX_LOCKARGS, ',');
+
+ for (i = 0; i < argc; i++) {
+ if (!strcmp(argv[i], "persist"))
+ *flags |= LOCKARGS_PERSIST;
+ else if (!strcmp(argv[i], "nopersist"))
+ *flags |= LOCKARGS_NOPERSIST;
+ else if (!strcmp(argv[i], "timeout"))
+ *flags |= LOCKARGS_TIMEOUT;
+ else if (!strcmp(argv[i], "notimeout"))
+ *flags |= LOCKARGS_NOTIMEOUT;
+ else {
+ log_error("Unknown lockargs option value: %s", argv[i]);
+ return -1;
+ }
+ }
+ log_debug("lockd_lockargs_get_user_flags %s = 0x%x", str, *flags);
+ return 0;
+}
+
struct lockspace *alloc_lockspace(void)
{
struct lockspace *ls;
INIT_LIST_HEAD(&ls->actions);
INIT_LIST_HEAD(&ls->resources);
INIT_LIST_HEAD(&ls->dispose);
+ INIT_LIST_HEAD(&ls->fence_history);
pthread_mutex_init(&ls->mutex, NULL);
pthread_cond_init(&ls->cond, NULL);
return ls;
memset(r, 0, sizeof(struct resource) + resource_lm_data_size);
INIT_LIST_HEAD(&r->locks);
INIT_LIST_HEAD(&r->actions);
+ INIT_LIST_HEAD(&r->fence_wait_actions);
} else {
log_error("out of memory for resource");
}
static void free_resource(struct resource *r)
{
+ struct action *act, *act2;
+
+ list_for_each_entry_safe(act, act2, &r->actions, list) {
+ list_del(&act->list);
+ free_action(act);
+ }
+ list_for_each_entry_safe(act, act2, &r->fence_wait_actions, list) {
+ list_del(&act->list);
+ free_action(act);
+ }
+
pthread_mutex_lock(&unused_struct_mutex);
if (unused_resource_count >= MAX_UNUSED_RESOURCE) {
free(r);
return "busy";
case LD_OP_REFRESH_LV:
return "refresh_lv";
+ case LD_OP_FENCE:
+ return "fence";
+ case LD_OP_FENCE_RESULT:
+ return "fence_result";
+ case LD_OP_SETLOCKARGS_BEFORE:
+ return "setlockargs_before";
+ case LD_OP_SETLOCKARGS_FINAL:
+ return "setlockargs_final";
default:
return "op_unknown";
};
return -1;
}
-int version_from_args(char *args, unsigned int *major, unsigned int *minor, unsigned int *patch)
-{
- char version[MAX_ARGS+1];
- char *major_str, *minor_str, *patch_str;
- char *n, *d1, *d2;
-
- memset(version, 0, sizeof(version));
- strncpy(version, args, MAX_ARGS);
- version[MAX_ARGS] = '\0';
-
- n = strstr(version, ":");
- if (n)
- *n = '\0';
-
- d1 = strstr(version, ".");
- if (!d1)
- return -1;
-
- d2 = strstr(d1 + 1, ".");
- if (!d2)
- return -1;
-
- major_str = version;
- minor_str = d1 + 1;
- patch_str = d2 + 1;
-
- *d1 = '\0';
- *d2 = '\0';
-
- if (major)
- *major = atoi(major_str);
- if (minor)
- *minor = atoi(minor_str);
- if (patch)
- *patch = atoi(patch_str);
-
- return 0;
-}
-
/*
* Write new info when a command exits if that command has acquired a new LV
* lock. If the command has released an LV lock we don't bother updating the
* closed/terminated their lvmlockd connection, and whose locks should
* be released. Do not remove these actions from act_close_list.
*
+ * act_fence_done: list of OP_FENCE_RESULT actions, identifying hosts that
+ * have been fenced. LOCK actions waiting for this fencing are moved from
+ * the r->fence_wait_actions list back to the r->actions list for retrying.
+ * Do not remove the FENCE_RESULT actions from act_fence_done list since
+ * these act structs are applied to multiple resources in the lockspace
+ * (like act_close_list.)
+ *
* retry_out: set to 1 if the lock manager said we should retry,
* meaning we should call res_process() again in a short while to retry.
*/
static void res_process(struct lockspace *ls, struct resource *r,
- struct list_head *act_close_list, int *retry_out)
+ struct list_head *act_close_list,
+ struct list_head *act_fence_done,
+ int *retry_out)
{
struct owner owner = { 0 };
- struct action *act, *safe, *act_close;
+ struct action *act, *safe, *act_close, *act_fence, *act_lock;
struct lock *lk;
uint32_t unlock_by_client_id = 0;
int lm_retry;
res_cancel(ls, r, act_close);
}
+ if (!list_empty(&r->fence_wait_actions)) {
+ list_for_each_entry(act_fence, act_fence_done, list) {
+ list_for_each_entry_safe(act_lock, safe, &r->fence_wait_actions, list) {
+ /*
+ * act_lock->owner identifies the failed host that owned the
+ * lock which we submitted a fence request for. if a fence
+ * result identifies that same owner, then the lock request
+ * action can continue.
+ */
+ if ((act_lock->owner.host_id == act_fence->owner.host_id) &&
+ (act_lock->owner.generation == act_fence->owner.generation)) {
+ list_del(&act_lock->list);
+ if (act_fence->result) {
+ /* fencing failed, return locking error to command */
+ log_debug("%s:%s lock error after fence error for %u %u",
+ ls->name, r->name, act_fence->owner.host_id, act_fence->owner.generation);
+ act_lock->result = -EAGAIN;
+ add_client_result(act_lock);
+ } else {
+ /* fencing done, retry lock request which should no
+ longer be blocked by the failed owner */
+ log_debug("%s:%s lock retry after fence success for %u %u",
+ ls->name, r->name, act_fence->owner.host_id, act_fence->owner.generation);
+ memset(&act_lock->owner, 0, sizeof(struct owner));
+ list_add_tail(&act_lock->list, &r->actions);
+ }
+ }
+ }
+ }
+ }
+
/*
* handle enable/disable
*/
rv = res_lock(ls, r, act, &lm_retry, &owner);
- /* TODO: if lock fails because it's owned by a failed host,
- and persistent reservations are enabled, then remove the
- pr of failed host_id, tell sanlock the host_id is now
- dead, and retry lock request. */
+ /*
+ * If lock fails because it's owned by a failed host,
+ * and persistent reservation fencing is enabled, then
+ * remove the pr of failed host_id, tell sanlock the
+ * host_id is now dead, and retry lock request.
+ */
+ if (ls->fence_pr && (rv == -EAGAIN) &&
+ owner.host_id && owner.generation &&
+ !strcmp(owner.state, "FAIL")) {
+ log_debug("%s:%s res_lock fence_pr %u:%u",
+ ls->name, r->name, owner.host_id, owner.generation);
+ /* after fencing is done for owner, the act's from
+ r->fence_wait_actions are moved back to r->actions. */
+ act->owner = owner;
+ list_del(&act->list);
+ list_add(&act->list, &r->fence_wait_actions);
+ add_fence_action(ls, &owner);
+ *retry_out = 1;
- if ((rv == -EAGAIN) &&
+ } else if ((rv == -EAGAIN) &&
(act->retries <= act->max_retries) &&
(lm_retry || (r->type != LD_RT_LV))) {
/* leave act on list */
rv = res_lock(ls, r, act, &lm_retry, &owner);
- if ((rv == -EAGAIN) &&
+ /*
+ * If lock fails because it's owned by a failed host,
+ * and persistent reservation fencing is enabled, then
+ * remove the pr of failed host_id, tell sanlock the
+ * host_id is now dead, and retry lock request.
+ */
+ if (ls->fence_pr && (rv == -EAGAIN) &&
+ owner.host_id && owner.generation &&
+ !strcmp(owner.state, "FAIL")) {
+ log_debug("%s:%s res_lock fence_pr %u:%u",
+ ls->name, r->name, owner.host_id, owner.generation);
+ /* after fencing is done for owner, the act's from
+ r->fence_wait_actions are moved back to r->actions. */
+ act->owner = owner;
+ list_del(&act->list);
+ list_add(&act->list, &r->fence_wait_actions);
+ add_fence_action(ls, &owner);
+ *retry_out = 1;
+ } else if ((rv == -EAGAIN) &&
(act->retries <= act->max_retries) &&
(lm_retry || (r->type != LD_RT_LV))) {
/* leave act on list */
* processing the OP_CLOSE for the client.
*/
if ((r->type == LD_RT_LV) && (r->mode == LD_LK_UN) &&
- list_empty(&r->locks) && list_empty(&r->actions)) {
+ list_empty(&r->locks) && list_empty(&r->actions) && list_empty(&r->fence_wait_actions)) {
/* An implicit unlock of a transient lock. */
if (!unlock_by_client_id)
struct action *act_op_free = NULL;
struct list_head tmp_act;
struct list_head act_close;
+ struct list_head act_fence;
char tmp_name[MAX_NAME+5];
int fail_stop_busy;
int free_vg = 0;
int rv;
INIT_LIST_HEAD(&act_close);
+ INIT_LIST_HEAD(&act_fence);
INIT_LIST_HEAD(&tmp_act);
/* first action may be client add */
adopt_ok = 1;
}
- log_debug("S %s lm_add_lockspace %s act %d wait %d adopt_only %d adopt_ok %d repair %d",
- ls->name, lm_str(ls->lm_type), add_act ? 1 : 0, wait_flag, adopt_only, adopt_ok, repair);
+ log_debug("S %s lm_add_lockspace %s act %d wait %d adopt_only %d adopt_ok %d repair %d no_timeout %d key 0x%llx",
+ ls->name, lm_str(ls->lm_type), add_act ? 1 : 0, wait_flag, adopt_only, adopt_ok, repair, ls->no_timeout,
+ (unsigned long long)ls->ourkey);
/*
* The prepare step does not wait for anything and is quick;
act = list_first_entry(&ls->actions, struct action, list);
+ log_debug("S %s ls actions entry: %s", ls->name, op_str(act->op));
+
+ act->ls_generation = ls->generation;
+
if (act->op == LD_OP_KILL_VG && act->rt == LD_RT_VG) {
/* Continue processing until DROP_VG arrives. */
log_debug("S %s kill_vg", ls->name);
ls->thread_work = 0;
ls->thread_stop = 1;
drop_vg = 1;
+ /* list_del(&act->list) is done at end of lockspace_thread function */
break;
}
if (act->op == LD_OP_STOP) {
- /* thread_stop is already set */
ls->thread_work = 0;
+ /* ls->thread_stop = 1 is already set */
+ /* list_del(&act->list) is done at end of lockspace_thread function */
break;
}
ls->thread_work = 0;
ls->thread_stop = 1;
free_vg = 1;
+ /* list_del(&act->list) is done at end of lockspace_thread function */
break;
}
continue;
}
+ if (act->op == LD_OP_SETLOCKARGS_BEFORE && act->rt == LD_RT_VG) {
+ /* check if sanlock version supports the new args */
+ if (!lm_setlockargs_supported_sanlock(ls, act)) {
+ list_del(&act->list);
+ act->result = -EPROTONOSUPPORT;
+ add_client_result(act);
+ continue;
+ }
+
+ /* check that no LV locks are held; a VG lock is usually held */
+ if (for_each_lock(ls, LOCKS_EXIST_LV)) {
+ list_del(&act->list);
+ act->result = -ENOTEMPTY;
+ add_client_result(act);
+ continue;
+ }
+
+ /* check that we are the only lockspace user */
+ rv = lm_hosts(ls, 1);
+ if (rv) {
+ /*
+ * rv < 0: error (don't remove)
+ * rv > 0: other hosts in lockspace (cannot remove)
+ * rv = 0: only local host in lockspace (can remove)
+ * Checking for hosts here in addition to after the
+ * main loop allows vgremove to fail and be rerun
+ * after the ls is stopped on other hosts.
+ */
+ log_error("S %s setlockargs_before hosts %d", ls->name, rv);
+ list_del(&act->list);
+ act->result = (rv < 0) ? rv : -EBUSY;
+ add_client_result(act);
+ continue;
+ }
+
+ /* return success, allow the change */
+ /* list_del act and add_client_result done after rem_lockspace */
+
+ /* the lockspace needs to be stopped for setlockargs_final */
+ ls->thread_work = 0;
+ ls->thread_stop = 1;
+ break;
+ }
+
if (act->op == LD_OP_RENAME_BEFORE && act->rt == LD_RT_VG) {
/* vgrename */
log_debug("S %s checking for lockspace hosts", ls->name);
}
ls->thread_work = 0;
ls->thread_stop = 1;
+ /* list_del(&act->list) is done at end of lockspace_thread function */
/* Do we want to check hosts again below like vgremove? */
break;
}
}
if (act->op == LD_OP_FREE && act->rt == LD_RT_LV) {
+ /* lvremove */
list_del(&act->list);
r = find_dispose_act(ls, act); /* removes r from dispose list */
continue;
}
+ /*
+ * check all resources for lock actions that are waiting
+ * for this fence result
+ */
+ if (act->op == LD_OP_FENCE_RESULT) {
+ list_del(&act->list);
+ list_add(&act->list, &act_fence);
+ log_debug("S %s apply fence result %d for host %u %u",
+ ls->name, act->result, act->owner.host_id, act->owner.generation);
+ continue;
+ }
+
/*
* All the other op's are for locking.
* Find the specific resource that the lock op is for,
log_debug("%s:%s action %s %s", ls->name, r->name,
op_str(act->op), mode_str(act->mode));
}
+ /* end processing ls->actions */
pthread_mutex_unlock(&ls->mutex);
+ /*
+ * If the fence result was a success, then tell the
+ * sanlock lockspace that the fenced host is dead
+ * so it will grant locks held by the fenced host.
+ */
+ if (ls->lm_type == LD_LM_SANLOCK) {
+ list_for_each_entry(act, &act_fence, list) {
+ if (!act->result)
+ lm_set_host_dead_sanlock(ls, &act->owner);
+ }
+ }
+
/*
* Process the lock operations that have been queued for each
* resource.
retry = 0;
list_for_each_entry_safe(r, r2, &ls->resources, list)
- res_process(ls, r, &act_close, &retry);
+ res_process(ls, r, &act_close, &act_fence, &retry);
list_for_each_entry_safe(act, safe, &act_close, list) {
list_del(&act->list);
free_action(act);
}
+ list_for_each_entry_safe(act, safe, &act_fence, list) {
+ list_del(&act->list);
+ free_action(act);
+ }
+
if (retry) {
ls->thread_work = 1;
usleep(LOCK_RETRY_MS * 1000);
out_act:
/*
- * Move remaining actions to results; this will usually (always?)
- * be only the stop action.
+ * Move remaining actions to results, this will usually (always?)
+ * be the act processed above which resulted in the lockspace thread
+ * being stopped. That act is not removed from ls->actions by
+ * the main action processing loop, but remains on ls->actions
+ * and is removed removed here. (TODO: wouldn't it be nicer
+ * to always list_del every action above, and save a pointer
+ * to the act struct that caused thread_stop=1? This seems
+ * to incorrectly return success for any/all acts, not just
+ * the one act that was processed leading to thread_stop.)
*/
pthread_mutex_lock(&ls->mutex);
list_for_each_entry_safe(act, safe, &ls->actions, list) {
if (act->op == LD_OP_FREE) {
+ /* vgremove */
act_op_free = act;
act->result = 0;
} else if (act->op == LD_OP_STOP)
act->result = 0;
else if (act->op == LD_OP_RENAME_BEFORE)
act->result = 0;
+ else if (act->op == LD_OP_SETLOCKARGS_BEFORE)
+ act->result = 0;
else
act->result = -ENOLS;
list_del(&act->list);
pthread_mutex_lock(&lockspaces_mutex);
ls->thread_done = 1;
- ls->free_vg = free_vg;
- ls->drop_vg = drop_vg;
+
if (ls->lm_type == LD_LM_DLM && !strcmp(ls->name, gl_lsname_dlm))
global_dlm_lockspace_exists = 0;
if (ls->lm_type == LD_LM_IDM && !strcmp(ls->name, gl_lsname_idm))
struct resource *r;
int rv;
- log_debug("add_lockspace_thread %s %s version %u",
- lm_str(lm_type), ls_name, act ? act->version : 0);
+ log_debug("add_lockspace_thread %s %s version %u vg_args %s",
+ lm_str(lm_type), ls_name, act ? act->version : 0, vg_args);
if (!(ls = alloc_lockspace()))
return -ENOMEM;
strncpy(ls->name, ls_name, MAX_NAME);
ls->lm_type = lm_type;
+ ls->ourkey = act->ourkey;
+
+ if (lockd_lockargs_get_meta_flags(vg_args, &ls->lock_args_flags) < 0) {
+ log_error("add_lockspace_thread %s lock_args invalid %s", ls->name, vg_args);
+ free(ls);
+ return -EARGS;
+ }
+ ls->no_timeout = (ls->lock_args_flags & LOCKARGS_NOTIMEOUT) ? 1 : 0;
+ ls->fence_pr = (ls->lock_args_flags & LOCKARGS_PERSIST) ? 1 : 0;
if (act) {
ls->start_client_id = act->client_id;
* unlock it when stopping.
*
* Should we attempt to stop the lockspace containing the gl last?
+ *
+ * FIXME: why is OP_STOP partly processed here rather than just being
+ * added to ls->actions and processed by the lockspace thread?
*/
static int rem_lockspace(struct action *act)
list_del(&act->list);
free_action(act);
}
+ list_for_each_entry_safe(act, act2, &ls->fence_history, list) {
+ list_del(&act->list);
+ free_action(act);
+ }
free_ls_resources(ls);
free_pvs_path(&ls->pvs);
free(ls);
}
if (act->lm_type == LD_LM_SANLOCK)
- rv = lm_init_vg_sanlock(ls_name, act->vg_name, act->flags, act->vg_args, act->align_mb);
+ rv = lm_init_vg_sanlock(ls_name, act->vg_name, act->flags, act->vg_args, act->align_mb,
+ act->other_args[0] ? act->other_args : NULL);
else if (act->lm_type == LD_LM_DLM)
rv = lm_init_vg_dlm(ls_name, act->vg_name, act->flags, act->vg_args);
else if (act->lm_type == LD_LM_IDM)
return rv;
}
+static int work_setlockargs_vg_final(struct action *act)
+{
+ char ls_name[MAX_NAME+1] = {0};
+ int found;
+ int rv = -EINVAL;
+
+
+ if (act->lm_type == LD_LM_SANLOCK) {
+ vg_ls_name(act->vg_name, ls_name);
+
+ /*
+ * Wait for the lockspace thread to be cleared.
+ * It was stopped in setlockargs_before but has
+ * likely not been fully cleaned up yet.
+ */
+ while (1) {
+ pthread_mutex_lock(&lockspaces_mutex);
+ found = find_lockspace_name(ls_name) ? 1 : 0;
+ pthread_mutex_unlock(&lockspaces_mutex);
+ if (!found)
+ break;
+ log_debug("S %s work_setlockargs_vg_final ls not cleared, retry", ls_name);
+ return -EAGAIN;
+ }
+ rv = lm_setlockargs_vg_sanlock(ls_name, act->vg_name, act);
+ }
+
+ return rv;
+}
+
static void work_test_gl(void)
{
struct lockspace *ls;
if (lm_type == LD_LM_SANLOCK) {
/* ls is NULL if the lockspace is not started, which happens
for vgchange --locktype sanlock. */
- rv = lm_init_lv_sanlock(ls, ls_name, act->vg_name, act->lv_uuid, vg_args, lv_args, act->prev_lv_args);
+ rv = lm_init_lv_sanlock(ls, ls_name, act->vg_name, act->lv_uuid, vg_args, lv_args, act->other_args);
memcpy(act->lv_args, lv_args, MAX_ARGS);
return rv;
return rv;
}
+static void work_fence(struct action *act, int *retry)
+{
+ char ls_name[MAX_NAME+1];
+ char vg_name[MAX_NAME+1];
+ struct lockspace *ls;
+ struct action *ah;
+ struct owner ah_owner;
+ uint32_t new_msg_id;
+ int ah_result;
+ int found_busy = 0;
+ int found_done = 0;
+ int rv;
+
+ /*
+ * if the new fencing act matches a previous, completed fencing act in
+ * fence_history, then take the previous result from the previous act.
+ *
+ * if the new fencing act matches a current, in-progress fencing act in
+ * fence_history, then leave the new fencing act as a delayed work item
+ * that will be retried later.
+ */
+
+ memset(ls_name, 0, sizeof(ls_name));
+ memcpy(vg_name, act->vg_name, sizeof(act->vg_name));
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ vg_ls_name(vg_name, ls_name);
+ ls = find_lockspace_name(ls_name);
+ if (!ls) {
+ pthread_mutex_unlock(&lockspaces_mutex);
+ log_error("no lockspace for fence action %s.", ls_name);
+ return;
+ }
+
+ pthread_mutex_lock(&ls->mutex);
+ list_for_each_entry(ah, &ls->fence_history, list) {
+ if (ah->owner.host_id != act->owner.host_id)
+ continue;
+ if (ah->owner.generation != act->owner.generation)
+ continue;
+
+ if (ah->op == LD_OP_FENCE) {
+ /* new act matches an in-progress fence act */
+ found_busy = 1;
+ } else if (ah->op == LD_OP_FENCE_RESULT) {
+ /* new act matches a completed fence act */
+ found_done = 1;
+ ah_result = ah->result;
+ ah_owner = ah->owner;
+ }
+ break;
+ }
+
+ if (!found_done && !found_busy) {
+ /*
+ * send the helper a fencing request for this act.
+ * keep this new act in fence_history while the helper
+ * is working on it. when it's completed, this act will
+ * be changed from OP_FENCE to OP_FENCE_RESULT and kept
+ * in fence_history.
+ */
+ list_add(&act->list, &ls->fence_history);
+ new_msg_id = helper_msg_id++;
+
+ log_debug("work_fence %s found_done %d found_busy %d send helper new_msg_id %u", vg_name, found_done, found_busy, new_msg_id);
+
+ } else if (found_done) {
+ /*
+ * A matching OP_FENCE was already completed.
+ * Reuse this act as an OP_FENCE_RESULT.
+ */
+ act->op = LD_OP_FENCE_RESULT;
+ act->result = ah_result;
+ act->owner = ah_owner;
+
+ if (!ls->thread_stop) {
+ list_add_tail(&act->list, &ls->actions);
+ ls->thread_work = 1;
+ pthread_cond_signal(&ls->cond);
+ } else {
+ free_action(act);
+ }
+
+ log_debug("work_fence %s found_done %d found_busy %d fence result %d", vg_name, found_done, found_busy, ah_result);
+
+ } else if (found_busy) {
+ /* when retried, the result will eventually be found in history above */
+ *retry = 1;
+
+ log_debug("work_fence %s found_done %d found_busy %d retry", vg_name, found_done, found_busy);
+ }
+ pthread_mutex_unlock(&ls->mutex);
+
+
+ if (!found_done && !found_busy) {
+ rv = send_helper_request(act, ls_name, new_msg_id);
+ if (rv < 0) {
+ /* change act to FENCE_RESULT error and move it to ls->actions */
+ log_error("work_fence %s failed to send helper request %u", vg_name, new_msg_id);
+ pthread_mutex_lock(&ls->mutex);
+ list_del(&act->list);
+ act->op = LD_OP_FENCE_RESULT;
+ act->result = -ENOTCONN;
+ list_add_tail(&act->list, &ls->actions);
+ pthread_mutex_unlock(&ls->mutex);
+ }
+ }
+ pthread_mutex_unlock(&lockspaces_mutex);
+}
+
/*
* When an action is queued for the worker_thread, it is processed right away.
* After processing, some actions need to be retried again in a short while.
act->result = work_rename_vg(act);
add_client_result(act);
+ } else if ((act->op == LD_OP_SETLOCKARGS_FINAL) && (act->rt == LD_RT_VG)) {
+ log_debug("work setlockargs_vg_final %s", act->vg_name);
+ act->result = work_setlockargs_vg_final(act);
+ add_client_result(act);
+
} else if (act->op == LD_OP_START_WAIT) {
act->result = count_lockspace_starting(0);
if (!act->result)
} else
list_add(&act->list, &delayed_list);
+ } else if (act->op == LD_OP_FENCE) {
+ int retry = 0;
+ log_debug("work_fence %s %u %u", act->vg_name, act->owner.host_id, act->owner.generation);
+ work_fence(act, &retry);
+ if (retry)
+ list_add(&act->list, &delayed_list);
} else {
log_error("work unknown op %d", act->op);
act->result = -EINVAL;
if (act->flags & LD_AF_SH_EXISTS)
strcat(result_flags, "SH_EXISTS,");
- if (act->op == LD_OP_INIT) {
+ if (act->op == LD_OP_INIT || act->op == LD_OP_SETLOCKARGS_FINAL) {
/*
- * init is a special case where lock args need
- * to be passed back to the client.
+ * init and setlockargs send lock_args back to the client.
*/
const char *vg_args = "none";
const char *lv_args = "none";
"op_result = " FMTd64, (int64_t) act->result,
"lm_result = " FMTd64, (int64_t) act->lm_rv,
"result_flags = %s", result_flags[0] ? result_flags : "none",
+ "ls_generation = " FMTd64, (int64_t) act->ls_generation,
NULL);
}
return 0;
}
if (!strcmp(req_name, "free_vg")) {
+ /* TODO: use LD_OP_REMOVE_VG */
*op = LD_OP_FREE;
*rt = LD_RT_VG;
return 0;
return 0;
}
if (!strcmp(req_name, "free_lv")) {
+ /* TODO: use LD_OP_REMOVE_LV */
*op = LD_OP_FREE;
*rt = LD_RT_LV;
return 0;
*rt = 0;
return 0;
}
+ if (!strcmp(req_name, "setlockargs_vg_before")) {
+ *op = LD_OP_SETLOCKARGS_BEFORE;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "setlockargs_vg_final")) {
+ *op = LD_OP_SETLOCKARGS_FINAL;
+ *rt = LD_RT_VG;
+ return 0;
+ }
out:
return -1;
}
"vg_args=%s "
"lm_type=%s "
"host_id=%u "
+ "generation=%llu "
"create_fail=%d "
"create_done=%d "
"thread_work=%d "
"thread_stop=%d "
"thread_done=%d "
"kill_vg=%d "
- "drop_vg=%d "
+ "fence_pr=%d "
+ "no_timeout=%d "
"sanlock_gl_enabled=%d\n",
prefix,
ls->name,
ls->vg_args,
lm_str(ls->lm_type),
ls->host_id,
+ (unsigned long long)ls->generation,
ls->create_fail ? 1 : 0,
ls->create_done ? 1 : 0,
ls->thread_work ? 1 : 0,
ls->thread_stop ? 1 : 0,
ls->thread_done ? 1 : 0,
ls->kill_vg,
- ls->drop_vg,
+ ls->fence_pr,
+ ls->no_timeout,
ls->sanlock_gl_enabled ? 1 : 0);
}
char buf[18]; /* "path[%d]\0", %d outputs signed integer so max to 10 bytes */
int64_t val;
uint32_t opts = 0;
+ uint64_t ourkey;
int result = 0;
int cl_pid;
int op, rt, lm, mode;
str = daemon_request_str(req, "prev_lv_args", NULL);
if (str && strcmp(str, "none"))
- strncpy(act->prev_lv_args, str, MAX_ARGS);
+ strncpy(act->other_args, str, MAX_ARGS);
+
+ str = daemon_request_str(req, "set_lock_args", NULL);
+ if (str && strcmp(str, "none"))
+ strncpy(act->other_args, str, MAX_ARGS);
/* start_vg will include lvmlocal.conf local/host_id here */
val = daemon_request_int(req, "host_id", 0);
act->lv_size_bytes = (uint64_t)dm_config_find_int64(req.cft->root, "lv_size_bytes", 0);
+ ourkey = (uint64_t)dm_config_find_int64(req.cft->root, "our_key", 0);
+ if (ourkey)
+ act->ourkey = ourkey;
+
/* Create PV list for idm */
if (lm == LD_LM_IDM) {
memset(&pvs, 0x0, sizeof(pvs));
case LD_OP_RENAME_FINAL:
case LD_OP_RUNNING_LM:
case LD_OP_REFRESH_LV:
+ case LD_OP_SETLOCKARGS_FINAL:
add_work_action(act);
rv = 0;
break;
case LD_OP_KILL_VG:
case LD_OP_DROP_VG:
case LD_OP_BUSY:
+ case LD_OP_SETLOCKARGS_BEFORE:
rv = add_lock_action(act);
break;
default:
log_debug("process_restart error %d", errno);
}
+/*
+ * Fencing
+ *
+ * lockspace thread
+ * . res_process() lock action fails due to a failed host
+ * . add_fence_action() creates new action OP_FENCE with owner info
+ * . adds it to work actions
+ *
+ * worker thread
+ * . takes new OP_FENCE
+ * . compares it against lockspace's fence_history list
+ * (completed fence actions for hosts)
+ * . if action for same host is complete, add OP_FENCE_RESULT to
+ * actions for the lockspace thread
+ * . if action for same host is in progress, return and have worker
+ * thread retry after delay
+ * . else send new fence command to helper process
+ *
+ * helper process
+ * . receives fencing command
+ * . runs fencing command:
+ * lvmpersist remove --ourkey OURKEY --removekey REMKEY --vg VG
+ * . sends result back to main thread
+ *
+ * main thread
+ * . receive fencing result from helper process, process_helper
+ * . process_fence_result() finds original OP_FENCE act in
+ * ls fence_history and changes it to OP_FENCE_RESULT
+ * . adds a new OP_FENCE_RESULT action to the lockspace actions list
+ *
+ * lockspace thread
+ * . applies OP_FENCE_RESULT to each resource's fence_wait_actions
+ * . moves matching fence_wait_actions entries to r->actions
+ * to be retried
+ */
+
+/*
+ * We cannot block the main thread on this write, so the pipe is
+ * NONBLOCK, and write fails with EAGAIN when the pipe is full.
+ * With 1k msg size and 64k default pipe size, the pipe will be full
+ * if we quickly send 64 messages.
+ *
+ * By setting the pipe size to 1MB in setup_helper, we could quickly send 1024
+ * msgs before getting EAGAIN.
+ */
+
+static int send_helper_request(struct action *act, char *ls_name, uint32_t new_msg_id)
+{
+ struct helper_msg msg = { 0 };
+ int retries = 0;
+ int rv;
+
+ if (helper_send_fd == -1) {
+ log_error("send_helper_request no send fd");
+ return -1;
+ }
+
+ if (act->op == LD_OP_FENCE) {
+ strncpy(msg.ls_name, ls_name, MAX_NAME);
+ msg.type = HELPER_COMMAND;
+ msg.act = LD_OP_FENCE;
+ msg.msg_id = new_msg_id;
+ act->msg_id = new_msg_id;
+ snprintf(msg.command, RUN_COMMAND_LEN-1, "/usr/sbin/lvmpersist remove --ourkey 0x%llx --removekey 0x%llx --vg %s",
+ (unsigned long long)act->ourkey,
+ (unsigned long long)act->remkey,
+ act->vg_name);
+ log_debug("send_helper_request fence msg %u %s", new_msg_id, msg.command);
+ } else {
+ return -1;
+ }
+
+ retry:
+ rv = write(helper_send_fd, &msg, sizeof(msg));
+ if (rv == -1 && errno == EINTR)
+ goto retry;
+
+ if (rv == -1 && errno == EAGAIN) {
+ /* pipe is full */
+ if (!retries) {
+ retries++;
+ sleep(1);
+ goto retry;
+ }
+ log_error("send_helper_request write EAGAIN");
+ return -1;
+ }
+
+ /* helper exited or closed fd */
+ if (rv == -1 && errno == EPIPE) {
+ log_error("send_helper_request write EPIPE");
+ return -1;
+ }
+
+ if (rv != sizeof(msg)) {
+ /* this shouldn't happen */
+ log_error("send_helper_request write error %d %d", rv, errno);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* lockspace threads call add_fence_action() */
+
+static int add_fence_action(struct lockspace *ls, struct owner *owner)
+{
+ struct action *act;
+
+ if (!(act = alloc_action()))
+ return -1;
+
+ /*
+ * The creation of a key here for host_id X generation Y must match the
+ * logic that lvm commands use to generate keys for sanlock hosts:
+ *
+ * key 0x100000YYYYYYXXXX where XXXX are the hex digits for the host_id,
+ * and YYYYYY are the hex digits for the generation number.
+ */
+
+ memcpy(act->vg_name, ls->vg_name, sizeof(act->vg_name));
+ memcpy(act->vg_uuid, ls->vg_uuid, sizeof(act->vg_uuid));
+ act->op = LD_OP_FENCE;
+ act->ourkey = ls->ourkey;
+ act->remkey = 0x1000000000000000 | ((owner->generation & 0xFFFFFF) << 16) | (owner->host_id & 0xFFFF);
+ memcpy(&act->owner, owner, sizeof(struct owner));
+
+ log_debug("add_fence_action vg %s for host_id %u gen %u ourkey 0x%llx remkey 0x%llx",
+ act->vg_name, act->owner.host_id, act->owner.generation,
+ (unsigned long long)act->ourkey, (unsigned long long)act->remkey);
+
+ add_work_action(act);
+ return 0;
+}
+
+static int setup_helper(void)
+{
+ int pid;
+ int pw_fd = -1; /* parent write */
+ int cr_fd = -1; /* child read */
+ int pr_fd = -1; /* parent read */
+ int cw_fd = -1; /* child write */
+ int pfd[2];
+
+ /* we can't allow the main daemon loop to block */
+ if (pipe2(pfd, O_NONBLOCK | O_CLOEXEC))
+ return -errno;
+
+ /* fcntl(pfd[1], F_SETPIPE_SZ, 1024*1024); */
+
+ cr_fd = pfd[0];
+ pw_fd = pfd[1];
+
+ if (pipe2(pfd, O_NONBLOCK | O_CLOEXEC)) {
+ close(cr_fd);
+ close(pw_fd);
+ return -errno;
+ }
+
+ pr_fd = pfd[0];
+ cw_fd = pfd[1];
+
+ pid = fork();
+ if (pid < 0) {
+ close(cr_fd);
+ close(pw_fd);
+ close(pr_fd);
+ close(cw_fd);
+ return -errno;
+ }
+
+ if (pid) {
+ close(cr_fd);
+ close(cw_fd);
+ helper_send_fd = pw_fd;
+ helper_recv_fd = pr_fd;
+ helper_pid = pid;
+ return 0;
+ } else {
+ close(pr_fd);
+ close(pw_fd);
+ helper_main(cr_fd, cw_fd, daemon_debug);
+ exit(0);
+ }
+}
+
+static void close_helper(void)
+{
+ close(helper_send_fd);
+ close(helper_recv_fd);
+ helper_send_fd = -1;
+ helper_recv_fd = -1;
+ rem_pollfd(helper_pi);
+ helper_pi = -1;
+ /* don't set helper_pid = -1 until we've tried waitpid */
+}
+
+static void helper_dead(int fd)
+{
+ int pid = helper_pid;
+ int rv, status;
+
+ close_helper();
+
+ helper_pid = -1;
+
+ rv = waitpid(pid, &status, WNOHANG);
+
+ if (rv != pid) {
+ /* should not happen */
+ log_error("helper pid %d dead wait %d", pid, rv);
+ return;
+ }
+
+ if (WIFEXITED(status)) {
+ log_error("helper pid %d exit status %d", pid,
+ WEXITSTATUS(status));
+ return;
+ }
+
+ if (WIFSIGNALED(status)) {
+ log_error("helper pid %d term signal %d", pid,
+ WTERMSIG(status));
+ return;
+ }
+
+ /* should not happen */
+ log_error("helper pid %d state change", pid);
+}
+
+/*
+ * main thread runs process_helper() and process_fence_result()
+ * the result is given to each lockspace as an action to process.
+ */
+
+static void process_fence_result(struct helper_msg *msg)
+{
+ struct lockspace *ls;
+ struct action *ah, *act;
+ int found = 0;
+
+ log_debug("process_fence_result %s msg_id %u result %d", msg->ls_name, msg->msg_id, msg->result);
+
+ /* create a fence result act to pass the result from ah */
+ if (!(act = alloc_action()))
+ return;
+
+ /*
+ * find the OP_FENCE action that initiated the fence request,
+ * it was saved on the fence_history list.
+ */
+ pthread_mutex_lock(&lockspaces_mutex);
+ ls = find_lockspace_name(msg->ls_name);
+ if (!ls) {
+ pthread_mutex_unlock(&lockspaces_mutex);
+ log_error("No lockspace for fence result %s", msg->ls_name);
+ free_action(act);
+ return;
+ }
+
+ pthread_mutex_lock(&ls->mutex);
+ list_for_each_entry(ah, &ls->fence_history, list) {
+ if (ah->msg_id != msg->msg_id)
+ continue;
+
+ if (ah->op != LD_OP_FENCE) {
+ /* shouldn't happen */
+ log_error("process_fence_result wrong history op for msg_id %u", ah->msg_id);
+ }
+
+ /*
+ * change the OP_FENCE action into an OP_FENCE_RESULT action
+ * that is saved in the fence_history.
+ *
+ * TODO: limit history, one per host_id?
+ * e.g. remove older gen results?
+ */
+ ah->op = LD_OP_FENCE_RESULT;
+ ah->result = msg->result;
+
+ /* if the result is failure, then the lock requests
+ waiting on this fence result will return an error */
+
+ found = 1;
+ break;
+ }
+
+ if (!found) {
+ log_error("fence result does not match a fence request");
+ goto out;
+ }
+
+ act->op = LD_OP_FENCE_RESULT;
+ act->owner = ah->owner;
+ act->result = ah->result;
+
+ if (!ls->thread_stop) {
+ list_add_tail(&act->list, &ls->actions);
+ ls->thread_work = 1;
+ pthread_cond_signal(&ls->cond);
+ } else {
+ free_action(act);
+ }
+out:
+ pthread_mutex_unlock(&ls->mutex);
+ pthread_mutex_unlock(&lockspaces_mutex);
+}
+
+static void process_helper(int fd)
+{
+ struct helper_msg msg;
+ int rv;
+
+ memset(&msg, 0, sizeof(msg));
+
+ rv = read(fd, &msg, sizeof(msg));
+ if (!rv || rv == -EAGAIN)
+ return;
+ if (rv < 0) {
+ log_error("process_helper rv %d errno %d", rv, errno);
+ goto fail;
+ }
+ if (rv != sizeof(msg)) {
+ log_error("process_helper recv size %d", rv);
+ goto fail;
+ }
+
+ if ((msg.type == HELPER_COMMAND_RESULT) && (msg.act == LD_OP_FENCE))
+ process_fence_result(&msg);
+ else
+ log_error("process_helper unknown msg %u %u %u", msg.type, msg.act, msg.msg_id);
+ return;
+
+ fail:
+ close_helper();
+}
+
static void sigterm_handler(int sig __attribute__((unused)))
{
daemon_quit = 1;
struct client *cl;
int i, rv, is_recv, is_dead;
+ rv = setup_helper();
+ if (rv < 0) {
+ log_error("Can't setup helper process");
+ return rv;
+ }
+
signal(SIGTERM, &sigterm_handler);
rv = setup_structs();
if (rv < 0) {
log_error("Can't allocate memory");
+ close_helper();
return rv;
}
listen_fd = ds_arg->socket_fd;
listen_pi = add_pollfd(listen_fd);
+ helper_pi = add_pollfd(helper_recv_fd);
+
setup_client_thread();
setup_worker_thread();
setup_restart();
continue;
}
+ if (i == helper_pi) {
+ if (is_recv)
+ process_helper(pollfd[i].fd);
+ if (is_dead)
+ helper_dead(pollfd[i].fd);
+ continue;
+ }
+
/*
log_debug("poll pi %d fd %d revents %x",
i, pollfd[i].fd, pollfd[i].revents);
unsigned int major = 0;
int rv;
- rv = version_from_args(vg_args, &major, NULL, NULL);
+ rv = lockd_lockargs_get_version(vg_args, &major, NULL, NULL);
if (rv < 0) {
log_error("check_args_version %s error %d", vg_args, rv);
return rv;
--- /dev/null
+/*
+ * Copyright 2025 Red Hat, Inc.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v2 or (at your option) any later version.
+ */
+
+#include <inttypes.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <poll.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <time.h>
+#include <stdarg.h>
+#include <signal.h>
+#include <ctype.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <grp.h>
+#include <syslog.h>
+
+#include "lvmlockd-internal.h"
+
+struct list_head commands; /* helper_msg_list entries */
+
+static int _log_stderr;
+
+#define log_helper(fmt, args...) \
+do { \
+ if (_log_stderr) \
+ fprintf(stderr, fmt "\n", ##args); \
+} while (0)
+
+static void _save_command(struct helper_msg *msg)
+{
+ struct helper_msg_list *ml;
+
+ ml = malloc(sizeof(struct helper_msg_list));
+ if (!ml)
+ return;
+
+ memcpy(&ml->msg, msg, sizeof(struct helper_msg));
+ list_add_tail(&ml->list, &commands);
+}
+
+static struct helper_msg_list *_get_command(int pid)
+{
+ struct helper_msg_list *ml;
+
+ list_for_each_entry(ml, &commands, list) {
+ if (ml->msg.pid == pid)
+ return ml;
+ }
+ return NULL;
+}
+
+static int read_msg(int fd, struct helper_msg *msg)
+{
+ int rv;
+ retry:
+ rv = read(fd, msg, sizeof(struct helper_msg));
+ if (rv == -1 && errno == EINTR)
+ goto retry;
+
+ if (rv != sizeof(struct helper_msg))
+ return -1;
+ return 0;
+}
+
+static void exec_command(char *cmd_str)
+{
+ char arg[ONE_ARG_LEN];
+ char *av[MAX_AV_COUNT + 1]; /* +1 for NULL */
+ int av_count = 0;
+ int i, arg_len, cmd_len;
+
+ for (i = 0; i < MAX_AV_COUNT + 1; i++)
+ av[i] = NULL;
+
+ if (!cmd_str[0])
+ return;
+
+ /* this should already be done, but make sure */
+ cmd_str[RUN_COMMAND_LEN - 1] = '\0';
+
+ memset(&arg, 0, sizeof(arg));
+ arg_len = 0;
+ cmd_len = strlen(cmd_str);
+
+ for (i = 0; i < cmd_len; i++) {
+ if (!cmd_str[i])
+ break;
+
+ if (av_count == MAX_AV_COUNT)
+ break;
+
+ if (cmd_str[i] == '\\') {
+ if (i == (cmd_len - 1))
+ break;
+ i++;
+
+ if (cmd_str[i] == '\\') {
+ arg[arg_len++] = cmd_str[i];
+ continue;
+ }
+ if (isspace(cmd_str[i])) {
+ arg[arg_len++] = cmd_str[i];
+ continue;
+ } else {
+ break;
+ }
+ }
+
+ if (isalnum(cmd_str[i]) || ispunct(cmd_str[i])) {
+ arg[arg_len++] = cmd_str[i];
+ } else if (isspace(cmd_str[i])) {
+ if (arg_len)
+ av[av_count++] = strdup(arg);
+
+ memset(arg, 0, sizeof(arg));
+ arg_len = 0;
+ } else {
+ break;
+ }
+ }
+
+ if ((av_count < MAX_AV_COUNT) && arg_len) {
+ av[av_count++] = strdup(arg);
+ }
+
+ execvp(av[0], av);
+}
+
+static int send_result(struct helper_msg *msg, int fd)
+{
+ int rv;
+
+ rv = write(fd, msg, sizeof(struct helper_msg));
+
+ if (rv == sizeof(struct helper_msg))
+ return 0;
+ return -1;
+}
+
+#define IDLE_TIMEOUT_MS (30 * 1000)
+#define ACTIVE_TIMEOUT_MS 500
+
+__attribute__((noreturn)) void helper_main(int in_fd, int out_fd, int log_stderr)
+{
+ struct pollfd pollfd;
+ struct helper_msg msg;
+ struct helper_msg_list *ml;
+ siginfo_t info;
+ unsigned int fork_count = 0;
+ unsigned int done_count = 0;
+ int timeout = IDLE_TIMEOUT_MS;
+ int rv, pid;
+
+ INIT_LIST_HEAD(&commands);
+
+ _log_stderr = log_stderr;
+
+ rv = setgroups(0, NULL);
+ if (rv < 0)
+ log_helper("error clearing helper groups errno %i", errno);
+
+ memset(&pollfd, 0, sizeof(pollfd));
+ pollfd.fd = in_fd;
+ pollfd.events = POLLIN;
+
+ openlog("lvmlockd-helper", LOG_CONS | LOG_PID, LOG_LOCAL4);
+
+ while (1) {
+ rv = poll(&pollfd, 1, timeout);
+ if (rv == -1 && errno == EINTR)
+ continue;
+
+ if (rv < 0)
+ exit(0);
+
+ if (pollfd.revents & POLLIN) {
+ memset(&msg, 0, sizeof(msg));
+
+ rv = read_msg(in_fd, &msg);
+ if (rv)
+ continue;
+
+ if (msg.type == HELPER_COMMAND) {
+ pid = fork();
+ if (!pid) {
+ exec_command(msg.command);
+ exit(1);
+ }
+
+ msg.pid = pid;
+
+ _save_command(&msg);
+
+ fork_count++;
+ }
+ }
+
+ if (pollfd.revents & (POLLERR | POLLHUP | POLLNVAL))
+ exit(0);
+
+ /* collect child exits until no more children exist (ECHILD)
+ or none are ready (WNOHANG) */
+
+ while (1) {
+ memset(&info, 0, sizeof(info));
+
+ rv = waitid(P_ALL, 0, &info, WEXITED | WNOHANG);
+
+ if ((rv < 0) && (errno == ECHILD)) {
+ /*
+ log_helper("helper no children exist fork_count %d done_count %d", fork_count, done_count);
+ */
+ timeout = IDLE_TIMEOUT_MS;
+ }
+
+ else if (!rv && !info.si_pid) {
+ log_helper("helper no children ready fork_count %d done_count %d", fork_count, done_count);
+ timeout = ACTIVE_TIMEOUT_MS;
+ }
+
+ else if (!rv && info.si_pid) {
+ done_count++;
+
+ if (!(ml = _get_command(info.si_pid))) {
+ log_helper("command for pid %d result %d not found",
+ info.si_pid, info.si_status);
+ continue;
+ }
+
+ log_helper("command for pid %d result %d done", info.si_pid, info.si_status);
+
+ ml->msg.type = HELPER_COMMAND_RESULT;
+ ml->msg.result = info.si_status;
+
+ send_result(&ml->msg, out_fd);
+
+ list_del(&ml->list);
+ free(ml);
+ continue;
+ }
+
+ else {
+ log_helper("helper waitid rv %d errno %d fork_count %d done_count %d",
+ rv, errno, fork_count, done_count);
+ }
+
+ break;
+ }
+ }
+}
LD_OP_QUERY_LOCK,
LD_OP_REFRESH_LV,
LD_OP_VG_STATUS,
+ LD_OP_FENCE,
+ LD_OP_FENCE_RESULT,
+ LD_OP_SETLOCKARGS_BEFORE,
+ LD_OP_SETLOCKARGS_FINAL,
};
/* resource types */
#define LD_AF_ADOPT_ONLY 0x00200000 /* adopt orphan or fail */
#define LD_AF_NODELAY 0x00400000
#define LD_AF_REPAIR 0x00800000
+#define LD_AF_NO_TIMEOUT 0x01000000
/*
* Number of times to repeat a lock request after
int num;
};
+#define RUN_COMMAND_LEN 1024
+#define MAX_AV_COUNT 32
+#define ONE_ARG_LEN 256
+
+/* helper_msg types */
+#define HELPER_COMMAND 0x1
+#define HELPER_COMMAND_RESULT 0x2
+
+struct helper_msg {
+ uint8_t type;
+ uint8_t act;
+ uint16_t unused1;
+ uint32_t msg_id;
+ int pid;
+ int result;
+ char ls_name[MAX_NAME+1];
+ uint8_t unused2;
+ uint16_t unused3;
+ char command[RUN_COMMAND_LEN];
+};
+
+struct helper_msg_list {
+ struct helper_msg msg;
+ struct list_head list;
+};
+
#define OWNER_NAME_SIZE 64
#define OWNER_STATE_SIZE 32
struct list_head list;
uint32_t client_id;
uint32_t flags; /* LD_AF_ */
+ uint32_t msg_id;
uint32_t version;
uint32_t host_id;
+ uint64_t ourkey;
+ uint64_t remkey;
uint64_t lv_size_bytes;
+ uint64_t ls_generation;
int8_t op; /* operation type LD_OP_ */
int8_t rt; /* resource type LD_RT_ */
int8_t mode; /* lock mode LD_LK_ */
char lv_uuid[MAX_NAME+1];
char vg_args[MAX_ARGS+1];
char lv_args[MAX_ARGS+1];
- char prev_lv_args[MAX_ARGS+1];
+ char other_args[MAX_ARGS+1];
struct owner owner;
struct pvs pvs; /* PV list for idm */
};
unsigned int use_vb : 1;
struct list_head locks;
struct list_head actions;
+ struct list_head fence_wait_actions;
char lv_args[MAX_ARGS+1];
char lm_data[]; /* lock manager specific data */
};
char vg_args[MAX_ARGS+1]; /* lock manager specific args */
int8_t lm_type; /* lock manager: LM_DLM, LM_SANLOCK */
void *lm_data;
+ uint32_t lock_args_flags;
uint32_t host_id;
uint64_t generation;
+ uint64_t ourkey;
uint64_t free_lock_offset; /* for sanlock, start search for free lock here */
struct pvs pvs; /* for idm: PV list */
unsigned int thread_done : 1;
unsigned int sanlock_gl_enabled: 1;
unsigned int sanlock_gl_dup: 1;
- unsigned int free_vg: 1;
unsigned int kill_vg: 1;
- unsigned int drop_vg: 1;
+ unsigned int fence_pr: 1;
+ unsigned int no_timeout: 1;
struct list_head actions; /* new client actions */
struct list_head resources; /* resource/lock state for gl/vg/lv */
struct list_head dispose; /* resources to free */
+ struct list_head fence_history; /* internally created actions for fencing */
};
/* val_blk version */
struct lockspace *alloc_lockspace(void);
int lockspaces_empty(void);
int last_string_from_args(char *args_in, char *last);
-int version_from_args(char *args, unsigned int *major, unsigned int *minor, unsigned int *patch);
+void helper_main(int in_fd, int out_fd, int log_stderr);
+int lockd_lockargs_get_user_flags(const char *str, uint32_t *flags);
+int lockd_lockargs_get_version(char *str, unsigned int *major, unsigned int *minor, unsigned int *patch);
static inline const char *mode_str(int x)
{
#ifdef LOCKDSANLOCK_SUPPORT
-int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args, int opt_align_mb);
+int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args, int opt_align_mb, char *other_args);
int lm_init_lv_sanlock(struct lockspace *ls, char *ls_name, char *vg_name, char *lv_name, char *vg_args, char *lv_args, char *prev_args);
int lm_free_lv_sanlock(struct lockspace *ls, struct resource *r);
int lm_rename_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args);
int lm_is_running_sanlock(void);
int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes);
int lm_vg_status_sanlock(struct lockspace *ls, struct action *act);
+void lm_set_host_dead_sanlock(struct lockspace *ls, struct owner *owner);
+int lm_setlockargs_supported_sanlock(struct lockspace *ls, struct action *act);
+int lm_setlockargs_vg_sanlock(char *ls_name, char *vg_name, struct action *act);
static inline int lm_support_sanlock(void)
{
#else
-static inline int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args, int opt_align_mb)
+static inline int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args, int opt_align_mb, char *other_args)
{
return -1;
}
return 0;
}
+static inline void lm_set_host_dead_sanlock(struct lockspace *ls, struct owner *owner)
+{
+}
+
+static inline int lm_setlockargs_supported_sanlock(struct lockspace *ls, struct action *act)
+{
+ return 0;
+}
+
+static inline int lm_setlockargs_vg_sanlock(char *ls_name, char *vg_name, struct action *act)
+{
+ return -1;
+}
+
#endif /* sanlock support */
#ifdef LOCKDIDM_SUPPORT
#define SANLK_ADD_NODELAY 0x00000002
/* FIXME: copied from sanlock header until the sanlock update is more widespread */
#define SANLK_GET_HOST_LOCAL 0x00000001
+/* FIXME: copied from sanlock header until the sanlock update is more widespread */
+#define SANLK_LSF_NO_TIMEOUT 0x00000004
#include <stddef.h>
#include <poll.h>
}
/*
- * lock_args format
- *
- * vg_lock_args format for sanlock is
- * vg_version_string:undefined:lock_lv_name
- *
- * lv_lock_args format for sanlock is
- * lv_version_string:undefined:offset
+ * If a new variant of the lock_args string cannot be
+ * handled by the previous version of lvmlockd, then the
+ * new variant should contain a larger major number.
*
- * version_string is MAJOR.MINOR.PATCH
- * undefined may contain ":"
+ * VG_LOCK_ARGS_V1 format:
+ * 1.0.0:lvname
*
- * If a new version of the lock_args string cannot be
- * handled by an old version of lvmlockd, then the
- * new lock_args string should contain a larger major number.
+ * VG_LOCK_ARGS_V2 format:
+ * 2.0.0:lvname:notimeout:persist
+ * 2.0.0:lvname:notimeout
+ * 2.0.0:lvname:persist
*/
-#define VG_LOCK_ARGS_MAJOR 1
+#define VG_LOCK_ARGS_MAJOR 2
#define VG_LOCK_ARGS_MINOR 0
#define VG_LOCK_ARGS_PATCH 0
+#define VG_LOCK_ARGS_V1 "1.0.0"
+#define VG_LOCK_ARGS_V2 "2.0.0"
+
#define LV_LOCK_ARGS_MAJOR 1
#define LV_LOCK_ARGS_MINOR 0
#define LV_LOCK_ARGS_PATCH 0
+#define LV_LOCK_ARGS_V1 "1.0.0"
+
/*
* offset 0 is lockspace
* offset align_size * 1 is unused
memccpy(buf, str, 0, len);
}
-static int lock_lv_name_from_args(char *vg_args, char *lock_lv_name)
+/*
+ * copy out lvname from lock_args string:
+ * 1.0.0:lvname
+ * 2.0.0:lvname
+ * 2.0.0:lvname:other
+ */
+static int lockd_lockargs_get_locklv(char *vg_args, char *lock_lv_name)
{
- return last_string_from_args(vg_args, lock_lv_name);
+ char args[MAX_ARGS+1] = {0};
+ char *p, *name;
+
+ strncpy(args, vg_args, MAX_ARGS);
+
+ if (!(p = strchr(args, ':')))
+ return -1;
+
+ name = p+1;
+ if (!*name)
+ return -1;
+
+ if ((p = strchr(name, ':')))
+ *p = '\0';
+
+ strncpy(lock_lv_name, name, MAX_ARGS);
+ return 0;
}
static int lock_lv_offset_from_args(char *lv_args, uint64_t *lock_lv_offset)
unsigned int major = 0;
int rv;
- rv = version_from_args(args, &major, NULL, NULL);
+ rv = lockd_lockargs_get_version(args, &major, NULL, NULL);
if (rv < 0) {
log_error("check_args_version %s error %d", args, rv);
return rv;
}
#if LOCKDSANLOCK_SUPPORT >= 410
-static int read_info_file(struct lockspace *ls, uint32_t *host_id, uint64_t *generation, int *sector_size, int *align_size)
+static int read_info_file(char *vg_name, uint32_t *host_id, uint64_t *generation, int *sector_size, int *align_size, int *no_timeout)
{
char line[MAX_LINE];
char path[PATH_MAX] = { 0 };
FILE *fp;
- if (dm_snprintf(path, sizeof(path), "/var/lib/lvm/lvmlockd_info_%s", ls->vg_name) < 0)
+ if (dm_snprintf(path, sizeof(path), "/var/lib/lvm/lvmlockd_info_%s", vg_name) < 0)
return -1;
if (!(fp = fopen(path, "r"))) {
} else if (!strncmp(line, "align_size ", 11)) {
if (sscanf(line, "align_size %d", align_size) != 1)
goto fail;
+ } else if (!strncmp(line, "no_timeout ", 11)) {
+ if (sscanf(line, "no_timeout %d", no_timeout) != 1)
+ goto fail;
}
}
_fclose(fp, path);
- log_debug("info file: read %u %llu %d %d", *host_id, (unsigned long long)*generation, *sector_size, *align_size);
+ log_debug("info file: read %u %llu %d %d %d", *host_id, (unsigned long long)*generation, *sector_size, *align_size, *no_timeout);
return 0;
fail:
}
#endif
-static int write_info_file(struct lockspace *ls)
+static int write_info_file(char *vg_name, uint32_t host_id, uint64_t generation, int sector_size, int align_size, int no_timeout)
{
- struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
char path[PATH_MAX] = { 0 };
FILE *fp;
time_t t = time(NULL);
- if (dm_snprintf(path, sizeof(path), "/var/lib/lvm/lvmlockd_info_%s", ls->vg_name) < 0)
+ if (dm_snprintf(path, sizeof(path), "/var/lib/lvm/lvmlockd_info_%s", vg_name) < 0)
return -1;
if (!(fp = fopen(path, "w"))) {
return -1;
}
- fprintf(fp, "# vg %s %s created %s", ls->vg_name, ls->vg_uuid, ctime(&t));
- fprintf(fp, "host_id %u\n", ls->host_id);
- fprintf(fp, "generation %llu\n", (unsigned long long)ls->generation);
- fprintf(fp, "sector_size %d\n", lms->sector_size);
- fprintf(fp, "align_size %d\n", lms->align_size);
+ fprintf(fp, "# vg %s created %s", vg_name, ctime(&t));
+ fprintf(fp, "host_id %u\n", host_id);
+ fprintf(fp, "generation %llu\n", (unsigned long long)generation);
+ fprintf(fp, "sector_size %d\n", sector_size);
+ fprintf(fp, "align_size %d\n", align_size);
+ fprintf(fp, "no_timeout %d\n", no_timeout);
if (fflush(fp))
log_warn("Failed to write/flush %s", path);
_fclose(fp, path);
- log_debug("info file: wrote %u %llu %d %d", ls->host_id, (unsigned long long)ls->generation, lms->sector_size, lms->align_size);
+ log_debug("info file: wrote %u %llu %d %d %d", host_id, (unsigned long long)generation, sector_size, align_size, no_timeout);
return 0;
}
sanlock encoded this in the lockspace/resource structs on disk. */
static int read_lockspace_info(char *path, uint32_t host_id, int *sector_size, int *align_size, int *align_mb,
- uint32_t *ss_flags, uint32_t *rs_flags, struct sanlk_host *hs)
+ uint32_t *ss_size_flags, uint32_t *rs_size_flags, int *no_timeout, struct sanlk_host *hs)
{
struct sanlk_lockspace ss;
uint32_t io_timeout = 0;
*sector_size = 4096;
*align_mb = 8;
*align_size = 8 * ONE_MB;
- *ss_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN8M;
- *rs_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M;
+ *ss_size_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN8M;
+ *rs_size_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M;
} else if ((ss.flags & SANLK_LSF_SECTOR4K) && (ss.flags & SANLK_LSF_ALIGN4M)) {
*sector_size = 4096;
*align_mb = 4;
*align_size = 4 * ONE_MB;
- *ss_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN4M;
- *rs_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN4M;
+ *ss_size_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN4M;
+ *rs_size_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN4M;
} else if ((ss.flags & SANLK_LSF_SECTOR4K) && (ss.flags & SANLK_LSF_ALIGN2M)) {
*sector_size = 4096;
*align_mb = 2;
*align_size = 2 * ONE_MB;
- *ss_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN2M;
- *rs_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN2M;
+ *ss_size_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN2M;
+ *rs_size_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN2M;
} else if ((ss.flags & SANLK_LSF_SECTOR4K) && (ss.flags & SANLK_LSF_ALIGN1M)) {
*sector_size = 4096;
*align_mb = 1;
*align_size = ONE_MB;
- *ss_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN1M;
- *rs_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN1M;
+ *ss_size_flags = SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN1M;
+ *rs_size_flags = SANLK_RES_SECTOR4K | SANLK_RES_ALIGN1M;
} else if ((ss.flags & SANLK_LSF_SECTOR512) && (ss.flags & SANLK_LSF_ALIGN1M)) {
*sector_size = 512;
*align_mb = 1;
*align_size = ONE_MB;
- *ss_flags = SANLK_LSF_SECTOR512 | SANLK_LSF_ALIGN1M;
- *rs_flags = SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M;
+ *ss_size_flags = SANLK_LSF_SECTOR512 | SANLK_LSF_ALIGN1M;
+ *rs_size_flags = SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M;
}
- log_debug("read_lockspace_info %s %u found sector_size %d align_size %d",
- path, host_id, *sector_size, *align_size);
+ if (ss.flags & SANLK_LSF_NO_TIMEOUT)
+ *no_timeout = 1;
+
+ log_debug("read_lockspace_info %s %u found sector_size %d align_size %d no_timeout %d",
+ path, host_id, *sector_size, *align_size, *no_timeout);
return 0;
}
#define MAX_VERSION 16
-int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args, int opt_align_mb)
+int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args, int opt_align_mb, char *other_args)
{
struct sanlk_lockspace ss;
struct sanlk_resourced rd;
struct sanlk_disk disk;
char lock_lv_name[MAX_ARGS+1];
- char lock_args_version[MAX_VERSION+1];
const char *gl_name = NULL;
+ uint32_t lock_args_flags = 0;
uint32_t rs_flags;
uint32_t daemon_version;
uint32_t daemon_proto;
uint64_t offset;
uint64_t dev_size;
+ int no_timeout;
+ int persist;
int sector_size = 0;
int align_size = 0;
int align_mb = 0;
int i, rv;
+ if (other_args && (lockd_lockargs_get_user_flags(other_args, &lock_args_flags) < 0)) {
+ log_error("S %s init_vg_san unknown other args %s", ls_name, other_args);
+ return -EARGS;
+ }
+ no_timeout = (lock_args_flags & LOCKARGS_NOTIMEOUT) ? 1 :0;
+ persist = (lock_args_flags & LOCKARGS_PERSIST) ? 1 : 0;
+
+#if LOCKDSANLOCK_SUPPORT < 420
+ if (no_timeout || persist) {
+ log_error("S %s init_vg_san sanlock 4.2 required for args %s", ls_name, other_args);
+ return -EARGS;
+ }
+#endif
+
memset(&ss, 0, sizeof(ss));
memset(&rd, 0, sizeof(rd));
memset(&disk, 0, sizeof(disk));
- memset(lock_args_version, 0, sizeof(lock_args_version));
if (!vg_args || !vg_args[0] || !strcmp(vg_args, "none")) {
log_error("S %s init_vg_san vg_args missing", ls_name);
return -EARGS;
}
- snprintf(lock_args_version, MAX_VERSION, "%u.%u.%u",
- VG_LOCK_ARGS_MAJOR, VG_LOCK_ARGS_MINOR, VG_LOCK_ARGS_PATCH);
-
/* see comment above about input vg_args being only lock_lv_name */
dm_strncpy(lock_lv_name, vg_args, sizeof(lock_lv_name));
- if (strlen(lock_lv_name) + strlen(lock_args_version) + 2 > MAX_ARGS)
- return -EARGS;
-
if ((rv = build_dm_path(disk.path, SANLK_PATH_LEN, vg_name, lock_lv_name)))
return rv;
if (daemon_test) {
if (!gl_lsname_sanlock[0])
strncpy(gl_lsname_sanlock, ls_name, MAX_NAME);
- rv = snprintf(vg_args, MAX_ARGS, "%s:%s", lock_args_version, lock_lv_name);
+ rv = snprintf(vg_args, MAX_ARGS, "%s:%s", VG_LOCK_ARGS_V1, lock_lv_name);
if (rv >= MAX_ARGS)
log_debug("init_vg_san vg_args may be too long %d %s", rv, vg_args);
return 0;
return -EARGS;
}
+ if (no_timeout)
+ ss.flags |= SANLK_LSF_NO_TIMEOUT;
+
rv = sanlock_write_lockspace(&ss, 0, 0, sanlock_io_timeout);
if (rv < 0) {
log_error("S %s init_vg_san write_lockspace error %d %s",
return rv;
}
- if (!strcmp(gl_name, R_NAME_GL))
- dm_strncpy(gl_lsname_sanlock, ls_name, sizeof(gl_lsname_sanlock));
-
- rv = snprintf(vg_args, MAX_ARGS, "%s:%s", lock_args_version, lock_lv_name);
- if (rv >= MAX_ARGS)
- log_debug("init_vg_san vg_args may be too long %d %s", rv, vg_args);
-
- log_debug("S %s init_vg_san done vg_args %s", ls_name, vg_args);
-
/*
* Go through all lv resource slots and initialize them with the
* correct lockspace name but a special resource name that indicates
offset += align_size;
}
+ if (no_timeout && persist)
+ rv = snprintf(vg_args, MAX_ARGS, "%s:%s:notimeout:persist", VG_LOCK_ARGS_V2, lock_lv_name);
+ else if (no_timeout)
+ rv = snprintf(vg_args, MAX_ARGS, "%s:%s:notimeout", VG_LOCK_ARGS_V2, lock_lv_name);
+ else if (persist)
+ rv = snprintf(vg_args, MAX_ARGS, "%s:%s:persist", VG_LOCK_ARGS_V2, lock_lv_name);
+ else
+ rv = snprintf(vg_args, MAX_ARGS, "%s:%s", VG_LOCK_ARGS_V1, lock_lv_name);
+
+ if (rv >= MAX_ARGS) {
+ log_error("S %s init_vg_san vg_args string too long %d %s", ls_name, rv, vg_args);
+ return -EINVAL;
+ }
+
+ if (!strcmp(gl_name, R_NAME_GL))
+ dm_strncpy(gl_lsname_sanlock, ls_name, sizeof(gl_lsname_sanlock));
+
+ log_debug("S %s init_vg_san done vg_args %s", ls_name, vg_args);
+
return 0;
}
struct lm_sanlock *lms;
struct sanlk_resourced rd;
char lock_lv_name[MAX_ARGS+1];
- char lock_args_version[MAX_VERSION+1];
uint64_t offset;
uint64_t prev_offset = 0;
int sector_size = 0;
int align_size = 0;
int align_mb;
+ int no_timeout = 0;
uint32_t ss_flags;
uint32_t rs_flags = 0;
uint32_t tries = 1;
memset(&rd, 0, sizeof(rd));
memset(lock_lv_name, 0, sizeof(lock_lv_name));
- memset(lock_args_version, 0, sizeof(lock_args_version));
memset(disk_path, 0, sizeof(disk_path));
- snprintf(lock_args_version, MAX_VERSION, "%u.%u.%u",
- LV_LOCK_ARGS_MAJOR, LV_LOCK_ARGS_MINOR, LV_LOCK_ARGS_PATCH);
-
if (daemon_test) {
align_size = 1024 * 1024;
snprintf(lv_args, MAX_ARGS, "%s:%llu",
- lock_args_version,
+ LV_LOCK_ARGS_V1,
(unsigned long long)((align_size * LV_LOCK_BEGIN) + (align_size * daemon_test_lv_count)));
daemon_test_lv_count++;
return 0;
}
- rv = lock_lv_name_from_args(vg_args, lock_lv_name);
+ rv = lockd_lockargs_get_locklv(vg_args, lock_lv_name);
if (rv < 0) {
- log_error("S %s init_lv_san lock_lv_name_from_args error %d %s",
+ log_error("S %s init_lv_san lockd_lockargs_get_locklv error %d %s",
ls_name, rv, vg_args);
return rv;
}
/* using host_id 1 to get sizes since we don't need host-specific info */
- rv = read_lockspace_info(disk_path, 1, §or_size, &align_size, &align_mb, &ss_flags, &rs_flags, NULL);
+ rv = read_lockspace_info(disk_path, 1, §or_size, &align_size, &align_mb, &ss_flags, &rs_flags, &no_timeout, NULL);
if (rv < 0) {
log_error("S %s init_lv_san read_lockspace_info error %d %s",
ls_name, rv, disk_path);
rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
if (!rv) {
snprintf(lv_args, MAX_ARGS, "%s:%llu",
- lock_args_version, (unsigned long long)offset);
+ LV_LOCK_ARGS_V1, (unsigned long long)offset);
} else {
log_error("S %s init_lv_san write error %d offset %llu",
ls_name, rv, (unsigned long long)rv);
return -EINVAL;
}
- rv = lock_lv_name_from_args(vg_args, lock_lv_name);
+ rv = lockd_lockargs_get_locklv(vg_args, lock_lv_name);
if (rv < 0) {
- log_error("S %s init_lv_san lock_lv_name_from_args error %d %s",
+ log_error("S %s init_lv_san lockd_lockargs_get_locklv error %d %s",
ls_name, rv, vg_args);
return rv;
}
int sector_size = 0;
int align_size = 0;
int align_mb = 0;
+ int no_timeout = 0;
int retries = 0;
int gl_found;
int ret, rv;
goto fail;
}
- rv = lock_lv_name_from_args(ls->vg_args, lock_lv_name);
+ rv = lockd_lockargs_get_locklv(ls->vg_args, lock_lv_name);
if (rv < 0) {
- log_error("S %s prepare_lockspace_san lock_lv_name_from_args error %d %s",
+ log_error("S %s prepare_lockspace_san lockd_lockargs_get_locklv error %d %s",
ls->name, rv, ls->vg_args);
ret = -EARGS;
goto fail;
#endif
sector_size = 0;
align_size = 0;
+ no_timeout = 0;
- rv = read_lockspace_info(disk_path, lms->ss.host_id, §or_size, &align_size, &align_mb, &ss_flags, &rs_flags, &hs);
+ rv = read_lockspace_info(disk_path, lms->ss.host_id, §or_size, &align_size, &align_mb, &ss_flags, &rs_flags, &no_timeout, &hs);
#if LOCKDSANLOCK_SUPPORT >= 410
if ((rv == -ELOCKREPAIR) && repair && !retries) {
uint64_t generation = 0;
uint32_t host_id = 0;
- rv = read_info_file(ls, &host_id, &generation, §or_size, &align_size);
+ rv = read_info_file(ls->vg_name, &host_id, &generation, §or_size, &align_size, &no_timeout);
if (rv < 0) {
log_error("S %s prepare_lockspace_san cannot repair lockspace no info file", lsname);
ret = -EINVAL;
ret = -EINVAL;
}
+ if (no_timeout)
+ lms->ss.flags |= SANLK_LSF_NO_TIMEOUT;
+
log_debug("S %s prepare_lockspace_san repair host %u lease", lsname, host_id);
rv = sanlock_init_lockspace_host(&lms->ss, NULL, generation, 0, 0, 0);
free(hs);
- write_info_file(ls);
+ write_info_file(ls->vg_name, ls->host_id, ls->generation, lms->sector_size, lms->align_size, ls->no_timeout);
/*
* Don't let the lockspace be cleanly released if orphan locks
rv == SANLK_ACQUIRE_OWNED ||
rv == SANLK_ACQUIRE_OTHER ||
rv == SANLK_ACQUIRE_OWNED_RETRY ||
+ rv == SANLK_ACQUIRE_OWNED_NO_TIMEOUT ||
rv == -EAGAIN) {
/*
if (rv == SANLK_ACQUIRE_OWNED_RETRY)
*retry = 0;
+ if (rv == SANLK_ACQUIRE_OWNED_NO_TIMEOUT)
+ *retry = 0;
+
if (owner && owner_host.host_id) {
const char *host_state;
case SANLK_ACQUIRE_IDLIVE:
case SANLK_ACQUIRE_OWNED:
case SANLK_ACQUIRE_OWNED_RETRY:
+ case SANLK_ACQUIRE_OWNED_NO_TIMEOUT:
case SANLK_ACQUIRE_OTHER:
case SANLK_AIO_TIMEOUT:
/* expected errors from known/normal cases like lock contention or io timeouts */
return 1;
}
+#if LOCKDSANLOCK_SUPPORT >= 420
+
+static void update_info_file(char *vg_name, int no_timeout_new)
+{
+ uint32_t host_id;
+ uint64_t generation;
+ int sector_size;
+ int align_size;
+ int no_timeout;
+ int rv;
+
+ rv = read_info_file(vg_name, &host_id, &generation, §or_size, &align_size, &no_timeout);
+ if (rv < 0)
+ return;
+
+ write_info_file(vg_name, host_id, generation, sector_size, align_size, no_timeout_new);
+}
+
+void lm_set_host_dead_sanlock(struct lockspace *ls, struct owner *owner)
+{
+ struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
+ struct sanlk_host host = { 0 };
+ int rv;
+
+ log_debug("S %s set_host_dead_sanlock host_id %u gen %u", ls->name, owner->host_id, owner->generation);
+
+ host.host_id = owner->host_id;
+ host.generation = owner->generation;
+
+ rv = sanlock_set_host(&lms->ss, SANLK_SET_HOST_DEAD_EXT, 0, 0, &host);
+ if (rv)
+ log_error("S %s set_host_dead_sanlock host_id %u gen %u error %d", ls->name, owner->host_id, owner->generation, rv);
+}
+
+int lm_setlockargs_supported_sanlock(struct lockspace *ls, struct action *act)
+{
+ uint32_t daemon_version;
+ uint32_t daemon_proto;
+ uint32_t lock_args_flags = 0;
+ uint32_t ver_major, ver_minor;
+ int rv;
+
+ if (!act->other_args[0]) {
+ log_error("S %s setlockargs_supported empty user lock args", ls->name);
+ return 0;
+ }
+
+ if (lockd_lockargs_get_user_flags(act->other_args, &lock_args_flags) < 0) {
+ log_error("S %s setlockargs_supported invalid user lock args %s", ls->name, act->other_args);
+ return 0;
+ }
+
+ if (!(lock_args_flags & LOCKARGS_NOTIMEOUT) && !(lock_args_flags & LOCKARGS_PERSIST))
+ return 1;
+
+ rv = sanlock_version(0, &daemon_version, &daemon_proto);
+ if (rv < 0) {
+ log_error("S %s setlockargs failed to connect to sanlock daemon", ls->name);
+ return 0;
+ }
+
+ log_debug("S %s setlockargs sanlock version 0x%x lock_args_flags 0x%x", ls->name, daemon_version, lock_args_flags);
+
+ ver_major = (daemon_version & 0xFF000000) >> 24;
+ ver_minor = (daemon_version & 0x00FF0000) >> 16;
+
+ /* sanlock 4.2.0 added support for LOCKARGS_NOTIMEOUT or LOCKARGS_PERSIST. */
+
+ if (ver_major < 4)
+ return 0;
+
+ if ((ver_major == 4) && (ver_minor < 2))
+ return 0;
+
+ return 1;
+}
+
+int lm_setlockargs_vg_sanlock(char *ls_name, char *vg_name, struct action *act)
+{
+ struct sanlk_lockspace ss = {0};
+ char lock_lv_name[MAX_ARGS+1] = {0};
+ char disk_path[SANLK_PATH_LEN] = {0};
+ uint32_t ss_size_flags = 0;
+ uint32_t rs_size_flags = 0;
+ uint32_t lock_args_flags = 0;
+ int sector_size = 0;
+ int align_size = 0;
+ int align_mb = 0;
+ int no_timeout;
+ int persist;
+ int rv;
+
+ if (!act->other_args[0]) {
+ log_error("S %s setlockargs empty user lock args", ls_name);
+ return 0;
+ }
+
+ if (lockd_lockargs_get_user_flags(act->other_args, &lock_args_flags) < 0) {
+ log_error("S %s setlockargs invalid user lock args %s", ls_name, act->other_args);
+ return 0;
+ }
+
+ rv = lockd_lockargs_get_locklv(act->vg_args, lock_lv_name);
+ if (rv < 0) {
+ log_error("S %s setlockargs lockd_lockargs_get_locklv error %d %s",
+ ls_name, rv, act->vg_args);
+ return rv;
+ }
+
+ if ((rv = build_dm_path(disk_path, SANLK_PATH_LEN, vg_name, lock_lv_name)))
+ return rv;
+
+ /* get the sector and align flags from host_id 1 in the current lockspace */
+
+ rv = read_lockspace_info(disk_path, 1, §or_size, &align_size, &align_mb, &ss_size_flags, &rs_size_flags, &no_timeout, NULL);
+ if (rv < 0) {
+ log_error("S %s setlockargs read_lockspace_info error %d %s", ls_name, rv, disk_path);
+ return rv;
+ }
+
+ /* initialize lockspace */
+
+ no_timeout = (lock_args_flags & LOCKARGS_NOTIMEOUT) ? 1 :0;
+ persist = (lock_args_flags & LOCKARGS_PERSIST) ? 1 : 0;
+
+ strcpy_name_len(ss.name, ls_name, SANLK_NAME_LEN);
+ memcpy(ss.host_id_disk.path, disk_path, SANLK_PATH_LEN);
+ ss.host_id_disk.offset = 0;
+ ss.flags = ss_size_flags;
+
+ if (no_timeout)
+ ss.flags |= SANLK_LSF_NO_TIMEOUT;
+
+ log_debug("S %s setlockargs write_lockspace no_timeout %d flags 0x%x", ls_name, no_timeout, ss.flags);
+
+ rv = sanlock_write_lockspace(&ss, 0, 0, sanlock_io_timeout);
+ if (rv < 0) {
+ log_error("S %s setlockargs write_lockspace error %d %s", ls_name, rv, ss.host_id_disk.path);
+ return rv;
+ }
+
+ update_info_file(vg_name, no_timeout);
+
+ if (no_timeout && persist)
+ rv = snprintf(act->vg_args, MAX_ARGS, "%s:%s:notimeout:persist", VG_LOCK_ARGS_V2, lock_lv_name);
+ else if (no_timeout)
+ rv = snprintf(act->vg_args, MAX_ARGS, "%s:%s:notimeout", VG_LOCK_ARGS_V2, lock_lv_name);
+ else if (persist)
+ rv = snprintf(act->vg_args, MAX_ARGS, "%s:%s:persist", VG_LOCK_ARGS_V2, lock_lv_name);
+ else
+ rv = snprintf(act->vg_args, MAX_ARGS, "%s:%s", VG_LOCK_ARGS_V1, lock_lv_name);
+
+ log_debug("S %s setlockargs new args %s", ls_name, act->vg_args);
+
+ if (rv >= MAX_ARGS) {
+ log_error("S %s setlockargs vg_args string too long %d %s", ls_name, rv, act->vg_args);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+#else
+
+void lm_set_host_dead_sanlock(struct lockspace *ls, struct owner *owner)
+{
+}
+
+int lm_setlockargs_supported_sanlock(struct lockspace *ls, struct action *act)
+{
+ return 0;
+}
+
+int lm_setlockargs_vg_sanlock(char *ls_name, char *vg_name, struct action *act)
+{
+ return -EINVAL;
+}
+#endif /* LOCKDSANLOCK_SUPPORT >= 420 */
}
}
-int persist_is_started(struct cmd_context *cmd, struct volume_group *vg, int may_fail)
+int persist_is_started(struct cmd_context *cmd, struct volume_group *vg, int may_fail, uint64_t *our_key_ret)
{
struct pv_list *pvl;
struct device *dev;
if (!vg_is_registered(cmd, vg, &our_key_val, &partial))
goto out;
+ if (our_key_ret)
+ *our_key_ret = our_key_val;
+
if (partial) {
log_debug("PR is started: partial");
goto out;
/*
* When using an explicit pr_key setting, there's
- * not sanlock generation number that needs updating.
+ * no sanlock generation number that needs updating.
*/
if (local_key)
return 1;
return error ? 0 : 1;
}
+int persist_upgrade_stop(struct cmd_context *cmd, struct volume_group *vg, uint64_t our_key_val)
+{
+ DM_LIST_INIT(devs);
+ char our_key_buf[PR_KEY_BUF_SIZE] = { 0 };
+
+ if (!pv_list_to_dev_list(cmd->mem, &vg->pvs, &devs))
+ return_0;
+
+ if (dm_snprintf(our_key_buf, PR_KEY_BUF_SIZE-1, "0x%llx", (unsigned long long)our_key_val) < 0)
+ return_0;
+
+ if (!_run_stop(cmd, vg, &devs, our_key_buf, 0))
+ return_0;
+
+ return 1;
+}
+
+/*
+ * Host currently holds a normal sh access PR on shared VG,
+ * and wants to switch to an ex access PR on that VG
+ * (to prevent other hosts from using it while it's making
+ * changes.)
+ */
+
+int persist_upgrade_ex(struct cmd_context *cmd, struct volume_group *vg, uint64_t *our_key_held)
+{
+ DM_LIST_INIT(devs);
+ struct device_list *devl;
+ char *local_key = (char *)find_config_tree_str(cmd, local_pr_key_CFG, NULL);
+ int local_host_id = find_config_tree_int(cmd, local_host_id_CFG, NULL);
+ char our_key_buf[PR_KEY_BUF_SIZE] = { 0 };
+ char new_key_buf[PR_KEY_BUF_SIZE] = { 0 };
+ uint64_t our_key_val = 0;
+ uint64_t new_key_val = 0;
+ const char *devname;
+ const char **argv;
+ int pv_count;
+ int args;
+ int status;
+
+ if (!local_key && !local_host_id)
+ return 1;
+
+ if (!get_our_key(cmd, vg, local_key, local_host_id, our_key_buf, &our_key_val))
+ return_0;
+
+ if (!pv_list_to_dev_list(cmd->mem, &vg->pvs, &devs))
+ return_0;
+
+ log_debug("persist_upgrade_ex stop PR %s", our_key_buf);
+
+ if (!_run_stop(cmd, vg, &devs, our_key_buf, 0))
+ return_0;
+
+ if (local_key) {
+ new_key_val = our_key_val;
+ memcpy(new_key_buf, our_key_buf, PR_KEY_BUF_SIZE);
+ } else if (local_host_id) {
+ if (dm_snprintf(new_key_buf, PR_KEY_BUF_SIZE-1, "0x100000000000%04x", local_host_id) != 18) {
+ log_error("Failed to format key string for host_id %d", local_host_id);
+ return 0;
+ }
+ if (!parse_prkey(new_key_buf, &new_key_val)) {
+ log_error("Failed to parse generated key %s", new_key_buf);
+ return 0;
+ }
+ }
+
+ pv_count = dm_list_size(&devs);
+
+ log_debug("persist_upgrade_ex start PR on %d devs with local key %llx", pv_count, (unsigned long long)new_key_val);
+
+ args = 9 + pv_count*2;
+ if (vg->pr & VG_PR_PTPL)
+ args += 1;
+
+ if (!(argv = dm_pool_alloc(cmd->mem, args * sizeof(char *))))
+ return_0;
+
+ args = 0;
+ argv[0] = LVMPERSIST_PATH;
+ argv[++args] = "start";
+ argv[++args] = "--ourkey";
+ argv[++args] = new_key_buf;
+ argv[++args] = "--access";
+ argv[++args] = "ex";
+ argv[++args] = "--vg";
+ argv[++args] = vg->name;
+ if (vg->pr & VG_PR_PTPL)
+ argv[++args] = "--ptpl";
+
+ dm_list_iterate_items(devl, &devs) {
+ if (!(devname = dm_pool_strdup(cmd->mem, dev_name(devl->dev))))
+ return_0;
+ argv[++args] = "--device";
+ argv[++args] = devname;
+ }
+
+ argv[++args] = NULL;
+
+ if (!exec_cmd(cmd, argv, &status, 1)) {
+ log_error("persistent reservation exclusive start failed: lvmpersist command error.");
+ log_error("(Use vgchange --persist stop to stop PR on other hosts.");
+ return 0;
+ }
+
+ *our_key_held = new_key_val;
+
+ return 1;
+}
+
/*
* Start PR on devices that are being used for vgcreate.
* This is somewhat awkward because it happens early in
int args;
int status;
+ persist_key_file_remove_name(cmd, vg_name);
+
if (local_key) {
if (!parse_prkey(local_key, &our_key_val)) {
log_error("Failed to parse local key %s", local_key);
* access PR (typically WE), and starts PR with the normal sh access
* PR (typically WEAR), allowing other hosts to also use the new VG.
*/
-int persist_vgcreate_update(struct cmd_context *cmd, struct volume_group *vg, uint32_t set_flags)
+int persist_vgcreate_update(struct cmd_context *cmd, struct volume_group *vg, uint32_t set_flags,
+ uint64_t *our_key_ret)
{
DM_LIST_INIT(devs);
struct device_list *devl;
return 0;
}
+ /* key file is an optimization, not an error condition */
if (!write_key_file(cmd, vg, our_key_val))
stack;
+ if (our_key_ret)
+ *our_key_ret = our_key_val;
+
return 1;
}
int persist_vgcreate_begin(struct cmd_context *cmd, char *vg_name, char *local_key, int local_host_id,
uint32_t set_flags, struct dm_list *devs);
-int persist_vgcreate_update(struct cmd_context *cmd, struct volume_group *vg, uint32_t set_flags);
+int persist_vgcreate_update(struct cmd_context *cmd, struct volume_group *vg, uint32_t set_flags,
+ uint64_t *our_key_ret);
-int persist_is_started(struct cmd_context *cmd, struct volume_group *vg, int may_fail);
+int persist_upgrade_ex(struct cmd_context *cmd, struct volume_group *vg, uint64_t *our_key_held);
+int persist_upgrade_stop(struct cmd_context *cmd, struct volume_group *vg, uint64_t our_key_val);
+
+int persist_is_started(struct cmd_context *cmd, struct volume_group *vg, int may_fail, uint64_t *our_key);
int persist_key_update(struct cmd_context *cmd, struct volume_group *vg, uint32_t prev_gen);
_lvmlockd_connected = 0;
}
+#define MAX_LOCKARGS 8
+
+/* parse lock_args string for values that may appear in command line --setlockargs */
+
+int lockd_lockargs_get_user_flags(const char *str, uint32_t *flags)
+{
+ char buf[PATH_MAX];
+ char *argv[MAX_LOCKARGS];
+ int argc;
+ int i;
+
+ if (!str)
+ return 0;
+
+ dm_strncpy(buf, str, sizeof(buf));
+
+ split_line(buf, &argc, argv, MAX_LOCKARGS, ',');
+
+ for (i = 0; i < argc; i++) {
+ if (!strcmp(argv[i], "persist"))
+ *flags |= LOCKARGS_PERSIST;
+ else if (!strcmp(argv[i], "nopersist"))
+ *flags |= LOCKARGS_NOPERSIST;
+ else if (!strcmp(argv[i], "timeout"))
+ *flags |= LOCKARGS_TIMEOUT;
+ else if (!strcmp(argv[i], "notimeout"))
+ *flags |= LOCKARGS_NOTIMEOUT;
+ else {
+ log_error("Unknown lockargs option value: %s", argv[i]);
+ return 0;
+ }
+ }
+
+ if (((*flags & LOCKARGS_PERSIST) && (*flags & LOCKARGS_NOPERSIST)) ||
+ ((*flags & LOCKARGS_TIMEOUT) && (*flags & LOCKARGS_NOTIMEOUT))) {
+ log_error("Invalid setlockargs option combination: %s", str);
+ return 0;
+ }
+
+ /*
+ * . nopersist and timeout: default
+ * . persist and notimeout: permitted with setlockargs
+ *
+ * FIXME: when tested, allow
+ * . nopersist and notimeout: requires manual set host dead
+ * . persist and timeout: watchdog still resets host when PR is used
+ */
+ if (((*flags & LOCKARGS_PERSIST) && !(*flags & LOCKARGS_NOTIMEOUT)) ||
+ ((*flags & LOCKARGS_NOTIMEOUT) && !(*flags & LOCKARGS_PERSIST))) {
+ log_error("setlockargs persist and notimeout are currently required together.");
+ return 0;
+ }
+
+ return 1;
+}
+
/* Translate the result strings from lvmlockd to bit flags. */
static void _flags_str_to_lockd_flags(const char *flags_str, uint32_t *lockd_flags)
{
#define NO_LOCKD_RESULT (-1000)
static int _lockd_result(struct cmd_context *cmd, const char *req_name, daemon_reply reply,
- int *result, uint32_t *lockd_flags, struct owner *owner)
+ int *result, uint32_t *lockd_flags, struct owner *owner, uint64_t *our_generation)
{
int reply_result;
const char *str;
owner->name = dm_pool_strdup(cmd->mem, str);
}
+ if (our_generation)
+ *our_generation = (uint64_t)daemon_reply_int(reply, "our_generation", 0);
+
log_debug("lockd %s result: %d", req_name, reply_result);
return 1;
}
const struct lvmlockd_pvs *lock_pvs,
int *result,
uint32_t *lockd_flags,
- struct owner *owner)
+ struct owner *owner,
+ uint64_t *our_generation)
{
const char *cmd_name = get_cmd_name();
daemon_reply reply;
"lv_lock_args = %s", lv_lock_args ?: "none",
NULL);
- if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner))
+ if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner, our_generation))
goto fail;
/*
"vg_lock_args = %s", vg_lock_args ?: "none",
NULL);
- if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner))
+ if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner, our_generation))
goto fail;
/*
"vg_lock_type = %s", vg_lock_type ?: "none",
NULL);
- if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner))
+ if (!_lockd_result(cmd, req_name, reply, result, lockd_flags, owner, our_generation))
goto fail;
log_debug("lockd %s %s result %d %x",
"lv_size_bytes = " FMTd64, (int64_t) lv_size_bytes,
NULL);
- if (!_lockd_result(cmd, "find_free_lock", reply, &result, NULL, NULL)) {
+ if (!_lockd_result(cmd, "find_free_lock", reply, &result, NULL, NULL, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
"vg_lock_type = %s", lock_type,
NULL);
- if (!_lockd_result(cmd, "init_vg", reply, &result, NULL, NULL)) {
+ if (!_lockd_result(cmd, "init_vg", reply, &result, NULL, NULL, NULL)) {
ret = 0;
result = -ELOCKD;
} else {
return _init_vg(cmd, vg, "idm");
}
-static int _init_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg, int lv_lock_count)
+static int _init_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg, int lv_lock_count, const char *set_args)
{
daemon_reply reply;
const char *reply_str;
int ret;
if (!_use_lvmlockd)
- return 0;
+ return_0;
if (!_lvmlockd_connected)
- return 0;
+ return_0;
/*
* We need the sector size to know what size to create the LV,
"vg_name = %s", vg->name,
"vg_lock_type = %s", "sanlock",
"vg_lock_args = %s", vg->sanlock_lv->name,
+ "set_lock_args = %s", set_args ?: "none",
"align_mb = " FMTd64, (int64_t) align_size,
"opts = %s", opts ?: "none",
NULL);
- if (!_lockd_result(cmd, "init_vg", reply, &result, NULL, NULL)) {
+ if (!_lockd_result(cmd, "init_vg", reply, &result, NULL, NULL, NULL)) {
ret = 0;
result = -ELOCKD;
} else {
"vg_lock_args = %s", vg->lock_args,
NULL);
- if (!_lockd_result(cmd, "free_vg", reply, &result, &lockd_flags, NULL)) {
+ if (!_lockd_result(cmd, "free_vg", reply, &result, &lockd_flags, NULL, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
"vg_lock_args = %s", vg->lock_args,
NULL);
- if (!_lockd_result(cmd, "busy_vg", reply, &result, &lockd_flags, NULL)) {
+ if (!_lockd_result(cmd, "busy_vg", reply, &result, &lockd_flags, NULL, NULL)) {
ret = 1;
goto out;
}
"vg_lock_args = %s", vg->lock_args,
NULL);
- if (!_lockd_result(cmd, "free_vg", reply, &result, &lockd_flags, NULL)) {
+ if (!_lockd_result(cmd, "free_vg", reply, &result, &lockd_flags, NULL, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
/* vgcreate */
int lockd_init_vg(struct cmd_context *cmd, struct volume_group *vg,
- const char *lock_type, int lv_lock_count)
+ const char *lock_type, int lv_lock_count, const char *set_args)
{
switch (get_lock_type_from_string(lock_type)) {
case LOCK_TYPE_NONE:
case LOCK_TYPE_DLM:
return _init_vg_dlm(cmd, vg);
case LOCK_TYPE_SANLOCK:
- return _init_vg_sanlock(cmd, vg, lv_lock_count);
+ return _init_vg_sanlock(cmd, vg, lv_lock_count, set_args);
case LOCK_TYPE_IDM:
return _init_vg_idm(cmd, vg);
default:
* lock the vg, read/use/write the vg, unlock the vg.
*/
-int lockd_start_vg(struct cmd_context *cmd, struct volume_group *vg, int *exists)
+int lockd_start_vg(struct cmd_context *cmd, struct volume_group *vg, uint64_t our_key, int *exists)
{
char uuid[64] __attribute__((aligned(8)));
const char *opts = NULL;
"vg_uuid = %s", uuid[0] ? uuid : "none",
"version = " FMTd64, (int64_t) vg->seqno,
"host_id = " FMTd64, (int64_t) host_id,
+ "our_key = " FMTd64, (int64_t) our_key,
"opts = %s", opts ?: "none",
NULL);
_lockd_free_pv_list(&lock_pvs);
"vg_uuid = %s", uuid[0] ? uuid : "none",
"version = " FMTd64, (int64_t) vg->seqno,
"host_id = " FMTd64, (int64_t) host_id,
+ "our_key = " FMTd64, (int64_t) our_key,
"opts = %s", opts ?: "none",
NULL);
}
- if (!_lockd_result(cmd, "start_vg", reply, &result, &lockd_flags, NULL)) {
+ if (!_lockd_result(cmd, "start_vg", reply, &result, &lockd_flags, NULL, NULL)) {
ret = 0;
result = -ELOCKD;
} else {
"vg_name = %s", vg->name,
NULL);
- if (!_lockd_result(cmd, "stop_vg", reply, &result, NULL, NULL)) {
+ if (!_lockd_result(cmd, "stop_vg", reply, &result, NULL, NULL, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
"pid = " FMTd64, (int64_t) getpid(),
NULL);
- if (!_lockd_result(cmd, "start_wait", reply, &result, NULL, NULL)) {
+ if (!_lockd_result(cmd, "start_wait", reply, &result, NULL, NULL, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
req:
if (!_lockd_request(cmd, "lock_gl",
NULL, vg_lock_type, NULL, NULL, NULL, NULL, mode, NULL,
- NULL, &result, &lockd_flags, &owner)) {
+ NULL, &result, &lockd_flags, &owner, NULL)) {
/* No result from lvmlockd, it is probably not running. */
log_error("Global lock failed: check that lvmlockd is running.");
return 0;
if (!_lockd_request(cmd, "lock_gl",
NULL, NULL, NULL, NULL, NULL, NULL, mode, opts,
- NULL, &result, &lockd_flags, &owner)) {
+ NULL, &result, &lockd_flags, &owner, NULL)) {
/* No result from lvmlockd, it is probably not running. */
/* We don't care if an unlock fails. */
uint32_t flags, uint32_t *lockd_state)
{
struct owner owner = { 0 };
+ uint64_t our_generation = 0;
char opt_buf[64] = {};
const char *mode = NULL;
const char *opts = NULL;
if (!_lockd_request(cmd, "lock_vg",
vg_name, NULL, NULL, NULL, NULL, NULL, mode, opts,
- NULL, &result, &lockd_flags, &owner)) {
+ NULL, &result, &lockd_flags, &owner, &our_generation)) {
/*
* No result from lvmlockd, it is probably not running.
* Decide if it is ok to continue without a lock in
*/
if ((lockd_flags & LD_RF_DUP_GL_LS) && strcmp(mode, "un"))
log_warn("Duplicate sanlock global lock in VG %s", vg_name);
-
+
return ret;
}
"version = " FMTd64, (int64_t) vg->seqno,
NULL);
- if (!_lockd_result(vg->cmd, "vg_update", reply, &result, NULL, NULL)) {
+ if (!_lockd_result(vg->cmd, "vg_update", reply, &result, NULL, NULL, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
{
daemon_reply reply;
struct owner owner = { 0 };
+ uint64_t our_generation = 0;
int result;
int ret = 0;
"vg_name = %s", vg->name,
NULL);
- if (!_lockd_result(vg->cmd, "vg_status", reply, &result, NULL, &owner)) {
+ if (!_lockd_result(vg->cmd, "vg_status", reply, &result, NULL, &owner, &our_generation)) {
log_debug("lockd_vg_status %s no result", vg->name);
goto out;
}
goto out;
}
+ /*
+ * The local host generation number is returned
+ * in both fields, they should always match.
+ */
+ if (our_generation && owner.generation &&
+ ((uint32_t)our_generation != owner.generation)) {
+ log_warn("WARNING: lvmlockd local host generation mismatch %llu vs %u",
+ (unsigned long long)our_generation, owner.generation);
+ }
+
log_debug("lockd_vg_status %s host_id %u gen %u",
vg->name, owner.host_id, owner.generation);
"lv_lock_args = %s", lock_args ?: "none",
NULL);
- if (!_lockd_result(cmd, "query_lock_lv", reply, &result, NULL, NULL)) {
+ if (!_lockd_result(cmd, "query_lock_lv", reply, &result, NULL, NULL, NULL)) {
/* No result from lvmlockd, it is probably not running. */
log_error("Lock query failed for LV %s/%s", vg->name, lv_name);
return 0;
const char *opts = NULL;
const char *mode = NULL;
uint32_t lockd_flags;
+ uint64_t our_generation = 0;
int refreshed = 0;
int result;
struct lvmlockd_pvs lock_pvs;
if (!_lockd_request(cmd, "lock_lv",
vg->name, vg->lock_type, vg->lock_args,
lv_name, lv_uuid, lock_args, mode, opts,
- &lock_pvs, &result, &lockd_flags, NULL)) {
+ &lock_pvs, &result, &lockd_flags, NULL, NULL)) {
_lockd_free_pv_list(&lock_pvs);
/* No result from lvmlockd, it is probably not running. */
log_error("Locking failed for LV %s/%s", vg->name, lv_name);
if (!_lockd_request(cmd, "lock_lv",
vg->name, vg->lock_type, vg->lock_args,
lv_name, lv_uuid, lock_args, mode, opts,
- NULL, &result, &lockd_flags, &owner)) {
+ NULL, &result, &lockd_flags, &owner, &our_generation)) {
/* No result from lvmlockd, it is probably not running. */
log_error("Locking failed for LV %s/%s", vg->name, lv_name);
return 0;
"vg_lock_args = %s", vg->lock_args,
NULL);
- if (!_lockd_result(cmd, "init_lv", reply, &result, NULL, NULL)) {
+ if (!_lockd_result(cmd, "init_lv", reply, &result, NULL, NULL, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
"lv_lock_args = %s", lock_args ?: "none",
NULL);
- if (!_lockd_result(cmd, "free_lv", reply, &result, NULL, NULL)) {
+ if (!_lockd_result(cmd, "free_lv", reply, &result, NULL, NULL, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
"vg_lock_args = %s", vg->lock_args,
NULL);
- if (!_lockd_result(cmd, "rename_vg_before", reply, &result, NULL, NULL)) {
+ if (!_lockd_result(cmd, "rename_vg_before", reply, &result, NULL, NULL, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
* Depending on the problem that caused the rename to
* fail, it may make sense to not restart the VG here.
*/
- if (!lockd_start_vg(cmd, vg, NULL))
+ if (!lockd_start_vg(cmd, vg, 0, NULL))
log_error("Failed to restart VG %s lockspace.", vg->name);
return 1;
}
"vg_lock_args = %s", vg->lock_args,
NULL);
- if (!_lockd_result(cmd, "rename_vg_final", reply, &result, NULL, NULL)) {
+ if (!_lockd_result(cmd, "rename_vg_final", reply, &result, NULL, NULL, NULL)) {
ret = 0;
} else {
ret = (result < 0) ? 0 : 1;
}
}
- if (!lockd_start_vg(cmd, vg, NULL))
+ if (!lockd_start_vg(cmd, vg, 0, NULL))
log_error("Failed to start VG %s lockspace.", vg->name);
return 1;
"pid = " FMTd64, (int64_t) getpid(),
NULL);
- if (!_lockd_result(cmd, "running_lm", reply, &result, NULL, NULL)) {
+ if (!_lockd_result(cmd, "running_lm", reply, &result, NULL, NULL, NULL)) {
log_error("Failed to get result from lvmlockd");
goto out;
}
"path = %s", path,
NULL);
- if (!_lockd_result(cmd, "refresh_lv", reply, &result, NULL, NULL)) {
+ if (!_lockd_result(cmd, "refresh_lv", reply, &result, NULL, NULL, NULL)) {
/* No result from lvmlockd, it is probably not running. */
log_error("LV refresh failed for LV %s", path);
return 0;
log_warn("Ignoring unknown lockopt value: %s", argv[i]);
}
}
+
+int lockd_setlockargs(struct cmd_context *cmd, struct volume_group *vg, const char *set_args, uint64_t *our_key_held)
+{
+ daemon_reply reply;
+ const char *reply_str;
+ const char *vg_lock_args = NULL;
+ uint32_t lockd_flags = 0;
+ uint32_t lock_args_flags = 0;
+ int result;
+ int ret;
+
+ if (!_use_lvmlockd) {
+ log_error("lvmlockd is not in use.");
+ return 0;
+ }
+ if (!_lvmlockd_connected) {
+ log_error("lvmlockd is not connected.");
+ return 0;
+ }
+
+ if (!vg->lock_type || strcmp(vg->lock_type, "sanlock")) {
+ log_error("setlockargs is only supported for lock type sanlock.");
+ return 0;
+ }
+
+ if (!set_args)
+ return_0;
+
+ if (!lockd_lockargs_get_user_flags(set_args, &lock_args_flags))
+ return_0;
+
+ if ((lock_args_flags & LOCKARGS_PERSIST) && !(vg->pr & VG_PR_REQUIRE)) {
+ log_error("lockargs \"persist\" requires persistent reservation setting \"require\".");
+ return 0;
+ }
+
+ /*
+ * Check if other PR keys are registered, which would
+ * cause the persist_upgrade_ex below to fail.
+ */
+ if (vg->pr & (VG_PR_REQUIRE | VG_PR_AUTOSTART)) {
+ struct pv_list *pvl;
+ struct device *dev;
+ int key_count;
+
+ dm_list_iterate_items(pvl, &vg->pvs) {
+ if (!(dev = pvl->pv->dev))
+ continue;
+ if (dm_list_empty(&dev->aliases))
+ continue;
+ if (!dev_find_key(cmd, dev, 0, 0, NULL, 0, NULL, 1, &key_count, NULL)) {
+ /* Shouldn't happen if persist_is_started already passed. */
+ log_error("No PR key found on %s.", dev_name(dev));
+ return 0;
+ }
+ if (key_count != 1) {
+ log_error("Found %d PR keys on %s, stop PR and lockspace on other hosts.", key_count, dev_name(dev));
+ log_error("(See vgchange --lockstop --persist stop.)");
+ return 0;
+ }
+ }
+ }
+
+ /*
+ * setlockargs_before checks that sanlock version supports
+ * the new set_lock_args, checks that no LV locks are held,
+ * checks we are the only host in the lockspace, and stops
+ * the lockspace.
+ */
+
+ log_debug("lockd setlockargs_vg_before %s", vg->name);
+
+ reply = _lockd_send("setlockargs_vg_before",
+ "pid = " FMTd64, (int64_t) getpid(),
+ "vg_name = %s", vg->name,
+ "vg_lock_type = %s", vg->lock_type,
+ "vg_lock_args = %s", vg->lock_args,
+ "set_lock_args = %s", set_args,
+ NULL);
+
+ if (!_lockd_result(cmd, "setlockargs_vg_before", reply, &result, &lockd_flags, NULL, NULL)) {
+ ret = 0;
+ goto out;
+ }
+
+ if (result == -EBUSY) {
+ log_error("Lockspace for \"%s\" not stopped on other hosts", vg->name);
+ ret = 0;
+ goto out;
+ } else if (result < 0) {
+ log_error("Lockspace setlockargs error %d for \"%s\"", result, vg->name);
+ ret = 0;
+ goto out;
+ }
+
+ daemon_reply_destroy(reply);
+
+ /*
+ * When the VG has the ability to use PR, change the
+ * current PR to an exclusive mode (WE), using a key
+ * with our host_id and gen 0. The exclusive PR protects
+ * the VG from other hosts while the locking parameters
+ * are being changed (since locking can't be used while
+ * the locking is being changed.) The lockspace is stopped
+ * while it's being changed. At the end of the vgchange
+ * setlockargs command, persist_ugprade_stop() releases
+ * the exclusive PR. After this, any host can do a normal
+ * start of PR/locking using the new lockargs.
+ */
+ if (vg->pr & (VG_PR_REQUIRE | VG_PR_AUTOSTART)) {
+ if (!persist_upgrade_ex(cmd, vg, our_key_held)) {
+ log_error("Failed to upgrade to exclusive PR.");
+ log_error("Restart PR and locking to retry setlockargs.");
+ return 0;
+ }
+ }
+
+ /*
+ * setlockargs_final reformats sanlock leases on the lvmlock LV.
+ * The host generation numbers will all be reset back to 0, and
+ * the PR keys containing the gen will start over from gen 1.
+ * lvmlockd returns a new lock_args string that this command
+ * writes in VG metadata.
+ */
+
+ retry_final:
+ log_debug("lockd setlockargs_vg_final %s", vg->name);
+
+ reply = _lockd_send("setlockargs_vg_final",
+ "pid = " FMTd64, (int64_t) getpid(),
+ "vg_name = %s", vg->name,
+ "vg_lock_type = %s", vg->lock_type,
+ "vg_lock_args = %s", vg->lock_args,
+ "set_lock_args = %s", set_args,
+ NULL);
+
+ if (!_lockd_result(cmd, "setlockargs_vg_final", reply, &result, &lockd_flags, NULL, NULL)) {
+ ret = 0;
+ goto out;
+ }
+
+ if (result == -EAGAIN) {
+ daemon_reply_destroy(reply);
+ sleep(1);
+ goto retry_final;
+ }
+
+ if (!(reply_str = daemon_reply_str(reply, "vg_lock_args", NULL))) {
+ log_error("VG %s setlockargs failed: result %d new lock_args not returned", vg->name, result);
+ ret = 0;
+ goto out;
+ }
+
+ if (!(vg_lock_args = dm_pool_strdup(cmd->mem, reply_str))) {
+ ret = 0;
+ goto out;
+ }
+
+ log_debug("lockd setlockargs_vg %s result %d new lock_args %s", vg->name, result, vg_lock_args);
+
+ vg->lock_args = vg_lock_args;
+ ret = 1;
+
+out:
+ daemon_reply_destroy(reply);
+ return ret;
+}
+
#include "libdaemon/client/config-util.h"
#include "libdaemon/client/daemon-client.h"
#include "lib/metadata/metadata-exported.h" /* is_lockd_type() */
+#include "daemons/lvmlockd/lvmlockd-client.h"
#define LOCKD_SANLOCK_LV_NAME "lvmlock"
#ifdef LVMLOCKD_SUPPORT
void lockd_lockopt_get_flags(const char *str, uint32_t *flags);
+int lockd_lockargs_get_user_flags(const char *str, uint32_t *flags);
struct lvresize_params;
struct lvcreate_params;
/* vgcreate/vgremove use init/free */
-int lockd_init_vg(struct cmd_context *cmd, struct volume_group *vg, const char *lock_type, int lv_lock_count);
+int lockd_init_vg(struct cmd_context *cmd, struct volume_group *vg,
+ const char *lock_type, int lv_lock_count, const char *set_args);
int lockd_free_vg_before(struct cmd_context *cmd, struct volume_group *vg, int changing, int yes);
void lockd_free_vg_final(struct cmd_context *cmd, struct volume_group *vg);
/* start and stop the lockspace for a vg */
-int lockd_start_vg(struct cmd_context *cmd, struct volume_group *vg, int *exists);
+int lockd_start_vg(struct cmd_context *cmd, struct volume_group *vg, uint64_t our_key, int *exists);
int lockd_stop_vg(struct cmd_context *cmd, struct volume_group *vg);
int lockd_start_wait(struct cmd_context *cmd);
int lockd_vg_is_started(struct cmd_context *cmd, struct volume_group *vg, uint32_t *cur_gen);
int lockd_lvremove_lock(struct cmd_context *cmd, struct logical_volume *lv, struct logical_volume **lv_other, int *other_unlock);
void lockd_lvremove_done(struct cmd_context *cmd, struct logical_volume *lv, struct logical_volume *lv_other, int other_unlock);
+int lockd_setlockargs(struct cmd_context *cmd, struct volume_group *vg, const char *set_args, uint64_t *our_key_held);
+
#else /* LVMLOCKD_SUPPORT */
static inline void lockd_lockopt_get_flags(const char *str, uint32_t *flags)
{
}
+static inline int lockd_lockargs_get_user_flags(const char *str, uint32_t *flags)
+{
+ return 0;
+}
+
static inline void lvmlockd_set_socket(const char *sock)
{
}
return 0;
}
-static inline int lockd_init_vg(struct cmd_context *cmd, struct volume_group *vg, const char *lock_type, int lv_lock_count)
+static inline int lockd_init_vg(struct cmd_context *cmd, struct volume_group *vg,
+ const char *lock_type, int lv_lock_count, const char *set_args)
{
return 1;
}
return 0;
}
+static inline int lockd_setlockargs(struct cmd_context *cmd, struct volume_group *vg, const char *set_args, uint64_t *our_key_held)
+{
+ return 0;
+}
+
#endif /* LVMLOCKD_SUPPORT */
#endif /* _LVMLOCKD_H */
return r;
}
-static int _validate_vg_lock_args(struct volume_group *vg)
-{
- if (!vg->lock_args || !_validate_lock_args_chars(vg->lock_args)) {
- log_error(INTERNAL_ERROR "VG %s has invalid lock_args chars", vg->name);
- return 0;
- }
-
- return 1;
-}
-
/*
* For lock_type sanlock, LV lock_args are <version>:<info>
* For lock_type dlm, LV lock_args are not used, and lock_args is
r = 0;
}
- if (!_validate_vg_lock_args(vg))
- r = 0;
} else {
if (vg->lock_args) {
log_error(INTERNAL_ERROR "VG %s has lock_args %s without lock_type",
}
if ((vg->pr & VG_PR_REQUIRE) && (writing || activating) && !cmd->disable_pr_required) {
- if (!persist_is_started(cmd, vg, 0)) {
+ if (!persist_is_started(cmd, vg, 0, NULL)) {
failure |= FAILED_PR_REQUIRED;
goto_bad;
}
.I VG
.I devices
.P
+Include vgcreate options to use Persistent Reservations (sanlock only):
+.br
+.B --setpersist y --setlockargs persist,notimeout
+.P
+Start Persistent Reservations (if they are used):
+.br
+.B $ vgchange --persist start
+.I VG
+.P
Start the lockspace for the shared VG on all hosts:
.br
.B $ vgchange --lockstart
.br
$ vgchange -an VG
.br
- $ vgchange --lockstop VG
+ $ vgchange --lockstop [--persist stop] VG
.br
$ stop lvmlockd and lock manager
.br
.br
$ start lvmlockd and lock manager
.br
- $ vgchange --lockstart VG
+ $ vgchange --lockstart [--persist start] VG
.P
.
.SH SETUP DETAILS
VG may take some time, and until the start completes the VG may not be
modified or activated. When shutting down, the lockspace is stopped with
vgchange --lockstop VG.
+.P
+.B Persistent Reservations
+.br
+A shared VG with locktype sanlock can take advantage of Persistent
+Reservations (PR) for faster and more reliable recovery. This
+requires that all of the shared devices in the VG support PR. Test
+if PR is supported by a device with the command:
+.br
+.B $ lvmpersist devtest --device
+.I device
+.P
+The vgcreate command options when enabling PR recovery with sanlock:
+.br
+.B $ vgcreate --shared --setpersist y --setlockargs persist,notimeout
+.P
+When enabled, PR needs to be started for the VG before locking:
+.br
+.B $ vgchange --persist start
+.I VG
.
.SH TOPICS
.
vgfoo 1 0 0 wz--ns 992.00m 736.00m
.fi
.
+.SS Persistent Reservations
+.
+To enable PR-based recovery ("fencing") in an existing VG:
+.br
+.B $ vgchange --setpersist y --setlockargs persist,notimeout
+.I VG
+.P
+Changing the lock args requires the VG to be stopped on all other nodes.
+.P
+Once enabled, PR needs to be started before or with lockstart:
+.br
+.B $ vgchange --persist start
+.I VG
+.br
+.B $ vgchange --persist start --lockstart
+.I VG
+.P
+Display the VG attributes configured by setpersist and setlockargs:
+.br
+.B $ vgs -o+persist
+.I VG
+.br
+.B $ vgs -o+lockargs
+.I VG
+.P
+.B setpersist y
+.br
+With this setting, LVM requires that PR be started before
+lockstart, and any VG modifications or activations require
+that PR is started.
+.br
+.B setlockargs persist
+.br
+This lockargs setting causes lvmlockd to remove the PR key of a
+failed host when a lock request fails due to a lock owned by the
+failed host. sanlock is then permitted to grant the lock.
+.br
+.B setlockargs notimeout
+.br
+This lockargs setting causes lvmlockd to configure sanlock leases
+to not time out. Removing the PR of a failed host replaces timeouts
+as a faster mechanism for lock recovery. With timeouts disabled,
+the local watchdog is not used by sanlock for the VG lockspace.
+.P
+For more information, see
+.BR lvmpersist (8).
+.
.SS System ID
.br
In contrast to a shared VG, a local VG can only be used by one host
"If autoactivation is enabled on a VG, autoactivation can be disabled\n"
"for individual LVs.\n")
+arg(setlockargs_ARG, '\0', "setlockargs", string_VAL, 0, 0,
+ "Add or remove lock_args settings for a shared VG.\n"
+ "The lock_args determine lock manager behavior for the VG.\n"
+ "These settings are only allowed for lock_type sanlock.\n"
+ "persist: use persistent reservations for lock recovery.\n"
+ "lvmlockd will preempt-abort the persistent reservation of a failed\n"
+ "lock owner so that the lock can be acquired.\n"
+ "notimeout: use locks that do not time out when the owner fails.\n"
+ "In this case, a lock owned by a failed host can only be acquired\n"
+ "using the persist feature.\n"
+ "nopersist: do not use the persist feature.\n"
+ "timeout: do not use the notimeout feature.\n"
+ "The default behavior with no settings configured is: nopersist and timeout.\n")
+
arg(setpersist_ARG, '\0', "setpersist", string_VAL, 0, 0,
"#vgcreate\n"
"Set flags to control persistent reservation behavior.\n"
ID: vgchange_persist
DESC: Perform persistent reservation commands on devices.
+vgchange --setlockargs String VG|Tag|Select
+OO: --select String
+ID: vgchange_setlockargs
+DESC: Set or clear lock_args flags to control lock manager behavior.
+
vgchange --lockstart
OO: --select String, --persist start
OP: VG|Tag|Select ...
DESC: Stop the lockspace of a shared VG in lvmlockd.
vgchange --locktype LockType VG
+OO: --setlockargs String
ID: vgchange_locktype
DESC: Change the lock type for a shared VG.
--metadatasize SizeMB, --pvmetadatacopies MetadataCopiesPV, --vgmetadatacopies MetadataCopiesVG,
--reportformat ReportFmt, --dataalignment SizeKB, --dataalignmentoffset SizeKB,
--shared, --systemid String, --locktype LockType, --setautoactivation Bool,
---setpersist String, --persist start
+--setpersist String, --persist start, --setlockargs String
ID: vgcreate_general
---
{ vgchange_systemid_CMD, vgchange_systemid_cmd },
{ vgchange_setpersist_CMD, vgchange_setpersist_cmd },
{ vgchange_persist_CMD, vgchange_persist_cmd },
+ { vgchange_setlockargs_CMD, vgchange_setlockargs_cmd },
/* lvdisplay variants */
{ lvdisplay_columns_CMD, lvdisplay_columns_cmd },
*/
int vgcreate_params_set_from_args(struct cmd_context *cmd,
struct vgcreate_params *vp_new,
- struct vgcreate_params *vp_def)
+ struct vgcreate_params *vp_def,
+ struct pvcreate_params *pp)
{
const char *system_id_arg_str;
const char *lock_type = NULL;
vp_new->lock_type = lock_type;
log_debug("Setting lock_type to %s", vp_new->lock_type);
+
+ if (arg_is_set(cmd, setlockargs_ARG)) {
+ const char *set_args;
+ uint32_t lock_args_flags = 0;
+
+ if (!lock_type || strcmp(lock_type, "sanlock")) {
+ log_error("Using setlockargs requires sanlock lock type for shared VG.");
+ return 0;
+ }
+
+ if (!(set_args = arg_str_value(cmd, setlockargs_ARG, NULL)))
+ return_0;
+ if (!lockd_lockargs_get_user_flags(set_args, &lock_args_flags))
+ return_0;
+ if (!pp)
+ return_0;
+
+ if ((lock_args_flags & LOCKARGS_PERSIST) && !(pp->setpersist_flags & (SETPR_Y | SETPR_REQUIRE))) {
+ log_error("Using --setlockargs persist requires --setpersist y|require.");
+ return 0;
+ }
+ }
+
return 1;
}
struct volume_group *vg);
int vgcreate_params_set_from_args(struct cmd_context *cmd,
struct vgcreate_params *vp_new,
- struct vgcreate_params *vp_def);
+ struct vgcreate_params *vp_def,
+ struct pvcreate_params *pp);
int lv_change_activate(struct cmd_context *cmd, struct logical_volume *lv,
activation_change_t activate);
int lv_refresh(struct cmd_context *cmd, struct logical_volume *lv);
int vgchange_systemid_cmd(struct cmd_context *cmd, int argc, char **argv);
int vgchange_setpersist_cmd(struct cmd_context *cmd, int argc, char **argv);
int vgchange_persist_cmd(struct cmd_context *cmd, int argc, char **argv);
+int vgchange_setlockargs_cmd(struct cmd_context *cmd, int argc, char **argv);
const struct opt_name *get_opt_name(int opt);
const struct val_name *get_val_name(int val);
static int _vgchange_lock_start(struct cmd_context *cmd, struct volume_group *vg,
struct vgchange_params *vp)
{
+ uint64_t our_key = 0;
int auto_opt = 0;
int exists = 0;
int r;
if (!persist_start_include(cmd, vg, 0, auto_opt, NULL))
return 0;
- if ((vg->pr & (VG_PR_REQUIRE|VG_PR_AUTOSTART)) && !persist_is_started(cmd, vg, 0)) {
+ if ((vg->pr & (VG_PR_REQUIRE|VG_PR_AUTOSTART)) && !persist_is_started(cmd, vg, 0, &our_key)) {
log_error("VG %s PR should be started before locking (vgchange --persist start)", vg->name);
return 0;
}
- r = lockd_start_vg(cmd, vg, &exists);
+ r = lockd_start_vg(cmd, vg, our_key, &exists);
if (r)
vp->lock_start_count++;
vg->system_id = NULL;
- if (!lockd_init_vg(cmd, vg, lock_type, lv_lock_count)) {
+ if (!lockd_init_vg(cmd, vg, lock_type, lv_lock_count, arg_str_value(cmd, setlockargs_ARG, NULL))) {
log_error("Failed to initialize lock args for lock type %s", lock_type);
return 0;
}
* enabling/starting PR, otherwise enabling/starting PR will
* cause i/o to begin failing on those other hosts.
*/
- if (on && vg_is_shared(vg) && !persist_is_started(cmd, vg, 1) &&
+ if (on && vg_is_shared(vg) && !persist_is_started(cmd, vg, 1, NULL) &&
lockd_vg_is_started(cmd, vg, NULL) && lockd_vg_is_busy(cmd, vg)) {
log_error("VG lockspace should be stopped on all hosts (vgchange --lockstop) before enabling PR.");
return ECMD_FAILED;
return ret;
}
+static int _vgchange_setlockargs_single(struct cmd_context *cmd, const char *vg_name,
+ struct volume_group *vg,
+ struct processing_handle *handle)
+{
+ const char *set = arg_str_value(cmd, setlockargs_ARG, NULL);
+ uint64_t our_key_held = 0;
+
+ if (!set)
+ return_ECMD_FAILED;
+
+ /*
+ * lockd_setlockargs gets exclusive PR (if the VG is using PR),
+ * stops the lockspace, and sets new vg->lock_args that are
+ * written below. If lockd_setlockargs got the ex PR, then
+ * persist_upgrade_stop releases the PR.
+ */
+ if (!lockd_setlockargs(cmd, vg, set, &our_key_held))
+ return_ECMD_FAILED;
+
+ if (!vg_write(vg) || !vg_commit(vg))
+ return_ECMD_FAILED;
+
+ if (our_key_held && !persist_upgrade_stop(cmd, vg, our_key_held))
+ log_warn("Failed to stop PR.");
+ persist_key_file_remove(cmd, vg);
+
+ log_print_unless_silent("Volume group \"%s\" successfully changed.", vg->name);
+
+ return ECMD_PROCESSED;
+}
+
+int vgchange_setlockargs_cmd(struct cmd_context *cmd, int argc, char **argv)
+{
+ struct processing_handle *handle;
+ uint32_t flags = READ_FOR_UPDATE;
+ int ret;
+
+ if (!(handle = init_processing_handle(cmd, NULL))) {
+ log_error("Failed to initialize processing handle.");
+ return ECMD_FAILED;
+ }
+
+ ret = process_each_vg(cmd, argc, argv, NULL, NULL, flags, 0, handle, &_vgchange_setlockargs_single);
+
+ destroy_processing_handle(cmd, handle);
+ return ret;
+}
+
pp.pv_names = argv;
pp.vg_name = vg_name;
pp.preserve_existing = 1; /* Don't create a new PV on top of an existing PV like pvcreate does. */
-
pp.check_consistent_block_size = 1;
if (!vgcreate_params_set_defaults(cmd, &vp_def, NULL))
return EINVALID_CMD_LINE;
vp_def.vg_name = vg_name;
- if (!vgcreate_params_set_from_args(cmd, &vp_new, &vp_def))
+ if (!vgcreate_params_set_from_args(cmd, &vp_new, &vp_def, &pp))
return EINVALID_CMD_LINE;
if (!vgcreate_params_validate(cmd, &vp_new))
* a local VG. lockd_init_vg() then writes the VG a second time with
* both lock_type and lock_args set.
*/
- if (!lockd_init_vg(cmd, vg, vp_new.lock_type, 0)) {
+ if (!lockd_init_vg(cmd, vg, vp_new.lock_type, 0, arg_str_value(cmd, setlockargs_ARG, NULL))) {
log_error("Failed to initialize lock args for lock type %s",
vp_new.lock_type);
vg_remove_pvs(vg);
* read without locks until the lockspace is done starting.)
*/
if (vg_is_shared(vg)) {
+ uint64_t our_key = 0;
+
if (pp.setpersist_flags &&
- !persist_vgcreate_update(cmd, vg, pp.setpersist_flags)) {
+ !persist_vgcreate_update(cmd, vg, pp.setpersist_flags, &our_key)) {
log_error("Failed to start PR");
goto out;
}
- if (!lockd_start_vg(cmd, vg, NULL)) {
+ if (!lockd_start_vg(cmd, vg, our_key, NULL)) {
log_error("Failed to start locking");
goto out;
}
goto_bad;
}
vp_def.vg_name = vg_name_to;
- if (!vgcreate_params_set_from_args(cmd, &vp_new, &vp_def)) {
+ if (!vgcreate_params_set_from_args(cmd, &vp_new, &vp_def, NULL)) {
r = EINVALID_CMD_LINE;
goto_bad;
}