[lxc-users] Unable to join cluster

Joshua Schaeffer jschaeffer at harmonywave.com
Fri Mar 20 19:25:02 UTC 2020


I ran an strace as well. Below is from the last question from lxd init until I killed the process. All I can see is a connection to the unix socket and then a HTTP 200. I can provide the full strace if that would be helpful to anyone:

futex(0x17e5628, FUTEX_WAIT, 0, NULLChoose the local disk or dataset for storage pool "btrfspool1" (empty for loop disk): /dev/sdj
Would you like a YAML "lxd init" preseed to be printed? (yes/no) [default=no]: no
)   = 0
futex(0xc4200d3548, FUTEX_WAKE, 1)      = 1
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
sched_yield()                           = 0
futex(0x17e4af0, FUTEX_WAKE, 1)         = 0
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
futex(0xc4203ff148, FUTEX_WAKE, 1)      = 1
sched_yield()                           = 0
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
sched_yield()                           = 0
futex(0x17e4af0, FUTEX_WAIT, 2, NULL)   = -1 EAGAIN (Resource temporarily unavailable)
futex(0x17e4af0, FUTEX_WAKE, 1)         = 0
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
futex(0xc42009cd48, FUTEX_WAKE, 1)      = 1
sched_yield()                           = 0
futex(0x17e4af0, FUTEX_WAIT, 2, NULL)   = -1 EAGAIN (Resource temporarily unavailable)
futex(0x17e4af0, FUTEX_WAKE, 1)         = 0
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = -1 EAGAIN (Resource temporarily unavailable)
sched_yield()                           = 0
futex(0x17e4af0, FUTEX_WAIT, 2, NULL)   = 0
futex(0x17e4af0, FUTEX_WAKE, 1)         = 1
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
futex(0xc4204e1548, FUTEX_WAKE, 1)      = 1
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
sched_yield()                           = 0
futex(0x17e4af0, FUTEX_WAIT, 2, NULL)   = -1 EAGAIN (Resource temporarily unavailable)
futex(0x17e4af0, FUTEX_WAKE, 1)         = 1
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
futex(0xc4204e1148, FUTEX_WAKE, 1)      = 1
sched_yield()                           = 0
futex(0x17e4af0, FUTEX_WAIT, 2, NULL)   = -1 EAGAIN (Resource temporarily unavailable)
futex(0x17e4af0, FUTEX_WAKE, 1)         = 1
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
sched_yield()                           = 0
futex(0x17e4af0, FUTEX_WAIT, 2, NULL)   = 0
futex(0x17e4af0, FUTEX_WAKE, 1)         = 0
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
futex(0xc4200d3948, FUTEX_WAKE, 1)      = 1
sched_yield()                           = 0
futex(0x17e4af0, FUTEX_WAKE, 1)         = 0
futex(0x17e4be8, FUTEX_WAKE, 1)         = 0
futex(0x17e4af0, FUTEX_WAKE, 1)         = 1
futex(0x17e4bc0, FUTEX_WAIT, 0, {tv_sec=0, tv_nsec=100000}) = -1 EAGAIN (Resource temporarily unavailable)
futex(0xc42027a148, FUTEX_WAKE, 1)      = 1
futex(0xc4200d3948, FUTEX_WAKE, 1)      = 1
futex(0xc4200d2d48, FUTEX_WAKE, 1)      = 1
futex(0xc4204e1d48, FUTEX_WAKE, 1)      = 1
futex(0xc42027a548, FUTEX_WAKE, 1)      = 1
futex(0xc4203ff948, FUTEX_WAKE, 1)      = 1
futex(0xc42009dd48, FUTEX_WAKE, 1)      = 1
futex(0xc42027b948, FUTEX_WAKE, 1)      = 1
futex(0xc4203fe148, FUTEX_WAKE, 1)      = 1
futex(0xc42009cd48, FUTEX_WAKE, 1)      = 1
futex(0xc4203fed48, FUTEX_WAKE, 1)      = 1
futex(0xc4203ff148, FUTEX_WAKE, 1)      = 1
futex(0xc42009d548, FUTEX_WAKE, 1)      = 1
futex(0xc4200d3148, FUTEX_WAKE, 1)      = 1
futex(0xc4204e0148, FUTEX_WAKE, 1)      = 1
futex(0xc4204e1148, FUTEX_WAKE, 1)      = 1
futex(0xc4204e1948, FUTEX_WAKE, 1)      = 1
futex(0xc4204e1548, FUTEX_WAKE, 1)      = 1
futex(0xc42027a948, FUTEX_WAKE, 1)      = 1
futex(0xc420628148, FUTEX_WAKE, 1)      = 1
futex(0xc420628548, FUTEX_WAKE, 1)      = 1
futex(0xc42027bd48, FUTEX_WAKE, 1)      = 1
futex(0xc42009d148, FUTEX_WAKE, 1)      = 1
futex(0xc4203ff548, FUTEX_WAKE, 1)      = 1
futex(0xc4204e0948, FUTEX_WAKE, 1)      = 1
futex(0xc4200d3548, FUTEX_WAKE, 1)      = 1
futex(0xc4203ffd48, FUTEX_WAKE, 1)      = 1
futex(0xc42027b148, FUTEX_WAKE, 1)      = 1
futex(0xc4203fe548, FUTEX_WAKE, 1)      = 1
futex(0xc42027b548, FUTEX_WAKE, 1)      = 1
futex(0xc4204e0548, FUTEX_WAKE, 1)      = 1
futex(0x17e4af0, FUTEX_WAKE, 1)         = 1
epoll_pwait(4, [], 128, 0, NULL, 34)    = 0
futex(0xc4200d2d48, FUTEX_WAKE, 1)      = 1
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [], 8) = 0
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7fc751ffc000
mprotect(0x7fc751ffd000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7fc7527fbe70, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7fc7527fc9d0, tls=0x7fc7527fc700, child_tidptr=0x7fc7527fc9d0) = 13862
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
futex(0xc420628548, FUTEX_WAKE, 1)      = 1
futex(0x17e4af0, FUTEX_WAKE, 1)         = 1
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
epoll_pwait(4, [], 128, 0, NULL, 0)     = 0
epoll_pwait(4, [{EPOLLIN|EPOLLOUT, {u32=90537520, u64=140497060724272}}], 128, -1, NULL, 25054624) = 1
futex(0x17e4bd0, FUTEX_WAKE, 1)         = 1
read(5, "\26\3\3\0001\2\0\0-\3\3[cp*\373\264A\34[\234\326\304\3104_yi,v\214\370"..., 1024) = 1024
read(5, "\"\372\347=\244\257\32\vxu\323J5\204\301'\3274\330\332\1\344\316\326,\342`\355\251\350E\334"..., 1078) = 1043
getrandom("\x74\xc4\x77\xb7\x69\xa7\x5f\xe2\x17\x03\x6c\x40\xfd\xb1\x71\xd5\x62\xc9\x02\xa1\x1f\xf0\xf9\xc4\x5b\xe9\x19\xc8\xe2\xb5\xf8\x10", 32, 0) = 32
write(5, "\26\3\3\0\7\v\0\0\3\0\0\0\26\3\3\0%\20\0\0! \33O%\336`\277\340\321\16\362"..., 105) = 105
read(5, 0xc42059e400, 1024)             = -1 EAGAIN (Resource temporarily unavailable)
epoll_pwait(4, [], 128, 0, NULL, 0)     = 0
epoll_pwait(4, [{EPOLLIN|EPOLLOUT, {u32=90537520, u64=140497060724272}}], 128, -1, NULL, 0) = 1
futex(0x17e4bd0, FUTEX_WAKE, 1)         = 1
futex(0x17e4af0, FUTEX_WAKE, 1)         = 1
read(5, "\24\3\3\0\1\1\26\3\3\0(\0\0\0\0\0\0\0\0\235\203\332>S\214\214\nz]\211\215\4"..., 1024) = 51
futex(0xc42027ad48, FUTEX_WAKE, 1)      = 1
write(5, "\27\3\3\0\243\0\0\0\0\0\0\0\1\354tY\204\0\272\36\302W\200\337S\270\315b\300\2231\201"..., 168) = 168
futex(0xc42027ad48, FUTEX_WAKE, 1)      = 1
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
epoll_pwait(4, [], 128, 0, NULL, 0)     = 0
epoll_pwait(4, [{EPOLLIN|EPOLLOUT, {u32=90537520, u64=140497060724272}}], 128, -1, NULL, 25054624) = 1
futex(0x17e4bd0, FUTEX_WAKE, 1)         = 1
futex(0x17e4af0, FUTEX_WAKE, 1)         = 1
read(5, "\26\3\3\0001\2\0\0-\3\0038E$W\267\2146\332\252\241\256$Hw\333\24\314@\f\v\337"..., 1024) = 1024
read(5, "\"\372\347=\244\257\32\vxu\323J5\204\301'\3274\330\332\1\344\316\326,\342`\355\251\350E\334"..., 1078) = 1043
getrandom("\x02\xb0\xb3\x20\xd4\x8d\x58\x56\x0b\x6f\x21\x5e\xe4\x29\x11\xd3\x50\x69\x62\x70\xbe\x6a\x09\x45\x50\xf7\x3a\xeb\xc7\x28\x5e\x2a", 32, 0) = 32
write(5, "\26\3\3\0\7\v\0\0\3\0\0\0\26\3\3\0%\20\0\0! \23\273\263nk\221\227>\305\337"..., 105) = 105
read(5, 0xc42028d000, 1024)             = -1 EAGAIN (Resource temporarily unavailable)
epoll_pwait(4, [], 128, 0, NULL, 0)     = 0
epoll_pwait(4, [{EPOLLIN|EPOLLOUT, {u32=90537520, u64=140497060724272}}], 128, -1, NULL, 0) = 1
futex(0x17e4bd0, FUTEX_WAKE, 1)         = 1
futex(0x17e4af0, FUTEX_WAKE, 1)         = 1
read(5, "\24\3\3\0\1\1\26\3\3\0(\0\0\0\0\0\0\0\0005T\350\7F\326U\241?\246\7\365\346"..., 1024) = 51
futex(0xc42027ad48, FUTEX_WAKE, 1)      = 1
read(5, 0xc420344800, 2048)             = -1 EAGAIN (Resource temporarily unavailable)
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
epoll_pwait(4, [], 128, 0, NULL, 0)     = 0
epoll_pwait(4, [{EPOLLOUT, {u32=90537520, u64=140497060724272}}], 128, -1, NULL, 25054624) = 1
epoll_pwait(4, [{EPOLLOUT, {u32=90537520, u64=140497060724272}}], 128, -1, NULL, 25054624) = 1
epoll_pwait(4, [{EPOLLIN|EPOLLOUT, {u32=90537520, u64=140497060724272}}], 128, -1, NULL, 25054624) = 1
futex(0x17e4bd0, FUTEX_WAKE, 1)         = 1
futex(0x17e4af0, FUTEX_WAKE, 1)         = 1
read(5, "HTTP/1.1 101 Switching Protocols"..., 4096) = 311
futex(0xc4200d2d48, FUTEX_WAKE, 1)      = 1
futex(0xc4200d2d48, FUTEX_WAKE, 1)      = 1
socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0) = 6
setsockopt(6, SOL_SOCKET, SO_BROADCAST, [1], 4) = 0
connect(6, {sa_family=AF_UNIX, sun_path="/var/lib/lxd/unix.socket"}, 27) = 0
epoll_ctl(4, EPOLL_CTL_ADD, 6, {EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLET, {u32=90537312, u64=140497060724064}}) = 0
getsockname(6, {sa_family=AF_UNIX}, [112->2]) = 0
getpeername(6, {sa_family=AF_UNIX, sun_path="/var/lib/lxd/unix.socket"}, [112->27]) = 0
futex(0xc4200d2d48, FUTEX_WAKE, 1)      = 1
read(6, 0xc4203f0000, 4096)             = -1 EAGAIN (Resource temporarily unavailable)
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = -1 EAGAIN (Resource temporarily unavailable)
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
epoll_pwait(4, [], 128, 0, NULL, 0)     = 0
epoll_pwait(4, [{EPOLLOUT, {u32=90537312, u64=140497060724064}}], 128, -1, NULL, 25054624) = 1
epoll_pwait(4, [{EPOLLOUT, {u32=90537312, u64=140497060724064}}], 128, -1, NULL, 25054624) = 1
epoll_pwait(4, [{EPOLLIN|EPOLLOUT, {u32=90537312, u64=140497060724064}}], 128, -1, NULL, 25054624) = 1
futex(0x17e4bd0, FUTEX_WAKE, 1)         = 1
futex(0x17e4af0, FUTEX_WAKE, 1)         = 1
read(6, "HTTP/1.1 200 OK\r\nContent-Type: a"..., 4096) = 538
futex(0xc4204e0548, FUTEX_WAKE, 1)      = 1
epoll_ctl(4, EPOLL_CTL_DEL, 6, 0xc4205d79d4) = 0
close(6)                                = 0
futex(0xc4200d2d48, FUTEX_WAKE, 1)      = 1
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = 0
futex(0xc42027b548, FUTEX_WAKE, 1)      = 1
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = -1 EAGAIN (Resource temporarily unavailable)
futex(0x17e5628, FUTEX_WAIT, 0, NULL)   = -1 EAGAIN (Resource temporarily unavailable)
futex(0xc4200d2d48, FUTEX_WAKE, 1)      = 1
sched_yield()                           = 0
futex(0x17e4af0, FUTEX_WAIT, 2, NULL)   = -1 EAGAIN (Resource temporarily unavailable)
futex(0x17e4af0, FUTEX_WAKE, 1)         = 0
futex(0x17e4be8, FUTEX_WAKE, 1)         = 1
futex(0x17e4af0, FUTEX_WAKE, 1)         = 1
futex(0x17e4bc0, FUTEX_WAKE, 1)         = 1
futex(0x17e4af0, FUTEX_WAKE, 1)         = 1
sched_yield()                           = 0
futex(0x17e4af0, FUTEX_WAIT, 2, NULL)   = 0
futex(0x17e4af0, FUTEX_WAKE, 1)         = 1
futex(0x17e5628, FUTEX_WAIT, 0, NULL

^C)   = ? ERESTARTSYS (To be restarted if SA_RESTART is set)
strace: Process 13785 detached


On 3/19/20 11:41, Joshua Schaeffer wrote:
> Hey all, I'm trying to build a cluster on Ubuntu 18.04 with lxd 3.0.3. I was able to bootstrap the first node without any issues, but when I try to add a second node it just hangs and never returns the  terminal prompt. Here is my bootstrapped node:
>
> lxcuser at blllxc02:~$ lxc cluster list
> +----------+----------------------------------------------+----------+--------+-------------------+
> |   NAME   |                     URL                      | DATABASE | STATE  |      MESSAGE      |
> +----------+----------------------------------------------+----------+--------+-------------------+
> | blllxc02 | https://blllxc02-mgmt.harmonywave.cloud:8443 | YES      | ONLINE | fully operational |
> +----------+----------------------------------------------+----------+--------+-------------------+
>
> And here is the second node I am trying to add:
>
> lxcuser at blllxc01:~$ sudo lxd init
> Would you like to use LXD clustering? (yes/no) [default=no]: yes
> What name should be used to identify this node in the cluster? [default=blllxc01]:
> What IP address or DNS name should be used to reach this node? [default=fe80::6a1c:a2ff:fe13:1ec6]: blllxc01-mgmt.harmonywave.cloud
> Are you joining an existing cluster? (yes/no) [default=no]: yes
> IP address or FQDN of an existing cluster node: blllxc02-mgmt.harmonywave.cloud
> Cluster fingerprint: 20b51145761f3444278317331feeded8492c263920889f5dccd83772da0c42cf
> You can validate this fingerpring by running "lxc info" locally on an existing node.
> Is this the correct fingerprint? (yes/no) [default=no]: yes
> Cluster trust password:
> All existing data is lost when joining a cluster, continue? (yes/no) [default=no] yes
> Choose the local disk or dataset for storage pool "btrfspool1" (empty for loop disk): /dev/sdj
> Would you like a YAML "lxd init" preseed to be printed? (yes/no) [default=no]:
>
> ^C
> lxcuser at blllxc01:~$ lxc cluster list
> Error: LXD server isn't part of a cluster
>
> After the last question from lxd init my terminal never returns. I've left it like this overnight with no change. This is all I'm seeing in the logs as well from the time I run lxd init to when I abort the process:
>
> Logs from the node trying to be added:
> t=2020-03-18T20:17:07-0600 lvl=info msg="Creating BTRFS storage pool \"btrfspool1\""
> t=2020-03-18T20:17:08-0600 lvl=warn msg="Failed to detect UUID by looking at /dev/disk/by-uuid"
> t=2020-03-18T20:17:08-0600 lvl=info msg="Created BTRFS storage pool \"btrfspool1\""
> t=2020-03-19T02:12:27-0600 lvl=info msg="Updating images"
> t=2020-03-19T02:12:27-0600 lvl=info msg="Done updating images"
> t=2020-03-19T08:12:27-0600 lvl=info msg="Updating images"
> t=2020-03-19T08:12:27-0600 lvl=info msg="Done updating images"
>
> Logs from the bootstrapped node:
> t=2020-03-18T17:05:58-0600 lvl=info msg="Initializing global database"
> t=2020-03-18T17:06:02-0600 lvl=warn msg="Raft: Heartbeat timeout from \"\" reached, starting election"
> t=2020-03-18T17:06:03-0600 lvl=info msg="Initializing storage pools"
> t=2020-03-18T17:06:03-0600 lvl=info msg="Initializing networks"
> t=2020-03-18T17:06:03-0600 lvl=info msg="Pruning leftover image files"
> t=2020-03-18T17:06:03-0600 lvl=info msg="Done pruning leftover image files"
> t=2020-03-18T17:06:03-0600 lvl=info msg="Loading daemon configuration"
> t=2020-03-18T17:06:03-0600 lvl=info msg="Pruning expired images"
> t=2020-03-18T17:06:03-0600 lvl=info msg="Done pruning expired images"
> t=2020-03-18T17:06:03-0600 lvl=info msg="Expiring log files"
> t=2020-03-18T17:06:03-0600 lvl=info msg="Done expiring log files"
> t=2020-03-18T17:06:03-0600 lvl=info msg="Updating images"
> t=2020-03-18T17:06:03-0600 lvl=info msg="Done updating images"
> t=2020-03-18T17:06:03-0600 lvl=info msg="Updating instance types"
> t=2020-03-18T17:06:03-0600 lvl=info msg="Done updating instance types"
> t=2020-03-18T23:06:03-0600 lvl=info msg="Updating images"
> t=2020-03-18T23:06:03-0600 lvl=info msg="Done updating images"
> t=2020-03-19T05:06:03-0600 lvl=info msg="Updating images"
> t=2020-03-19T05:06:03-0600 lvl=info msg="Done updating images"
> t=2020-03-19T11:06:03-0600 lvl=info msg="Updating images"
> t=2020-03-19T11:06:03-0600 lvl=info msg="Done updating images"
>
> Any idea where I can get more information about what is going on to successfully add the node to the cluster?
> -- 
> Thanks,
> Joshua Schaeffer
>
> _______________________________________________
> lxc-users mailing list
> lxc-users at lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-users

-- 
Thanks,
Joshua Schaeffer

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.linuxcontainers.org/pipermail/lxc-users/attachments/20200320/f90e4783/attachment-0001.htm>


More information about the lxc-users mailing list