commit 9a52a161cfe09ef54d78262ed723e2067a4b42a4 Author: Mikolaj Golub Date: Sat Aug 31 17:05:07 2013 +0300 In remote_recv_thread(), for memsync requests, the detection if it is "remote memsync" or "remote final" does not work correctly when processing pending requests after secondary disconnect or when a request fails on remote node. As a result such requests may leak. Fix this by adding hio_memsyncacked flag to hio to properly handle memsync requests with error. diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c index 5a6cf90..2f8b624 100644 --- a/sbin/hastd/primary.c +++ b/sbin/hastd/primary.c @@ -98,6 +98,10 @@ struct hio { * so we won't get confused when replication changes on reload. */ int hio_replication; + /* + * Memsync request was acknowleged by remote. + */ + bool hio_memsyncacked; TAILQ_ENTRY(hio) *hio_next; }; #define hio_free_next hio_next[0] @@ -1184,6 +1188,7 @@ ggate_recv_thread(void *arg) ggio->gctl_error = 0; hio->hio_done = false; hio->hio_replication = res->hr_replication; + hio->hio_memsyncacked = false; pjdlog_debug(2, "ggate_recv: (%p) Waiting for request from the kernel.", hio); @@ -1720,6 +1725,7 @@ remote_recv_thread(void *arg) TAILQ_REMOVE(&hio_recv_list[ncomp], hio, hio_next[ncomp]); mtx_unlock(&hio_recv_list_lock[ncomp]); + hio->hio_errors[ncomp] = ENOTCONN; goto done_queue; } if (hast_proto_recv_hdr(res->hr_remotein, &nv) == -1) { @@ -1797,6 +1803,18 @@ done_queue: if (refcnt_release(&hio->hio_countdown) > 0) continue; } else { + if (hio->hio_errors[ncomp] != 0) { + if (!hio->hio_memsyncacked) { + /* + * Don't wait for remote final after + * the error, emulate it instead. + */ + refcnt_release(&hio->hio_countdown); + hio->hio_memsyncacked = true; + } + } + PJDLOG_ASSERT((memsyncack && !hio->hio_memsyncacked) || + (!memsyncack && hio->hio_memsyncacked)); /* * Depending on hio_countdown value, requests finished * in the following order: @@ -1818,42 +1836,21 @@ done_queue: /* * Remote final reply arrived. */ - PJDLOG_ASSERT(!memsyncack); + PJDLOG_ASSERT(hio->hio_memsyncacked); break; case 1: - if (memsyncack) { - /* - * Local request already finished, so we - * can complete the write. - */ - if (hio->hio_errors[0] == 0) - write_complete(res, hio); - /* - * We still need to wait for final - * remote reply. - */ - pjdlog_debug(2, - "remote_recv: (%p) Moving request back to the recv queue.", - hio); - mtx_lock(&hio_recv_list_lock[ncomp]); - TAILQ_INSERT_TAIL(&hio_recv_list[ncomp], - hio, hio_next[ncomp]); - mtx_unlock(&hio_recv_list_lock[ncomp]); - } else { - /* - * Remote final reply arrived before - * local write finished. - * Nothing to do in such case. - */ - } - continue; - case 2: + if (hio->hio_memsyncacked) + continue; /* - * We received remote memsync reply even before - * local write finished. + * Local request already finished, so we can + * complete the write. */ - PJDLOG_ASSERT(memsyncack); - + if (hio->hio_errors[0] == 0) + write_complete(res, hio); + /* FALLTHROUGH */ + case 2: + PJDLOG_ASSERT(!hio->hio_memsyncacked); + hio->hio_memsyncacked = true; pjdlog_debug(2, "remote_recv: (%p) Moving request back to the recv queue.", hio); @@ -2130,6 +2127,7 @@ sync_thread(void *arg __unused) ggio->gctl_error = 0; hio->hio_done = false; hio->hio_replication = res->hr_replication; + hio->hio_memsyncacked = false; for (ii = 0; ii < ncomps; ii++) hio->hio_errors[ii] = EINVAL; reqlog(LOG_DEBUG, 2, ggio, "sync: (%p) Sending sync request: ",