Wait for the socket to become ready to receive more data before attempting to
authorPavan Deolasee <[email protected]>
Tue, 17 Nov 2015 12:28:49 +0000 (17:58 +0530)
committerPavan Deolasee <[email protected]>
Tue, 17 Nov 2015 16:55:39 +0000 (22:25 +0530)
write again

We'd seen that coordinator can become CPU bound while loading large chunks of
data. Upon investigation, it was found that the coordinator process keeps
trying to send more data, even though the underlying networking layer is not
yet ready to receive more data, most likely because the kernel send-buffer is
full. Instead of retrying in a tight loop, we should check for socket readiness
and then write more

This should also fix the problem of COPY process running out of memory on the
coordinator (exhibited by "invalid memory alloc request size" error seen during
pg_restore as well as COPY)

src/backend/pgxc/pool/pgxcnode.c

index 274dd51bfd5674e8f0118f6244ee4e4f408847a4..cd700b92d28005f90d80b2015c664b7a6f61d0f6 100644 (file)
@@ -1080,12 +1080,42 @@ send_some(PGXCNodeHandle *handle, int len)
 
                if (len > 0)
                {
+                       struct pollfd pool_fd;
+                       int poll_ret;
+
                        /*
-                        * We did not send it all
-                        * return 1 to indicate that data is still pending.
+                        * Wait for the socket to become ready again to receive more data.
+                        * For some cases, especially while writing large sums of data
+                        * during COPY protocol and when the remote node is not capable of
+                        * handling data at the same speed, we might otherwise go in a
+                        * useless tight loop, consuming all available local resources
+                        *
+                        * Use a small timeout of 1s to avoid infinite wait
                         */
-                       result = 1;
-                       break;
+                       pool_fd.fd = handle->sock;
+                       pool_fd.events = POLLOUT;
+
+                       poll_ret = poll(&pool_fd, 1, 1000);
+                       if (poll_ret < 0)
+                       {
+                               if (errno == EAGAIN || errno == EINTR)
+                                       continue;
+                               else
+                               {
+                                       add_error_message(handle, "poll failed ");
+                                       handle->outEnd = 0;
+                                       return -1;
+                               }
+                       }
+                       else if (poll_ret == 1)
+                       {
+                               if (pool_fd.revents & POLLHUP)
+                               {
+                                       add_error_message(handle, "remote end disconnected");
+                                       handle->outEnd = 0;
+                                       return -1;
+                               }
+                       }
                }
        }