summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Smith <psmith@gnu.org>1999-08-01 08:12:06 +0000
committerPaul Smith <psmith@gnu.org>1999-08-01 08:12:06 +0000
commite34540f79be9c915ce1d6191575267066370f6ec (patch)
tree8418cbb560875512845504a0bd504db81cf4232e
parent09f1e4cf83853e3636a19da17e28e7291aac9bbd (diff)
downloadgunmake-e34540f79be9c915ce1d6191575267066370f6ec.tar.gz
* Release 3.77.92.3.77.92
* Complete implementation of new jobserver algorithm. * A few minor fixups.
-rw-r--r--ChangeLog39
-rw-r--r--arscan.c7
-rw-r--r--configure.in2
-rw-r--r--function.c6
-rw-r--r--job.c109
-rw-r--r--main.c11
-rw-r--r--make.h8
-rw-r--r--remake.c7
8 files changed, 117 insertions, 72 deletions
diff --git a/ChangeLog b/ChangeLog
index 59d403d..14f2c6d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,43 @@
+1999-08-01 Paul D. Smith <psmith@gnu.org>
+
+ New jobserver algorithm to avoid a possible hole where we could
+ miss SIGCHLDs and get into a deadlock. The original algorithm was
+ suggested by Roland McGrath with a nice refinement by Paul Eggert.
+ Many thanks as well to Tim Magill and Howard Chu, who also
+ provided many viable ideas and critiques. We all had a fun week
+ dreaming up interesting ways to use and abuse UNIX syscalls :).
+
+ Previously we could miss a SIGCHLD if it happened after we reaped
+ the children but before we re-entered the blocking read. If this
+ happened to all makes and/or all children, make would never wake
+ up.
+
+ We avoid this by having the SIGCHLD handler reset the blocking bit
+ on the jobserver pipe read FD (normally read does block in this
+ algorithm). Now if the handler is called between the time we reap
+ and the time we read(), and there are no tokens available, the
+ read will merely return with EAGAIN instead of blocking.
+
+ * main.c (main): Set the blocking bit explicitly here.
+ * job.c (child_handler): If we have a jobserver pipe, set the
+ non-blocking bit for it.
+ (start_waiting_job): Move the token stuff back to new_job; if we
+ do it here then we're not controlling the number of remote jobs
+ started!
+ (new_job): Move the check for job slots to _after_ we've created a
+ child structure. If the read returns without getting a token, set
+ the blocking bit then try to reap_children.
+
+ * make.h (EINTR_SET): Define to test errno if EINTR is available,
+ or 0 otherwise. Just some code cleanup.
+ * arscan.c (ar_member_touch): Use it.
+ * function.c (func_shell): Use it.
+ * job.c (reap_children): Use it.
+ * remake.c (touch_file): Use it.
+
1999-07-28 Paul D. Smith <psmith@gnu.org>
- * make.h: Define _() and N_() macros as a passthrough to initiate
+ * make.h: Define _() and N_() macros as passthrough to initiate
NLS support.
* <all>: Add _()/N_() around translatable strings.
diff --git a/arscan.c b/arscan.c
index cc9b9fd..25e739a 100644
--- a/arscan.c
+++ b/arscan.c
@@ -775,11 +775,8 @@ ar_member_touch (arname, memname)
if (AR_HDR_SIZE != write (fd, (char *) &ar_hdr, AR_HDR_SIZE))
goto lose;
/* The file's mtime is the time we we want. */
-#ifdef EINTR
- while (fstat (fd, &statbuf) < 0 && errno == EINTR);
-#else
- fstat (fd, &statbuf);
-#endif
+ while (fstat (fd, &statbuf) < 0 && EINTR_SET)
+ ;
#if defined(ARFMAG) || defined(ARFZMAG) || defined(AIAMAG) || defined(WINDOWS32)
/* Advance member's time to that time */
for (i = 0; i < sizeof ar_hdr.ar_date; i++)
diff --git a/configure.in b/configure.in
index 096df54..a864e17 100644
--- a/configure.in
+++ b/configure.in
@@ -3,7 +3,7 @@ AC_REVISION([$Id$])
AC_PREREQ(2.13)dnl dnl Minimum Autoconf version required.
AC_INIT(vpath.c)dnl dnl A distinctive file to look for in srcdir.
-AM_INIT_AUTOMAKE(make, 3.77.91)
+AM_INIT_AUTOMAKE(make, 3.77.92)
AM_CONFIG_HEADER(config.h)
dnl Regular configure stuff
diff --git a/function.c b/function.c
index 0771274..4047339 100644
--- a/function.c
+++ b/function.c
@@ -1381,11 +1381,7 @@ func_shell (o, argv, funcname)
if (cc > 0)
i += cc;
}
-#ifdef EINTR
- while (cc > 0 || errno == EINTR);
-#else
- while (cc > 0);
-#endif
+ while (cc > 0 || EINTR_SET);
/* Close the read side of the pipe. */
#ifdef __MSDOS__
diff --git a/job.c b/job.c
index 17743a2..575a030 100644
--- a/job.c
+++ b/job.c
@@ -145,6 +145,17 @@ extern int wait ();
#endif /* Don't have `union wait'. */
+/* How to set close-on-exec for a file descriptor. */
+
+#if !defined F_SETFD
+# define CLOSE_ON_EXEC(_d)
+#else
+# ifndef FD_CLOEXEC
+# define FD_CLOEXEC 1
+# endif
+# define CLOSE_ON_EXEC(_d) (void) fcntl ((_d), F_SETFD, FD_CLOEXEC)
+#endif
+
#ifdef VMS
static int vms_jobsefnmask = 0;
#endif /* !VMS */
@@ -218,9 +229,6 @@ int w32_kill(int pid, int sig)
#endif /* WINDOWS32 */
-#ifndef MAKE_JOBSERVER
-# define free_job_token(c)
-#else
static void
free_job_token (child)
struct child *child;
@@ -248,7 +256,6 @@ free_job_token (child)
child->job_token = '-';
}
-#endif
/* Write an error message describing the exit status given in
@@ -302,14 +309,13 @@ vmsWaitForChildren(int *status)
/* Handle a dead child. This handler may or may not ever be installed.
If we're using the jobserver blocking read, we need it. First, installing
- it ensures the read will interrupt on SIGCHLD. Second, we close the dup'd
- read side of the pipe to ensure we don't enter another blocking read
- without reaping all the dead children. In this case we don't need the
- dead_children count.
+ it ensures the read will interrupt on SIGCHLD. Second, we reset the
+ blocking bit on the read side of the pipe to ensure we don't enter another
+ blocking read without reaping all the dead children. In this case we
+ don't need the dead_children count.
If we don't have either waitpid or wait3, then make is unreliable, but we
- use the dead_children count to reap children as best we can. In this case
- job_rfd will never be >= 0. */
+ use the dead_children count to reap children as best we can. */
static unsigned int dead_children = 0;
@@ -319,9 +325,15 @@ child_handler (sig)
{
++dead_children;
- if (job_rfd >= 0)
- close (job_rfd);
- job_rfd = -1;
+#ifdef HAVE_JOBSERVER
+ if (job_fds[0] >= 0)
+ {
+ int fl = fcntl(job_fds[0], F_GETFL, 0);
+
+ if (fl >= 0)
+ fcntl(job_fds[0], F_SETFL, fl | O_NONBLOCK);
+ }
+#endif
if (debug_flag)
printf (_("Got a SIGCHLD; %u unreaped children.\n"), dead_children);
@@ -330,11 +342,12 @@ child_handler (sig)
extern int shell_function_pid, shell_function_completed;
-/* Reap dead children, storing the returned status and the new command
+/* Reap all dead children, storing the returned status and the new command
state (`cs_finished') in the `file' member of the `struct child' for the
- dead child, and removing the child from the chain. If BLOCK nonzero,
- reap at least one child, waiting for it to die if necessary. If ERR is
- nonzero, print an error message first. */
+ dead child, and removing the child from the chain. In addition, if BLOCK
+ nonzero, we block in this function until we've reaped at least one
+ complete child, waiting for it to die if necessary. If ERR is nonzero,
+ print an error message first. */
void
reap_children (block, err)
@@ -415,19 +428,18 @@ reap_children (block, err)
{
/* A remote status command failed miserably. Punt. */
remote_status_lose:
-#ifdef EINTR
- if (errno == EINTR)
+ if (EINTR_SET)
continue;
-#endif
+
pfatal_with_name ("remote_status");
}
else
{
/* No remote children. Check for local children. */
-
#if !defined(__MSDOS__) && !defined(_AMIGA) && !defined(WINDOWS32)
if (any_local)
{
+ local_wait:
#ifdef VMS
vmsWaitForChildren (&status);
pid = c->pid;
@@ -446,15 +458,14 @@ reap_children (block, err)
if (pid < 0)
{
/* The wait*() failed miserably. Punt. */
-#ifdef EINTR
- if (errno == EINTR)
- continue;
-#endif
+ if (EINTR_SET)
+ goto local_wait;
+
pfatal_with_name ("wait");
}
else if (pid > 0)
{
- /* We got one; chop the status word up. */
+ /* We got a child exit; chop the status word up. */
exit_code = WEXITSTATUS (status);
exit_sig = WIFSIGNALED (status) ? WTERMSIG (status) : 0;
coredump = WCOREDUMP (status);
@@ -1171,20 +1182,16 @@ start_waiting_job (c)
c->remote = start_remote_job_p (1);
- /* If not, start it locally. */
- if (!c->remote)
+ /* If we are running at least one job already and the load average
+ is too high, make this one wait. */
+ if (!c->remote && job_slots_used > 0 && load_too_high ())
{
- /* If we are running at least one job already and the load average
- is too high, make this one wait. */
- if (job_slots_used > 0 && load_too_high ())
- {
- /* Put this child on the chain of children waiting
- for the load average to go down. */
- set_command_state (f, cs_running);
- c->next = waiting_jobs;
- waiting_jobs = c;
- return 0;
- }
+ /* Put this child on the chain of children waiting for the load average
+ to go down. */
+ set_command_state (f, cs_running);
+ c->next = waiting_jobs;
+ waiting_jobs = c;
+ return 0;
}
/* Start the first command; reap_children will run later command lines. */
@@ -1379,19 +1386,25 @@ new_job (file)
}
/* Read a token. As long as there's no token available we'll block.
If we get a SIGCHLD we'll return with EINTR. If one happened
- before we got here we'll return with EBADF. */
- else if (read (job_rfd, &c->job_token, 1) < 1)
+ before we got here we'll return immediately with EAGAIN because
+ the signal handler unsets the blocking bit. */
+ else if (read (job_fds[0], &c->job_token, 1) < 1)
{
- if (errno == EINTR)
- ;
+ int fl;
- /* If EBADF, we got a SIGCHLD before. Otherwise, who knows? */
- else if (errno != EBADF)
+#if !defined(EAGAIN)
+# define EAGAIN EWOULDBLOCK
+#endif
+ if (errno != EINTR && errno != EAGAIN)
pfatal_with_name (_("read jobs pipe"));
- /* Something's done; reap it. We don't force a block here; if
- something strange happened and nothing's ready, just come back
- and wait some more. */
+ /* Set the blocking bit on the read FD again, just in case. */
+ fl = fcntl(job_fds[0], F_GETFL, 0);
+ if (fl >= 0)
+ fcntl(job_fds[0], F_SETFL, fl & ~O_NONBLOCK);
+
+ /* Something's done. We don't want to block for a whole child,
+ just reap whatever's there. */
reap_children (0, 0);
}
diff --git a/main.c b/main.c
index d40ae3f..c51fe7c 100644
--- a/main.c
+++ b/main.c
@@ -205,7 +205,6 @@ static unsigned int inf_jobs = 0;
/* File descriptors for the jobs pipe. */
int job_fds[2] = { -1, -1 };
-int job_rfd = -1;
/* Maximum load average at which multiple jobs will be run.
Negative values mean unlimited, while zero means limit to
@@ -1353,15 +1352,13 @@ int main (int argc, char ** argv)
job_slots_str = xstrdup(buf);
}
- /* If we have a jobserver pipe, dup(2) the read end. We'll use that in
- the child handler to note a child has died. See job.c. */
-
+ /* Be sure the blocking bit on the read FD is set to start with. */
if (job_fds[0] >= 0)
{
- job_rfds = dup (job_fds[0]);
- CLOSE_ON_EXEC (job_rfds);
+ int fl = fcntl(job_fds[0], F_GETFL, 0);
+ if (fl >= 0)
+ fcntl(job_fds[0], F_SETFL, fl & ~O_NONBLOCK);
}
-
#endif
/* Set up MAKEFLAGS and MFLAGS again, so they will be right. */
diff --git a/make.h b/make.h
index 0496ce7..26eaec9 100644
--- a/make.h
+++ b/make.h
@@ -77,6 +77,14 @@ Boston, MA 02111-1307, USA. */
extern int errno;
#endif
+/* A shortcut for EINTR checking. Note you should never negate this! That
+ very likely doesn't mean what you want if EINTR is not available. */
+#ifdef EINTR
+# define EINTR_SET (errno == EINTR)
+#else
+# define EINTR_SET (0)
+#endif
+
#ifndef isblank
# define isblank(c) ((c) == ' ' || (c) == '\t')
#endif
diff --git a/remake.c b/remake.c
index 6464f34..74c4a9a 100644
--- a/remake.c
+++ b/remake.c
@@ -918,13 +918,10 @@ touch_file (file)
char buf;
int status;
-#ifdef EINTR
do
-#endif
status = fstat (fd, &statbuf);
-#ifdef EINTR
- while (status < 0 && errno == EINTR);
-#endif
+ while (status < 0 && EINTR_SET);
+
if (status < 0)
TOUCH_ERROR ("touch: fstat: ");
/* Rewrite character 0 same as it already is. */