From cb64352c75f0da8979c9addd37b2b78bb97d5301 Mon Sep 17 00:00:00 2001 From: Frank Heckenbach Date: Sun, 14 Apr 2013 15:38:07 -0400 Subject: Initial patch for output synchronization. See Savannah bug #33138. Based on work by David Boyce . --- doc/make.texi | 27 +++- job.c | 253 +++++++++++++++++++++++++++++++++++ job.h | 8 ++ main.c | 37 ++++- makeint.h | 7 +- misc.c | 24 +++- tests/scripts/features/parallel-sync | 143 ++++++++++++++++++++ 7 files changed, 486 insertions(+), 13 deletions(-) create mode 100644 tests/scripts/features/parallel-sync diff --git a/doc/make.texi b/doc/make.texi index dffa14d..1a760e1 100644 --- a/doc/make.texi +++ b/doc/make.texi @@ -4057,9 +4057,12 @@ If there is nothing looking like an integer after the @samp{-j} option, there is no limit on the number of job slots. The default number of job slots is one, which means serial execution (one thing at a time). -One unpleasant consequence of running several recipes simultaneously is -that output generated by the recipes appears whenever each recipe -sends it, so messages from different recipes may be interspersed. +One consequence of running several recipes simultaneously is that by +default, output from each recipe appears as soon as it is generated, +with the result that messages from different recipes may be interspersed. +This may create problems in interpreting output. If the @samp{-P} option +is used, however, recipes will save their output until completion and +then take turns writing it, with a more coherent result. Another problem is that two processes cannot both take input from the same device; so to make sure that only one recipe tries to take input @@ -8612,6 +8615,24 @@ The data base output contains file name and line number information for recipe and variable definitions, so it can be a useful debugging tool in complex environments. +@item -P +@cindex @code{-P} +@itemx --parallel-sync +@cindex @code{--parallel-sync} +@cindex parallel recipe execution, output +When jobs are running in parallel under @samp{--jobs}, the output of +each job is held until the job is complete thus ensuring that the output +of each recipe is grouped together. + +With no argument or the argument @samp{1}, messages from each job in +recursive makes are grouped together. With the argument @samp{2}, the +complete output from any recursive make is grouped together. The latter +achieves better grouping of output from related jobs, but causes longer +delay, since messages do not appear until the recursive make has +completed. Therefore @samp{-P} is more useful when watching the output +while make runs, and @samp{-P2} is better suited when running a complex +parallel build in the background and checking its output afterwards. + @item -q @cindex @code{-q} @itemx --question diff --git a/job.c b/job.c index 2e2d3ff..dedd526 100644 --- a/job.c +++ b/job.c @@ -242,6 +242,16 @@ unsigned long job_counter = 0; /* Number of jobserver tokens this instance is currently using. */ unsigned int jobserver_tokens = 0; + +#ifdef PARALLEL_SYNC +/* Semaphore for use in -j mode with parallel_sync. */ + +int sync_handle = -1; + +#define STREAM_OK(strm) ((fcntl (fileno ((strm)), F_GETFD) != -1) || (errno != EBADF)) + +#define FD_NOT_EMPTY(FD) ((FD) >= 0 && lseek ((FD), 0, SEEK_CUR) > 0) +#endif /* PARALLEL_SYNC */ #ifdef WINDOWS32 /* @@ -539,6 +549,184 @@ child_handler (int sig UNUSED) */ } +#ifdef PARALLEL_SYNC +/* Adds file descriptors to the child structure to support parallel_sync; + one for stdout and one for stderr as long as they are (a) open and + (b) separate. If stdout and stderr share a device they can share a + temp file too. */ +static int +assign_child_tempfiles (struct child *c, int combined) +{ + FILE *outstrm = NULL, *errstrm = NULL; + int o_ok, e_ok; + const char *suppressed = "parallel-sync suppressed: "; + char *failmode = NULL; + + /* Check status of stdout and stderr before hooking up temp files. */ + o_ok = STREAM_OK (stdout); + e_ok = STREAM_OK (stderr); + + /* The tmpfile() function returns a FILE pointer but those can be in + limited supply, so we immediately dup its file descriptor and keep + only that, closing the FILE pointer. */ + + if (combined) + { + if (!(outstrm = tmpfile ())) + failmode = "tmpfile()"; + else + errstrm = outstrm; + } + else if (o_ok && e_ok) + { + if (!(outstrm = tmpfile ()) || !(errstrm = tmpfile ())) + failmode = "tmpfile()"; + } + else if (o_ok) + { + if (!(outstrm = tmpfile ())) + failmode = "tmpfile()"; + } + else if (e_ok) + { + if (!(errstrm = tmpfile ())) + failmode = "tmpfile()"; + } + else + failmode = "stdout"; + + if (!failmode && outstrm) + { + if ((c->outfd = dup (fileno (outstrm))) == -1) + failmode = "dup2()"; + else + CLOSE_ON_EXEC (c->outfd); + } + + if (!failmode && errstrm) + { + if (combined) + c->errfd = c->outfd; + else + { + if ((c->errfd = dup (fileno (errstrm))) == -1) + failmode = "dup2()"; + else + CLOSE_ON_EXEC (c->errfd); + } + } + + if (failmode) + perror_with_name (suppressed, failmode); + + if (outstrm) + (void) fclose (outstrm); + + if (errstrm && errstrm != outstrm) + (void) fclose (errstrm); + + return failmode == NULL; +} + +/* Support routine for sync_output() */ +static void +pump_from_tmp_fd (int from_fd, int to_fd) +{ + ssize_t nleft, nwrite; + char buffer[8192]; + + if (lseek (from_fd, 0, SEEK_SET) == -1) + perror ("lseek()"); + + while (1) + { + char *write_buf = buffer; + EINTRLOOP (nleft, read (from_fd, buffer, sizeof (buffer))); + if (nleft < 0) + perror ("read()"); + if (nleft <= 0) + break; + while (nleft > 0) + { + EINTRLOOP (nwrite, write (to_fd, write_buf, nleft)); + if (nwrite < 0) + { + perror ("write()"); + return; + } + + write_buf += nwrite; + nleft -= nwrite; + } + } +} + +/* Support routine for sync_output() */ +static void * +acquire_semaphore (void) +{ + static struct flock fl; + + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; /* lock just one byte according to pid */ + fl.l_len = 1; + if (fcntl (sync_handle, F_SETLKW, &fl) != -1) + return &fl; + perror ("fcntl()"); + return NULL; +} + +/* Support routine for sync_output() */ +static void +release_semaphore (void *sem) +{ + struct flock *flp = (struct flock *)sem; + flp->l_type = F_UNLCK; + if (fcntl (sync_handle, F_SETLKW, flp) == -1) + perror ("fcntl()"); +} + +/* Synchronize the output of jobs in -j mode to keep the results of + each job together. This is done by holding the results in temp files, + one for stdout and potentially another for stderr, and only releasing + them to "real" stdout/stderr when a semaphore can be obtained. */ + +static void +sync_output (struct child *c) +{ + void *sem; + + int outfd_not_empty = FD_NOT_EMPTY (c->outfd); + int errfd_not_empty = FD_NOT_EMPTY (c->errfd); + + if ((outfd_not_empty || errfd_not_empty) && (sem = acquire_semaphore ())) + { + /* + * We've entered the "critical section" during which a lock is held. + * We want to keep it as short as possible. + */ + if (outfd_not_empty) + { + log_working_directory (1, 1); + pump_from_tmp_fd (c->outfd, fileno (stdout)); + log_working_directory (0, 1); + } + if (errfd_not_empty && c->errfd != c->outfd) + pump_from_tmp_fd (c->errfd, fileno (stderr)); + + /* Exit the critical section */ + release_semaphore (sem); + } + + if (c->outfd >= 0) + close (c->outfd); + if (c->errfd >= 0) + close (c->errfd); + c->outfd = c->errfd = -1; +} +#endif /* PARALLEL_SYNC */ + extern int shell_function_pid, shell_function_completed; /* Reap all dead children, storing the returned status and the new command @@ -833,6 +1021,12 @@ reap_children (int block, int err) c->sh_batch_file = NULL; } +#ifdef PARALLEL_SYNC + /* Synchronize parallel output if requested */ + if (parallel_sync) + sync_output (c); +#endif /* PARALLEL_SYNC */ + /* If this child had the good stdin, say it is now free. */ if (c->good_stdin) good_stdin_used = 0; @@ -1413,6 +1607,45 @@ start_job_command (struct child *child) #else /* !__EMX__ */ +#ifdef PARALLEL_SYNC + if (parallel_sync) + { + static int combined_output; + /* If parallel_sync is turned on, find a resource to + synchronize on. This block is traversed only once. */ + if (sync_handle == -1) + { + struct stat stbuf_o, stbuf_e; + + if (STREAM_OK (stdout)) + { + sync_handle = fileno (stdout); + combined_output = + fstat (fileno (stdout), &stbuf_o) == 0 && + fstat (fileno (stderr), &stbuf_e) == 0 && + stbuf_o.st_dev == stbuf_e.st_dev && + stbuf_o.st_ino == stbuf_e.st_ino; + } + else if (STREAM_OK (stderr)) + sync_handle = fileno (stderr); + else + { + perror_with_name ("parallel-sync suppressed: ", "stderr"); + parallel_sync = 0; + } + } + + /* If it still looks like we can synchronize, create a temp + file to hold stdout (and one for stderr if separate). */ + if (parallel_sync >= PARALLEL_SYNC_COARSE + || (parallel_sync == PARALLEL_SYNC_FINE && !(flags & COMMANDS_RECURSE))) + { + if (!assign_child_tempfiles (child, combined_output)) + parallel_sync = 0; + } + } +#endif /* PARALLEL_SYNC */ + child->pid = vfork (); environ = parent_environ; /* Restore value child may have clobbered. */ if (child->pid == 0) @@ -1436,6 +1669,23 @@ start_job_command (struct child *child) setrlimit (RLIMIT_STACK, &stack_limit); #endif +#ifdef PARALLEL_SYNC + /* Divert child output into tempfile(s) if parallel_sync in use. */ + if (parallel_sync) + { + int outfd = fileno (stdout); + int errfd = fileno (stderr); + + if ((child->outfd >= 0 && + (close (outfd) == -1 || dup2 (child->outfd, outfd) == -1)) + || (child->errfd >= 0 && + (close (errfd) == -1 || dup2 (child->errfd, errfd) == -1))) + { + perror_with_name ("parallel-sync: ", "dup2()"); + } + } +#endif /* PARALLEL_SYNC */ + child_execute_job (child->good_stdin ? 0 : bad_stdin, 1, argv, child->environment); } @@ -1769,6 +2019,9 @@ new_job (struct file *file) c->file = file; c->command_lines = lines; c->sh_batch_file = NULL; +#ifdef PARALLEL_SYNC + c->outfd = c->errfd = -1; +#endif /* Cache dontcare flag because file->dontcare can be changed once we return. Check dontcare inheritance mechanism for details. */ diff --git a/job.h b/job.h index 0b594a1..8d32bd7 100644 --- a/job.h +++ b/job.h @@ -34,6 +34,10 @@ this program. If not, see . */ # define CLOSE_ON_EXEC(_d) (void) fcntl ((_d), F_SETFD, FD_CLOEXEC) #endif +#ifdef POSIX /* PARALLEL_SYNC */ +#define PARALLEL_SYNC +#endif /* POSIX */ + /* Structure describing a running or dead child process. */ struct child @@ -60,6 +64,10 @@ struct child unsigned int good_stdin:1; /* Nonzero if this child has a good stdin. */ unsigned int deleted:1; /* Nonzero if targets have been deleted. */ unsigned int dontcare:1; /* Saved dontcare flag. */ +#ifdef PARALLEL_SYNC + int outfd; /* Optional file descriptor for saving stdout */ + int errfd; /* Optional file descriptor for saving stderr */ +#endif /* PARALLEL_SYNC */ }; extern struct child *children; diff --git a/main.c b/main.c index 0a2774c..0447888 100644 --- a/main.c +++ b/main.c @@ -228,6 +228,15 @@ static unsigned int master_job_slots = 0; static unsigned int inf_jobs = 0; +#ifdef PARALLEL_SYNC + +/* Default value for parallel sync without an argument. */ + +static unsigned int no_parallel_sync = 0; +static unsigned int default_parallel_sync = PARALLEL_SYNC_FINE; + +#endif + /* File descriptors for the jobs pipe. */ static struct stringlist *jobserver_fds = 0; @@ -344,6 +353,10 @@ static const char *const usage[] = Consider FILE to be very old and don't remake it.\n"), N_("\ -p, --print-data-base Print make's internal database.\n"), +#ifdef PARALLEL_SYNC + N_("\ + -P [2], --parallel-sync[=2] Synchronize output of parallel jobs [coarse].\n"), +#endif N_("\ -q, --question Run no recipe; exit status says if up to date.\n"), N_("\ @@ -408,6 +421,11 @@ static const struct command_switch switches[] = { 'n', flag, &just_print_flag, 1, 1, 1, 0, 0, "just-print" }, { 'o', filename, &old_files, 0, 0, 0, 0, 0, "old-file" }, { 'p', flag, &print_data_base_flag, 1, 1, 0, 0, 0, "print-data-base" }, +#ifdef PARALLEL_SYNC + // { 'P', flag, ¶llel_sync, 1, 1, 0, 0, 0, "parallel-sync" }, // two-state + { 'P', positive_int, ¶llel_sync, 1, 1, 0, &default_parallel_sync, + &no_parallel_sync, "parallel-sync" }, +#endif { 'q', flag, &question_flag, 1, 1, 1, 0, 0, "question" }, { 'r', flag, &no_builtin_rules_flag, 1, 1, 0, 0, 0, "no-builtin-rules" }, { 'R', flag, &no_builtin_variables_flag, 1, 1, 0, 0, 0, @@ -503,6 +521,14 @@ int second_expansion; int one_shell; +/* Either PARALLEL_SYNC_FINE or PARALLEL_SYNC_COARSE + if the "--parallel-sync" option was given. + This attempts to synchronize the output of parallel + jobs such that the results of each job stay together. + It works best in combination with .ONESHELL. */ + +int parallel_sync; + /* Nonzero if we have seen the '.NOTPARALLEL' target. This turns off parallel builds for this invocation of make. */ @@ -2095,7 +2121,7 @@ main (int argc, char **argv, char **envp) if (print_data_base_flag) print_data_base (); - log_working_directory (0); + log_working_directory (0, 0); clean_jobserver (0); @@ -3280,7 +3306,7 @@ die (int status) _x = chdir (directory_before_chdir); } - log_working_directory (0); + log_working_directory (0, 0); } exit (status); @@ -3290,17 +3316,18 @@ die (int status) left (according to ENTERING) the current directory. */ void -log_working_directory (int entering) +log_working_directory (int entering, int force) { static int entered = 0; /* Print nothing without the flag. Don't print the entering message again if we already have. Don't print the leaving message if we haven't printed the entering message. */ - if (! print_directory_flag || entering == entered) + if (! print_directory_flag || (!force && entering == entered)) return; - entered = entering; + if (!force) + entered = entering; if (print_data_base_flag) fputs ("# ", stdout); diff --git a/makeint.h b/makeint.h index 6f2ee67..2273fad 100644 --- a/makeint.h +++ b/makeint.h @@ -396,7 +396,7 @@ void fatal (); #endif void die (int) __attribute__ ((noreturn)); -void log_working_directory (int); +void log_working_directory (int, int); void pfatal_with_name (const char *) __attribute__ ((noreturn)); void perror_with_name (const char *, const char *); void *xmalloc (unsigned int); @@ -525,6 +525,9 @@ int strncasecmp (const char *s1, const char *s2, int n); # endif #endif +#define PARALLEL_SYNC_FINE 1 +#define PARALLEL_SYNC_COARSE 2 + extern const gmk_floc *reading_file; extern const gmk_floc **expanding_var; @@ -536,7 +539,7 @@ extern int env_overrides, no_builtin_rules_flag, no_builtin_variables_flag; extern int print_version_flag, print_directory_flag, check_symlink_flag; extern int warn_undefined_variables_flag, trace_flag, posix_pedantic; extern int not_parallel, second_expansion, clock_skew_detected; -extern int rebuilding_makefiles, one_shell; +extern int rebuilding_makefiles, one_shell, parallel_sync; /* can we run commands via 'sh -c xxx' or must we use batch files? */ extern int batch_mode_shell; diff --git a/misc.c b/misc.c index 37fa1e1..ba8cc2e 100644 --- a/misc.c +++ b/misc.c @@ -231,10 +231,13 @@ message (prefix, fmt, va_alist) va_list args; #endif - log_working_directory (1); + log_working_directory (1, 0); if (fmt != 0) { + if (parallel_sync) + log_working_directory (1, 1); + if (prefix) { if (makelevel == 0) @@ -246,6 +249,9 @@ message (prefix, fmt, va_alist) VA_PRINTF (stdout, fmt, args); VA_END (args); putchar ('\n'); + + if (parallel_sync) + log_working_directory (0, 1); } fflush (stdout); @@ -267,7 +273,10 @@ error (flocp, fmt, va_alist) va_list args; #endif - log_working_directory (1); + if (parallel_sync) + log_working_directory (1, 1); + else + log_working_directory (1, 0); if (flocp && flocp->filenm) fprintf (stderr, "%s:%lu: ", flocp->filenm, flocp->lineno); @@ -282,6 +291,9 @@ error (flocp, fmt, va_alist) putc ('\n', stderr); fflush (stderr); + + if (parallel_sync) + log_working_directory (0, 1); } /* Print an error message and exit. */ @@ -300,7 +312,10 @@ fatal (flocp, fmt, va_alist) va_list args; #endif - log_working_directory (1); + if (parallel_sync) + log_working_directory (1, 1); + else + log_working_directory (1, 0); if (flocp && flocp->filenm) fprintf (stderr, "%s:%lu: *** ", flocp->filenm, flocp->lineno); @@ -315,6 +330,9 @@ fatal (flocp, fmt, va_alist) fputs (_(". Stop.\n"), stderr); + if (parallel_sync) + log_working_directory (0, 1); + die (2); } diff --git a/tests/scripts/features/parallel-sync b/tests/scripts/features/parallel-sync new file mode 100644 index 0000000..29e5564 --- /dev/null +++ b/tests/scripts/features/parallel-sync @@ -0,0 +1,143 @@ +# -*-perl-*- + +$description = "Test parallel-sync (-P) option."; + +$details = "Test the synchronization of output from parallel jobs."; + +if (!$parallel_jobs) { + return -1; +} + +if ($vos) { + $sleep_command = "sleep -seconds"; +} +else { + $sleep_command = "sleep"; +} + +# The following subdirectories with Makefiles are used in several +# of the following tests. +mkdir('foo', 0777); +mkdir('bar', 0777); + +open(MAKEFILE,"> foo/Makefile"); +print MAKEFILE < bar/Makefile"); +print MAKEFILE <