git.oblomov.eu Git - git/blob - builtin/fsmonitor--daemon.c

   1 #include "builtin.h"
   2 #include "config.h"
   3 #include "parse-options.h"
   4 #include "fsmonitor.h"
   5 #include "fsmonitor-ipc.h"
   6 #include "compat/fsmonitor/fsmonitor-fs-listen.h"
   7 #include "fsmonitor--daemon.h"
   8 #include "simple-ipc.h"
   9 #include "khash.h"
  10 #include "pkt-line.h"
  11
  12 static const char * const builtin_fsmonitor__daemon_usage[] = {
  13         N_("git fsmonitor--daemon start [<options>]"),
  14         N_("git fsmonitor--daemon run [<options>]"),
  15         N_("git fsmonitor--daemon stop"),
  16         N_("git fsmonitor--daemon status"),
  17         NULL
  18 };
  19
  20 #ifdef HAVE_FSMONITOR_DAEMON_BACKEND
  21 /*
  22  * Global state loaded from config.
  23  */
  24 #define FSMONITOR__IPC_THREADS "fsmonitor.ipcthreads"
  25 static int fsmonitor__ipc_threads = 8;
  26
  27 #define FSMONITOR__START_TIMEOUT "fsmonitor.starttimeout"
  28 static int fsmonitor__start_timeout_sec = 60;
  29
  30 static int fsmonitor_config(const char *var, const char *value, void *cb)
  31 {
  32         if (!strcmp(var, FSMONITOR__IPC_THREADS)) {
  33                 int i = git_config_int(var, value);
  34                 if (i < 1)
  35                         return error(_("value of '%s' out of range: %d"),
  36                                      FSMONITOR__IPC_THREADS, i);
  37                 fsmonitor__ipc_threads = i;
  38                 return 0;
  39         }
  40
  41         if (!strcmp(var, FSMONITOR__START_TIMEOUT)) {
  42                 int i = git_config_int(var, value);
  43                 if (i < 0)
  44                         return error(_("value of '%s' out of range: %d"),
  45                                      FSMONITOR__START_TIMEOUT, i);
  46                 fsmonitor__start_timeout_sec = i;
  47                 return 0;
  48         }
  49
  50         return git_default_config(var, value, cb);
  51 }
  52
  53 /*
  54  * Acting as a CLIENT.
  55  *
  56  * Send a "quit" command to the `git-fsmonitor--daemon` (if running)
  57  * and wait for it to shutdown.
  58  */
  59 static int do_as_client__send_stop(void)
  60 {
  61         struct strbuf answer = STRBUF_INIT;
  62         int ret;
  63
  64         ret = fsmonitor_ipc__send_command("quit", &answer);
  65
  66         /* The quit command does not return any response data. */
  67         strbuf_release(&answer);
  68
  69         if (ret)
  70                 return ret;
  71
  72         trace2_region_enter("fsm_client", "polling-for-daemon-exit", NULL);
  73         while (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
  74                 sleep_millisec(50);
  75         trace2_region_leave("fsm_client", "polling-for-daemon-exit", NULL);
  76
  77         return 0;
  78 }
  79
  80 static int do_as_client__status(void)
  81 {
  82         enum ipc_active_state state = fsmonitor_ipc__get_state();
  83
  84         switch (state) {
  85         case IPC_STATE__LISTENING:
  86                 printf(_("The built-in file system monitor is active\n"));
  87                 return 0;
  88
  89         default:
  90                 printf(_("The built-in file system monitor is not active\n"));
  91                 return 1;
  92         }
  93 }
  94
  95 enum fsmonitor_cookie_item_result {
  96         FCIR_ERROR = -1, /* could not create cookie file ? */
  97         FCIR_INIT = 0,
  98         FCIR_SEEN,
  99         FCIR_ABORT,
 100 };
 101
 102 struct fsmonitor_cookie_item {
 103         struct hashmap_entry entry;
 104         const char *name;
 105         enum fsmonitor_cookie_item_result result;
 106 };
 107
 108 static int cookies_cmp(const void *data, const struct hashmap_entry *he1,
 109                      const struct hashmap_entry *he2, const void *keydata)
 110 {
 111         const struct fsmonitor_cookie_item *a =
 112                 container_of(he1, const struct fsmonitor_cookie_item, entry);
 113         const struct fsmonitor_cookie_item *b =
 114                 container_of(he2, const struct fsmonitor_cookie_item, entry);
 115
 116         return strcmp(a->name, keydata ? keydata : b->name);
 117 }
 118
 119 static enum fsmonitor_cookie_item_result with_lock__wait_for_cookie(
 120         struct fsmonitor_daemon_state *state)
 121 {
 122         /* assert current thread holding state->main_lock */
 123
 124         int fd;
 125         struct fsmonitor_cookie_item *cookie;
 126         struct strbuf cookie_pathname = STRBUF_INIT;
 127         struct strbuf cookie_filename = STRBUF_INIT;
 128         enum fsmonitor_cookie_item_result result;
 129         int my_cookie_seq;
 130
 131         CALLOC_ARRAY(cookie, 1);
 132
 133         my_cookie_seq = state->cookie_seq++;
 134
 135         strbuf_addf(&cookie_filename, "%i-%i", getpid(), my_cookie_seq);
 136
 137         strbuf_addbuf(&cookie_pathname, &state->path_cookie_prefix);
 138         strbuf_addbuf(&cookie_pathname, &cookie_filename);
 139
 140         cookie->name = strbuf_detach(&cookie_filename, NULL);
 141         cookie->result = FCIR_INIT;
 142         hashmap_entry_init(&cookie->entry, strhash(cookie->name));
 143
 144         hashmap_add(&state->cookies, &cookie->entry);
 145
 146         trace_printf_key(&trace_fsmonitor, "cookie-wait: '%s' '%s'",
 147                          cookie->name, cookie_pathname.buf);
 148
 149         /*
 150          * Create the cookie file on disk and then wait for a notification
 151          * that the listener thread has seen it.
 152          */
 153         fd = open(cookie_pathname.buf, O_WRONLY | O_CREAT | O_EXCL, 0600);
 154         if (fd >= 0) {
 155                 close(fd);
 156                 unlink(cookie_pathname.buf);
 157
 158                 /*
 159                  * NEEDSWORK: This is an infinite wait (well, unless another
 160                  * thread sends us an abort).  I'd like to change this to
 161                  * use `pthread_cond_timedwait()` and return an error/timeout
 162                  * and let the caller do the trivial response thing.
 163                  */
 164                 while (cookie->result == FCIR_INIT)
 165                         pthread_cond_wait(&state->cookies_cond,
 166                                           &state->main_lock);
 167         } else {
 168                 error_errno(_("could not create fsmonitor cookie '%s'"),
 169                             cookie->name);
 170
 171                 cookie->result = FCIR_ERROR;
 172         }
 173
 174         hashmap_remove(&state->cookies, &cookie->entry, NULL);
 175
 176         result = cookie->result;
 177
 178         free((char*)cookie->name);
 179         free(cookie);
 180         strbuf_release(&cookie_pathname);
 181
 182         return result;
 183 }
 184
 185 /*
 186  * Mark these cookies as _SEEN and wake up the corresponding client threads.
 187  */
 188 static void with_lock__mark_cookies_seen(struct fsmonitor_daemon_state *state,
 189                                          const struct string_list *cookie_names)
 190 {
 191         /* assert current thread holding state->main_lock */
 192
 193         int k;
 194         int nr_seen = 0;
 195
 196         for (k = 0; k < cookie_names->nr; k++) {
 197                 struct fsmonitor_cookie_item key;
 198                 struct fsmonitor_cookie_item *cookie;
 199
 200                 key.name = cookie_names->items[k].string;
 201                 hashmap_entry_init(&key.entry, strhash(key.name));
 202
 203                 cookie = hashmap_get_entry(&state->cookies, &key, entry, NULL);
 204                 if (cookie) {
 205                         trace_printf_key(&trace_fsmonitor, "cookie-seen: '%s'",
 206                                          cookie->name);
 207                         cookie->result = FCIR_SEEN;
 208                         nr_seen++;
 209                 }
 210         }
 211
 212         if (nr_seen)
 213                 pthread_cond_broadcast(&state->cookies_cond);
 214 }
 215
 216 /*
 217  * Set _ABORT on all pending cookies and wake up all client threads.
 218  */
 219 static void with_lock__abort_all_cookies(struct fsmonitor_daemon_state *state)
 220 {
 221         /* assert current thread holding state->main_lock */
 222
 223         struct hashmap_iter iter;
 224         struct fsmonitor_cookie_item *cookie;
 225         int nr_aborted = 0;
 226
 227         hashmap_for_each_entry(&state->cookies, &iter, cookie, entry) {
 228                 trace_printf_key(&trace_fsmonitor, "cookie-abort: '%s'",
 229                                  cookie->name);
 230                 cookie->result = FCIR_ABORT;
 231                 nr_aborted++;
 232         }
 233
 234         if (nr_aborted)
 235                 pthread_cond_broadcast(&state->cookies_cond);
 236 }
 237
 238 /*
 239  * Requests to and from a FSMonitor Protocol V2 provider use an opaque
 240  * "token" as a virtual timestamp.  Clients can request a summary of all
 241  * created/deleted/modified files relative to a token.  In the response,
 242  * clients receive a new token for the next (relative) request.
 243  *
 244  *
 245  * Token Format
 246  * ============
 247  *
 248  * The contents of the token are private and provider-specific.
 249  *
 250  * For the built-in fsmonitor--daemon, we define a token as follows:
 251  *
 252  *     "builtin" ":" <token_id> ":" <sequence_nr>
 253  *
 254  * The "builtin" prefix is used as a namespace to avoid conflicts
 255  * with other providers (such as Watchman).
 256  *
 257  * The <token_id> is an arbitrary OPAQUE string, such as a GUID,
 258  * UUID, or {timestamp,pid}.  It is used to group all filesystem
 259  * events that happened while the daemon was monitoring (and in-sync
 260  * with the filesystem).
 261  *
 262  *     Unlike FSMonitor Protocol V1, it is not defined as a timestamp
 263  *     and does not define less-than/greater-than relationships.
 264  *     (There are too many race conditions to rely on file system
 265  *     event timestamps.)
 266  *
 267  * The <sequence_nr> is a simple integer incremented whenever the
 268  * daemon needs to make its state public.  For example, if 1000 file
 269  * system events come in, but no clients have requested the data,
 270  * the daemon can continue to accumulate file changes in the same
 271  * bin and does not need to advance the sequence number.  However,
 272  * as soon as a client does arrive, the daemon needs to start a new
 273  * bin and increment the sequence number.
 274  *
 275  *     The sequence number serves as the boundary between 2 sets
 276  *     of bins -- the older ones that the client has already seen
 277  *     and the newer ones that it hasn't.
 278  *
 279  * When a new <token_id> is created, the <sequence_nr> is reset to
 280  * zero.
 281  *
 282  *
 283  * About Token Ids
 284  * ===============
 285  *
 286  * A new token_id is created:
 287  *
 288  * [1] each time the daemon is started.
 289  *
 290  * [2] any time that the daemon must re-sync with the filesystem
 291  *     (such as when the kernel drops or we miss events on a very
 292  *     active volume).
 293  *
 294  * [3] in response to a client "flush" command (for dropped event
 295  *     testing).
 296  *
 297  * When a new token_id is created, the daemon is free to discard all
 298  * cached filesystem events associated with any previous token_ids.
 299  * Events associated with a non-current token_id will never be sent
 300  * to a client.  A token_id change implicitly means that the daemon
 301  * has gap in its event history.
 302  *
 303  * Therefore, clients that present a token with a stale (non-current)
 304  * token_id will always be given a trivial response.
 305  */
 306 struct fsmonitor_token_data {
 307         struct strbuf token_id;
 308         struct fsmonitor_batch *batch_head;
 309         struct fsmonitor_batch *batch_tail;
 310         uint64_t client_ref_count;
 311 };
 312
 313 struct fsmonitor_batch {
 314         struct fsmonitor_batch *next;
 315         uint64_t batch_seq_nr;
 316         const char **interned_paths;
 317         size_t nr, alloc;
 318         time_t pinned_time;
 319 };
 320
 321 static struct fsmonitor_token_data *fsmonitor_new_token_data(void)
 322 {
 323         static int test_env_value = -1;
 324         static uint64_t flush_count = 0;
 325         struct fsmonitor_token_data *token;
 326         struct fsmonitor_batch *batch;
 327
 328         CALLOC_ARRAY(token, 1);
 329         batch = fsmonitor_batch__new();
 330
 331         strbuf_init(&token->token_id, 0);
 332         token->batch_head = batch;
 333         token->batch_tail = batch;
 334         token->client_ref_count = 0;
 335
 336         if (test_env_value < 0)
 337                 test_env_value = git_env_bool("GIT_TEST_FSMONITOR_TOKEN", 0);
 338
 339         if (!test_env_value) {
 340                 struct timeval tv;
 341                 struct tm tm;
 342                 time_t secs;
 343
 344                 gettimeofday(&tv, NULL);
 345                 secs = tv.tv_sec;
 346                 gmtime_r(&secs, &tm);
 347
 348                 strbuf_addf(&token->token_id,
 349                             "%"PRIu64".%d.%4d%02d%02dT%02d%02d%02d.%06ldZ",
 350                             flush_count++,
 351                             getpid(),
 352                             tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
 353                             tm.tm_hour, tm.tm_min, tm.tm_sec,
 354                             (long)tv.tv_usec);
 355         } else {
 356                 strbuf_addf(&token->token_id, "test_%08x", test_env_value++);
 357         }
 358
 359         /*
 360          * We created a new <token_id> and are starting a new series
 361          * of tokens with a zero <seq_nr>.
 362          *
 363          * Since clients cannot guess our new (non test) <token_id>
 364          * they will always receive a trivial response (because of the
 365          * mismatch on the <token_id>).  The trivial response will
 366          * tell them our new <token_id> so that subsequent requests
 367          * will be relative to our new series.  (And when sending that
 368          * response, we pin the current head of the batch list.)
 369          *
 370          * Even if the client correctly guesses the <token_id>, their
 371          * request of "builtin:<token_id>:0" asks for all changes MORE
 372          * RECENT than batch/bin 0.
 373          *
 374          * This implies that it is a waste to accumulate paths in the
 375          * initial batch/bin (because they will never be transmitted).
 376          *
 377          * So the daemon could be running for days and watching the
 378          * file system, but doesn't need to actually accumulate any
 379          * paths UNTIL we need to set a reference point for a later
 380          * relative request.
 381          *
 382          * However, it is very useful for testing to always have a
 383          * reference point set.  Pin batch 0 to force early file system
 384          * events to accumulate.
 385          */
 386         if (test_env_value)
 387                 batch->pinned_time = time(NULL);
 388
 389         return token;
 390 }
 391
 392 struct fsmonitor_batch *fsmonitor_batch__new(void)
 393 {
 394         struct fsmonitor_batch *batch;
 395
 396         CALLOC_ARRAY(batch, 1);
 397
 398         return batch;
 399 }
 400
 401 struct fsmonitor_batch *fsmonitor_batch__pop(struct fsmonitor_batch *batch)
 402 {
 403         struct fsmonitor_batch *next;
 404
 405         if (!batch)
 406                 return NULL;
 407
 408         next = batch->next;
 409
 410         /*
 411          * The actual strings within the array are interned, so we don't
 412          * own them.
 413          */
 414         free(batch->interned_paths);
 415
 416         return next;
 417 }
 418
 419 void fsmonitor_batch__add_path(struct fsmonitor_batch *batch,
 420                                const char *path)
 421 {
 422         const char *interned_path = strintern(path);
 423
 424         trace_printf_key(&trace_fsmonitor, "event: %s", interned_path);
 425
 426         ALLOC_GROW(batch->interned_paths, batch->nr + 1, batch->alloc);
 427         batch->interned_paths[batch->nr++] = interned_path;
 428 }
 429
 430 static void fsmonitor_batch__combine(struct fsmonitor_batch *batch_dest,
 431                                      const struct fsmonitor_batch *batch_src)
 432 {
 433         size_t k;
 434
 435         ALLOC_GROW(batch_dest->interned_paths,
 436                    batch_dest->nr + batch_src->nr + 1,
 437                    batch_dest->alloc);
 438
 439         for (k = 0; k < batch_src->nr; k++)
 440                 batch_dest->interned_paths[batch_dest->nr++] =
 441                         batch_src->interned_paths[k];
 442 }
 443
 444 /*
 445  * To keep the batch list from growing unbounded in response to filesystem
 446  * activity, we try to truncate old batches from the end of the list as
 447  * they become irrelevant.
 448  *
 449  * We assume that the .git/index will be updated with the most recent token
 450  * any time the index is updated.  And future commands will only ask for
 451  * recent changes *since* that new token.  So as tokens advance into the
 452  * future, older batch items will never be requested/needed.  So we can
 453  * truncate them without loss of functionality.
 454  *
 455  * However, multiple commands may be talking to the daemon concurrently
 456  * or perform a slow command, so a little "token skew" is possible.
 457  * Therefore, we want this to be a little bit lazy and have a generous
 458  * delay.
 459  *
 460  * The current reader thread walked backwards in time from `token->batch_head`
 461  * back to `batch_marker` somewhere in the middle of the batch list.
 462  *
 463  * Let's walk backwards in time from that marker an arbitrary delay
 464  * and truncate the list there.  Note that these timestamps are completely
 465  * artificial (based on when we pinned the batch item) and not on any
 466  * filesystem activity.
 467  */
 468 #define MY_TIME_DELAY_SECONDS (5 * 60) /* seconds */
 469
 470 static void with_lock__truncate_old_batches(
 471         struct fsmonitor_daemon_state *state,
 472         const struct fsmonitor_batch *batch_marker)
 473 {
 474         /* assert current thread holding state->main_lock */
 475
 476         const struct fsmonitor_batch *batch;
 477         struct fsmonitor_batch *rest;
 478         struct fsmonitor_batch *p;
 479
 480         if (!batch_marker)
 481                 return;
 482
 483         trace_printf_key(&trace_fsmonitor, "Truncate: mark (%"PRIu64",%"PRIu64")",
 484                          batch_marker->batch_seq_nr,
 485                          (uint64_t)batch_marker->pinned_time);
 486
 487         for (batch = batch_marker; batch; batch = batch->next) {
 488                 time_t t;
 489
 490                 if (!batch->pinned_time) /* an overflow batch */
 491                         continue;
 492
 493                 t = batch->pinned_time + MY_TIME_DELAY_SECONDS;
 494                 if (t > batch_marker->pinned_time) /* too close to marker */
 495                         continue;
 496
 497                 goto truncate_past_here;
 498         }
 499
 500         return;
 501
 502 truncate_past_here:
 503         state->current_token_data->batch_tail = (struct fsmonitor_batch *)batch;
 504
 505         rest = ((struct fsmonitor_batch *)batch)->next;
 506         ((struct fsmonitor_batch *)batch)->next = NULL;
 507
 508         for (p = rest; p; p = fsmonitor_batch__pop(p)) {
 509                 trace_printf_key(&trace_fsmonitor,
 510                                  "Truncate: kill (%"PRIu64",%"PRIu64")",
 511                                  p->batch_seq_nr, (uint64_t)p->pinned_time);
 512         }
 513 }
 514
 515 static void fsmonitor_free_token_data(struct fsmonitor_token_data *token)
 516 {
 517         struct fsmonitor_batch *p;
 518
 519         if (!token)
 520                 return;
 521
 522         assert(token->client_ref_count == 0);
 523
 524         strbuf_release(&token->token_id);
 525
 526         for (p = token->batch_head; p; p = fsmonitor_batch__pop(p))
 527                 ;
 528
 529         free(token);
 530 }
 531
 532 /*
 533  * Flush all of our cached data about the filesystem.  Call this if we
 534  * lose sync with the filesystem and miss some notification events.
 535  *
 536  * [1] If we are missing events, then we no longer have a complete
 537  *     history of the directory (relative to our current start token).
 538  *     We should create a new token and start fresh (as if we just
 539  *     booted up).
 540  *
 541  * [2] Some of those lost events may have been for cookie files.  We
 542  *     should assume the worst and abort them rather letting them starve.
 543  *
 544  * If there are no concurrent threads readering the current token data
 545  * series, we can free it now.  Otherwise, let the last reader free
 546  * it.
 547  *
 548  * Either way, the old token data series is no longer associated with
 549  * our state data.
 550  */
 551 static void with_lock__do_force_resync(struct fsmonitor_daemon_state *state)
 552 {
 553         /* assert current thread holding state->main_lock */
 554
 555         struct fsmonitor_token_data *free_me = NULL;
 556         struct fsmonitor_token_data *new_one = NULL;
 557
 558         new_one = fsmonitor_new_token_data();
 559
 560         if (state->current_token_data->client_ref_count == 0)
 561                 free_me = state->current_token_data;
 562         state->current_token_data = new_one;
 563
 564         fsmonitor_free_token_data(free_me);
 565
 566         with_lock__abort_all_cookies(state);
 567 }
 568
 569 void fsmonitor_force_resync(struct fsmonitor_daemon_state *state)
 570 {
 571         pthread_mutex_lock(&state->main_lock);
 572         with_lock__do_force_resync(state);
 573         pthread_mutex_unlock(&state->main_lock);
 574 }
 575
 576 /*
 577  * Format an opaque token string to send to the client.
 578  */
 579 static void with_lock__format_response_token(
 580         struct strbuf *response_token,
 581         const struct strbuf *response_token_id,
 582         const struct fsmonitor_batch *batch)
 583 {
 584         /* assert current thread holding state->main_lock */
 585
 586         strbuf_reset(response_token);
 587         strbuf_addf(response_token, "builtin:%s:%"PRIu64,
 588                     response_token_id->buf, batch->batch_seq_nr);
 589 }
 590
 591 /*
 592  * Parse an opaque token from the client.
 593  * Returns -1 on error.
 594  */
 595 static int fsmonitor_parse_client_token(const char *buf_token,
 596                                         struct strbuf *requested_token_id,
 597                                         uint64_t *seq_nr)
 598 {
 599         const char *p;
 600         char *p_end;
 601
 602         strbuf_reset(requested_token_id);
 603         *seq_nr = 0;
 604
 605         if (!skip_prefix(buf_token, "builtin:", &p))
 606                 return -1;
 607
 608         while (*p && *p != ':')
 609                 strbuf_addch(requested_token_id, *p++);
 610         if (!*p++)
 611                 return -1;
 612
 613         *seq_nr = (uint64_t)strtoumax(p, &p_end, 10);
 614         if (*p_end)
 615                 return -1;
 616
 617         return 0;
 618 }
 619
 620 KHASH_INIT(str, const char *, int, 0, kh_str_hash_func, kh_str_hash_equal);
 621
 622 static int do_handle_client(struct fsmonitor_daemon_state *state,
 623                             const char *command,
 624                             ipc_server_reply_cb *reply,
 625                             struct ipc_server_reply_data *reply_data)
 626 {
 627         struct fsmonitor_token_data *token_data = NULL;
 628         struct strbuf response_token = STRBUF_INIT;
 629         struct strbuf requested_token_id = STRBUF_INIT;
 630         struct strbuf payload = STRBUF_INIT;
 631         uint64_t requested_oldest_seq_nr = 0;
 632         uint64_t total_response_len = 0;
 633         const char *p;
 634         const struct fsmonitor_batch *batch_head;
 635         const struct fsmonitor_batch *batch;
 636         intmax_t count = 0, duplicates = 0;
 637         kh_str_t *shown;
 638         int hash_ret;
 639         int do_trivial = 0;
 640         int do_flush = 0;
 641         int do_cookie = 0;
 642         enum fsmonitor_cookie_item_result cookie_result;
 643
 644         /*
 645          * We expect `command` to be of the form:
 646          *
 647          * <command> := quit NUL
 648          *            | flush NUL
 649          *            | <V1-time-since-epoch-ns> NUL
 650          *            | <V2-opaque-fsmonitor-token> NUL
 651          */
 652
 653         if (!strcmp(command, "quit")) {
 654                 /*
 655                  * A client has requested over the socket/pipe that the
 656                  * daemon shutdown.
 657                  *
 658                  * Tell the IPC thread pool to shutdown (which completes
 659                  * the await in the main thread (which can stop the
 660                  * fsmonitor listener thread)).
 661                  *
 662                  * There is no reply to the client.
 663                  */
 664                 return SIMPLE_IPC_QUIT;
 665
 666         } else if (!strcmp(command, "flush")) {
 667                 /*
 668                  * Flush all of our cached data and generate a new token
 669                  * just like if we lost sync with the filesystem.
 670                  *
 671                  * Then send a trivial response using the new token.
 672                  */
 673                 do_flush = 1;
 674                 do_trivial = 1;
 675                 do_cookie = 1;
 676
 677         } else if (!skip_prefix(command, "builtin:", &p)) {
 678                 /* assume V1 timestamp or garbage */
 679
 680                 char *p_end;
 681
 682                 strtoumax(command, &p_end, 10);
 683                 trace_printf_key(&trace_fsmonitor,
 684                                  ((*p_end) ?
 685                                   "fsmonitor: invalid command line '%s'" :
 686                                   "fsmonitor: unsupported V1 protocol '%s'"),
 687                                  command);
 688                 do_trivial = 1;
 689                 do_cookie = 1;
 690
 691         } else {
 692                 /* We have "builtin:*" */
 693                 if (fsmonitor_parse_client_token(command, &requested_token_id,
 694                                                  &requested_oldest_seq_nr)) {
 695                         trace_printf_key(&trace_fsmonitor,
 696                                          "fsmonitor: invalid V2 protocol token '%s'",
 697                                          command);
 698                         do_trivial = 1;
 699                         do_cookie = 1;
 700
 701                 } else {
 702                         /*
 703                          * We have a V2 valid token:
 704                          *     "builtin:<token_id>:<seq_nr>"
 705                          */
 706                         do_cookie = 1;
 707                 }
 708         }
 709
 710         pthread_mutex_lock(&state->main_lock);
 711
 712         if (!state->current_token_data)
 713                 BUG("fsmonitor state does not have a current token");
 714
 715         /*
 716          * Write a cookie file inside the directory being watched in
 717          * an effort to flush out existing filesystem events that we
 718          * actually care about.  Suspend this client thread until we
 719          * see the filesystem events for this cookie file.
 720          *
 721          * Creating the cookie lets us guarantee that our FS listener
 722          * thread has drained the kernel queue and we are caught up
 723          * with the kernel.
 724          *
 725          * If we cannot create the cookie (or otherwise guarantee that
 726          * we are caught up), we send a trivial response.  We have to
 727          * assume that there might be some very, very recent activity
 728          * on the FS still in flight.
 729          */
 730         if (do_cookie) {
 731                 cookie_result = with_lock__wait_for_cookie(state);
 732                 if (cookie_result != FCIR_SEEN) {
 733                         error(_("fsmonitor: cookie_result '%d' != SEEN"),
 734                               cookie_result);
 735                         do_trivial = 1;
 736                 }
 737         }
 738
 739         if (do_flush)
 740                 with_lock__do_force_resync(state);
 741
 742         /*
 743          * We mark the current head of the batch list as "pinned" so
 744          * that the listener thread will treat this item as read-only
 745          * (and prevent any more paths from being added to it) from
 746          * now on.
 747          */
 748         token_data = state->current_token_data;
 749         batch_head = token_data->batch_head;
 750         ((struct fsmonitor_batch *)batch_head)->pinned_time = time(NULL);
 751
 752         /*
 753          * FSMonitor Protocol V2 requires that we send a response header
 754          * with a "new current token" and then all of the paths that changed
 755          * since the "requested token".  We send the seq_nr of the just-pinned
 756          * head batch so that future requests from a client will be relative
 757          * to it.
 758          */
 759         with_lock__format_response_token(&response_token,
 760                                          &token_data->token_id, batch_head);
 761
 762         reply(reply_data, response_token.buf, response_token.len + 1);
 763         total_response_len += response_token.len + 1;
 764
 765         trace2_data_string("fsmonitor", the_repository, "response/token",
 766                            response_token.buf);
 767         trace_printf_key(&trace_fsmonitor, "response token: %s",
 768                          response_token.buf);
 769
 770         if (!do_trivial) {
 771                 if (strcmp(requested_token_id.buf, token_data->token_id.buf)) {
 772                         /*
 773                          * The client last spoke to a different daemon
 774                          * instance -OR- the daemon had to resync with
 775                          * the filesystem (and lost events), so reject.
 776                          */
 777                         trace2_data_string("fsmonitor", the_repository,
 778                                            "response/token", "different");
 779                         do_trivial = 1;
 780
 781                 } else if (requested_oldest_seq_nr <
 782                            token_data->batch_tail->batch_seq_nr) {
 783                         /*
 784                          * The client wants older events than we have for
 785                          * this token_id.  This means that the end of our
 786                          * batch list was truncated and we cannot give the
 787                          * client a complete snapshot relative to their
 788                          * request.
 789                          */
 790                         trace_printf_key(&trace_fsmonitor,
 791                                          "client requested truncated data");
 792                         do_trivial = 1;
 793                 }
 794         }
 795
 796         if (do_trivial) {
 797                 pthread_mutex_unlock(&state->main_lock);
 798
 799                 reply(reply_data, "/", 2);
 800
 801                 trace2_data_intmax("fsmonitor", the_repository,
 802                                    "response/trivial", 1);
 803
 804                 strbuf_release(&response_token);
 805                 strbuf_release(&requested_token_id);
 806                 return 0;
 807         }
 808
 809         /*
 810          * We're going to hold onto a pointer to the current
 811          * token-data while we walk the list of batches of files.
 812          * During this time, we will NOT be under the lock.
 813          * So we ref-count it.
 814          *
 815          * This allows the listener thread to continue prepending
 816          * new batches of items to the token-data (which we'll ignore).
 817          *
 818          * AND it allows the listener thread to do a token-reset
 819          * (and install a new `current_token_data`).
 820          */
 821         token_data->client_ref_count++;
 822
 823         pthread_mutex_unlock(&state->main_lock);
 824
 825         /*
 826          * The client request is relative to the token that they sent,
 827          * so walk the batch list backwards from the current head back
 828          * to the batch (sequence number) they named.
 829          *
 830          * We use khash to de-dup the list of pathnames.
 831          *
 832          * NEEDSWORK: each batch contains a list of interned strings,
 833          * so we only need to do pointer comparisons here to build the
 834          * hash table.  Currently, we're still comparing the string
 835          * values.
 836          */
 837         shown = kh_init_str();
 838         for (batch = batch_head;
 839              batch && batch->batch_seq_nr > requested_oldest_seq_nr;
 840              batch = batch->next) {
 841                 size_t k;
 842
 843                 for (k = 0; k < batch->nr; k++) {
 844                         const char *s = batch->interned_paths[k];
 845                         size_t s_len;
 846
 847                         if (kh_get_str(shown, s) != kh_end(shown))
 848                                 duplicates++;
 849                         else {
 850                                 kh_put_str(shown, s, &hash_ret);
 851
 852                                 trace_printf_key(&trace_fsmonitor,
 853                                                  "send[%"PRIuMAX"]: %s",
 854                                                  count, s);
 855
 856                                 /* Each path gets written with a trailing NUL */
 857                                 s_len = strlen(s) + 1;
 858
 859                                 if (payload.len + s_len >=
 860                                     LARGE_PACKET_DATA_MAX) {
 861                                         reply(reply_data, payload.buf,
 862                                               payload.len);
 863                                         total_response_len += payload.len;
 864                                         strbuf_reset(&payload);
 865                                 }
 866
 867                                 strbuf_add(&payload, s, s_len);
 868                                 count++;
 869                         }
 870                 }
 871         }
 872
 873         if (payload.len) {
 874                 reply(reply_data, payload.buf, payload.len);
 875                 total_response_len += payload.len;
 876         }
 877
 878         kh_release_str(shown);
 879
 880         pthread_mutex_lock(&state->main_lock);
 881
 882         if (token_data->client_ref_count > 0)
 883                 token_data->client_ref_count--;
 884
 885         if (token_data->client_ref_count == 0) {
 886                 if (token_data != state->current_token_data) {
 887                         /*
 888                          * The listener thread did a token-reset while we were
 889                          * walking the batch list.  Therefore, this token is
 890                          * stale and can be discarded completely.  If we are
 891                          * the last reader thread using this token, we own
 892                          * that work.
 893                          */
 894                         fsmonitor_free_token_data(token_data);
 895                 } else if (batch) {
 896                         /*
 897                          * This batch is the first item in the list
 898                          * that is older than the requested sequence
 899                          * number and might be considered to be
 900                          * obsolete.  See if we can truncate the list
 901                          * and save some memory.
 902                          */
 903                         with_lock__truncate_old_batches(state, batch);
 904                 }
 905         }
 906
 907         pthread_mutex_unlock(&state->main_lock);
 908
 909         trace2_data_intmax("fsmonitor", the_repository, "response/length", total_response_len);
 910         trace2_data_intmax("fsmonitor", the_repository, "response/count/files", count);
 911         trace2_data_intmax("fsmonitor", the_repository, "response/count/duplicates", duplicates);
 912
 913         strbuf_release(&response_token);
 914         strbuf_release(&requested_token_id);
 915         strbuf_release(&payload);
 916
 917         return 0;
 918 }
 919
 920 static ipc_server_application_cb handle_client;
 921
 922 static int handle_client(void *data,
 923                          const char *command, size_t command_len,
 924                          ipc_server_reply_cb *reply,
 925                          struct ipc_server_reply_data *reply_data)
 926 {
 927         struct fsmonitor_daemon_state *state = data;
 928         int result;
 929
 930         /*
 931          * The Simple IPC API now supports {char*, len} arguments, but
 932          * FSMonitor always uses proper null-terminated strings, so
 933          * we can ignore the command_len argument.  (Trust, but verify.)
 934          */
 935         if (command_len != strlen(command))
 936                 BUG("FSMonitor assumes text messages");
 937
 938         trace_printf_key(&trace_fsmonitor, "requested token: %s", command);
 939
 940         trace2_region_enter("fsmonitor", "handle_client", the_repository);
 941         trace2_data_string("fsmonitor", the_repository, "request", command);
 942
 943         result = do_handle_client(state, command, reply, reply_data);
 944
 945         trace2_region_leave("fsmonitor", "handle_client", the_repository);
 946
 947         return result;
 948 }
 949
 950 #define FSMONITOR_DIR           "fsmonitor--daemon"
 951 #define FSMONITOR_COOKIE_DIR    "cookies"
 952 #define FSMONITOR_COOKIE_PREFIX (FSMONITOR_DIR "/" FSMONITOR_COOKIE_DIR "/")
 953
 954 enum fsmonitor_path_type fsmonitor_classify_path_workdir_relative(
 955         const char *rel)
 956 {
 957         if (fspathncmp(rel, ".git", 4))
 958                 return IS_WORKDIR_PATH;
 959         rel += 4;
 960
 961         if (!*rel)
 962                 return IS_DOT_GIT;
 963         if (*rel != '/')
 964                 return IS_WORKDIR_PATH; /* e.g. .gitignore */
 965         rel++;
 966
 967         if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
 968                         strlen(FSMONITOR_COOKIE_PREFIX)))
 969                 return IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX;
 970
 971         return IS_INSIDE_DOT_GIT;
 972 }
 973
 974 enum fsmonitor_path_type fsmonitor_classify_path_gitdir_relative(
 975         const char *rel)
 976 {
 977         if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
 978                         strlen(FSMONITOR_COOKIE_PREFIX)))
 979                 return IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX;
 980
 981         return IS_INSIDE_GITDIR;
 982 }
 983
 984 static enum fsmonitor_path_type try_classify_workdir_abs_path(
 985         struct fsmonitor_daemon_state *state,
 986         const char *path)
 987 {
 988         const char *rel;
 989
 990         if (fspathncmp(path, state->path_worktree_watch.buf,
 991                        state->path_worktree_watch.len))
 992                 return IS_OUTSIDE_CONE;
 993
 994         rel = path + state->path_worktree_watch.len;
 995
 996         if (!*rel)
 997                 return IS_WORKDIR_PATH; /* it is the root dir exactly */
 998         if (*rel != '/')
 999                 return IS_OUTSIDE_CONE;
1000         rel++;
1001
1002         return fsmonitor_classify_path_workdir_relative(rel);
1003 }
1004
1005 enum fsmonitor_path_type fsmonitor_classify_path_absolute(
1006         struct fsmonitor_daemon_state *state,
1007         const char *path)
1008 {
1009         const char *rel;
1010         enum fsmonitor_path_type t;
1011
1012         t = try_classify_workdir_abs_path(state, path);
1013         if (state->nr_paths_watching == 1)
1014                 return t;
1015         if (t != IS_OUTSIDE_CONE)
1016                 return t;
1017
1018         if (fspathncmp(path, state->path_gitdir_watch.buf,
1019                        state->path_gitdir_watch.len))
1020                 return IS_OUTSIDE_CONE;
1021
1022         rel = path + state->path_gitdir_watch.len;
1023
1024         if (!*rel)
1025                 return IS_GITDIR; /* it is the <gitdir> exactly */
1026         if (*rel != '/')
1027                 return IS_OUTSIDE_CONE;
1028         rel++;
1029
1030         return fsmonitor_classify_path_gitdir_relative(rel);
1031 }
1032
1033 /*
1034  * We try to combine small batches at the front of the batch-list to avoid
1035  * having a long list.  This hopefully makes it a little easier when we want
1036  * to truncate and maintain the list.  However, we don't want the paths array
1037  * to just keep growing and growing with realloc, so we insert an arbitrary
1038  * limit.
1039  */
1040 #define MY_COMBINE_LIMIT (1024)
1041
1042 void fsmonitor_publish(struct fsmonitor_daemon_state *state,
1043                        struct fsmonitor_batch *batch,
1044                        const struct string_list *cookie_names)
1045 {
1046         if (!batch && !cookie_names->nr)
1047                 return;
1048
1049         pthread_mutex_lock(&state->main_lock);
1050
1051         if (batch) {
1052                 struct fsmonitor_batch *head;
1053
1054                 head = state->current_token_data->batch_head;
1055                 if (!head) {
1056                         BUG("token does not have batch");
1057                 } else if (head->pinned_time) {
1058                         /*
1059                          * We cannot alter the current batch list
1060                          * because:
1061                          *
1062                          * [a] it is being transmitted to at least one
1063                          * client and the handle_client() thread has a
1064                          * ref-count, but not a lock on the batch list
1065                          * starting with this item.
1066                          *
1067                          * [b] it has been transmitted in the past to
1068                          * at least one client such that future
1069                          * requests are relative to this head batch.
1070                          *
1071                          * So, we can only prepend a new batch onto
1072                          * the front of the list.
1073                          */
1074                         batch->batch_seq_nr = head->batch_seq_nr + 1;
1075                         batch->next = head;
1076                         state->current_token_data->batch_head = batch;
1077                 } else if (!head->batch_seq_nr) {
1078                         /*
1079                          * Batch 0 is unpinned.  See the note in
1080                          * `fsmonitor_new_token_data()` about why we
1081                          * don't need to accumulate these paths.
1082                          */
1083                         fsmonitor_batch__pop(batch);
1084                 } else if (head->nr + batch->nr > MY_COMBINE_LIMIT) {
1085                         /*
1086                          * The head batch in the list has never been
1087                          * transmitted to a client, but folding the
1088                          * contents of the new batch onto it would
1089                          * exceed our arbitrary limit, so just prepend
1090                          * the new batch onto the list.
1091                          */
1092                         batch->batch_seq_nr = head->batch_seq_nr + 1;
1093                         batch->next = head;
1094                         state->current_token_data->batch_head = batch;
1095                 } else {
1096                         /*
1097                          * We are free to append the paths in the given
1098                          * batch onto the end of the current head batch.
1099                          */
1100                         fsmonitor_batch__combine(head, batch);
1101                         fsmonitor_batch__pop(batch);
1102                 }
1103         }
1104
1105         if (cookie_names->nr)
1106                 with_lock__mark_cookies_seen(state, cookie_names);
1107
1108         pthread_mutex_unlock(&state->main_lock);
1109 }
1110
1111 static void *fsmonitor_fs_listen__thread_proc(void *_state)
1112 {
1113         struct fsmonitor_daemon_state *state = _state;
1114
1115         trace2_thread_start("fsm-listen");
1116
1117         trace_printf_key(&trace_fsmonitor, "Watching: worktree '%s'",
1118                          state->path_worktree_watch.buf);
1119         if (state->nr_paths_watching > 1)
1120                 trace_printf_key(&trace_fsmonitor, "Watching: gitdir '%s'",
1121                                  state->path_gitdir_watch.buf);
1122
1123         fsmonitor_fs_listen__loop(state);
1124
1125         pthread_mutex_lock(&state->main_lock);
1126         if (state->current_token_data &&
1127             state->current_token_data->client_ref_count == 0)
1128                 fsmonitor_free_token_data(state->current_token_data);
1129         state->current_token_data = NULL;
1130         pthread_mutex_unlock(&state->main_lock);
1131
1132         trace2_thread_exit();
1133         return NULL;
1134 }
1135
1136 static int fsmonitor_run_daemon_1(struct fsmonitor_daemon_state *state)
1137 {
1138         struct ipc_server_opts ipc_opts = {
1139                 .nr_threads = fsmonitor__ipc_threads,
1140
1141                 /*
1142                  * We know that there are no other active threads yet,
1143                  * so we can let the IPC layer temporarily chdir() if
1144                  * it needs to when creating the server side of the
1145                  * Unix domain socket.
1146                  */
1147                 .uds_disallow_chdir = 0
1148         };
1149
1150         /*
1151          * Start the IPC thread pool before the we've started the file
1152          * system event listener thread so that we have the IPC handle
1153          * before we need it.
1154          */
1155         if (ipc_server_run_async(&state->ipc_server_data,
1156                                  fsmonitor_ipc__get_path(), &ipc_opts,
1157                                  handle_client, state))
1158                 return error(_("could not start IPC thread pool"));
1159
1160         /*
1161          * Start the fsmonitor listener thread to collect filesystem
1162          * events.
1163          */
1164         if (pthread_create(&state->listener_thread, NULL,
1165                            fsmonitor_fs_listen__thread_proc, state) < 0) {
1166                 ipc_server_stop_async(state->ipc_server_data);
1167                 ipc_server_await(state->ipc_server_data);
1168
1169                 return error(_("could not start fsmonitor listener thread"));
1170         }
1171
1172         /*
1173          * The daemon is now fully functional in background threads.
1174          * Wait for the IPC thread pool to shutdown (whether by client
1175          * request or from filesystem activity).
1176          */
1177         ipc_server_await(state->ipc_server_data);
1178
1179         /*
1180          * The fsmonitor listener thread may have received a shutdown
1181          * event from the IPC thread pool, but it doesn't hurt to tell
1182          * it again.  And wait for it to shutdown.
1183          */
1184         fsmonitor_fs_listen__stop_async(state);
1185         pthread_join(state->listener_thread, NULL);
1186
1187         return state->error_code;
1188 }
1189
1190 static int fsmonitor_run_daemon(void)
1191 {
1192         struct fsmonitor_daemon_state state;
1193         int err;
1194
1195         memset(&state, 0, sizeof(state));
1196
1197         hashmap_init(&state.cookies, cookies_cmp, NULL, 0);
1198         pthread_mutex_init(&state.main_lock, NULL);
1199         pthread_cond_init(&state.cookies_cond, NULL);
1200         state.error_code = 0;
1201         state.current_token_data = fsmonitor_new_token_data();
1202
1203         /* Prepare to (recursively) watch the <worktree-root> directory. */
1204         strbuf_init(&state.path_worktree_watch, 0);
1205         strbuf_addstr(&state.path_worktree_watch, absolute_path(get_git_work_tree()));
1206         state.nr_paths_watching = 1;
1207
1208         /*
1209          * We create/delete cookie files inside the .git directory to
1210          * help us keep sync with the file system.  If ".git" is not a
1211          * directory, then <gitdir> is not inside the cone of
1212          * <worktree-root>, so set up a second watch for it.
1213          */
1214         strbuf_init(&state.path_gitdir_watch, 0);
1215         strbuf_addbuf(&state.path_gitdir_watch, &state.path_worktree_watch);
1216         strbuf_addstr(&state.path_gitdir_watch, "/.git");
1217         if (!is_directory(state.path_gitdir_watch.buf)) {
1218                 strbuf_reset(&state.path_gitdir_watch);
1219                 strbuf_addstr(&state.path_gitdir_watch, absolute_path(get_git_dir()));
1220                 state.nr_paths_watching = 2;
1221         }
1222
1223         /*
1224          * We will write filesystem syncing cookie files into
1225          * <gitdir>/<fsmonitor-dir>/<cookie-dir>/<pid>-<seq>.
1226          */
1227         strbuf_init(&state.path_cookie_prefix, 0);
1228         strbuf_addbuf(&state.path_cookie_prefix, &state.path_gitdir_watch);
1229
1230         strbuf_addch(&state.path_cookie_prefix, '/');
1231         strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_DIR);
1232         mkdir(state.path_cookie_prefix.buf, 0777);
1233
1234         strbuf_addch(&state.path_cookie_prefix, '/');
1235         strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_COOKIE_DIR);
1236         mkdir(state.path_cookie_prefix.buf, 0777);
1237
1238         strbuf_addch(&state.path_cookie_prefix, '/');
1239
1240         /*
1241          * Confirm that we can create platform-specific resources for the
1242          * filesystem listener before we bother starting all the threads.
1243          */
1244         if (fsmonitor_fs_listen__ctor(&state)) {
1245                 err = error(_("could not initialize listener thread"));
1246                 goto done;
1247         }
1248
1249         err = fsmonitor_run_daemon_1(&state);
1250
1251 done:
1252         pthread_cond_destroy(&state.cookies_cond);
1253         pthread_mutex_destroy(&state.main_lock);
1254         fsmonitor_fs_listen__dtor(&state);
1255
1256         ipc_server_free(state.ipc_server_data);
1257
1258         strbuf_release(&state.path_worktree_watch);
1259         strbuf_release(&state.path_gitdir_watch);
1260         strbuf_release(&state.path_cookie_prefix);
1261
1262         /*
1263          * NEEDSWORK: Consider "rm -rf <gitdir>/<fsmonitor-dir>"
1264          */
1265
1266         return err;
1267 }
1268
1269 static int try_to_run_foreground_daemon(void)
1270 {
1271         /*
1272          * Technically, we don't need to probe for an existing daemon
1273          * process, since we could just call `fsmonitor_run_daemon()`
1274          * and let it fail if the pipe/socket is busy.
1275          *
1276          * However, this method gives us a nicer error message for a
1277          * common error case.
1278          */
1279         if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
1280                 die("fsmonitor--daemon is already running.");
1281
1282         return !!fsmonitor_run_daemon();
1283 }
1284
1285 #ifndef GIT_WINDOWS_NATIVE
1286 /*
1287  * This is adapted from `daemonize()`.  Use `fork()` to directly create
1288  * and run the daemon in a child process.  The fork-parent returns the
1289  * child PID so that we can wait for the child to startup before exiting.
1290  */
1291 static int spawn_background_fsmonitor_daemon(pid_t *pid)
1292 {
1293         *pid = fork();
1294
1295         switch (*pid) {
1296         case 0:
1297                 if (setsid() == -1)
1298                         error_errno(_("setsid failed"));
1299                 close(0);
1300                 close(1);
1301                 close(2);
1302                 sanitize_stdfds();
1303
1304                 return !!fsmonitor_run_daemon();
1305
1306         case -1:
1307                 return error_errno(_("could not spawn fsmonitor--daemon in the background"));
1308
1309         default:
1310                 return 0;
1311         }
1312 }
1313 #else
1314 /*
1315  * Conceptually like `daemonize()` but different because Windows does not
1316  * have `fork(2)`.  Spawn a normal Windows child process but without the
1317  * limitations of `start_command()` and `finish_command()`.
1318  */
1319 static int spawn_background_fsmonitor_daemon(pid_t *pid)
1320 {
1321         char git_exe[MAX_PATH];
1322         struct strvec args = STRVEC_INIT;
1323         int in, out;
1324
1325         GetModuleFileNameA(NULL, git_exe, MAX_PATH);
1326
1327         in = open("/dev/null", O_RDONLY);
1328         out = open("/dev/null", O_WRONLY);
1329
1330         strvec_push(&args, git_exe);
1331         strvec_push(&args, "fsmonitor--daemon");
1332         strvec_push(&args, "run");
1333
1334         *pid = mingw_spawnvpe(args.v[0], args.v, NULL, NULL, in, out, out);
1335         close(in);
1336         close(out);
1337
1338         strvec_clear(&args);
1339
1340         if (*pid < 0)
1341                 return error(_("could not spawn fsmonitor--daemon in the background"));
1342
1343         return 0;
1344 }
1345 #endif
1346
1347 /*
1348  * This is adapted from `wait_or_whine()`.  Watch the child process and
1349  * let it get started and begin listening for requests on the socket
1350  * before reporting our success.
1351  */
1352 static int wait_for_background_startup(pid_t pid_child)
1353 {
1354         int status;
1355         pid_t pid_seen;
1356         enum ipc_active_state s;
1357         time_t time_limit, now;
1358
1359         time(&time_limit);
1360         time_limit += fsmonitor__start_timeout_sec;
1361
1362         for (;;) {
1363                 pid_seen = waitpid(pid_child, &status, WNOHANG);
1364
1365                 if (pid_seen == -1)
1366                         return error_errno(_("waitpid failed"));
1367                 else if (pid_seen == 0) {
1368                         /*
1369                          * The child is still running (this should be
1370                          * the normal case).  Try to connect to it on
1371                          * the socket and see if it is ready for
1372                          * business.
1373                          *
1374                          * If there is another daemon already running,
1375                          * our child will fail to start (possibly
1376                          * after a timeout on the lock), but we don't
1377                          * care (who responds) if the socket is live.
1378                          */
1379                         s = fsmonitor_ipc__get_state();
1380                         if (s == IPC_STATE__LISTENING)
1381                                 return 0;
1382
1383                         time(&now);
1384                         if (now > time_limit)
1385                                 return error(_("fsmonitor--daemon not online yet"));
1386                 } else if (pid_seen == pid_child) {
1387                         /*
1388                          * The new child daemon process shutdown while
1389                          * it was starting up, so it is not listening
1390                          * on the socket.
1391                          *
1392                          * Try to ping the socket in the odd chance
1393                          * that another daemon started (or was already
1394                          * running) while our child was starting.
1395                          *
1396                          * Again, we don't care who services the socket.
1397                          */
1398                         s = fsmonitor_ipc__get_state();
1399                         if (s == IPC_STATE__LISTENING)
1400                                 return 0;
1401
1402                         /*
1403                          * We don't care about the WEXITSTATUS() nor
1404                          * any of the WIF*(status) values because
1405                          * `cmd_fsmonitor__daemon()` does the `!!result`
1406                          * trick on all function return values.
1407                          *
1408                          * So it is sufficient to just report the
1409                          * early shutdown as an error.
1410                          */
1411                         return error(_("fsmonitor--daemon failed to start"));
1412                 } else
1413                         return error(_("waitpid is confused"));
1414         }
1415 }
1416
1417 static int try_to_start_background_daemon(void)
1418 {
1419         pid_t pid_child;
1420         int ret;
1421
1422         /*
1423          * Before we try to create a background daemon process, see
1424          * if a daemon process is already listening.  This makes it
1425          * easier for us to report an already-listening error to the
1426          * console, since our spawn/daemon can only report the success
1427          * of creating the background process (and not whether it
1428          * immediately exited).
1429          */
1430         if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
1431                 die("fsmonitor--daemon is already running.");
1432
1433         /*
1434          * Run the actual daemon in a background process.
1435          */
1436         ret = spawn_background_fsmonitor_daemon(&pid_child);
1437         if (pid_child <= 0)
1438                 return ret;
1439
1440         /*
1441          * Wait (with timeout) for the background child process get
1442          * started and begin listening on the socket/pipe.  This makes
1443          * the "start" command more synchronous and more reliable in
1444          * tests.
1445          */
1446         ret = wait_for_background_startup(pid_child);
1447
1448         return ret;
1449 }
1450
1451 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
1452 {
1453         const char *subcmd;
1454
1455         struct option options[] = {
1456                 OPT_INTEGER(0, "ipc-threads",
1457                             &fsmonitor__ipc_threads,
1458                             N_("use <n> ipc worker threads")),
1459                 OPT_INTEGER(0, "start-timeout",
1460                             &fsmonitor__start_timeout_sec,
1461                             N_("Max seconds to wait for background daemon startup")),
1462
1463                 OPT_END()
1464         };
1465
1466         if (argc < 2)
1467                 usage_with_options(builtin_fsmonitor__daemon_usage, options);
1468
1469         if (argc == 2 && !strcmp(argv[1], "-h"))
1470                 usage_with_options(builtin_fsmonitor__daemon_usage, options);
1471
1472         git_config(fsmonitor_config, NULL);
1473
1474         subcmd = argv[1];
1475         argv--;
1476         argc++;
1477
1478         argc = parse_options(argc, argv, prefix, options,
1479                              builtin_fsmonitor__daemon_usage, 0);
1480         if (fsmonitor__ipc_threads < 1)
1481                 die(_("invalid 'ipc-threads' value (%d)"),
1482                     fsmonitor__ipc_threads);
1483
1484         if (!strcmp(subcmd, "start"))
1485                 return !!try_to_start_background_daemon();
1486
1487         if (!strcmp(subcmd, "run"))
1488                 return !!try_to_run_foreground_daemon();
1489
1490         if (!strcmp(subcmd, "stop"))
1491                 return !!do_as_client__send_stop();
1492
1493         if (!strcmp(subcmd, "status"))
1494                 return !!do_as_client__status();
1495
1496         die(_("Unhandled subcommand '%s'"), subcmd);
1497 }
1498
1499 #else
1500 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
1501 {
1502         struct option options[] = {
1503                 OPT_END()
1504         };
1505
1506         if (argc == 2 && !strcmp(argv[1], "-h"))
1507                 usage_with_options(builtin_fsmonitor__daemon_usage, options);
1508
1509         die(_("fsmonitor--daemon not supported on this platform"));
1510 }
1511 #endif