where.c   [plain text]


/* -*- c-file-style: "java"; indent-tabs-mode: nil; fill-column: 78; -*-
 * 
 * distcc -- A simple distributed compiler system
 *
 * Copyright (C) 2002, 2003 by Martin Pool <mbp@samba.org>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA
 */


                /* I put the shotgun in an Adidas bag and padded it
                 * out with four pairs of tennis socks, not my style
                 * at all, but that was what I was aiming for: If they
                 * think you're crude, go technical; if they think
                 * you're technical, go crude.  I'm a very technical
                 * boy.  So I decided to get as crude as possible.
                 * These days, though, you have to be pretty technical
                 * before you can even aspire to crudeness.
                 *              -- William Gibson, "Johnny Mnemonic" */

    
/**
 * @file
 *
 * Routines to decide on which machine to run a distributable job.
 *
 * The current algorithm (new in 1.2 and subject to change) is as follows.
 *
 * CPU lock is held until the job is complete.
 *
 * Once the request has been transmitted, the lock is released and a second
 * job can be sent.
 *
 * Servers which wish to limit their load can defer accepting jobs, and the
 * client will block with that lock held.
 *
 * cpp is probably cheap enough that we can allow it to run unlocked.  However
 * that is not true for local compilation or linking.
 *
 * @todo Write a test harness for the host selection algorithm.  Perhaps a
 * really simple simulation of machines taking different amounts of time to
 * build stuff?
 */

#include "config.h"

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <time.h>

#include <sys/stat.h>
#include <sys/file.h>

#include "distcc.h"
#include "trace.h"
#include "util.h"
#include "hosts.h"
#include "lock.h"
#include "where.h"
#include "exitcode.h"


static int dcc_lock_one(struct dcc_hostdef *hostlist,
                        struct dcc_hostdef **buildhost,
                        int *cpu_lock_fd);


int dcc_pick_host_from_list(struct dcc_hostdef **buildhost,
                            int *cpu_lock_fd)
{
    struct dcc_hostdef *hostlist;
    int ret;
    int n_hosts;
    
    if ((ret = dcc_get_hostlist(&hostlist, &n_hosts)) != 0) {
        return EXIT_NO_HOSTS;
    }

    if ((ret = dcc_remove_disliked(&hostlist)))
        return ret;

    if (!hostlist) {
        return EXIT_NO_HOSTS;
    }
    
    return dcc_lock_one(hostlist, buildhost, cpu_lock_fd);

    /* FIXME: Host list is leaked? */
}


static void dcc_lock_pause(void)
{
    /* This could do with some tuning.
     *
     * My assumption basically is that polling a little too often is
     * relatively cheap; sleeping when we should be working is bad.  However,
     * if we hit this code at all we're overloaded, so sleeping a while is
     * perhaps OK.
     *
     * We don't use exponential backoff, because that would tend to prefer
     * later arrivals and penalize jobs that have been waiting for a long
     * time.  This would mean more compiler processes hanging around than is
     * really necessary, and also by making jobs complete very-out-of-order is
     * more likely to find Makefile bugs. */

    unsigned pause_time = 1;

    dcc_note_state(DCC_PHASE_BLOCKED, NULL, NULL);
    
    rs_trace("nothing available, sleeping %us...", pause_time);

    sleep(pause_time);
}


/**
 * Find a host that can run a distributed compilation by examining local state.
 * It can be either a remote server or localhost (if that is in the list).
 *
 * This function does not return (except for errors) until a host has been
 * selected.  If necessary it sleeps until one is free.
 *
 * @todo We don't need transmit locks for local operations.
 **/
static int dcc_lock_one(struct dcc_hostdef *hostlist,
                        struct dcc_hostdef **buildhost,
                        int *cpu_lock_fd)
{
    struct dcc_hostdef *h;
    int i_cpu;
    int ret;

    while (1) {
        for (i_cpu = 0; i_cpu < 50; i_cpu++) {
            for (h = hostlist; h; h = h->next) {
                if (i_cpu >= h->n_slots)
                    continue;

                ret = dcc_lock_host("cpu", h, i_cpu, 0, cpu_lock_fd);
                
                if (ret == 0) {
                    *buildhost = h;
                    dcc_note_state_slot(i_cpu);
                    return 0;
                } else if (ret == EXIT_BUSY) {
                    continue;
                } else {
                    rs_log_error("failed to lock");
                    return ret;
                }
            }
        }
        
        dcc_lock_pause();
    }
}



/**
 * Lock localhost.  Used to get the right balance of jobs when some of
 * them must be local.
 **/
int dcc_lock_local(int *cpu_lock_fd)
{
    struct dcc_hostdef *chosen;
    
    return dcc_lock_one(dcc_hostdef_local, &chosen, cpu_lock_fd);
}