atat-mirror/glue/jobctrl/chl

87 lines
3.3 KiB
Perl
Executable File

#!/usr/bin/perl
# 10/2002 V. Blum, NREL
# Replacement for MAPS's original chl command to work on current Scyld Beowulf etc.
# In case of problems with this version of chl, the old version is called chl.csh
if ($ARGV[0])
{
if ($ARGV[0] =~ "-h" )
{
print "chl [-m alternative_machine_config_file]\n";
print "Checks the load on the remote machines specified in the ~/.machines.rc file\n";
print "or as overridden by the -m option\n";
print "The first column of output is the load and the commands following the + sign\n";
print "give the command prefix needed to run on that machine\n";
print "A default ~/.machines.rc is created the first time the command is run.\n";
print "See comments therein for the format of the file.\n";
exit;
}
if ($ARGV[0] =~ "-m" )
{
$machinefile = $ARGV[1] ;
}
}
else
{
$machinefile = "$ENV{HOME}/.machines.rc" ;
}
if (! -e $machinefile )
{
print "Unable to read machine description $machinefile.\n" ;
if (-e "$ENV{HOME}/.machines.rc" )
{
print "Resorting to $ENV{HOME}/.machines.rc.\n" ;
$machinefile = "$ENV{HOME}/.machines.rc" ;
}
else
{
print "Creating $ENV{HOME}/.machines.rc ... \n" ;
print "Please edit this file to reflect your machine setup.\n" ;
open (MF,">$ENV{HOME}/.machines.rc") ;
print MF "#configuration file for chl, minload, and pollmach\n";
print MF "#this line indicates the waiting time between the checks of machine availability\n";
print MF "#in seconds\n";
print MF "set waitbetweenpoll=60\n";
print MF "#The remainder of this file lists the machines\n";
print MF "#columns in this file are separated by a +\n";
print MF "#each line corresponds to a machine\n";
print MF "#the first column indicates the command to obtain the load on a remote machine\n";
print MF "#the second column is the command prefix to lauch a job on that remote machine\n";
print MF "#note that the command must cd into the same directory on the remote machine\n";
print MF "#as on the local machine. The 'node' command does that automatically.\n";
print MF "#Type 'node' on the command line for more info.\n";
print MF "# \n";
print MF "#the first line sets the threshold load for not starting a job (here 0.5)\n";
print MF "#do not remove the 'none' keyword\n";
print MF "echo 0.5 + none\n";
print MF "#list the machines here\n";
print MF "#example on a local network (e.g. beowolf) with shared disk\n";
print MF "rsh machinename uptime | getvalue average + node machinename\n";
print MF "#secure version of the above\n";
print MF "ssh user\@machinename uptime | getvalue average + node -s user\@machinename\n";
print MF "#if the machines do not share the same disk\n";
print MF "ssh user\@machinename uptime | getvalue average + node -r -s user\@machinename\n";
print MF "#example for queueing system\n";
print MF "llq -u $USER | wc -l | awk '{print ($1-4)/2}' + qit \n";
close (MF) ;
exit ;
}
}
open(MF,"$machinefile") ;
while(<MF>)
{
if ( (!(/^ *$/)) && (!(/^ *\#/)) && (!(/^set/)) )
{
# command lines return the load level " + " the ``node'' command to run remotely ...
@cmdline = split /\+/, $_ ;
$load = `@cmdline[0]` ;
chop $load ;
print $load, " + ", @cmdline[1] ;
}
}
close(MF) ;