Rev 35 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed
#!/usr/bin/env perl
# Common library for havirt. Basically, just a place to put things which may be used by any
# part of havirt. More for organizations purposes.
# Copyright 2024 Daily Data, Inc.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following
# conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the distribution.
# Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
# THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# v0.0.1 20240602 RWR
# Initial setup
#
# v1.2.0 20240826 RWR
# Added some code to migrate domains if node placed in maintenance mode
# Added a lot of 'verbose' print lines, and modified for new flag structure
#
package havirt;
use warnings;
use strict;
BEGIN {
use FindBin;
use File::Spec;
# use libraries from the directory this script is in
use Cwd 'abs_path';
use File::Basename;
use lib dirname( abs_path( __FILE__ ) );
}
use Data::Dumper qw(Dumper); # Import the Dumper() subroutine
# define the version number
# see https://metacpan.org/pod/release/JPEACOCK/version-0.97/lib/version.pod
use version;
our $VERSION = version->declare("1.2.0");
use Exporter;
our @ISA = qw( Exporter );
our @EXPORT = qw(
&readDB
&writeDB
&report
&scan
&makeCommand
&forceScan
&executeAndWait
&findDomain
&diffArray
&makeConfig
&readConfig
&getAvailableResources
&resource
&validateResources
&migrate
);
# read a DB file (just a YAML)
# if $lock is set, will create a "lock" file so other processes will
# not try to write to it. Using custom code as flock is automagically
# release when the file is read
sub readDB {
my $lock = shift;
my $lockFileName = "$main::config->{'status db filename'}.lock";
my $lockTime = 5; # maximum time to wait for lock to clear
# wait for lock to clear if it exists, if we are wanting a lock
# and we have tried it for $locktime iterations
while ( $lock && -f $lockFileName && $lockTime-- ) {
sleep 1; # wait one second, then try again
}
if ( $lock ) {
die "Something has $main::config->{'status db filename'} locked, aborting\n" if -f $lockFileName;
`touch $lockFileName`;
}
my $yaml = YAML::Tiny->new( {} );
if ( -f $main::config->{'status db filename'} ) {
$yaml = YAML::Tiny->read( $main::config->{'status db filename'} );
}
$main::statusDB = $yaml->[0];
}
# Write the statusDB file out, overwriting the current one
# remove the lock file, if it exists
sub writeDB {
my $yaml = YAML::Tiny->new( $main::statusDB );
$yaml->write( $main::config->{'status db filename'} );
unlink "$main::config->{'status db filename'}.lock" if -f "$main::config->{'status db filename'}.lock"; # release any lock we might have on it
}
# create a report and send to STDOUT.
sub report {
if ( $main::config->{'flags'}->{'format'} eq 'tsv' ) {
return &report_tsv( @_ );
} else {
return &report_screen( @_ );
}
}
# report as a tab separated values, no encapulation
sub report_tsv {
my ( $header, $data ) = @_;
my @output;
push @output, join( "\t", @$header );
for( my $line = 0; $line < @$data; $line++ ) {
push @output, join( "\t", @{$data->[$line]} );
} # for
return join( "\n", @output ) . "\n";
}
# report suitable for screen, with fixed width columns
sub report_screen {
my ( $header, $data ) = @_;
my @output;
my @widths;
my $column;
my $row;
# First, initialize by using the length of the headers
for ( $column = 0; $column < @$header; $column++ ) {
@widths[$column] = length( $header->[$column] );
}
# now, go through all data in each row, for each column, and increment the width if it is larger
for ( $row = 0; $row < @$data; $row++ ) {
for ( $column = 0; $column < @$header; $column++ ) {
$widths[$column] = length( $data->[$row][$column] )
if length( $data->[$row][$column] ) > $widths[$column];
} # for column
} # for row
# actually do the print now
my @format;
for ( $column = 0; $column < @widths; $column++ ) {
push ( @format, '%' . $widths[$column] . 's' );
}
my $format = join( ' ', @format ) . "\n";
my $output = sprintf( $format, @$header );
for ( $row = 0; $row < @$data; $row++ ) {
$output .= sprintf( $format, @{$data->[$row]} );
} # for row
return $output;
}
# scans a node to determine which domains are running on it
# updates each domain to reflect when it was last seen
sub getDomainsOnNode {
my $node = shift;
my $command = &main::makeCommand( $node, 'virsh list' );
print "havirt.pm:getDomainsOnNode, command is $command\n" if $main::config->{'flags'}->{'debug'} > 2;
my @nodeList = grep { /^\s*\d/ } `$command`;
for ( my $i = 0; $i < @nodeList; $i++ ) {
if ( $nodeList[$i] =~ m/\s*\d+\s*([^ ]+)/ ) {
$nodeList[$i] = $1;
}
}
my %hash = map{ $_ => time } @nodeList;
return \%hash;
}
# find node a domain is on
# first parameter is the domain name
# rest of @_ is list of nodes to search
# if no nodes passed in, will search all known nodes
# returns first node found with the domain, or an empty string if not found
# possibly not being used??
sub findDomain {
my $domainName = shift;
my @node = @_;
my $foundNode = '';
&readDB();
unless ( @node ) {
@node = keys %{$main::statusDB->{'node'} };
print "findDomain, nodes = " . join( "\t", @node ) . "\n" if $main::config->{'flags'}->{'debug'} > 1;
}
if ( $main::config->{'flags'}->{'paranoid'} ) { # we will scan all nodes just to make sure
foreach my $thisNode ( @node ) {
my $command = &main::makeCommand( $thisNode, 'virsh list' );
my $output = `$command`;
print "findDomain, $thisNode list =\n" . $output . "\n" if $main::config->{'flags'}->{'debug'} > 1;;
return $thisNode if ( $output =~ m/$domainName/ );
}
} else { # not paranoid mode, so just look through the status file
foreach my $thisNode ( @node ) {
if ( $main::statusDB->{'nodePopulation'}->{$thisNode}->{'running'}->{$domainName} ) {
return $thisNode;
}
}
}
return '';
}
# check one or more nodes and determine which domains are running on them.
# defaults to everything in the node database, but the -t can have it run on only one
# this is the function that should be run every few minutes on one of the servers
sub scan {
my @targets = @_;
if ( -f $main::config->{'last scan filename'} && ! $main::config->{'flags'}->{'force'} ) {
my $lastScan = time - ( stat( $main::config->{'last scan filename'} ) ) [9];
return "Scan was run $lastScan seconds ago\n" unless $lastScan > $main::config->{'min scan time'};
}
`touch $main::config->{'last scan filename'}`;
&main::readDB(1);
print Dumper( $main::statusDB->{'nodePopulation'} ) if $main::config->{'flags'}->{'debug'} > 2;
if ( $main::config->{'flags'}->{'target'} ) {
push @targets, $main::config->{'flags'}->{'target'};
}
@targets = keys %{$main::statusDB->{'node'}} unless @targets;
print "Scanning " . join( "\n", @targets ) . "\n" if $main::config->{'flags'}->{'debug'};
foreach my $node (@targets) {
print "Scanning $node\n" if $main::config->{'flags'}->{'verbose'};
$main::statusDB->{'nodePopulation'}->{$node}->{'running'} = &getDomainsOnNode( $node );
$main::statusDB->{'nodePopulation'}->{$node}->{'lastchecked'} = time;
print "Found " . (keys %{$main::statusDB->{'nodePopulation'}->{$node}->{'running'}}) . " domains on node $node\n" if $main::config->{'flags'}->{'verbose'};
foreach my $domain ( keys %{$main::statusDB->{'nodePopulation'}->{$node}->{'running'}} ) {
# make sure there is an entry for all of these domains
$main::statusDB->{'virt'}->{$domain} = {} unless exists( $main::statusDB->{'virt'}->{$domain} );
}
print Dumper( $main::statusDB->{'nodePopulation'}->{$node} ) if $main::config->{'flags'}->{'debug'} > 2;
}
&main::writeDB();
return "Node(s) updated\n";
}
# makes the command that will be run on a node
# Created as a sub so we can change format easily
# if node is the node we're on, we don't need to do a remote call
# if node is null, we'll assume we do the command here
# otherwise, we'll do an ssh to the node and run the command there
sub makeCommand {
my ( $node, $command ) = @_;
my $me = `hostname`;
chomp $me;
if ( ! $node || $node eq $me ) {
return $command;
} else {
return "ssh $node '$command'";
}
}
# force a node scan, even if time has not expired
# do this by setting force to 1, calling scan, then resetting
# it to old value
sub forceScan {
my $save = $main::config->{'flags'}->{'force'};
$main::config->{'flags'}->{'force'} = 0;
&main::scan();
$main::config->{'flags'}->{'force'} = $save;
}
# executes command $command, then repeatedly runs virsh list
# on $scanNode, grep'ing for $scanDomain
# $condition is 1, to wait for domain to start
# or 0 (false) to wait for it to shut down
sub executeAndWait {
my ( $command, $scanNode, $scanDomain, $condition ) = @_;
my $waitSeconds = 5; # number of seconds to wait before checking again
my $maxIterations = 60 / $waitSeconds; # maximum number of tries
print "Running [$command], then waiting $waitSeconds to check if complete\n" if $main::config->{'flags'}->{'debug'};
`$command`;
my $waitCommand = &makeCommand( $scanNode, "virsh list | grep $scanDomain" );
my $output = '';
do {
return 0 unless ( $maxIterations-- ); # we've waited too long, so probably not working
print '. ';
sleep 1;
$output = `$waitCommand`;
print "[$waitCommand] returned [$output]\n" if $main::config->{'flags'}->{'debug'} > 1;
} until ( $condition ? $output : !$output );
return 1; # made it successful
}
# find the differences between two arrays (passed by reference)
# first sorts the array, then walks through them one by one
# @$arr1 MUST be larger than @$arr2
# used by domain.pm:list to find non-running domains for output
sub diffArray {
my ( $arr1, $arr2 ) = @_;
my @result;
@$arr1 = sort @$arr1;
@$arr2 = sort @$arr2;
my $i=0;
my $j=0;
while ( $i < @$arr1 ) {
if ( $arr1->[$i] eq $arr2->[$j] ) {
$i++;
$j++;
} elsif ( $arr1->[$i] lt $arr2->[$j] ) {
push @result, $arr1->[$i];
$i++;
} else {
push @result, $arr2->[$j];
$j++;
}
}
return \@result;
}
# create a config file if one does not exist
sub makeConfig {
my ( $config, $filename ) = @_;
$config->{'script dir'} = $FindBin::RealBin;
$config->{'script name'} = $FindBin::Script;
$config->{'db dir'} = $config->{'script dir'} . '/var';
$config->{'conf dir'} = $config->{'script dir'} . '/conf';
$config->{'status db filename'} = $config->{'db dir'} . '/status.yaml';
$config->{'last scan filename'} = $config->{'script dir'} . '/var/lastscan';
$config->{'min scan time'} = 5 * 60; # five minutes
$config->{'node reserved memory'} = 8 * 1024 * 1024; # 8 gigabytes
$config->{'node reserved vcpu' } = 0; # turn off reserved vcpu
$config->{'paranoid'} = 1; # rescan all nodes on any action which will modify it
$config->{'flags'}->{'debug'} = 0;
$config->{'flags'}->{'dryrun'} = 1;
$config->{'flags'}->{'force'} = 0;
$config->{'flags'}->{'format'} = 'screen';
#$config->{'flags'}->{'help'} = 0; # used, but don't put in config file
$config->{'flags'}->{'quiet'} = 0;
$config->{'flags'}->{'target'} = '';
$config->{'flags'}->{'verbose'} = 1;
#$config->{'flags'}->{'version'} = 0; # used, but don't put in config file
my $yaml = YAML::Tiny->new( $config );
$yaml->write( $filename );
}
# read the config file and return it
sub readConfig {
my $filename = shift;
my $yaml = YAML::Tiny->new( {} );
if ( -f $filename ) {
$yaml = YAML::Tiny->read( $filename );
}
return $yaml->[0];
}
# find available resource on a node, total RAM and threads
sub resource {
my $node = shift;
die "Can not find node $node in havirt.pm:resource\n"
unless $main::statusDB->{'node'}->{$node};
my $return = {
'memory' => 0,
'cpu_count' => 0
};
foreach my $key ( keys %$return ) {
$return->{$key} = $main::statusDB->{'node'}->{$node}->{$key}
if defined $main::statusDB->{'node'}->{$node}->{$key};
} # foreach
return $return;
}
# determine resources used on a node, total RAM and VCPU
sub getAvailableResources {
my $node = shift;
&readDB();
die "Can not find node $node in havirt.pm:resource\n" unless $main::statusDB->{'node'}->{$node};
my $totalResources = &resource( $node );
print Dumper( $totalResources ) if $main::config->{'flags'}->{'debug'};
foreach my $domain ( keys %{ $main::statusDB->{'nodePopulation'}->{$node}->{'running'} } ) {
$totalResources->{'memory'} -= $main::statusDB->{'virt'}->{$domain}->{'memory'};
$totalResources->{'cpu_count'} -= $main::statusDB->{'virt'}->{$domain}->{'vcpu'};
}
return $totalResources;
}
# validate that node has enough resources for the domains which occupy the
# remainder of the stack
# returns 0 on success, or one or more error messages in a string on failure
sub validateResources {
my $node = shift;
&readDB();
my @return;
my $nodeResources = &getAvailableResources( $node );
print "In havirt.pm:validateResources, checking if enough room on $node for\n" . join( "\n", @_ ) . "\n"
if $main::config->{'flags'}->{'debug'};
print "Checking resources on $node\n" if $main::config->{'flags'}->{'verbose'};
# subtract the reserved memory from the node
$nodeResources->{'memory'} -= $main::config->{'node reserved memory'};
$nodeResources->{'cpu_count'} -= $main::config->{'node reserved vcpu'} if $main::config->{'node reserved vcpu'};
while ( my $domain = shift ) {
$nodeResources->{'memory'} -= $main::statusDB->{'virt'}->{$domain}->{'memory'};
$nodeResources->{'cpu_count'} -= $main::statusDB->{'virt'}->{$domain}->{'vcpu'};
}
print "In havirt.pm:validateResources, $node will have $nodeResources->{memory} memory and $nodeResources->{cpu_count} vcpu's after task\n"
if ( $main::config->{'flags'}->{'debug'} > 1 );
push @return, "This action would result in memory of $nodeResources->{memory}" if $nodeResources->{'memory'} <= 0;
push @return, "This action would result in virtual cpu count of $nodeResources->{cpu_count}" if $nodeResources->{'cpu_count'} <= 0 && $main::config->{'flags'}->{'node reserved vcpu'};
return @return ? join( "\n", @return ) . "\n" : 0;
}
# migrate domain from current node it is on to $target
sub migrate {
my ( $virt, $target ) = @_;
my $return;
my $node = &main::findDomain( $virt );
print Dumper( $main::statusDB->{'nodePopulation'} ) if $main::config->{'flags'}->{'debug'} > 2;
die "I can not find $virt on any node\n" unless $node;
die "Domain $virt in maintenance mode, can not migrate it\n" if $main::statusDB->{'virt'}->{$virt}->{'maintenance'};
die "Node $target in maintenance mode, can not migrate anything to it\n" if $main::statusDB->{'node'}->{$target}->{'maintenance'};
die "$virt already on $target\n" if $target eq $node;
my $command = &main::makeCommand( $node, "virsh migrate --live --persistent --verbose $virt qemu+ssh://$target/system" );
if ( $main::config->{'flags'}->{'dryrun'} ) { # they want us to actually do it
$return = $command;
} else {
$return = ( &main::executeAndWait( $command, $node, $virt, 0 ) ? 'Success' : 'Time Out waiting for shutdown');
&main::forceScan();
}
return "$return\n";
}