Subversion Repositories havirt

Rev

Rev 41 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

#!/usr/bin/env perl

# All functions related to maniplating/reporting on cluster
# part of havirt.

# Copyright 2024 Daily Data, Inc.
# 
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following 
# conditions are met:
#
#   Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
#   Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer 
#   in the documentation and/or other materials provided with the distribution.
#   Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived
#   from this software without specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
# THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# v0.0.1 20240602 RWR
# Initial setup
#
# v1.2.0 20240826 RWR
# Added some code to migrate domains if node placed in maintenance mode
# Added a lot of 'verbose' print lines, and modified for new flag structure
#
# v1.3.0 20250511 RWR
# Added balance function. If called, will attempt to balance a cluster so that the variance is lower than balance_max_variance 
# (new entry in config file). --dryrun will simply display the commands sent, and --nodryrun will execute them.



package cluster;

use warnings;
use strict;  

# define the version number
# see https://metacpan.org/pod/release/JPEACOCK/version-0.97/lib/version.pod
use version;
our $VERSION = version->declare("1.3.0");


use Data::Dumper;

use Exporter;

our @ISA = qw( Exporter );
our @EXPORT = qw( 
                  &list
                  &iscsi
                );

sub help {
   my @return;
   push @return, 'cluster status';
   push @return, "\t[--format|-f screen|tsv] - displays some stats on cluster resources used";
   push @return, 'cluster balance';
   push @return, "\tBalances resources by moving domains between nodes";
   push @return, 'cluster iscsi';
   push @return, "\tdisplays list of all iSCSI targets 'known' by system";
   push @return, 'cluster iscsi add ip-or-dns-name';
   push @return, "\tAdds iscsi target to system";
   push @return, 'cluster iscsi delete  ip-or-dns-name';
   push @return, "\tDelete iSCSI target processed by system. ip-or-dns-name MUST be exact";
   push @return, 'cluster iscsi update [node ...]';
   push @return, "\tPerforms an update to add new iSCSI targets on one or more nodes";
   push @return, "\tScans all iSCSI targets, looking for new shares on each, then performs";
   push @return, "\ta login, adding it to the node. DOES NOT delete old targets at this";
   push @return, "\ttime. If no nodes passed in, will perform function on all nodes not";
   push @return, "\tin maintenance mode";
   push @return, 'cluster balance';
   push @return, "\tAttempts to balance node memory usage by migrating domains to less used";
   push @return, "\tnodes. If a node is in maintenance mode, will attempt to move all domains";
   push @return, "\toff of it and balance them on the other nodes";

   return join( "\n", @return ) . "\n";
}

sub status {
   my $return = '';
   &main::readDB();
   my @header = ('Node','Threads','Memory','Domains','vcpu','mem_used', 'Status' );
   my @data;
   my $usedmem = 0;
   my $usedcpu = 0;
   my $availmem = 0;
   my $availcpu = 0;
   my $totalDomains = 0;
   my $maintenance = 0;
   foreach my $node (sort keys %{ $main::statusDB->{'node'} } ) {
      my $memory = 0;
      my $vcpus = 0;
      my $count = 0;
      foreach my $domain ( keys %{ $main::statusDB->{'nodePopulation'}->{$node}->{'running'} } ) {
         $memory += $main::statusDB->{'virt'}->{$domain}->{'memory'};
         $vcpus += $main::statusDB->{'virt'}->{$domain}->{'vcpu'};
         $count++;
      }
      push @data, [ $node,$main::statusDB->{'node'}->{$node}->{cpu_count},$main::statusDB->{'node'}->{$node}->{memory},$count,$vcpus,$memory, $main::statusDB->{'node'}->{$node}->{maintenance} ? 'Maintenance' : 'Online' ];
      $usedmem += $memory;
      $usedcpu += $vcpus;
      $totalDomains += $count;
      $availmem += $main::statusDB->{'node'}->{$node}->{memory};
      $availcpu += $main::statusDB->{'node'}->{$node}->{cpu_count};
      $maintenance += $main::statusDB->{'node'}->{$node}->{maintenance} ? 0 : 1;
   } # outer for
   push @data, [ 'Total',$availcpu,$availmem,$totalDomains,$usedcpu,$usedmem, $maintenance ];
   return &main::report( \@header, \@data );
}

# perform various functions on iSCSI target definitions
# on all nodes


sub iscsi {
   my $action = shift;
   my @return;
   if ( $action && $action eq 'add' ) {
      &main::readDB(1);
      while ( my $target = shift ) {
         $main::statusDB->{'cluster'}->{'iscsi'}->{$target} = '';
      }
      &main::writeDB();
   } elsif ( $action && $action eq 'delete' ) {
      my $target = shift;
      &main::readDB(1);
      delete $main::statusDB->{'cluster'}->{'iscsi'}->{$target} if exists $main::statusDB->{'cluster'}->{'iscsi'}->{$target};
      &main::writeDB();
   } elsif ( $action && $action eq 'update' ) {
      &main::readDB();
      # if they did not give us a node, do all of them
      @_ = keys %{ $main::statusDB->{'node'} } unless @_;
      while ( my $node = shift ) { # process each node on stack
         if ( $main::statusDB->{'node'}->{$node}->{'maintenance'} ) {
            print "Not processing node $node since it is in maintenance mode\n" if $main::config->{'flags'}->{'verbose'};
         } else { # actually do the work
            push @return, &updateISCITargets( $node );
         }
      } # while
   }
   &main::readDB();
   push @return, "iSCSI targets are";
   if ( $main::statusDB->{'cluster'}->{'iscsi'} ) {
      push @return, join( "\n",  keys %{ $main::statusDB->{'cluster'}->{'iscsi'} } );
   } else {
      push @return, "None Defined";
   }
   return join( "\n", @return ) . "\n";
}

# updates iSCSI targets on $node
# scans each target defined and compares it to the current session
# adding new targets if they exist
# NOTE: does not delete targets which no longer exist on server
sub updateISCITargets {
   my $node = shift;
   my $command;
   my %targets;
   my @return;
   push @return, "Processing iSCSI targets on $node";
   print Dumper( keys %{ $main::statusDB->{'cluster'}->{'iscsi'} } ) if $main::config->{'flags'}->{'debug'};
   foreach my $server (keys %{ $main::statusDB->{'cluster'}->{'iscsi'} } ) {
      print "\n" . '-'x40 . "\nGetting targets on server $server\n" . '-'x40 . "\n" if $main::config->{'flags'}->{'verbose'};
      $command = &main::makeCommand( $node, "iscsiadm -m discovery -t st -p $server" );
      my @list = `$command`;
      chomp @list;
      # @list contains lines of type
      # 10.19.209.2:3260,1 iqn.2014-11.net.dailydata.castor:simon0
      # split them apart and add them to the hash
      foreach my $entry ( @list ) {
         my ( $portal, $targetName ) = split( ' ', $entry );
         # $portal has some extra info after a comma, so clean it up
         $portal =~ m/^([0-9:.]+)/;
         $portal = $1;
         # some targets return multiple IP's for a given name, so 
         # only add them if they are in this IP
         $targets{ $targetName } = $portal if $portal =~ m/^$server/;
         print "$targetName\t$targets{ $targetName }\n" if $main::config->{'flags'}->{'verbose'};
      } # foreach
   } # while
   print "\n" . '-'x40 . "\nGetting active sessions\n". '-'x40 . "\n" if $main::config->{'flags'}->{'verbose'};
   # now, get active sessions so we can filter them
   $command = &main::makeCommand( $node, "iscsiadm -m session" );
   my @activeSessions = `$command`;;
   chomp @activeSessions;
   foreach my $session ( @activeSessions ) {
      $session =~ m/^.*[^0-9:.]([0-9,:.]+).*(iqn\S*)/;
      my ( $portal,$targetName ) = ( $1,$2 );
      print "$portal\t$targetName" if $main::config->{'flags'}->{'verbose'};
      if ( exists( $targets{$targetName} ) ) {
         print "\tNOT updating\n" if $main::config->{'flags'}->{'verbose'};
         delete $targets{ $targetName };
      } else {
         print "Needs to be added\n" if $main::config->{'flags'}->{'verbose'};
      }
   }

   # check if we have any new entries and bail if not
   if ( scalar keys %targets ) {
      # We have new entries, so run them;
      foreach my $targetName ( sort keys %targets ) {
         my $portal = $targets{$targetName};
         push @return, "Adding $targetName";
         $command = &main::makeCommand( $node, "iscsiadm -m node --targetname '$targetName' --portal '$portal' --login" );
         if ( $main::config->{'flags'}->{'dryrun'} ) {
            push @return, $command;
         } else {
          `$command`;
         }
      }
   } else {
      push @return, "No new entries";
   }
   return join( "\n", @return ) . "\n";
} # updateISCITargets

# calculate stats about the cluster, including the amount of memory/cpu used, the standard deviation
# and variance. Used mainly to balance cluster
sub getClusterStats {
   my $return = {};
   $return->{'cluster'}->{'memory'} = 0;
   $return->{'cluster'}->{'used_memory'} = 0;
   $return->{'cluster'}->{'count'} = 0;
   $return->{'cluster'}->{'used_vcpu'} = 0;
   $return->{'cluster'}->{'domain_count'} = 0;
   foreach my $node (sort keys %{ $main::statusDB->{'node'} } ) {
      # only count nodes which are not in maintenance as part of the cluster towards total memory available
      if ( ! $main::statusDB->{'node'}->{$node}->{'maintenance'} ) {
         $return->{'cluster'}->{'memory'} += $main::statusDB->{'node'}->{$node}->{'memory'};
         $return->{'cluster'}->{'vcpu'} += $main::statusDB->{'node'}->{$node}->{'cpu_count'};
         $return->{'cluster'}->{'count'}++;
      } else {
         $return->{'node'}->{$node}->{'maintenance'} = 1;
      }
      $return->{'node'}->{$node}->{'memory'} = $main::statusDB->{'node'}->{$node}->{'memory'};
      $return->{'node'}->{$node}->{'vcpu'} = $main::statusDB->{'node'}->{$node}->{'cpu_count'};
      $return->{'node'}->{$node}->{'used_memory'} = 0;
      $return->{'node'}->{$node}->{'count'} = 0;
      $return->{'node'}->{$node}->{'used_vcpu'} = 0;
      # get individual stats for every domain on the node
      foreach my $domain ( keys %{ $main::statusDB->{'nodePopulation'}->{$node}->{'running'} } ) {
         # track used memory, and count
         $return->{'node'}->{$node}->{'used_memory'} += $main::statusDB->{'virt'}->{$domain}->{'memory'};
         $return->{'node'}->{$node}->{'used_vcpu'} += $main::statusDB->{'virt'}->{$domain}->{'vcpu'};
         $return->{'node'}->{$node}->{'count'}++;
      }
      # calculate the average memory used in the node
      $return->{'node'}->{$node}->{'average_memory'} = $return->{'node'}->{$node}->{'used_memory'} / 
         (
            $main::statusDB->{'node'}->{$node}->{'maintenance'} ? 0.0001 : $main::statusDB->{'node'}->{$node}->{'memory'}
         );
      # add the used memory to the cluster
      $return->{'cluster'}->{'used_memory'} += $return->{'node'}->{$node}->{'used_memory'};
      $return->{'cluster'}->{'used_vcpu'} += $return->{'node'}->{$node}->{'used_vcpu'};
      $return->{'cluster'}->{'domain_count'} += $return->{'node'}->{$node}->{'count'};
   }
   # calculate the deviation for each active node in the cluster
   $return->{'cluster'}->{'average_memory'} = $return->{'cluster'}->{'used_memory'} / $return->{'cluster'}->{'memory'};
   
   # get the deviation for each node
   # variance in the cluster is simply the average of all deviations
   $return->{'cluster'}->{'variance'} = 0;
   foreach my $node (sort keys %{ $main::statusDB->{'node'} } ) {
      # deviation is the square of the difference between this node and the cluster overall
      $return->{'node'}->{$node}->{'deviation'} = (
         $return->{'node'}->{$node}->{'average_memory'} / $return->{'cluster'}->{'average_memory'} 
         ) ** 2;
      # we'll divide by number of active nodes after the loop
      $return->{'cluster'}->{'variance'} += $return->{'node'}->{$node}->{'deviation'};
   }
   $return->{'cluster'}->{'variance'} /= $return->{'cluster'}->{'count'};
   # now, determine how much memory needs to be added (plus) or removed (minus) for each node
   # memory_needed is calculated by taking the total amount of memory and multiplying it by the cluster average memory
   # then subtracting whatever is already used
   foreach my $node (sort keys %{ $main::statusDB->{'node'} } ) {
      if ( $main::statusDB->{'node'}->{$node}->{'maintenance'} ) {
         $return->{'node'}->{$node}->{'memory_needed'} = -1 * $return->{'node'}->{$node}->{'used_memory'};
      } else {
         $return->{'node'}->{$node}->{'memory_needed'} = int (
            ( $return->{'node'}->{$node}->{'memory'} * $return->{'cluster'}->{'average_memory'} ) -
            $return->{'node'}->{$node}->{'used_memory'} 
            );
      }
   }
   return $return;
}

sub humanReadable {
   my ( $value, $preferredUnits ) = @_;
   $value *= 1024;
   my @units =  ( '', 'k', 'M', 'G', 'T' );
   $preferredUnits = $units[-1] unless $preferredUnits;
   my $unit = 0;
   while ( $unit < @units && abs($value) > 1023 && lc $units[$unit] ne lc $preferredUnits ) {
      $unit++;
      $value /= 1024;
   }
   return sprintf( '%d%s', $value+0.5, $units[$unit] );
}


sub percent {
   my ($value, $accuracy) = @_;
   $accuracy = 0 unless $accuracy;
   return sprintf( '%2.' . $accuracy . 'f%%', $value * 100)
}

# Creates a balance report to show the user what went on
# $cluster is a hash created by sub getClusterStats, and possibly modified by
# the calling process
sub showBalanceReport {
   my $stats = shift;
   #die Dumper( $stats ) . "\n";
   my @header = ('Node','Threads','Memory','Domains','vcpu_alloc','mem_alloc', 'mem_needed', 'vcpu%', 'mem%', 'Status', 'StdDev' );
   my @data;
   foreach my $node ( sort keys %{ $stats->{'node'} } ) {
      push @data, [
         $node, 
         $stats->{'node'}->{$node}->{'vcpu'},
         &humanReadable( $stats->{'node'}->{$node}->{'memory'} ),
         $stats->{'node'}->{$node}->{'count'},
         $stats->{'node'}->{$node}->{'used_vcpu'},
         &humanReadable( $stats->{'node'}->{$node}->{'used_memory'} ),
         &humanReadable( $stats->{'node'}->{$node}->{'memory_needed'} ),
         &percent( $stats->{'node'}->{$node}->{'used_vcpu'} / $stats->{'node'}->{$node}->{'vcpu'} ),
         &percent( $stats->{'node'}->{$node}->{'used_memory'} / $stats->{'node'}->{$node}->{'memory'} ),
         $stats->{'node'}->{$node}->{'maintenance'} ? 'Maintenance' : '',
         $stats->{'node'}->{$node}->{'deviation'} < 1000 ? sprintf( "%2.2f", $stats->{'node'}->{$node}->{'deviation'} ) : 'undef'
      ];
   }
   push @data, [
         'All', 
         $stats->{'cluster'}->{'vcpu'},
         &humanReadable( $stats->{'cluster'}->{'memory'} ),
         $stats->{'cluster'}->{'domain_count'},
         $stats->{'cluster'}->{'used_vcpu'},
         &humanReadable( $stats->{'cluster'}->{'used_memory'} ),
         '',
         &percent( $stats->{'cluster'}->{'used_vcpu'} / $stats->{'cluster'}->{'vcpu'} ),
         &percent( $stats->{'cluster'}->{'used_memory'} / $stats->{'cluster'}->{'memory'} ),
         '',
         ''
      ];
   return &main::report( \@header, \@data ) . "Variance " . 
      ( $stats->{'cluster'}->{'variance'} < 100 ? sprintf( "%2.2f", $stats->{'cluster'}->{'variance'} + .005 ) : "undef" ) . "\n\n";
}

# simulates performing migrations. Simply moves entries from $from to $to in $main::statusDB->{'nodePopulation'}
sub doActions {
   my $actions = shift;
   my $return;
   for ( my $i = 0; $i < @$actions; $i++ ) {
      my ($domain, $source, $target, $size ) = split( "\t", $actions->[$i] );
      $return .= &main::migrate( $domain, $target, $source );
      delete $main::statusDB->{'nodePopulation'}->{$source}->{'running'}->{$domain};
      $main::statusDB->{'nodePopulation'}->{$target}->{'running'}->{$domain} = time;
   }
   &main::forceScan() unless $main::config->{'flags'}->{'dryrun'} || $main::config->{'flags'}->{'testing'};
   return $return;
}

# attempt to balance the domains on the active (maintenance = false) nodes
# basically, we take what is currently working, and calculate the variance
# of it (see https://en.wikipedia.org/wiki/Standard_deviation). If that is
# over about a 10, we move things around, if possible, then check our variance
# again.
sub balance {
   &main::readDB();
   my $return;
   # get the current cluster status
   my $cluster = &getClusterStats();
   #die Dumper( $cluster ) . "\n";
   # show user what it looks like at first
   print "=== Starting Status ===\n\n" . &showBalanceReport( $cluster) unless $main::config->{'flags'}->{'quiet'};
   # we will do a loop to get the variance within our preferred range ($main::config->{ 'balance variance'})
   # however, we will only do a maximum number of iterations ($main::config->{ 'balance maxiterations'})
   my $iterations = defined $main::config->{ 'balance_max_iterations'} && $main::config->{ 'balance_max_iterations'} ? $main::config->{ 'balance_max_iterations'} : 10;
   $main::config->{ 'balance_max_variance'} = 1.1 unless defined $main::config->{ 'balance_max_variance'};
   # continue until our variance is where we want it, or we have tried too many times.
   while ( $iterations-- && $cluster->{'cluster'}->{'variance'} > $main::config->{ 'balance_max_variance'} ) {
      my $actions = &moveThings( $cluster );
      $return .= &doActions( $actions );
      
      #print Dumper( $actions ) . "\n"; die;
      # rerun stats
      $cluster = &getClusterStats();
      print &showBalanceReport( $cluster) if $main::config->{'flags'}->{'verbose'} > 1;
   }
   print "=== Ending Status ===\n\n" . &showBalanceReport( $cluster) unless $main::config->{'flags'}->{'quiet'};
   return $return;
} # balance

# finds node which needs to lose ($from) and gain ($to) the most. Then, goes through $from and finds the largest
# domain which will fit on $to until exhausted.
# as each domain is found, appends to $actions (array pointer). The format of each entry is a tab separated
# list of domain name, node from, node to, domain size
# returns the modified $actions
sub moveThings {
   my $stats = shift;
   
   my $actions = [];
   # find largest and smallest node differences
   my $transfer;
   my $from = '';
   my $to = '';
   # find smallest and largest "memory needed" in group. Note that if a node has too much, the number is negative and
   # for too little (ie, needs additional), the number is positive
   foreach my $node (keys %{$stats->{'node'} } ) {
      #print "Checking $node\n";
      if ( $from ) {
         $from = $node if $stats->{'node'}->{$from}->{'memory_needed'} > $stats->{'node'}->{$node}->{'memory_needed'};
         $to = $node if $stats->{'node'}->{$to}->{'memory_needed'} < $stats->{'node'}->{$node}->{'memory_needed'};
      } else { # just initialize everything to this node
         $from = $to = $node;
      } #if .. else
   } # foreach
   # this is a poor mans min. we want to transfer the least number of bytes, ie what $from can spare, or what $to can accept
   # we need the smallest of what $from can give and $to can accept
   $transfer = abs( abs( $stats->{'node'}->{$from}->{'memory_needed'} ) > abs( $stats->{'node'}->{$to}->{'memory_needed'} ) ?
               $stats->{'node'}->{$to}->{'memory_needed'} : $stats->{'node'}->{$from}->{'memory_needed'} );
   # die "Transfer " . &humanReadable($transfer) ." bytes from $from to $to\n";
   
   # get array of domains running on $from, sorted by the size of the domain (descending, ie largest on top )
   # basically, get all keys from $main::statusDB->{'nodePopulation'}->{$from}->{'running'}, then sort them by looking them
   # up in $main::statusDB->{'virt'} and retrieving the amount of RAM
   my @sortedDomains = sort
      {
         $main::statusDB->{'virt'}->{$b}->{'memory'} <=> $main::statusDB->{'virt'}->{$a}->{'memory'}
      } keys %{ $main::statusDB->{'nodePopulation'}->{$from}->{'running'} };
   # now, "move" (fake move) largest domain that will fit into $to, and repeat until we can not do it anymore
   while ( $transfer ) {
      my $thisDomain = shift @sortedDomains;
      last unless $thisDomain; # we ran out of domains
      next unless $main::statusDB->{'virt'}->{$thisDomain}->{'memory'} <= $transfer;
      push @$actions, join( "\t", ( $thisDomain, $from, $to, $main::statusDB->{'virt'}->{$thisDomain}->{'memory'} ) );
      $transfer -= $main::statusDB->{'virt'}->{$thisDomain}->{'memory'};
   }
      
   return $actions;
}