Subversion Repositories havirt

Rev

Rev 46 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
8 rodolico 1
#!/usr/bin/env perl
2
 
3
# All functions related to maniplating/reporting on cluster
4
# part of havirt.
5
 
6
# Copyright 2024 Daily Data, Inc.
7
# 
8
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following 
9
# conditions are met:
10
#
11
#   Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
12
#   Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer 
13
#   in the documentation and/or other materials provided with the distribution.
14
#   Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived
15
#   from this software without specific prior written permission.
16
# 
17
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
18
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
19
# THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
22
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23
 
24
# v0.0.1 20240602 RWR
25
# Initial setup
26 rodolico 26
#
27
# v1.2.0 20240826 RWR
28
# Added some code to migrate domains if node placed in maintenance mode
29
# Added a lot of 'verbose' print lines, and modified for new flag structure
30
#
42 rodolico 31
# v1.3.0 20250511 RWR
32
# Added balance function. If called, will attempt to balance a cluster so that the variance is lower than balance_max_variance 
33
# (new entry in config file). --dryrun will simply display the commands sent, and --nodryrun will execute them.
44 rodolico 34
#
35
# v1.3.0 20250514 RWR
36
# Modified so it will not issue error codes if we have done iterations but not perfectly in balance. Also, added message
37
# 'already in balance' if it is as good as we can get it.
47 rodolico 38
#
39
# v1.3.2 20260102 RWR
40
# Refactored all command executions to use centralized execute() function
41
# Updated updateISCITargets function to use execute() for all iscsiadm commands
8 rodolico 42
 
26 rodolico 43
 
42 rodolico 44
 
8 rodolico 45
package cluster;
46
 
47
use warnings;
48
use strict;  
49
 
50
# define the version number
51
# see https://metacpan.org/pod/release/JPEACOCK/version-0.97/lib/version.pod
52
use version;
47 rodolico 53
our $VERSION = version->declare("1.3.2");
8 rodolico 54
 
55
 
56
use Data::Dumper;
57
 
58
use Exporter;
59
 
60
our @ISA = qw( Exporter );
61
our @EXPORT = qw( 
62
                  &list
26 rodolico 63
                  &iscsi
8 rodolico 64
                );
65
 
11 rodolico 66
sub help {
67
   my @return;
68
   push @return, 'cluster status';
69
   push @return, "\t[--format|-f screen|tsv] - displays some stats on cluster resources used";
39 rodolico 70
   push @return, 'cluster balance';
71
   push @return, "\tBalances resources by moving domains between nodes";
26 rodolico 72
   push @return, 'cluster iscsi';
73
   push @return, "\tdisplays list of all iSCSI targets 'known' by system";
74
   push @return, 'cluster iscsi add ip-or-dns-name';
75
   push @return, "\tAdds iscsi target to system";
76
   push @return, 'cluster iscsi delete  ip-or-dns-name';
77
   push @return, "\tDelete iSCSI target processed by system. ip-or-dns-name MUST be exact";
78
   push @return, 'cluster iscsi update [node ...]';
79
   push @return, "\tPerforms an update to add new iSCSI targets on one or more nodes";
80
   push @return, "\tScans all iSCSI targets, looking for new shares on each, then performs";
81
   push @return, "\ta login, adding it to the node. DOES NOT delete old targets at this";
82
   push @return, "\ttime. If no nodes passed in, will perform function on all nodes not";
83
   push @return, "\tin maintenance mode";
42 rodolico 84
   push @return, 'cluster balance';
85
   push @return, "\tAttempts to balance node memory usage by migrating domains to less used";
86
   push @return, "\tnodes. If a node is in maintenance mode, will attempt to move all domains";
87
   push @return, "\toff of it and balance them on the other nodes";
26 rodolico 88
 
11 rodolico 89
   return join( "\n", @return ) . "\n";
90
}
91
 
46 rodolico 92
# status
93
# Reports the current status of all nodes in the cluster.
94
# Gathers information about each node, including:
95
#   - Number of threads (vCPUs)
96
#   - Total memory
97
#   - Number of running domains
98
#   - Total vCPUs and memory used by running domains
99
#   - Node status (Maintenance or Online)
100
# Aggregates totals for all nodes and returns a formatted report.
10 rodolico 101
sub status {
102
   my $return = '';
13 rodolico 103
   &main::readDB();
25 rodolico 104
   my @header = ('Node','Threads','Memory','Domains','vcpu','mem_used', 'Status' );
10 rodolico 105
   my @data;
106
   my $usedmem = 0;
107
   my $usedcpu = 0;
108
   my $availmem = 0;
109
   my $availcpu = 0;
110
   my $totalDomains = 0;
25 rodolico 111
   my $maintenance = 0;
13 rodolico 112
   foreach my $node (sort keys %{ $main::statusDB->{'node'} } ) {
10 rodolico 113
      my $memory = 0;
114
      my $vcpus = 0;
115
      my $count = 0;
13 rodolico 116
      foreach my $domain ( keys %{ $main::statusDB->{'nodePopulation'}->{$node}->{'running'} } ) {
117
         $memory += $main::statusDB->{'virt'}->{$domain}->{'memory'};
118
         $vcpus += $main::statusDB->{'virt'}->{$domain}->{'vcpu'};
10 rodolico 119
         $count++;
120
      }
25 rodolico 121
      push @data, [ $node,$main::statusDB->{'node'}->{$node}->{cpu_count},$main::statusDB->{'node'}->{$node}->{memory},$count,$vcpus,$memory, $main::statusDB->{'node'}->{$node}->{maintenance} ? 'Maintenance' : 'Online' ];
10 rodolico 122
      $usedmem += $memory;
123
      $usedcpu += $vcpus;
124
      $totalDomains += $count;
13 rodolico 125
      $availmem += $main::statusDB->{'node'}->{$node}->{memory};
126
      $availcpu += $main::statusDB->{'node'}->{$node}->{cpu_count};
46 rodolico 127
      $maintenance += $main::statusDB->{'node'}->{$node}->{'maintenance'} ? 0 : 1;
10 rodolico 128
   } # outer for
25 rodolico 129
   push @data, [ 'Total',$availcpu,$availmem,$totalDomains,$usedcpu,$usedmem, $maintenance ];
10 rodolico 130
   return &main::report( \@header, \@data );
131
}
26 rodolico 132
 
133
# perform various functions on iSCSI target definitions
134
# on all nodes
135
 
136
 
137
sub iscsi {
138
   my $action = shift;
139
   my @return;
140
   if ( $action && $action eq 'add' ) {
141
      &main::readDB(1);
142
      while ( my $target = shift ) {
143
         $main::statusDB->{'cluster'}->{'iscsi'}->{$target} = '';
144
      }
145
      &main::writeDB();
146
   } elsif ( $action && $action eq 'delete' ) {
147
      my $target = shift;
148
      &main::readDB(1);
149
      delete $main::statusDB->{'cluster'}->{'iscsi'}->{$target} if exists $main::statusDB->{'cluster'}->{'iscsi'}->{$target};
150
      &main::writeDB();
151
   } elsif ( $action && $action eq 'update' ) {
152
      &main::readDB();
153
      # if they did not give us a node, do all of them
154
      @_ = keys %{ $main::statusDB->{'node'} } unless @_;
155
      while ( my $node = shift ) { # process each node on stack
156
         if ( $main::statusDB->{'node'}->{$node}->{'maintenance'} ) {
157
            print "Not processing node $node since it is in maintenance mode\n" if $main::config->{'flags'}->{'verbose'};
158
         } else { # actually do the work
159
            push @return, &updateISCITargets( $node );
160
         }
161
      } # while
162
   }
163
   &main::readDB();
164
   push @return, "iSCSI targets are";
165
   if ( $main::statusDB->{'cluster'}->{'iscsi'} ) {
166
      push @return, join( "\n",  keys %{ $main::statusDB->{'cluster'}->{'iscsi'} } );
167
   } else {
168
      push @return, "None Defined";
169
   }
170
   return join( "\n", @return ) . "\n";
171
}
172
 
173
# updates iSCSI targets on $node
174
# scans each target defined and compares it to the current session
175
# adding new targets if they exist
176
# NOTE: does not delete targets which no longer exist on server
177
sub updateISCITargets {
178
   my $node = shift;
179
   my $command;
180
   my %targets;
181
   my @return;
182
   push @return, "Processing iSCSI targets on $node";
183
   print Dumper( keys %{ $main::statusDB->{'cluster'}->{'iscsi'} } ) if $main::config->{'flags'}->{'debug'};
184
   foreach my $server (keys %{ $main::statusDB->{'cluster'}->{'iscsi'} } ) {
185
      print "\n" . '-'x40 . "\nGetting targets on server $server\n" . '-'x40 . "\n" if $main::config->{'flags'}->{'verbose'};
186
      $command = &main::makeCommand( $node, "iscsiadm -m discovery -t st -p $server" );
47 rodolico 187
      my @list = &main::execute($command);
26 rodolico 188
      chomp @list;
189
      # @list contains lines of type
190
      # 10.19.209.2:3260,1 iqn.2014-11.net.dailydata.castor:simon0
191
      # split them apart and add them to the hash
192
      foreach my $entry ( @list ) {
193
         my ( $portal, $targetName ) = split( ' ', $entry );
194
         # $portal has some extra info after a comma, so clean it up
195
         $portal =~ m/^([0-9:.]+)/;
196
         $portal = $1;
197
         # some targets return multiple IP's for a given name, so 
198
         # only add them if they are in this IP
199
         $targets{ $targetName } = $portal if $portal =~ m/^$server/;
200
         print "$targetName\t$targets{ $targetName }\n" if $main::config->{'flags'}->{'verbose'};
201
      } # foreach
202
   } # while
203
   print "\n" . '-'x40 . "\nGetting active sessions\n". '-'x40 . "\n" if $main::config->{'flags'}->{'verbose'};
204
   # now, get active sessions so we can filter them
205
   $command = &main::makeCommand( $node, "iscsiadm -m session" );
47 rodolico 206
   my @activeSessions = &main::execute($command);
26 rodolico 207
   chomp @activeSessions;
208
   foreach my $session ( @activeSessions ) {
209
      $session =~ m/^.*[^0-9:.]([0-9,:.]+).*(iqn\S*)/;
210
      my ( $portal,$targetName ) = ( $1,$2 );
211
      print "$portal\t$targetName" if $main::config->{'flags'}->{'verbose'};
212
      if ( exists( $targets{$targetName} ) ) {
213
         print "\tNOT updating\n" if $main::config->{'flags'}->{'verbose'};
214
         delete $targets{ $targetName };
215
      } else {
216
         print "Needs to be added\n" if $main::config->{'flags'}->{'verbose'};
217
      }
218
   }
219
 
220
   # check if we have any new entries and bail if not
221
   if ( scalar keys %targets ) {
222
      # We have new entries, so run them;
223
      foreach my $targetName ( sort keys %targets ) {
224
         my $portal = $targets{$targetName};
225
         push @return, "Adding $targetName";
226
         $command = &main::makeCommand( $node, "iscsiadm -m node --targetname '$targetName' --portal '$portal' --login" );
227
         if ( $main::config->{'flags'}->{'dryrun'} ) {
228
            push @return, $command;
229
         } else {
47 rodolico 230
          &main::execute($command);
26 rodolico 231
         }
232
      }
233
   } else {
234
      push @return, "No new entries";
235
   }
236
   return join( "\n", @return ) . "\n";
39 rodolico 237
} # updateISCITargets
238
 
42 rodolico 239
# calculate stats about the cluster, including the amount of memory/cpu used, the standard deviation
240
# and variance. Used mainly to balance cluster
241
sub getClusterStats {
242
   my $return = {};
243
   $return->{'cluster'}->{'memory'} = 0;
244
   $return->{'cluster'}->{'used_memory'} = 0;
245
   $return->{'cluster'}->{'count'} = 0;
246
   $return->{'cluster'}->{'used_vcpu'} = 0;
247
   $return->{'cluster'}->{'domain_count'} = 0;
248
   foreach my $node (sort keys %{ $main::statusDB->{'node'} } ) {
249
      # only count nodes which are not in maintenance as part of the cluster towards total memory available
250
      if ( ! $main::statusDB->{'node'}->{$node}->{'maintenance'} ) {
251
         $return->{'cluster'}->{'memory'} += $main::statusDB->{'node'}->{$node}->{'memory'};
252
         $return->{'cluster'}->{'vcpu'} += $main::statusDB->{'node'}->{$node}->{'cpu_count'};
253
         $return->{'cluster'}->{'count'}++;
254
      } else {
255
         $return->{'node'}->{$node}->{'maintenance'} = 1;
256
      }
257
      $return->{'node'}->{$node}->{'memory'} = $main::statusDB->{'node'}->{$node}->{'memory'};
258
      $return->{'node'}->{$node}->{'vcpu'} = $main::statusDB->{'node'}->{$node}->{'cpu_count'};
259
      $return->{'node'}->{$node}->{'used_memory'} = 0;
260
      $return->{'node'}->{$node}->{'count'} = 0;
261
      $return->{'node'}->{$node}->{'used_vcpu'} = 0;
262
      # get individual stats for every domain on the node
263
      foreach my $domain ( keys %{ $main::statusDB->{'nodePopulation'}->{$node}->{'running'} } ) {
264
         # track used memory, and count
265
         $return->{'node'}->{$node}->{'used_memory'} += $main::statusDB->{'virt'}->{$domain}->{'memory'};
266
         $return->{'node'}->{$node}->{'used_vcpu'} += $main::statusDB->{'virt'}->{$domain}->{'vcpu'};
267
         $return->{'node'}->{$node}->{'count'}++;
268
      }
269
      # calculate the average memory used in the node
270
      $return->{'node'}->{$node}->{'average_memory'} = $return->{'node'}->{$node}->{'used_memory'} / 
271
         (
272
            $main::statusDB->{'node'}->{$node}->{'maintenance'} ? 0.0001 : $main::statusDB->{'node'}->{$node}->{'memory'}
273
         );
274
      # add the used memory to the cluster
275
      $return->{'cluster'}->{'used_memory'} += $return->{'node'}->{$node}->{'used_memory'};
276
      $return->{'cluster'}->{'used_vcpu'} += $return->{'node'}->{$node}->{'used_vcpu'};
277
      $return->{'cluster'}->{'domain_count'} += $return->{'node'}->{$node}->{'count'};
278
   }
279
   # calculate the deviation for each active node in the cluster
47 rodolico 280
   $return->{'cluster'}->{'average_memory'} = $return->{'cluster'}->{'used_memory'} / $return->{'cluster'}->{'memory'} if $return->{'cluster'}->{'memory'};
42 rodolico 281
 
282
   # get the deviation for each node
283
   # variance in the cluster is simply the average of all deviations
284
   $return->{'cluster'}->{'variance'} = 0;
47 rodolico 285
   $return->{'cluster'}->{'activeNodes'} = 0;
42 rodolico 286
   foreach my $node (sort keys %{ $main::statusDB->{'node'} } ) {
287
      # deviation is the square of the difference between this node and the cluster overall
288
      $return->{'node'}->{$node}->{'deviation'} = (
289
         $return->{'node'}->{$node}->{'average_memory'} / $return->{'cluster'}->{'average_memory'} 
290
         ) ** 2;
291
      # we'll divide by number of active nodes after the loop
292
      $return->{'cluster'}->{'variance'} += $return->{'node'}->{$node}->{'deviation'};
47 rodolico 293
      # do not count nodes in maintenance mode as active
294
      $return->{'cluster'}->{'activeNodes'}++ unless $main::statusDB->{'node'}->{$node}->{'maintenance'};
42 rodolico 295
   }
47 rodolico 296
   die "No active nodes in cluster\n" unless $return->{'cluster'}->{'activeNodes'};
297
   # die "$return->{'cluster'}->{'variance'}\t$return->{'cluster'}->{'activeNodes'}\n";
298
   $return->{'cluster'}->{'variance'} /= $return->{'cluster'}->{'activeNodes'} if $return->{'cluster'}->{'activeNodes'};
42 rodolico 299
   # now, determine how much memory needs to be added (plus) or removed (minus) for each node
300
   # memory_needed is calculated by taking the total amount of memory and multiplying it by the cluster average memory
301
   # then subtracting whatever is already used
302
   foreach my $node (sort keys %{ $main::statusDB->{'node'} } ) {
303
      if ( $main::statusDB->{'node'}->{$node}->{'maintenance'} ) {
304
         $return->{'node'}->{$node}->{'memory_needed'} = -1 * $return->{'node'}->{$node}->{'used_memory'};
305
      } else {
306
         $return->{'node'}->{$node}->{'memory_needed'} = int (
307
            ( $return->{'node'}->{$node}->{'memory'} * $return->{'cluster'}->{'average_memory'} ) -
308
            $return->{'node'}->{$node}->{'used_memory'} 
309
            );
310
      }
311
   }
312
   return $return;
313
}
314
 
315
sub humanReadable {
316
   my ( $value, $preferredUnits ) = @_;
317
   $value *= 1024;
318
   my @units =  ( '', 'k', 'M', 'G', 'T' );
319
   $preferredUnits = $units[-1] unless $preferredUnits;
320
   my $unit = 0;
321
   while ( $unit < @units && abs($value) > 1023 && lc $units[$unit] ne lc $preferredUnits ) {
322
      $unit++;
323
      $value /= 1024;
324
   }
325
   return sprintf( '%d%s', $value+0.5, $units[$unit] );
326
}
327
 
328
 
329
sub percent {
330
   my ($value, $accuracy) = @_;
331
   $accuracy = 0 unless $accuracy;
332
   return sprintf( '%2.' . $accuracy . 'f%%', $value * 100)
333
}
334
 
40 rodolico 335
# Creates a balance report to show the user what went on
336
# $cluster is a hash created by sub getClusterStats, and possibly modified by
337
# the calling process
338
sub showBalanceReport {
42 rodolico 339
   my $stats = shift;
340
   #die Dumper( $stats ) . "\n";
341
   my @header = ('Node','Threads','Memory','Domains','vcpu_alloc','mem_alloc', 'mem_needed', 'vcpu%', 'mem%', 'Status', 'StdDev' );
40 rodolico 342
   my @data;
42 rodolico 343
   foreach my $node ( sort keys %{ $stats->{'node'} } ) {
40 rodolico 344
      push @data, [
345
         $node, 
42 rodolico 346
         $stats->{'node'}->{$node}->{'vcpu'},
347
         &humanReadable( $stats->{'node'}->{$node}->{'memory'} ),
348
         $stats->{'node'}->{$node}->{'count'},
349
         $stats->{'node'}->{$node}->{'used_vcpu'},
350
         &humanReadable( $stats->{'node'}->{$node}->{'used_memory'} ),
351
         &humanReadable( $stats->{'node'}->{$node}->{'memory_needed'} ),
352
         &percent( $stats->{'node'}->{$node}->{'used_vcpu'} / $stats->{'node'}->{$node}->{'vcpu'} ),
353
         &percent( $stats->{'node'}->{$node}->{'used_memory'} / $stats->{'node'}->{$node}->{'memory'} ),
354
         $stats->{'node'}->{$node}->{'maintenance'} ? 'Maintenance' : '',
355
         $stats->{'node'}->{$node}->{'deviation'} < 1000 ? sprintf( "%2.2f", $stats->{'node'}->{$node}->{'deviation'} ) : 'undef'
40 rodolico 356
      ];
357
   }
358
   push @data, [
359
         'All', 
42 rodolico 360
         $stats->{'cluster'}->{'vcpu'},
361
         &humanReadable( $stats->{'cluster'}->{'memory'} ),
362
         $stats->{'cluster'}->{'domain_count'},
363
         $stats->{'cluster'}->{'used_vcpu'},
364
         &humanReadable( $stats->{'cluster'}->{'used_memory'} ),
41 rodolico 365
         '',
42 rodolico 366
         &percent( $stats->{'cluster'}->{'used_vcpu'} / $stats->{'cluster'}->{'vcpu'} ),
367
         &percent( $stats->{'cluster'}->{'used_memory'} / $stats->{'cluster'}->{'memory'} ),
368
         '',
40 rodolico 369
         ''
370
      ];
42 rodolico 371
   return &main::report( \@header, \@data ) . "Variance " . 
372
      ( $stats->{'cluster'}->{'variance'} < 100 ? sprintf( "%2.2f", $stats->{'cluster'}->{'variance'} + .005 ) : "undef" ) . "\n\n";
40 rodolico 373
}
374
 
42 rodolico 375
# simulates performing migrations. Simply moves entries from $from to $to in $main::statusDB->{'nodePopulation'}
376
sub doActions {
377
   my $actions = shift;
44 rodolico 378
   my $return = '';
42 rodolico 379
   for ( my $i = 0; $i < @$actions; $i++ ) {
380
      my ($domain, $source, $target, $size ) = split( "\t", $actions->[$i] );
381
      $return .= &main::migrate( $domain, $target, $source );
382
      delete $main::statusDB->{'nodePopulation'}->{$source}->{'running'}->{$domain};
383
      $main::statusDB->{'nodePopulation'}->{$target}->{'running'}->{$domain} = time;
384
   }
385
   &main::forceScan() unless $main::config->{'flags'}->{'dryrun'} || $main::config->{'flags'}->{'testing'};
386
   return $return;
387
}
388
 
40 rodolico 389
# attempt to balance the domains on the active (maintenance = false) nodes
390
# basically, we take what is currently working, and calculate the variance
391
# of it (see https://en.wikipedia.org/wiki/Standard_deviation). If that is
392
# over about a 10, we move things around, if possible, then check our variance
393
# again.
39 rodolico 394
sub balance {
40 rodolico 395
   &main::readDB();
44 rodolico 396
   my $return = '';
40 rodolico 397
   # get the current cluster status
398
   my $cluster = &getClusterStats();
42 rodolico 399
   #die Dumper( $cluster ) . "\n";
40 rodolico 400
   # show user what it looks like at first
42 rodolico 401
   print "=== Starting Status ===\n\n" . &showBalanceReport( $cluster) unless $main::config->{'flags'}->{'quiet'};
47 rodolico 402
 
403
   # First, ensure all domains are removed from nodes in maintenance mode
404
   foreach my $node (keys %{$main::statusDB->{'node'}}) {
405
      if ( $main::statusDB->{'node'}->{$node}->{'maintenance'} ) {
406
         my @domains = keys %{$main::statusDB->{'nodePopulation'}->{$node}->{'running'}};
407
         if ( @domains ) {
408
            print "Node $node is in maintenance mode with " . scalar(@domains) . " domains, evacuating...\n" 
409
               if $main::config->{'flags'}->{'verbose'};
410
            foreach my $domain (@domains) {
411
               # Skip domains that are also in maintenance
412
               next if $main::statusDB->{'virt'}->{$domain}->{'maintenance'};
413
               # Find a suitable target node (not in maintenance, with enough resources)
414
               my $target = '';
415
               foreach my $candidate (keys %{$main::statusDB->{'node'}}) {
416
                  next if $main::statusDB->{'node'}->{$candidate}->{'maintenance'};
417
                  next if $candidate eq $node;
418
                  # Check if target has enough resources
419
                  my $available = &main::getAvailableResources($candidate);
420
                  if ($available->{'memory'} >= $main::statusDB->{'virt'}->{$domain}->{'memory'}) {
421
                     $target = $candidate;
422
                     last;
423
                  }
424
               }
425
               if ($target) {
426
                  $return .= &main::migrate($domain, $target, $node);
427
                  delete $main::statusDB->{'nodePopulation'}->{$node}->{'running'}->{$domain};
428
                  $main::statusDB->{'nodePopulation'}->{$target}->{'running'}->{$domain} = time;
429
               } else {
430
                  print "Warning: Could not find suitable target for $domain from maintenance node $node\n";
431
               }
432
            }
433
            &main::forceScan() unless $main::config->{'flags'}->{'dryrun'} || $main::config->{'flags'}->{'testing'};
434
            # Refresh cluster stats after evacuation
435
            $cluster = &getClusterStats();
436
         }
437
      }
438
   }
439
 
42 rodolico 440
   # we will do a loop to get the variance within our preferred range ($main::config->{ 'balance variance'})
441
   # however, we will only do a maximum number of iterations ($main::config->{ 'balance maxiterations'})
442
   my $iterations = defined $main::config->{ 'balance_max_iterations'} && $main::config->{ 'balance_max_iterations'} ? $main::config->{ 'balance_max_iterations'} : 10;
47 rodolico 443
   $main::config->{ 'balance_max_variance'} = 0.5 unless defined $main::config->{ 'balance_max_variance'};
42 rodolico 444
   # continue until our variance is where we want it, or we have tried too many times.
445
   while ( $iterations-- && $cluster->{'cluster'}->{'variance'} > $main::config->{ 'balance_max_variance'} ) {
446
      my $actions = &moveThings( $cluster );
44 rodolico 447
      if ( my $output = &doActions( $actions ) ) {
448
         $return .= $output;
449
      } else {
450
         last;
451
      }
42 rodolico 452
      #print Dumper( $actions ) . "\n"; die;
453
      # rerun stats
454
      $cluster = &getClusterStats();
455
      print &showBalanceReport( $cluster) if $main::config->{'flags'}->{'verbose'} > 1;
40 rodolico 456
   }
42 rodolico 457
   print "=== Ending Status ===\n\n" . &showBalanceReport( $cluster) unless $main::config->{'flags'}->{'quiet'};
44 rodolico 458
   return $return ? $return : "Already Balanced: No actions to take\n";
42 rodolico 459
} # balance
460
 
461
# finds node which needs to lose ($from) and gain ($to) the most. Then, goes through $from and finds the largest
462
# domain which will fit on $to until exhausted.
463
# as each domain is found, appends to $actions (array pointer). The format of each entry is a tab separated
464
# list of domain name, node from, node to, domain size
465
# returns the modified $actions
466
sub moveThings {
467
   my $stats = shift;
40 rodolico 468
 
42 rodolico 469
   my $actions = [];
470
   # find largest and smallest node differences
471
   my $transfer;
472
   my $from = '';
473
   my $to = '';
474
   # find smallest and largest "memory needed" in group. Note that if a node has too much, the number is negative and
475
   # for too little (ie, needs additional), the number is positive
47 rodolico 476
   # Skip nodes in maintenance mode for the target, but maintenance nodes can be source
42 rodolico 477
   foreach my $node (keys %{$stats->{'node'} } ) {
478
      #print "Checking $node\n";
479
      if ( $from ) {
480
         $from = $node if $stats->{'node'}->{$from}->{'memory_needed'} > $stats->{'node'}->{$node}->{'memory_needed'};
47 rodolico 481
         # Only consider non-maintenance nodes as targets
482
         if ( !$stats->{'node'}->{$node}->{'maintenance'} ) {
483
            if ( $to ) {
484
               $to = $node if $stats->{'node'}->{$to}->{'memory_needed'} < $stats->{'node'}->{$node}->{'memory_needed'};
485
            } else {
486
               $to = $node;
487
            }
488
         }
489
      } else { # initialize $from to this node, $to only if not in maintenance
490
         $from = $node;
491
         $to = $node unless $stats->{'node'}->{$node}->{'maintenance'};
42 rodolico 492
      } #if .. else
493
   } # foreach
47 rodolico 494
 
495
   # Don't move anything if no valid target nodes available
496
   return $actions unless $to;
497
 
42 rodolico 498
   # this is a poor mans min. we want to transfer the least number of bytes, ie what $from can spare, or what $to can accept
499
   # we need the smallest of what $from can give and $to can accept
500
   $transfer = abs( abs( $stats->{'node'}->{$from}->{'memory_needed'} ) > abs( $stats->{'node'}->{$to}->{'memory_needed'} ) ?
501
               $stats->{'node'}->{$to}->{'memory_needed'} : $stats->{'node'}->{$from}->{'memory_needed'} );
502
   # die "Transfer " . &humanReadable($transfer) ." bytes from $from to $to\n";
503
 
504
   # get array of domains running on $from, sorted by the size of the domain (descending, ie largest on top )
505
   # basically, get all keys from $main::statusDB->{'nodePopulation'}->{$from}->{'running'}, then sort them by looking them
506
   # up in $main::statusDB->{'virt'} and retrieving the amount of RAM
507
   my @sortedDomains = sort
508
      {
509
         $main::statusDB->{'virt'}->{$b}->{'memory'} <=> $main::statusDB->{'virt'}->{$a}->{'memory'}
510
      } keys %{ $main::statusDB->{'nodePopulation'}->{$from}->{'running'} };
511
   # now, "move" (fake move) largest domain that will fit into $to, and repeat until we can not do it anymore
512
   while ( $transfer ) {
513
      my $thisDomain = shift @sortedDomains;
514
      last unless $thisDomain; # we ran out of domains
515
      next unless $main::statusDB->{'virt'}->{$thisDomain}->{'memory'} <= $transfer;
516
      push @$actions, join( "\t", ( $thisDomain, $from, $to, $main::statusDB->{'virt'}->{$thisDomain}->{'memory'} ) );
517
      $transfer -= $main::statusDB->{'virt'}->{$thisDomain}->{'memory'};
518
   }
519
 
520
   return $actions;
39 rodolico 521
}