Subversion Repositories havirt

Rev

Rev 42 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3 rodolico 1
#!/usr/bin/env perl
2
 
3
# Common library for havirt. Basically, just a place to put things which may be used by any
4 rodolico 4
# part of havirt. More for organizations purposes.
3 rodolico 5
 
4 rodolico 6
# Copyright 2024 Daily Data, Inc.
7
# 
8
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following 
9
# conditions are met:
10
#
11
#   Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
12
#   Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer 
13
#   in the documentation and/or other materials provided with the distribution.
14
#   Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived
15
#   from this software without specific prior written permission.
16
# 
17
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
18
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
19
# THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
22
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23
 
24
 
3 rodolico 25
# v0.0.1 20240602 RWR
26
# Initial setup
26 rodolico 27
#
28
# v1.2.0 20240826 RWR
29
# Added some code to migrate domains if node placed in maintenance mode
30
# Added a lot of 'verbose' print lines, and modified for new flag structure
31
#
38 rodolico 32
# v1.2.1 20240828 RWR
33
# removed forceScan from migrateAll and relying on calling scripts to do that.
42 rodolico 34
#
35
# v1.2.2 20250511 RWR
36
# added source node as a parameter for migrate. If source node is passed in, will not run readDB
46 rodolico 37
#
38
# v1.2.3 20260101 RWR
39
# modified executeAndWait to return 0 on timeout, 1 on success
40
# bugfix in diffArray to handle case where arr1 is smaller than arr2
3 rodolico 41
 
42
package havirt;
43
 
44
use warnings;
45
use strict;  
46
 
25 rodolico 47
BEGIN {
48
   use FindBin;
49
   use File::Spec;
50
   # use libraries from the directory this script is in
51
   use Cwd 'abs_path';
52
   use File::Basename;
53
   use lib dirname( abs_path( __FILE__ ) );
54
}
55
 
3 rodolico 56
use Data::Dumper qw(Dumper); # Import the Dumper() subroutine
57
 
4 rodolico 58
# define the version number
59
# see https://metacpan.org/pod/release/JPEACOCK/version-0.97/lib/version.pod
60
use version;
42 rodolico 61
our $VERSION = version->declare("1.2.2");
4 rodolico 62
 
63
 
3 rodolico 64
use Exporter;
65
 
66
our @ISA = qw( Exporter );
67
our @EXPORT = qw( 
25 rodolico 68
                  &readDB
69
                  &writeDB
70
                  &report
71
                  &scan
72
                  &makeCommand
73
                  &forceScan
15 rodolico 74
                  &executeAndWait
18 rodolico 75
                  &findDomain
76
                  &diffArray
25 rodolico 77
                  &makeConfig
78
                  &readConfig
79
                  &getAvailableResources
80
                  &resource
81
                  &validateResources
82
                  &migrate
3 rodolico 83
                );
84
 
12 rodolico 85
# read a DB file (just a YAML)
86
# if $lock is set, will create a "lock" file so other processes will
87
# not try to write to it. Using custom code as flock is automagically
88
# release when the file is read
3 rodolico 89
 
90
sub readDB {
12 rodolico 91
   my $lock = shift;
25 rodolico 92
   my $lockFileName = "$main::config->{'status db filename'}.lock";
12 rodolico 93
   my $lockTime = 5; # maximum time to wait for lock to clear
94
   # wait for lock to clear if it exists, if we are wanting a lock
95
   # and we have tried it for $locktime iterations
96
   while ( $lock && -f $lockFileName && $lockTime-- ) {
97
      sleep 1; # wait one second, then try again
98
   }
99
   if ( $lock ) {
25 rodolico 100
      die "Something has $main::config->{'status db filename'} locked, aborting\n" if -f $lockFileName;
12 rodolico 101
      `touch $lockFileName`;
102
   }
3 rodolico 103
   my $yaml = YAML::Tiny->new( {} );
25 rodolico 104
   if ( -f $main::config->{'status db filename'} ) {
105
      $yaml = YAML::Tiny->read( $main::config->{'status db filename'} );
3 rodolico 106
   }
12 rodolico 107
   $main::statusDB = $yaml->[0];
3 rodolico 108
}
109
 
26 rodolico 110
# Write the statusDB file out, overwriting the current one
111
# remove the lock file, if it exists
3 rodolico 112
sub writeDB {
12 rodolico 113
   my $yaml = YAML::Tiny->new( $main::statusDB );
25 rodolico 114
   $yaml->write( $main::config->{'status db filename'} );
115
   unlink "$main::config->{'status db filename'}.lock" if -f "$main::config->{'status db filename'}.lock"; # release any lock we might have on it
3 rodolico 116
}
117
 
26 rodolico 118
# create a report and send to STDOUT.
4 rodolico 119
sub report {
25 rodolico 120
   if ( $main::config->{'flags'}->{'format'} eq 'tsv' ) {
4 rodolico 121
      return &report_tsv( @_ );
122
   } else {
123
      return &report_screen( @_ );
124
   }
125
}
126
 
26 rodolico 127
# report as a tab separated values, no encapulation
3 rodolico 128
sub report_tsv {
129
   my ( $header, $data ) = @_;
130
   my @output;
131
   push @output, join( "\t", @$header );
132
   for( my $line = 0; $line < @$data; $line++ ) {
133
      push @output, join( "\t", @{$data->[$line]} );
134
   } # for
135
   return join( "\n", @output ) . "\n";
136
}
137
 
26 rodolico 138
# report suitable for screen, with fixed width columns
3 rodolico 139
sub report_screen {
140
   my ( $header, $data ) = @_;
141
   my @output;
142
   my @widths;
143
   my $column;
144
   my $row;
145
   # First, initialize by using the length of the headers
146
   for ( $column = 0; $column < @$header; $column++ ) {
147
      @widths[$column] = length( $header->[$column] );
148
   }
149
   # now, go through all data in each row, for each column, and increment the width if it is larger
150
   for ( $row = 0; $row < @$data; $row++ ) {
151
      for ( $column = 0; $column < @$header; $column++ ) {
152
         $widths[$column] = length( $data->[$row][$column] ) 
153
            if length( $data->[$row][$column] ) > $widths[$column];
154
      } # for column
155
   } # for row
156
   # actually do the print now
157
   my @format;
158
   for ( $column = 0; $column < @widths; $column++ ) {
159
      push ( @format, '%' . $widths[$column] . 's' );
160
   }
161
   my $format = join( ' ', @format ) . "\n";
162
   my $output = sprintf( $format, @$header );
163
   for ( $row = 0; $row < @$data; $row++ ) {
164
      $output .= sprintf( $format, @{$data->[$row]} );
165
   } # for row
166
   return $output;
167
}
10 rodolico 168
 
15 rodolico 169
# scans a node to determine which domains are running on it
26 rodolico 170
# updates each domain to reflect when it was last seen
15 rodolico 171
sub getDomainsOnNode {
172
   my $node = shift;
25 rodolico 173
   my $command = &main::makeCommand( $node, 'virsh list' );
174
   print "havirt.pm:getDomainsOnNode, command is $command\n" if $main::config->{'flags'}->{'debug'} > 2;
175
   my @nodeList = grep { /^\s*\d/ } `$command`;
15 rodolico 176
   for ( my $i = 0; $i < @nodeList; $i++ ) {
177
      if ( $nodeList[$i] =~ m/\s*\d+\s*([^ ]+)/ ) {
178
         $nodeList[$i] = $1;
179
      }
180
   }
181
   my %hash = map{ $_ => time } @nodeList;
182
   return \%hash;
183
}
184
 
18 rodolico 185
# find node a domain is on
186
# first parameter is the domain name
187
# rest of @_ is list of nodes to search
188
# if no nodes passed in, will search all known nodes
189
# returns first node found with the domain, or an empty string if not found
190
# possibly not being used??
191
sub findDomain {
192
   my $domainName = shift;
193
   my @node = @_;
194
   my $foundNode = '';
195
   &readDB();
196
   unless ( @node ) {
197
      @node = keys %{$main::statusDB->{'node'} };
25 rodolico 198
      print "findDomain, nodes = " . join( "\t", @node ) . "\n" if $main::config->{'flags'}->{'debug'} > 1;
18 rodolico 199
   }
26 rodolico 200
   if ( $main::config->{'flags'}->{'paranoid'} ) { # we will scan all nodes just to make sure
201
      foreach my $thisNode ( @node ) {
202
         my $command = &main::makeCommand( $thisNode, 'virsh list' );
203
         my $output = `$command`;
204
         print "findDomain, $thisNode list =\n" . $output . "\n" if $main::config->{'flags'}->{'debug'} > 1;;
205
         return $thisNode if ( $output =~ m/$domainName/ );
206
      }
207
   } else { # not paranoid mode, so just look through the status file
208
      foreach my $thisNode ( @node ) {
209
         if ( $main::statusDB->{'nodePopulation'}->{$thisNode}->{'running'}->{$domainName} ) {
210
            return $thisNode;
211
         }
212
      }
18 rodolico 213
   }
214
   return '';
215
}
15 rodolico 216
 
217
# check one or more nodes and determine which domains are running on them.
218
# defaults to everything in the node database, but the -t can have it run on only one
219
# this is the function that should be run every few minutes on one of the servers
220
sub scan {
25 rodolico 221
   my @targets = @_;
26 rodolico 222
   if ( -f $main::config->{'last scan filename'} && ! $main::config->{'flags'}->{'force'} ) {
25 rodolico 223
      my $lastScan = time - ( stat( $main::config->{'last scan filename'} ) ) [9];
26 rodolico 224
      return "Scan was run $lastScan seconds ago\n" unless $lastScan > $main::config->{'min scan time'};
15 rodolico 225
   }
25 rodolico 226
   `touch $main::config->{'last scan filename'}`;
15 rodolico 227
   &main::readDB(1);
25 rodolico 228
   print Dumper( $main::statusDB->{'nodePopulation'} ) if $main::config->{'flags'}->{'debug'} > 2;
229
   if ( $main::config->{'flags'}->{'target'} ) {
230
      push @targets, $main::config->{'flags'}->{'target'};
15 rodolico 231
   }
25 rodolico 232
   @targets = keys %{$main::statusDB->{'node'}} unless @targets;
233
   print "Scanning " . join( "\n", @targets ) . "\n" if $main::config->{'flags'}->{'debug'};
15 rodolico 234
   foreach my $node (@targets) {
26 rodolico 235
      print "Scanning $node\n" if $main::config->{'flags'}->{'verbose'};
15 rodolico 236
      $main::statusDB->{'nodePopulation'}->{$node}->{'running'} = &getDomainsOnNode( $node );
237
      $main::statusDB->{'nodePopulation'}->{$node}->{'lastchecked'} = time;
29 rodolico 238
      print "Found " . (keys %{$main::statusDB->{'nodePopulation'}->{$node}->{'running'}}) . " domains on node $node\n" if $main::config->{'flags'}->{'verbose'};
15 rodolico 239
      foreach my $domain ( keys %{$main::statusDB->{'nodePopulation'}->{$node}->{'running'}} ) {
240
         # make sure there is an entry for all of these domains
241
         $main::statusDB->{'virt'}->{$domain} = {} unless exists( $main::statusDB->{'virt'}->{$domain} );
242
      }
25 rodolico 243
      print Dumper( $main::statusDB->{'nodePopulation'}->{$node} ) if $main::config->{'flags'}->{'debug'} > 2;
15 rodolico 244
   }
245
   &main::writeDB();
246
   return "Node(s) updated\n";
247
}
248
 
18 rodolico 249
# makes the command that will be run on a node
250
# Created as a sub so we can change format easily
25 rodolico 251
# if node is the node we're on, we don't need to do a remote call
252
# if node is null, we'll assume we do the command here
253
# otherwise, we'll do an ssh to the node and run the command there
15 rodolico 254
sub makeCommand {
255
   my ( $node, $command ) = @_;
25 rodolico 256
   my $me = `hostname`;
257
   chomp $me;
258
   if ( ! $node || $node eq $me ) {
259
      return $command;
260
   } else {
261
      return "ssh $node '$command'";
262
   }
15 rodolico 263
}
264
 
38 rodolico 265
# force a node scan, of all domains, even if time has not expired
266
# and/or target is set. do this by setting force to 1 and target to null
267
# then calling scan,
268
# after run, reset it to old value
15 rodolico 269
sub forceScan {
38 rodolico 270
   my $force = $main::config->{'flags'}->{'force'};
271
   my $target = $main::config->{'flags'}->{'target'};
272
   $main::config->{'flags'}->{'force'} = 1;
273
   $main::config->{'flags'}->{'target'} = '';
15 rodolico 274
   &main::scan();
38 rodolico 275
   $main::config->{'flags'}->{'force'} = $force;
276
   $main::config->{'flags'}->{'target'} = $target;
15 rodolico 277
}
278
 
279
 
280
# executes command $command, then repeatedly runs virsh list
281
# on $scanNode, grep'ing for $scanDomain
26 rodolico 282
# $condition is 1, to wait for domain to start
283
# or 0 (false) to wait for it to shut down
15 rodolico 284
sub executeAndWait {
285
   my ( $command, $scanNode, $scanDomain, $condition ) = @_;
41 rodolico 286
   my $waitSeconds = 15; # number of seconds to wait before checking again
15 rodolico 287
   my $maxIterations = 60 / $waitSeconds; # maximum number of tries
25 rodolico 288
   print "Running [$command], then waiting $waitSeconds to check if complete\n" if $main::config->{'flags'}->{'debug'};
15 rodolico 289
   `$command`;
290
   my $waitCommand = &makeCommand( $scanNode, "virsh list | grep $scanDomain" );
291
   my $output = '';
292
   do {
293
      return 0 unless ( $maxIterations-- ); # we've waited too long, so probably not working
294
      print '. ';
25 rodolico 295
      sleep 1;
15 rodolico 296
      $output = `$waitCommand`;
25 rodolico 297
      print "[$waitCommand] returned [$output]\n" if $main::config->{'flags'}->{'debug'} > 1;
15 rodolico 298
   } until ( $condition ? $output : !$output );
299
   return 1; # made it successful
300
} 
301
 
18 rodolico 302
# find the differences between two arrays (passed by reference)
303
# first sorts the array, then walks through them one by one
304
# @$arr1 MUST be larger than @$arr2
26 rodolico 305
# used by domain.pm:list to find non-running domains for output
18 rodolico 306
sub diffArray {
307
   my ( $arr1, $arr2 ) = @_;
308
   my @result;
309
 
310
   @$arr1 = sort @$arr1;
311
   @$arr2 = sort @$arr2;
312
   my $i=0;
313
   my $j=0;
314
 
46 rodolico 315
   while ( $i < @$arr1 && $j < @$arr2) {
18 rodolico 316
      if ( $arr1->[$i] eq $arr2->[$j] ) {
317
         $i++;
318
         $j++;
319
      } elsif ( $arr1->[$i] lt $arr2->[$j] ) {
320
         push @result, $arr1->[$i];
321
         $i++;
322
      } else {
323
         push @result, $arr2->[$j];
324
         $j++;
325
      }
326
   }
327
   return \@result;
328
}
25 rodolico 329
 
330
 
331
# create a config file if one does not exist
332
sub makeConfig {
333
   my ( $config, $filename ) = @_;
334
   $config->{'script dir'} = $FindBin::RealBin;
335
   $config->{'script name'} = $FindBin::Script;
336
   $config->{'db dir'} = $config->{'script dir'} . '/var';
337
   $config->{'conf dir'} = $config->{'script dir'} . '/conf';
338
   $config->{'status db filename'} = $config->{'db dir'} . '/status.yaml';
339
   $config->{'last scan filename'} = $config->{'script dir'} . '/var/lastscan';
26 rodolico 340
   $config->{'min scan time'} = 5 * 60; # five minutes
25 rodolico 341
   $config->{'node reserved memory'} = 8 * 1024 * 1024; # 8 gigabytes
342
   $config->{'node reserved vcpu' } = 0; # turn off reserved vcpu
26 rodolico 343
   $config->{'paranoid'} = 1; # rescan all nodes on any action which will modify it
344
   $config->{'flags'}->{'debug'} = 0;
345
   $config->{'flags'}->{'dryrun'} = 1;
346
   $config->{'flags'}->{'force'} = 0;
25 rodolico 347
   $config->{'flags'}->{'format'} = 'screen';
26 rodolico 348
   #$config->{'flags'}->{'help'} = 0; # used, but don't put in config file
25 rodolico 349
   $config->{'flags'}->{'quiet'} = 0;
350
   $config->{'flags'}->{'target'} = '';
26 rodolico 351
   $config->{'flags'}->{'verbose'} = 1;
352
   #$config->{'flags'}->{'version'} = 0; # used, but don't put in config file
25 rodolico 353
   my $yaml = YAML::Tiny->new( $config );
354
   $yaml->write( $filename );
355
}
356
 
357
# read the config file and return it
358
sub readConfig {
359
   my $filename = shift;
360
   my $yaml = YAML::Tiny->new( {} );
361
   if ( -f $filename ) {
362
      $yaml = YAML::Tiny->read( $filename );
363
   }
364
   return $yaml->[0];
365
}
366
 
26 rodolico 367
# find available resource on a node, total RAM and threads
25 rodolico 368
sub resource {
369
   my $node = shift;
370
   die "Can not find node $node in havirt.pm:resource\n"
371
      unless $main::statusDB->{'node'}->{$node};
372
   my $return = {
373
      'memory' => 0,
374
      'cpu_count' => 0
375
      };
376
   foreach my $key ( keys %$return ) {
377
      $return->{$key} = $main::statusDB->{'node'}->{$node}->{$key}
378
         if defined $main::statusDB->{'node'}->{$node}->{$key};
379
   } # foreach
380
   return $return;
381
}
382
 
26 rodolico 383
# determine resources used on a node, total RAM and VCPU
25 rodolico 384
sub getAvailableResources {
385
   my $node = shift;
386
   &readDB();
26 rodolico 387
   die "Can not find node $node in havirt.pm:resource\n" unless $main::statusDB->{'node'}->{$node};
25 rodolico 388
   my $totalResources = &resource( $node );
389
   print Dumper( $totalResources ) if $main::config->{'flags'}->{'debug'};
390
   foreach my $domain ( keys %{ $main::statusDB->{'nodePopulation'}->{$node}->{'running'} } ) {
391
      $totalResources->{'memory'} -= $main::statusDB->{'virt'}->{$domain}->{'memory'};
392
      $totalResources->{'cpu_count'} -= $main::statusDB->{'virt'}->{$domain}->{'vcpu'};
393
   }
394
   return $totalResources;
395
}
396
 
397
# validate that node has enough resources for the domains which occupy the
398
# remainder of the stack
399
# returns 0 on success, or one or more error messages in a string on failure
400
sub validateResources {
401
   my $node = shift;
402
   &readDB();
403
   my @return;
404
   my $nodeResources = &getAvailableResources( $node );
405
   print "In havirt.pm:validateResources, checking if enough room on $node for\n" . join( "\n", @_ ) . "\n"
26 rodolico 406
      if $main::config->{'flags'}->{'debug'};
407
   print "Checking resources on $node\n" if $main::config->{'flags'}->{'verbose'};
25 rodolico 408
   # subtract the reserved memory from the node
409
   $nodeResources->{'memory'} -= $main::config->{'node reserved memory'};
410
   $nodeResources->{'cpu_count'} -= $main::config->{'node reserved vcpu'} if $main::config->{'node reserved vcpu'};
411
   while ( my $domain = shift ) {
412
      $nodeResources->{'memory'} -= $main::statusDB->{'virt'}->{$domain}->{'memory'};
413
      $nodeResources->{'cpu_count'} -= $main::statusDB->{'virt'}->{$domain}->{'vcpu'};
414
   }
415
   print "In havirt.pm:validateResources, $node will have $nodeResources->{memory} memory and $nodeResources->{cpu_count} vcpu's after task\n"
416
      if ( $main::config->{'flags'}->{'debug'} > 1 );
417
 
418
   push @return, "This action would result in memory of $nodeResources->{memory}" if $nodeResources->{'memory'} <= 0;
419
   push @return, "This action would result in virtual cpu count of $nodeResources->{cpu_count}" if $nodeResources->{'cpu_count'} <= 0 && $main::config->{'flags'}->{'node reserved vcpu'};
420
   return @return ? join( "\n", @return ) . "\n" : 0;
421
}
422
 
423
# migrate domain from current node it is on to $target
424
sub migrate {
42 rodolico 425
   my ( $virt, $target, $node ) = @_;
25 rodolico 426
   my $return;
42 rodolico 427
   $node  = &main::findDomain( $virt ) unless $node;
25 rodolico 428
   print Dumper( $main::statusDB->{'nodePopulation'} ) if $main::config->{'flags'}->{'debug'} > 2;
429
   die "I can not find $virt on any node\n" unless $node;
430
   die "Domain $virt in maintenance mode, can not migrate it\n" if $main::statusDB->{'virt'}->{$virt}->{'maintenance'};
431
   die "Node $target in maintenance mode, can not migrate anything to it\n" if $main::statusDB->{'node'}->{$target}->{'maintenance'};
432
   die "$virt already on $target\n" if $target eq $node;
433
   my $command = &main::makeCommand( $node, "virsh migrate --live --persistent --verbose  $virt qemu+ssh://$target/system" );
26 rodolico 434
   if ( $main::config->{'flags'}->{'dryrun'} ) { # they want us to actually do it
435
      $return = $command;
436
   } else {
38 rodolico 437
      print "Migrating $virt to $node\n" if $main::config->{'flags'}->{'verbose'};
25 rodolico 438
      $return = ( &main::executeAndWait( $command, $node, $virt, 0 ) ? 'Success' : 'Time Out waiting for shutdown');
38 rodolico 439
      #&main::forceScan(); Removed since we're doing it at a higher level
25 rodolico 440
   }
441
   return "$return\n";
442
}
443