Subversion Repositories computer_asset_manager_v1

Rev

Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

<?php

   class CsvImporter
   {
       private $fp;
       private $parse_header;
       private $header;
       private $delimiter;
       private $length;
       private $analysis;
       public $lines;



       //--------------------------------------------------------------------
       function __construct($file_name, $parse_header=false, $delimiter='', $length=8000)
       {
           // auto detect mac file endings
           ini_set('auto_detect_line_endings',TRUE);

           $this->parse_header = $parse_header;
           $this->delimiter = $delimiter;
           if ( $delimiter === '' ) { # do an auto-detect on the file
              $this->analysis = $this->analyse_file( $file_name );
              $this->delimiter = $this->analysis['delimiter']['value'];
            } else {
               $this->delimiter = $delimiter;
            }
            $this->length = $length;
            $this->lines = 0;

           $this->fp = fopen($file_name, "r");

           if ($this->parse_header)
           {
              $this->header = fgetcsv($this->fp, $this->length, $this->delimiter);
           }

       }
       //--------------------------------------------------------------------
       function __destruct()
       {
           if ($this->fp)
           {
               fclose($this->fp);
           }
       }
       //--------------------------------------------------------------------
       function get($max_lines=0)
       {
           //if $max_lines is set to 0, then get all the data

           $data = array();

           if ($max_lines > 0)
               $line_count = 0;
           else
               $line_count = -1; // so loop limit is ignored

           while ($line_count < $max_lines && ($row = fgetcsv($this->fp, $this->length, $this->delimiter)) !== FALSE)
           {
               if ($this->parse_header)
               {
                   foreach ($this->header as $i => $heading_i)
                   {
                       $row_new[$heading_i] = $row[$i];
                   }
                   $data[] = $row_new;
               }
               else
               {
                   $data[] = $row;
               }

               if ($max_lines > 0)
                   $line_count++;
           }
           $this->lines += $line_count;
           return $data;
       } 

      /*
       *  taken from http://php.net/manual/en/function.fgetcsv.php
       * opens $file and reads up to $capture_limit_in_kb bytes
       * then analyzes and attempts to determine line endings and delimiters
       * returns results as an array with keys
       *    peak_mem
       *    line_endings
       *    lines
       *    delimiter
       *    read_kb
       */
      function analyse_file($file, $capture_limit_in_kb = 10) {
          // capture starting memory usage
          $output['peak_mem']['start']    = memory_get_peak_usage(true);

          // log the limit how much of the file was sampled (in Kb)
          $output['read_kb']                 = $capture_limit_in_kb;
         
          // read in file
          $fh = fopen($file, 'r');
              $contents = fread($fh, ($capture_limit_in_kb * 1024)); // in KB
          fclose($fh);
         
          // specify allowed field delimiters
          $delimiters = array(
              'comma'     => ',',
              'semicolon' => ';',
              'tab'         => "\t",
              'pipe'         => '|',
              'colon'     => ':'
          );
         
          // specify allowed line endings
          $line_endings = array(
              'rn'         => "\r\n",
              'n'         => "\n",
              'r'         => "\r",
              'nr'         => "\n\r"
          );
         
          // loop and count each line ending instance
          foreach ($line_endings as $key => $value) {
              $line_result[$key] = substr_count($contents, $value);
          }
         
          // sort by largest array value
          asort($line_result);
         
          // log to output array
          $output['line_ending']['results']     = $line_result;
          $output['line_ending']['count']     = end($line_result);
          $output['line_ending']['key']         = key($line_result);
          $output['line_ending']['value']     = $line_endings[$output['line_ending']['key']];
          $lines = explode($output['line_ending']['value'], $contents);
         
          // remove last line of array, as this maybe incomplete?
          array_pop($lines);
         
          // create a string from the legal lines
          $complete_lines = implode(' ', $lines);
         
          // log statistics to output array
          $output['lines']['count']     = count($lines);
          $output['lines']['length']     = strlen($complete_lines);
         
          // loop and count each delimiter instance
          foreach ($delimiters as $delimiter_key => $delimiter) {
              $delimiter_result[$delimiter_key] = substr_count($complete_lines, $delimiter);
          }
         
          // sort by largest array value
          asort($delimiter_result);
         
          // log statistics to output array with largest counts as the value
          $output['delimiter']['results']     = $delimiter_result;
          $output['delimiter']['count']         = end($delimiter_result);
          $output['delimiter']['key']         = key($delimiter_result);
          $output['delimiter']['value']         = $delimiters[$output['delimiter']['key']];
         
          // capture ending memory usage
          $output['peak_mem']['end'] = memory_get_peak_usage(true);
          return $output;
      }

   } // class CsvImporter


?>