| 41 | rodolico | 1 | <?php
 | 
        
           |  |  | 2 |   | 
        
           |  |  | 3 |    class CsvImporter
 | 
        
           |  |  | 4 |    {
 | 
        
           |  |  | 5 |        private $fp;
 | 
        
           |  |  | 6 |        private $parse_header;
 | 
        
           |  |  | 7 |        private $header;
 | 
        
           |  |  | 8 |        private $delimiter;
 | 
        
           | 45 | rodolico | 9 |        private $enclosure;
 | 
        
           |  |  | 10 |        private $escape;
 | 
        
           | 41 | rodolico | 11 |        private $length;
 | 
        
           |  |  | 12 |        private $analysis;
 | 
        
           |  |  | 13 |        public $lines;
 | 
        
           |  |  | 14 |   | 
        
           |  |  | 15 |   | 
        
           |  |  | 16 |   | 
        
           |  |  | 17 |        //--------------------------------------------------------------------
 | 
        
           | 45 | rodolico | 18 |        function __construct($file_name, $parse_header=false, $delimiter='', $enclosure='"', $escape = '', $length=8000 )
 | 
        
           | 41 | rodolico | 19 |        {
 | 
        
           |  |  | 20 |            // auto detect mac file endings
 | 
        
           |  |  | 21 |            ini_set('auto_detect_line_endings',TRUE);
 | 
        
           |  |  | 22 |   | 
        
           |  |  | 23 |            $this->parse_header = $parse_header;
 | 
        
           |  |  | 24 |            $this->delimiter = $delimiter;
 | 
        
           | 45 | rodolico | 25 |            if ( $delimiter === '' or $delimiter == 'auto' ) { # do an auto-detect on the file
 | 
        
           | 41 | rodolico | 26 |               $this->analysis = $this->analyse_file( $file_name );
 | 
        
           |  |  | 27 |               $this->delimiter = $this->analysis['delimiter']['value'];
 | 
        
           | 45 | rodolico | 28 |             } elseif ( $delimiter == 'tab' ) {
 | 
        
           |  |  | 29 |                $this->delimiter = "\t";
 | 
        
           | 41 | rodolico | 30 |             } else {
 | 
        
           |  |  | 31 |                $this->delimiter = $delimiter;
 | 
        
           |  |  | 32 |             }
 | 
        
           | 45 | rodolico | 33 |             $enclosure = $enclosure == 'auto' ? '' : $enclosure;
 | 
        
           |  |  | 34 |             $this->enclosure = $enclosure ? $enclosure : '"';
 | 
        
           |  |  | 35 |   | 
        
           |  |  | 36 |             $this->escape = $escape ? $escape : '\\';
 | 
        
           | 41 | rodolico | 37 |             $this->length = $length;
 | 
        
           |  |  | 38 |             $this->lines = 0;
 | 
        
           |  |  | 39 |   | 
        
           |  |  | 40 |            $this->fp = fopen($file_name, "r");
 | 
        
           |  |  | 41 |   | 
        
           |  |  | 42 |            if ($this->parse_header)
 | 
        
           |  |  | 43 |            {
 | 
        
           | 45 | rodolico | 44 |               $this->header = fgetcsv($this->fp, $this->length, $this->delimiter, $this->enclosure, $this->escape );
 | 
        
           | 41 | rodolico | 45 |            }
 | 
        
           |  |  | 46 |   | 
        
           |  |  | 47 |        }
 | 
        
           |  |  | 48 |        //--------------------------------------------------------------------
 | 
        
           |  |  | 49 |        function __destruct()
 | 
        
           |  |  | 50 |        {
 | 
        
           |  |  | 51 |            if ($this->fp)
 | 
        
           |  |  | 52 |            {
 | 
        
           |  |  | 53 |                fclose($this->fp);
 | 
        
           |  |  | 54 |            }
 | 
        
           |  |  | 55 |        }
 | 
        
           |  |  | 56 |        //--------------------------------------------------------------------
 | 
        
           |  |  | 57 |        function get($max_lines=0)
 | 
        
           |  |  | 58 |        {
 | 
        
           |  |  | 59 |            //if $max_lines is set to 0, then get all the data
 | 
        
           |  |  | 60 |   | 
        
           |  |  | 61 |            $data = array();
 | 
        
           |  |  | 62 |   | 
        
           |  |  | 63 |            if ($max_lines > 0)
 | 
        
           |  |  | 64 |                $line_count = 0;
 | 
        
           |  |  | 65 |            else
 | 
        
           |  |  | 66 |                $line_count = -1; // so loop limit is ignored
 | 
        
           |  |  | 67 |   | 
        
           | 45 | rodolico | 68 |            while ($line_count < $max_lines && ($row = fgetcsv($this->fp, $this->length, $this->delimiter, $this->enclosure, $this->escape )) !== FALSE)
 | 
        
           | 41 | rodolico | 69 |            {
 | 
        
           |  |  | 70 |                if ($this->parse_header)
 | 
        
           |  |  | 71 |                {
 | 
        
           |  |  | 72 |                    foreach ($this->header as $i => $heading_i)
 | 
        
           |  |  | 73 |                    {
 | 
        
           |  |  | 74 |                        $row_new[$heading_i] = $row[$i];
 | 
        
           |  |  | 75 |                    }
 | 
        
           |  |  | 76 |                    $data[] = $row_new;
 | 
        
           |  |  | 77 |                }
 | 
        
           |  |  | 78 |                else
 | 
        
           |  |  | 79 |                {
 | 
        
           |  |  | 80 |                    $data[] = $row;
 | 
        
           |  |  | 81 |                }
 | 
        
           |  |  | 82 |   | 
        
           |  |  | 83 |                if ($max_lines > 0)
 | 
        
           |  |  | 84 |                    $line_count++;
 | 
        
           |  |  | 85 |            }
 | 
        
           |  |  | 86 |            $this->lines += $line_count;
 | 
        
           |  |  | 87 |            return $data;
 | 
        
           |  |  | 88 |        } 
 | 
        
           |  |  | 89 |   | 
        
           |  |  | 90 |       /*
 | 
        
           |  |  | 91 |        *  taken from http://php.net/manual/en/function.fgetcsv.php
 | 
        
           |  |  | 92 |        * opens $file and reads up to $capture_limit_in_kb bytes
 | 
        
           |  |  | 93 |        * then analyzes and attempts to determine line endings and delimiters
 | 
        
           |  |  | 94 |        * returns results as an array with keys
 | 
        
           |  |  | 95 |        *    peak_mem
 | 
        
           |  |  | 96 |        *    line_endings
 | 
        
           |  |  | 97 |        *    lines
 | 
        
           |  |  | 98 |        *    delimiter
 | 
        
           |  |  | 99 |        *    read_kb
 | 
        
           |  |  | 100 |        */
 | 
        
           |  |  | 101 |       function analyse_file($file, $capture_limit_in_kb = 10) {
 | 
        
           |  |  | 102 |           // capture starting memory usage
 | 
        
           |  |  | 103 |           $output['peak_mem']['start']    = memory_get_peak_usage(true);
 | 
        
           |  |  | 104 |   | 
        
           |  |  | 105 |           // log the limit how much of the file was sampled (in Kb)
 | 
        
           |  |  | 106 |           $output['read_kb']                 = $capture_limit_in_kb;
 | 
        
           |  |  | 107 |   | 
        
           |  |  | 108 |           // read in file
 | 
        
           |  |  | 109 |           $fh = fopen($file, 'r');
 | 
        
           |  |  | 110 |               $contents = fread($fh, ($capture_limit_in_kb * 1024)); // in KB
 | 
        
           |  |  | 111 |           fclose($fh);
 | 
        
           |  |  | 112 |   | 
        
           |  |  | 113 |           // specify allowed field delimiters
 | 
        
           |  |  | 114 |           $delimiters = array(
 | 
        
           |  |  | 115 |               'comma'     => ',',
 | 
        
           |  |  | 116 |               'semicolon' => ';',
 | 
        
           |  |  | 117 |               'tab'         => "\t",
 | 
        
           |  |  | 118 |               'pipe'         => '|',
 | 
        
           |  |  | 119 |               'colon'     => ':'
 | 
        
           |  |  | 120 |           );
 | 
        
           |  |  | 121 |   | 
        
           |  |  | 122 |           // specify allowed line endings
 | 
        
           |  |  | 123 |           $line_endings = array(
 | 
        
           |  |  | 124 |               'rn'         => "\r\n",
 | 
        
           |  |  | 125 |               'n'         => "\n",
 | 
        
           |  |  | 126 |               'r'         => "\r",
 | 
        
           |  |  | 127 |               'nr'         => "\n\r"
 | 
        
           |  |  | 128 |           );
 | 
        
           |  |  | 129 |   | 
        
           |  |  | 130 |           // loop and count each line ending instance
 | 
        
           |  |  | 131 |           foreach ($line_endings as $key => $value) {
 | 
        
           |  |  | 132 |               $line_result[$key] = substr_count($contents, $value);
 | 
        
           |  |  | 133 |           }
 | 
        
           |  |  | 134 |   | 
        
           |  |  | 135 |           // sort by largest array value
 | 
        
           |  |  | 136 |           asort($line_result);
 | 
        
           |  |  | 137 |   | 
        
           |  |  | 138 |           // log to output array
 | 
        
           |  |  | 139 |           $output['line_ending']['results']     = $line_result;
 | 
        
           |  |  | 140 |           $output['line_ending']['count']     = end($line_result);
 | 
        
           |  |  | 141 |           $output['line_ending']['key']         = key($line_result);
 | 
        
           |  |  | 142 |           $output['line_ending']['value']     = $line_endings[$output['line_ending']['key']];
 | 
        
           |  |  | 143 |           $lines = explode($output['line_ending']['value'], $contents);
 | 
        
           |  |  | 144 |   | 
        
           |  |  | 145 |           // remove last line of array, as this maybe incomplete?
 | 
        
           |  |  | 146 |           array_pop($lines);
 | 
        
           |  |  | 147 |   | 
        
           |  |  | 148 |           // create a string from the legal lines
 | 
        
           |  |  | 149 |           $complete_lines = implode(' ', $lines);
 | 
        
           |  |  | 150 |   | 
        
           |  |  | 151 |           // log statistics to output array
 | 
        
           |  |  | 152 |           $output['lines']['count']     = count($lines);
 | 
        
           |  |  | 153 |           $output['lines']['length']     = strlen($complete_lines);
 | 
        
           |  |  | 154 |   | 
        
           |  |  | 155 |           // loop and count each delimiter instance
 | 
        
           |  |  | 156 |           foreach ($delimiters as $delimiter_key => $delimiter) {
 | 
        
           |  |  | 157 |               $delimiter_result[$delimiter_key] = substr_count($complete_lines, $delimiter);
 | 
        
           |  |  | 158 |           }
 | 
        
           |  |  | 159 |   | 
        
           |  |  | 160 |           // sort by largest array value
 | 
        
           |  |  | 161 |           asort($delimiter_result);
 | 
        
           |  |  | 162 |   | 
        
           |  |  | 163 |           // log statistics to output array with largest counts as the value
 | 
        
           |  |  | 164 |           $output['delimiter']['results']     = $delimiter_result;
 | 
        
           |  |  | 165 |           $output['delimiter']['count']         = end($delimiter_result);
 | 
        
           |  |  | 166 |           $output['delimiter']['key']         = key($delimiter_result);
 | 
        
           |  |  | 167 |           $output['delimiter']['value']         = $delimiters[$output['delimiter']['key']];
 | 
        
           |  |  | 168 |   | 
        
           |  |  | 169 |           // capture ending memory usage
 | 
        
           |  |  | 170 |           $output['peak_mem']['end'] = memory_get_peak_usage(true);
 | 
        
           |  |  | 171 |           return $output;
 | 
        
           |  |  | 172 |       }
 | 
        
           |  |  | 173 |   | 
        
           |  |  | 174 |    } // class CsvImporter
 | 
        
           |  |  | 175 |   | 
        
           |  |  | 176 |   | 
        
           |  |  | 177 | ?>
 |