Jtm.php

<?php

namespace Tlf;

/**
 * Json To Mysql class
 */
class Jtm {

    /**
     * Set `true` to disable foreign key checks during INSERTs
     */
    public bool $disable_foreign_key_checks = false;

    /**
     * Convert a json line into an array
     *
     * @param $line json that ends with `},` (bc there is another row
     */
    public function json_line_to_array(string $line): ?array {
        if (trim($line)=='[')return null;
        if (trim($line)==']')return null;
        $line = substr($line,0,strrpos($line,'}')+1);
        $row = json_decode($line, true);
        return $row;
    }

    /**
     * check if a value is a date
     * @param $value a value to check for a date format
     */
    public function is_date($value): bool{
        if (!is_string($value))return false;
        return preg_match('/[0-9]{4}\-[0-9]{2}\-[0-9]{2}/', $value);
    }

    /**
     *
     * convert column names to camel_case
     *
     * @param $dirty_schema, as generated by `generate_schema()`
     * 
     */
    public function clean_schema(array $dirty_schema): array {
        // print_r($dirty_schema);
        $clean_schema = [];
        foreach ($dirty_schema as $col=>$info){

            $json_key = $col;
            $dirty = $col;

            $dirty = preg_replace('/[^a-zA-Z_]/','_', $dirty); //replace bad chars with underscore
            $dirty = preg_replace('/([A-Z]+)/','_$1', $dirty); // replace uppercase letters with `_LETTER`
            $dirty = preg_replace('/_+/', '_', $dirty); // replace multiple underscores
            if (substr($dirty,0,1)=='_')$dirty = substr($dirty,1); // remove first char if underscore
            $dirty = strtolower($dirty); // lowercase string

            $col_name = $dirty;

            $info['name'] = $col_name;
            $info['json_key'] = $col;

            $clean_schema[$col_name] = $info;

        }
// echo "\n\n\n-----------\n\n";
// echo "\n\n\n-----------\n\n";
        // print_r($clean_schema);
        // exit;


        return $clean_schema;
    }

    /**
     * @param $prefix the file prefix as passed to @see(generate_sql_insert())
     * @param $pdo a pdo instance to use to execute the sql
     *
     * @return total number of rows inserted
     */
    public function execute_insert_files(string $prefix, \PDO $pdo): int{

        $dir = dirname($prefix);
        $name = basename($prefix);
        $insert_prefix = $name.'-insert';
        $prefix_len = strlen($insert_prefix);
        $len = strlen($name);
        $files = [];
        foreach (scandir($dir) as $file){
            if (substr($file,0,$prefix_len)!=$insert_prefix)continue;
            $end_pos = strrpos('-', $file);
            // file is like $prefix-insert-NUMBER.sql
            // trim off .sql & everything before NUMBER
            $index = substr($file, $end_pos);
            $index = substr($file,0,-4); 
            $index = (int)$index;
            $files[$index] = $file;
        }
        ksort($files);
        $total_rows = 0;
        foreach ($files as $f){
            $sql = file_get_contents($dir.'/'.$f);
// echo "\n\n\n-----------\n\n";
            // echo $sql;
// echo "\n\n\n-----------\n\n";
            // $stmt = $pdo->prepare($sql);
            // $num_rows = $stmt->execute();
            $num_rows = $pdo->exec(trim($sql));
            $total_rows += $num_rows;
        }

        return $total_rows;
    }

    /**
     *
     * @param $source_file the json file to parse for a schema
     */
    public function generate_schema(string $source_file, array $schema_info): array {
        
        $schema = [];
        
        foreach ($schema_info as $col => $data){
            $maxLen = $data['maxlen'];
            if ($maxLen > 1000){
                $type = "TEXT";
            } else if ($data['is_date']>0){
                $type = 'DATE';
            } else if ($data['is_string']>0
                &&$data['is_string']>$data['is_numeric']
                || $data['is_array'] > 0 
                && $data['is_array'] > $data['is_numeric']
            ) {
                $len = $maxLen + 50;
                $type = "VARCHAR({$len})"; //or varchar
            } else if ($data['is_float']>0 
                && $data['is_float'] > $data['is_int']
            ){
                $type  = "FLOAT";
            } else if ($data['is_bool']>0){
                $type = "BOOL";
            } else {
                $len = $maxLen;
                if ($len < 10)$len = 10;
                $type = "int({$len})";
            }
            
            $schema[$col]  = [
                'name'=> $col,
                'type'=> $type,
            ];
        }
        
        return $schema;
    }


    public function generate_schema_info($source_file){
        $schemaData = [];
        $sampleData = [
            'maxlen'=> 0,
            'minlen'=> false,
            'count' => 0,
            'is_string' => 0,
            'is_numeric' => 0,
            'is_int' => 0,
            'is_float'=> 0,
            'is_bool' => 0,
            'is_array'=> 0,

            'is_date' => 0,
            'is_json' => 0,
            
            'sampleValue' => '',
        ];
        
        //loop over every row
        //modify keys of schemaDataArray
        // finally json_encode & output the schema data to an info file <- For the developer to inspect their input data
        // json_encode & output an actual schema-file that details how to create the table.
        
        $reader = fopen($source_file, 'r');
        
        while ($line = fgets($reader)) {
            $row = $this->json_line_to_array($line);
            if ($row===null)continue;
            foreach ($row as $column => $value){
                $data = $schemaData[$column] ?? $sampleData;
                $data['count']++;
                $valueForLen = is_array($value) ? json_encode($value) : $value;
                $len = strlen($valueForLen??'');
                
                if ($len===false || $data['maxlen'] > $len){
                    $data['maxlen'] = $data['maxlen'];
                } else {
                    $data['maxlen'] = $len;
                    $data['sampleValue'] = substr($valueForLen??'', 0, 1000);
                }
                $data['minlen'] = ($len == false || $data['minlen'] < $len) && $data['minlen'] !== false ? $data['minlen'] : $len;
                
                $data['is_string'] += is_string($value) ? 1 : 0;
                $data['is_bool'] += is_bool($value) ? 1 : 0;
                $data['is_array'] += is_array($value) ? 1 : 0;
                $data['is_date'] += $this->is_date($value) ? 1 : 0;

                if (is_numeric($value)){
                    $data['is_numeric'] += 1;
                    $data['is_int'] += is_int($value) || (int)$value==$value ? 1 : 0;
                    $data['is_float'] += is_float($value) || (float)$value==$value ? 1 : 0;
                }
                
                if (is_string($value)){
                    $json = json_decode($value, true);
                    $data['is_json'] += ($json===null) ? 0 : 1;
                }
                
                $schemaData[$column] = $data;
            }
        }
        
        fclose($reader);
        
        return $schemaData;
    }



    /**
     * @param $tableName name of table to create
     * @param $schema schema ... @see(generate_schema())
     */
    public function generate_sql_create(string $tableName, array $schema, bool $dropIfExists=true){
        $colStatements = [];
        foreach ($schema as $col => $info){
            $statement = '`'.$info['name'].'` '.$info['type'];
            $colStatements[] = $statement;
        }
        $colsSql = implode(", ", $colStatements);
        $drop = $dropIfExists ? "DROP TABLE IF EXISTS `{$tableName}`;\n" : '';
        $sql =
        <<<SQL
            {$drop}
            CREATE TABLE `{$tableName}`
            (
            {$colsSql}
            )
            ;
            
        SQL;
            
        return $sql;
    }


    /**
     *
     * Generate sql for INSERTs and write it to disk 
     *
     * @param $source_file path to the source json file
     * @param $target_dir the directory to write the sql-insert files to
     * @param $tableName name of table to insert into
     * @param $schema the schema generated by @see(generate_schema())
     * @param $chunkByLength Maximum `strlen()` of any output sql
     * @param ?$rowModifier a function that accepts `array $row` and returns (optionally modified) `array` $row
     */
    public function generate_sql_insert(string $source_file, string $out_prefix, string $tableName, array $schema, $chunkByLength, ?callable $rowModifier = null){
        $pdo = new \PDO('sqlite::memory:');
         
        $reader = fopen($source_file, 'r');

        /** @key should be to access it in the json. @value should be the database column name */
        $columns = [];
        foreach ($schema as $col_name=>$info){
            $json_key = $info['json_key'] ?? $col_name;
            $columns[$json_key] = $col_name;
        }
        // $cols = array_combine(array_keys($schema), array_keys($schema));
        // $colsSql = '`'.implode('`,`', $cols).'`';
        $colsSql = '`'.implode('`,`', $columns).'`';
        $fk = $this->disable_foreign_key_checks 
            ? "SET FOREIGN_KEY_CHECKS=0;"
            : "";
        $insert = 
        <<<SQL
            $fk
            INSERT INTO `{$tableName}` 
                ( {$colsSql} )
            VALUES
        SQL;
        
        //goto here
        $rowCount = 0;
        $fileIndex = 0;
        $writer = null;
        $rows = [];
        $len = 0;
        while ($line = fgets($reader)) {
            $row = $this->json_line_to_array($line);
            if ($row==false)continue;
            $rowCount++;
            $row = array_filter($row,
                // function($value, $key) use ($cols){
                function($value, $key) use ($columns){
                    return isset($columns[$key]);
                }, ARRAY_FILTER_USE_BOTH
            );
            // var_dump($row);
            // exit;
            if ($rowModifier!=null){
                $row = $rowModifier($row);
            }
            $row = array_map(
                function($value) use ($pdo){
                    if (is_array($value)){
                        $value = json_encode($value);
                    }
                    if (is_string($value))$value = $pdo->quote($value); 
                    else if (strlen($value??'')==0)$value = 'NULL';
//                     $value = '"'.substr($value,1,-1).'"';
//                     return "{$value}";// Why am I returning it like that?
                    return $value;
                }, $row
            );
            $data = [];
            // var_dump($schema);
            // exit;
            foreach ($schema as $info){
                $key = $info['json_key'] ?? $info['name'];
                $data[] = $row[$key] ?? '0';
            }
            // echo "\n\n\n-----------\n\n";
            // var_dump($data);
            // echo "\n\n\n-----------\n\n";
            // var_dump($row);
            // echo "\n\n\n-----------\n\n";
            // exit;
            // $leadComma = $rowCount===1 ? '' : ',';
            $rowSql = "\n".'( '. implode(', ',$data).')';

            // echo "\n$rowCount";

            if ($writer === null || $chunkByLength!==false && $len + strlen($rowSql) > $chunkByLength){
                $len = 0;
                // finish what we have. This line gets written to next file
                // file name increments by 1
                if ($writer!==null){
                    fwrite($writer, "\n;\n");
                    fclose($writer);
                }
                if (substr($out_prefix,-1)!='/')$path_dir = dirname($out_prefix);
                else $path_dir = $out_prefix;

                if (!is_dir($path_dir))mkdir($path_dir,0751, true);

                $writer = fopen($out_prefix."-insert-{$fileIndex}.sql", 'w');
                fwrite($writer, $insert);
                $fileIndex++;
                $rowCount = 1;
                $len = strlen($insert);
            }
            if ($rowCount!==1){
                $rowSql = ",".$rowSql;
            }
            
            fwrite($writer,$rowSql);
            $len += strlen($rowSql);
            
        }
        // $writer will be null if the source json file contains no rows of data.
        if ($writer!==null){
            fwrite($writer, "\n;\n");
            if ($this->disable_foreign_key_checks){
                fwrite($writer, "SET FOREIGN_KEY_CHECKS=0;\n");
            }
            fclose($writer);
        }
        fclose($reader);
        
    }
}