<?php
namespace Tlf;
class DataConverter {
protected array $sources = [];
public function __construct(){}
public function addSource($source_dir){
if (!is_dir($source_dir)){
throw new \Exception("Directory '$source_dir' does not exist or is not a directory");
}
$this->sources[] = $source_dir;
}
public function convert($options=[]){
$options = array_merge($this->defaults(), $options);
extract($options);
foreach ($this->sources as $dir){
$input = $dir.'/source.ods'; //later make this accept any source name
if ($gen_csv)$this->convert_to_csv($dir.'/source.ods');
if ($gen_json)$this->convert_to_json($dir.'/out.csv');
if ($gen_sql)$this->convert_to_sql($dir.'/out.json');
if ($gen_sqlite)$this->convert_to_sqlite($dir.'/out.sql');
}
}
/**
* Convert a .ods file to .csv using `libreoffice` system command
*/
public function convert_to_csv(string $source_ods){
// old command that did not produce valid utf8
// $cmd = "libreoffice --headless --convert-to csv \"$odsDir\"/* --outdir \"$csvDir\"";
//--infilter is to force utf8 & comes from https://unix.stackexchange.com/questions/259361/specify-encoding-with-libreoffice-convert-to-csv
// I have no idea how it works
$out_dir = dirname($source_ods).'/out/';
$cmd = "libreoffice --headless --convert-to csv --infilter=CSV:44,34,76,1 \"$source_ods\"/* --outdir \"$out_dir\"";
//there is an alternate solution requiring an additional step for file format conversion available at: https://superuser.com/questions/199799/vim-shows-strange-characters-91-92#
system($cmd);
$this->clean_csv($out_dir.'/source.csv', dirname($source_ods).'/out.csv');
}
public function clean_csv($dirtyCsvFile, $write_to){
$csv = \League\Csv\Reader::createFromPath($dirtyCsvFile, 'r');
// row 9 has db column names
// row 10 has spreadsheet column names (human friendly)
// row 11 (offset 10) has first row of data
$csv->setHeaderOffset(8);
$header = $csv->getHeader(); //returns the CSV header record
$cols = [];
foreach ($header as $index=>$colName){
if (trim($colName)==''||substr($colName,0,2)=='--')break;
$cols[] = $colName;
}
$f = fopen($write_to, 'w');
if (!$f){
throw new \Exception("Could not open ".$write_to);
}
fputcsv($f, $cols);
$has_written = false;
foreach ($csv->getRecords($cols) as $offset => $record){
if ($offset<10)continue;
$record = $this->clean_row($cols,$record);
if ($record == null)continue;
fputcsv($f, $record);
$has_written = true;
}
fclose($f);
}
/**
* CSV file must be clean & contain headers at row 0 & first row of data at row 1
* @param $source_csv the path to the source csv file
* @param $out_file path to the output file or null to use out.json in $source_csv's directory
*/
public function convert_to_json(string $source_csv, $out_file=null){
$out_file = $out_file ?? dirname($source_csv).'/out.json';
$csv = \League\Csv\Reader::createFromPath($source_csv, 'r');
$csv->setHeaderOffset(0);
$header = $csv->getHeader(); //returns the CSV header record
$f = fopen($out_file, 'w');
if (!$f){
throw new \Exception("Could not open ".$jsonPath);
}
fwrite($f, "[\n");
$has_written = false;
foreach ($csv->getRecords($header) as $offset => $record){
if ($has_written) fwrite($f,','."\n");
fwrite($f,json_encode($record));
$has_written = true;
}
fwrite($f, "\n]");
fclose($f);
}
/**
*
* @see clean_value()
* @return an array with cleaned-up data
*/
public function clean_row($cols, $row){
if (trim($row[$cols[0]])==''
||trim($row[$cols[1]])=='')return null;
$out = [];
foreach ($row as $key=>$value){
$value = $this->clean_value($key,$value);
$out[$key] = $value;
}
return $out;
}
/**
* - Rounds percents to 2 decimal places & casts to float
* - converts 'na', 'n/a', and empty string to null
*
* @param $key the key for the column
* @param $value the value to modify
*
* @return a cleaned up version of the value (sometimes null)
*/
public function clean_value($key, $value){
if (substr($value,-1)=='%'){
$value = (float)substr($value,0,-1);
$value = round($value,2);
// 2 decimal places & remove the %
} else if ($value=='na'||$value=='n/a'){
$value = null;
} else if ($value === ''){
$value = null;
}
return $value;
}
public function defaults(){
$defaults = [
// bugfix: Fileformat encoding leaves some characters incompatible with utf8, so apply a fix with libreoffice cli
'use_filetype_conversion'=>true,
// WARNING: OVERWRITES FILES if true
// none of the files should be edited, generally. They're meant to be built from .ods spreadsheets
// Good to set them false if testing some kind of error
//
'gen_csv'=>true,
'gen_json'=>true,
'gen_sql'=>true,
'gen_sqlite'=>true,
];
return $defaults;
}
}