#!/usr/bin/env php
<?php

ini_set('memory_limit', '32G');

$nameInputFile = __DIR__ . "/autoshopper.xml";
//$nameInputFile = __DIR__ . "/autoshopper.xml";
//$nameInputFile = __DIR__ . "/test.xml";
$outputFileMask = 'autoshopper_%08d.csv';
// Max number of lines before the output is being split into multiple CSV files
$filePortionLines = 25000;


// Repairing input before the conversion (required for Wayne Reeves)
// ** IMPORTANT ** the "tidy" app must be installed on the server where
// this script is supposed to be run!
echo 'Trying to repair the input file: ' . $nameInputFile . ' ...';
exec("tidy -xml -utf8 {$nameInputFile} > {$nameInputFile}.repaired");
$nameInputFile = $nameInputFile . '.repaired';
echo "Finished\n";


// TODO: refactor this into a class

$xml = loadSourceAsSimpleXmlObject($nameInputFile);
if ($xml === false) {
    echo "\n\033[0;31m[FATAL ERROR] Failed to parse the input XML file!\e[0m\n";
    return;
}
echo "Ok\n";

$vehiclesDetailsArray = convertSimpleXmlObjectToArray($xml);

// Picking out the header
$arrayHeader = generateOutputHeader($vehiclesDetailsArray);
$headerLength = count($arrayHeader);

// Overall vehicle counter
$vehicleCountProcessed = 0;
$vehicleCountOverall = count($vehiclesDetailsArray);

$vehicleDetailsIterator = new ArrayIterator($vehiclesDetailsArray);

echo "\nConverting data..\n";
$handleCsv = null;

for (;;) {

    $handleCsv = beginNewOutput(
        $handleCsv, $vehicleCountProcessed, $filePortionLines, $outputFileMask,
        $arrayHeader);

    // processing vehicle data
    while ($vehicleDetailsIterator->valid()) {
        $_v = $vehicleDetailsIterator->current();
        // fixing other fields
        fixGenericFields($_v);
        mapEquipment($_v);
        unwrapPickUpFields($_v);
        cleanupSubArrays($_v);

        // filtering bad VINs
        if (isVinValid($_v)) {
            outputDestinationRecord(
                $_v,
                $headerLength,
                $vehicleCountProcessed,
                $handleCsv
            );
        } else {
            echo "\n\033[0;31m[warning] skipping bad VIN: {$_v['Vin']}\e[0m\n";
        }
        echo "\r";
        echo "Total vehicles processed: {$vehicleCountProcessed} of {$vehicleCountOverall}";
        $vehicleCountProcessed++;
        // Pointing next item to pick it upon creating a new file
        $vehicleDetailsIterator->next();
        //breaking out to create next output file
        if ($vehicleCountProcessed % $filePortionLines === 0) {
            break;
        }
    }
    fclose($handleCsv);

    if ($vehicleCountProcessed >= $vehicleCountOverall) {
        // We are done
        break;
    }
}

echo "\n__________________________\n\nDONE ("
    . $vehicleCountProcessed
    . ' records converted)';



// ======================== Subroutines ========================================

/**
 * @param $nameInputFile
 *
 * @return SimpleXMLElement
 */
function loadSourceAsSimpleXmlObject($nameInputFile)
{
    echo 'Loading the input source data from: ' . $nameInputFile . ' ...';
    $xml = simplexml_load_file(
        $nameInputFile, 'SimpleXMLElement',
        LIBXML_PARSEHUGE | LIBXML_NOBLANKS | LIBXML_NOERROR | LIBXML_ERR_NONE
    );
    return $xml;
}

/**
 * @param $xml
 *
 * @return mixed
 */
function convertSimpleXmlObjectToArray($xml)
{
    echo "Encoding into JSON..";
    $enc = json_encode($xml);
    $xml = null;
    echo "OK\n";
    echo "Decoding back to obtain an array..";
    $json = json_decode($enc, JSON_OBJECT_AS_ARRAY);
    $enc = null;
    $vehiclesDetailsArray = $json['VehicleDetails'];
    echo "OK\n";

    return $vehiclesDetailsArray;
}

/**
 * @param $handleCsv
 * @param $vehicleCountProcessed
 * @param $filePortionLines
 * @param $outputFileMask
 * @param $arrayHeader
 *
 * @return bool|resource
 */
function beginNewOutput(
    $handleCsv,
    $vehicleCountProcessed,
    $filePortionLines,
    $outputFileMask,
    $arrayHeader
) {
    if ($handleCsv) {
        @fclose($handleCsv);
    }

    if ($vehicleCountProcessed === 0
        || ($vehicleCountProcessed % $filePortionLines === 0)
    ) {

        $fileNameCurrent = __DIR__ . "/" . sprintf(
                $outputFileMask,
                $vehicleCountProcessed
            );

        echo "\nCreating a new output CSV ({$fileNameCurrent})..\n";
        $handleCsv = fopen($fileNameCurrent, 'w+');
        // put header
        fputs($handleCsv, '"' . join('","', $arrayHeader) . '"' . "\r\n");
    }

    return $handleCsv;
}

/**
 * @param $vehiclesDetailsArray
 * @return array
 */
function generateOutputHeader(& $vehiclesDetailsArray)
{
    $firstEntry = reset($vehiclesDetailsArray);
    $pib = (array)$firstEntry['PickUpInfo'];
    unset($firstEntry['PickUpInfo']);

    // unwrapping the pickup info fields into a plain list at the very end
    $finalTemplate = array_merge($firstEntry, $pib);
    $fieldNames = array_keys($finalTemplate);

    return $fieldNames;
}

/**
 * @param $currentVehicleRecord
 *
 * @return bool
 */
function isVinValid($currentVehicleRecord)
{
    return ($currentVehicleRecord['Vin'] = trim(mb_strtoupper($currentVehicleRecord['Vin'])))
        && strlen($currentVehicleRecord['Vin']) === 17;
}

/**
 * @param $_v
 * @param $headerLength
 * @param $vehicleCountProcessed
 * @param $handleCsv
 */
function outputDestinationRecord(
    $_v,
    $headerLength,
    $vehicleCountProcessed,
    $handleCsv
) {
    $lengthRecord = count($_v);
    if ($headerLength != $lengthRecord) {
        echo "\n\033[0;31mLine {$vehicleCountProcessed}: record length is 
        {$lengthRecord} vs header's $headerLength:\n";
        // print_r($_v);
        echo "\033[0m\n";
    } else {
        fputs($handleCsv, '"' . join('","', $_v) . '"' . "\r\n");
    }
}

/**
 * Unwraps information into our native fields
 *
 * @param array $currentVehicleRecord
 */
function unwrapPickUpFields(&$currentVehicleRecord)
{
    if (is_array($currentVehicleRecord['PickUpInfo']) === true) {
        // Mapping all required fields
        $pickUpInfo = $currentVehicleRecord['PickUpInfo'];
        $currentVehicleRecord = array_merge($currentVehicleRecord, $pickUpInfo);
    }
    // Removing the old field to keep the order
    // between the header and the data
    unset($currentVehicleRecord['PickUpInfo']);
}

/**
 * Maps equipment (options)
 *
 * @param $currentVehicleRecord
 */
function mapEquipment(&$currentVehicleRecord)
{
    if (is_array($currentVehicleRecord['Equipment']) === true
        && array_key_exists('EquipmentName', $currentVehicleRecord['Equipment']) === true
    ) {
        if (is_array($currentVehicleRecord['Equipment']['EquipmentName']) === true) {
            $currentVehicleRecord['Equipment'] = join(',',
                array_map(function ($equipmentValue) {
                    if (is_array($equipmentValue)) {
                        return join(',', $equipmentValue);
                    } else {
                        return $equipmentValue;
                    }
                }, $currentVehicleRecord['Equipment']['EquipmentName']));
        }
    } else {
        $currentVehicleRecord['Equipment'] = '';
    }
}

/**
 * Sanitizing generic fields
 *
 * @param $currentVehicleRecord
 */
function fixGenericFields(&$currentVehicleRecord)
{
    if (is_array($currentVehicleRecord['VehicleImages']) === true
        && array_key_exists('ImageUrl', $currentVehicleRecord['VehicleImages']) === true
    ) {
        if (is_array($currentVehicleRecord['VehicleImages']['ImageUrl']) === true) {
            $currentVehicleRecord['VehicleImages'] = join(';',
                $currentVehicleRecord['VehicleImages']['ImageUrl']);
        }
    } else {
        $currentVehicleRecord['VehicleImages'] = '';
    }

    // fixing company names
    if (strlen(trim(
            is_array($currentVehicleRecord['CompanyName'])
                ? reset($currentVehicleRecord['CompanyName'])
                : $currentVehicleRecord['CompanyName']
               )) === 0
    ) {
        echo "\n\033[0;31m[warning] empty Company Name for VIN: {$currentVehicleRecord['Vin']}\e[0m\n";
        $currentVehicleRecord['CompanyName'] = 'Untitled Company Name';
    }
}

/**
 * Truncate deep empty sub-arrays
 *
 * @param $currentVehicleRecord
 */
function cleanupSubArrays(&$currentVehicleRecord)
{
    $currentVehicleRecord = array_map(
        function ($valueOfAttribute) {
            if (is_array($valueOfAttribute)) {
                $downLow = reset($valueOfAttribute);

                return trim(
                    strtr(
                        is_array($downLow) ? reset($downLow) : $downLow,
                        ["\r" => '', "\n" => '', "\0" => '', '"' => '']
                    )
                );
            } else {

                return trim(strtr($valueOfAttribute,
                    ["\r" => '', "\n" => '', "\0" => '', '"' => '']));
            }
        }, $currentVehicleRecord
    );
}
