array( # entries without citation
        'Schmidbaur2002a',
        'Anderson2011',
        'Cadierno2012',
        'Selander2017a'
    ),
    'keys' => array( # entries without file
        'Finkelstein1910',
        'Williamson1851',
        'Ullmann1901',
        'Ullmann1904',        
        'Saito1973',
        'Lee1991',
        'Hayashi2012',
        'Rix1956',
        'Sheldrick2008',        
        'Smykalla1991',
        'Kuzmina1994',
        'Tanisaki1973',
        'Hayton2010',
        'Cadierno2013',
        'Gutmann1975',
        'Schwerdtfeger1993'
    ),
    'type' => array(
        'online',
        'book',
        'inbook',
        'incollection',
        'phdthesis'));

/* Load dependencies */
set_include_path(implode(PATH_SEPARATOR, array(get_include_path(), '../../')));
require_once 'vendor/autoload.php';
require_once 'explore.php';
require_once 'pdf2text.php';

use ZendPdf\PdfDocument;
use ZendPdf\Exception\CorruptedPdfException;
use ZendPdf\Exception\NotImplementedException;

preg_match_all('~@([^{]+){([^,]+),~i', $sDatabase, $aMatches);

$aEntries = array_combine($aMatches[2], $aMatches[1]);
$aDois = array();

echo "No entry found for:\n";
explore($sFiles, '\*.pdf', $aFound);

$aKeys = [];
foreach ($aFound as $sFile) {
    $sBase = basename($sFile, '.pdf');
    if (ctype_upper(substr($sBase, 0, 1)) && substr($sBase, -2, 1) !== '.' && strpos($sBase, ' ') === false && in_array($sBase, $aMissing['file']) === false) {
        $bIgnore = false;
        foreach ($aIgnore as $sIgnore) {
            if (strpos($sFile, sprintf('%s\\', $sIgnore)) !== false) {
                $bIgnore = true;
                break;
            }
        }
        if (!$bIgnore) {
            $aKeys[] = $sBase;
        }

        $bEntry = isset($aEntries[$sBase]);
        if ($bIgnore || $bEntry) {
            unset($aEntries[$sBase]);
        } else {
            if ($bDois) {
                $sDoi = null;
                try {
                    $oPdf = new PdfDocument($sFile, 0, true);
                    if (isset($oPdf->properties['WPS-ARTICLEDOI'])) {
                        $sDoi = $oPdf->properties['WPS-ARTICLEDOI'];
                    }
                } catch (CorruptedPdfException $e) {
                } catch (NotImplementedException $e) {}

                if (isset($sDoi)) {
                    $aDois[] = $sDoi;
                } elseif ($bText) {
                    $sText = pdf2text($sFile);
                    if (preg_match('~(dx\.doi\.org/|doi:\s?)(\d+\.\d+/(:?\w+\.)?\w+)~i', $sText, $aMatch)) {
                        $aDois[] = $aMatch[2];
                    } else if (preg_match('~\s(\d+\.\d+/(:?\w+\.)?\w+)CCC~i', $sText, $aMatch)) {
                        $aDois[] = $aMatch[1];
                    } else if ($bLoose && preg_match('~(\d{2}\.\d+/\s*(:?\w+\.)?\w+)~i', $sText, $aMatch)) {
                        $aDois[] = str_replace(' ', null, $aMatch[0]);
                    }
                }
            }
            if ($bLink) {
                printf("%-20s (%s)\n", $sBase, $sFile, str_replace(array($sFiles, basename($sFile)), null, $sFile));
            } else {
                printf("%s\n", $sBase);
            }
        }
    }
}

foreach ($aEntries as $sBase => $sType) {
    if (in_array(strtolower($aEntries[$sBase]), $aMissing['type']) || strpos($sBase, 'ange_') === 0) {
        unset($aEntries[$sBase]);
    }
}

echo "\nDOIs:\n";
echo count($aDois) > 0 ? implode("\n", $aDois) . "\n" : null;

echo "\nFile missing for:\n";
echo implode("\n", array_diff(array_keys($aEntries), $aMissing['keys'])). "\n";

// print_r(array_unique($aKeys));
// print_r(($aKeys));
echo "\nDuplicate files:\n";
echo implode("\n", array_diff_assoc($aKeys, array_unique($aKeys)));