array( # entries without citation
'Schmidbaur2002a',
'Anderson2011',
'Cadierno2012',
'Selander2017a'
),
'keys' => array( # entries without file
'Finkelstein1910',
'Williamson1851',
'Ullmann1901',
'Ullmann1904',
'Saito1973',
'Lee1991',
'Hayashi2012',
'Rix1956',
'Sheldrick2008',
'Smykalla1991',
'Kuzmina1994',
'Tanisaki1973',
'Hayton2010',
'Cadierno2013',
'Gutmann1975',
'Schwerdtfeger1993'
),
'type' => array(
'online',
'book',
'inbook',
'incollection',
'phdthesis'));
/* Load dependencies */
set_include_path(implode(PATH_SEPARATOR, array(get_include_path(), '../../')));
require_once 'vendor/autoload.php';
require_once 'explore.php';
require_once 'pdf2text.php';
use ZendPdf\PdfDocument;
use ZendPdf\Exception\CorruptedPdfException;
use ZendPdf\Exception\NotImplementedException;
preg_match_all('~@([^{]+){([^,]+),~i', $sDatabase, $aMatches);
$aEntries = array_combine($aMatches[2], $aMatches[1]);
$aDois = array();
echo "No entry found for:\n";
explore($sFiles, '\*.pdf', $aFound);
$aKeys = [];
foreach ($aFound as $sFile) {
$sBase = basename($sFile, '.pdf');
if (ctype_upper(substr($sBase, 0, 1)) && substr($sBase, -2, 1) !== '.' && strpos($sBase, ' ') === false && in_array($sBase, $aMissing['file']) === false) {
$bIgnore = false;
foreach ($aIgnore as $sIgnore) {
if (strpos($sFile, sprintf('%s\\', $sIgnore)) !== false) {
$bIgnore = true;
break;
}
}
if (!$bIgnore) {
$aKeys[] = $sBase;
}
$bEntry = isset($aEntries[$sBase]);
if ($bIgnore || $bEntry) {
unset($aEntries[$sBase]);
} else {
if ($bDois) {
$sDoi = null;
try {
$oPdf = new PdfDocument($sFile, 0, true);
if (isset($oPdf->properties['WPS-ARTICLEDOI'])) {
$sDoi = $oPdf->properties['WPS-ARTICLEDOI'];
}
} catch (CorruptedPdfException $e) {
} catch (NotImplementedException $e) {}
if (isset($sDoi)) {
$aDois[] = $sDoi;
} elseif ($bText) {
$sText = pdf2text($sFile);
if (preg_match('~(dx\.doi\.org/|doi:\s?)(\d+\.\d+/(:?\w+\.)?\w+)~i', $sText, $aMatch)) {
$aDois[] = $aMatch[2];
} else if (preg_match('~\s(\d+\.\d+/(:?\w+\.)?\w+)CCC~i', $sText, $aMatch)) {
$aDois[] = $aMatch[1];
} else if ($bLoose && preg_match('~(\d{2}\.\d+/\s*(:?\w+\.)?\w+)~i', $sText, $aMatch)) {
$aDois[] = str_replace(' ', null, $aMatch[0]);
}
}
}
if ($bLink) {
printf("%-20s (%s)\n", $sBase, $sFile, str_replace(array($sFiles, basename($sFile)), null, $sFile));
} else {
printf("%s\n", $sBase);
}
}
}
}
foreach ($aEntries as $sBase => $sType) {
if (in_array(strtolower($aEntries[$sBase]), $aMissing['type']) || strpos($sBase, 'ange_') === 0) {
unset($aEntries[$sBase]);
}
}
echo "\nDOIs:\n";
echo count($aDois) > 0 ? implode("\n", $aDois) . "\n" : null;
echo "\nFile missing for:\n";
echo implode("\n", array_diff(array_keys($aEntries), $aMissing['keys'])). "\n";
// print_r(array_unique($aKeys));
// print_r(($aKeys));
echo "\nDuplicate files:\n";
echo implode("\n", array_diff_assoc($aKeys, array_unique($aKeys)));