Untitled
unknown
plain_text
3 years ago
4.3 kB
9
Indexable
<?php
$filename=$argv[1];
prepare($filename);
$fileLines = file('pdfText2.out'); // Read the file into an array
// Initialize variables to store each section
$contact = '';
$summary = '';
$topSkills = '';
$languages = '';
$honorsAwards = '';
$certifications = '';
$publications = '';
$experience = '';
$education = '';
// Initialize a variable to track the current section
$currentSection = '';
// Loop through each line and process the PDF data
foreach ($fileLines as $line) {
echo $line. "\r\n";
$line = preg_replace('/^[\p{Z}\t]+/u', '', $line);
// Trim any whitespace at the start of the line
$line = trim($line);
if (preg_match('/^Page\s+\d+\s+of\s+\d+$/', $line)) {
continue;
}
// Identify the section based on the line's content
if ($line === 'Contact') {
$currentSection = 'Contact';
} elseif ($line === 'Headline:') {
$currentSection = 'Headline';
} elseif ($line === 'Summary') {
$currentSection = 'Summary';
} elseif ($line === 'Top Skills') {
$currentSection = 'Top Skills';
} elseif ($line === 'Languages') {
$currentSection = 'Languages';
} elseif ($line === 'Honors-Awards') {
$currentSection = 'Honors-Awards';
} elseif ($line === 'Certifications') {
$currentSection = 'Certifications';
} elseif ($line === 'Publications') {
$currentSection = 'Publications';
} elseif ($line === 'Experience') {
$currentSection = 'Experience';
} elseif ($line === 'Education') {
$currentSection = 'Education';
} else {
// Append the line to the current section
switch ($currentSection) {
case 'Contact':
$contact .= $line . PHP_EOL;
break;
case 'Headline':
$headline .= $line . PHP_EOL;
break;
case 'Summary':
$summary .= $line . PHP_EOL;
break;
case 'Top Skills':
$topSkills .= $line . PHP_EOL;
break;
case 'Languages':
$languages .= $line . PHP_EOL;
break;
case 'Honors-Awards':
$honorsAwards .= $line . PHP_EOL;
break;
case 'Certifications':
$certifications .= $line . PHP_EOL;
break;
case 'Publications':
$publications .= $line . PHP_EOL;
break;
case 'Experience':
$experience .= $line . PHP_EOL;
break;
case 'Education':
$education .= $line . PHP_EOL;
break;
}
}
}
$cvObj = array('contact'=>preg_replace("/[\r\n]+/", "\n", $contact) ?? '',
'headline'=>preg_replace("/[\r\n]+/", "\n", $headline) ?? '',
'summary'=>preg_replace("/[\r\n]+/", "\n", $summary) ?? '',
'topSkills'=>preg_replace("/[\r\n]+/", "\n", $topSkills) ?? '',
'languages'=>preg_replace("/[\r\n]+/", "\n", $languages) ?? '',
'honorsAwards'=>preg_replace("/[\r\n]+/", "\n", $honorsAwards) ?? '',
'certifications'=>preg_replace("/[\r\n]+/", "\n", $certifications) ?? '',
'publications'=>preg_replace("/[\r\n]+/", "\n", $publications) ?? '',
'experience'=> preg_replace("/[\r\n]+/", "\n", $experience) ?? '',
'education'=>preg_replace("/[\r\n]+/", "\n", $education) ?? '');
unlink('pdfText.out');
unlink('pdfText2.out');
print_r($cvObj);
function prepare ($filename)
{
exec("pdfgrep \".\" '$filename' > pdfText.out");
$fileLines = file('pdfText.out'); // Read the file into an array
$page=1;
$rightSection=$leftSection='';
$headline='';
foreach ($fileLines as $line) {
if (preg_match('/^\s{6,}/', $line)) {
$indentation = strlen(preg_replace('/[^\s].*$/', '', $line))-1;
reset($fileLines);
break;
}
}
foreach ($fileLines as $line) {
if (preg_match('/Page\s+\d+\s+of\s+\d+$/', $line)) {
$line="";
$page++;
error_log($leftSection .$headline.$rightSection,3,"pdfText2.out");
$rightSection=$leftSection='';
}
if ($page==1) {
$headline="Headline:\r\n";
$leftSection .=substr($line,0,$indentation);
$rightSection .=substr($line,$indentation);
}
else
{
$headline='';
$leftSection .=$line;
}
}
}
unlink('pdfText.out');
unlink('pdfText2.out');
Editor is loading...