Untitled
unknown
plain_text
2 years ago
4.3 kB
7
Indexable
<?php
$filename=$argv[1];
prepare($filename);
$fileLines = file('pdfText2.out'); // Read the file into an array
// Initialize variables to store each section
$contact = '';
$summary = '';
$topSkills = '';
$languages = '';
$honorsAwards = '';
$certifications = '';
$publications = '';
$experience = '';
$education = '';
// Initialize a variable to track the current section
$currentSection = '';
// Loop through each line and process the PDF data
foreach ($fileLines as $line) {
	echo $line. "\r\n";
    $line = preg_replace('/^[\p{Z}\t]+/u', '', $line);
    // Trim any whitespace at the start of the line
    $line = trim($line);
if (preg_match('/^Page\s+\d+\s+of\s+\d+$/', $line)) {
    continue;
}
    // Identify the section based on the line's content
    if ($line === 'Contact') {
        $currentSection = 'Contact';
    } elseif ($line === 'Headline:') {
        $currentSection = 'Headline';
    } elseif ($line === 'Summary') {
        $currentSection = 'Summary';
    } elseif ($line === 'Top Skills') {
        $currentSection = 'Top Skills';
    } elseif ($line === 'Languages') {
        $currentSection = 'Languages';
    } elseif ($line === 'Honors-Awards') {
        $currentSection = 'Honors-Awards';
    } elseif ($line === 'Certifications') {
        $currentSection = 'Certifications';
    } elseif ($line === 'Publications') {
        $currentSection = 'Publications';
    } elseif ($line === 'Experience') {
        $currentSection = 'Experience';
    } elseif ($line === 'Education') {
        $currentSection = 'Education';
    } else {
        // Append the line to the current section
        switch ($currentSection) {
            case 'Contact':
                $contact .= $line . PHP_EOL;
                break;
            case 'Headline':
                $headline .= $line . PHP_EOL;
                break;
            case 'Summary':
                $summary .= $line . PHP_EOL;
                break;
            case 'Top Skills':
                $topSkills .= $line . PHP_EOL;
                break;
            case 'Languages':
                $languages .= $line . PHP_EOL;
                break;
            case 'Honors-Awards':
                $honorsAwards .= $line . PHP_EOL;
                break;
            case 'Certifications':
                $certifications .= $line . PHP_EOL;
                break;
            case 'Publications':
                $publications .= $line . PHP_EOL;
                break;
            case 'Experience':
                $experience .= $line . PHP_EOL;
                break;
            case 'Education':
                $education .= $line . PHP_EOL;
                break;
        }
    }
}
$cvObj = array('contact'=>preg_replace("/[\r\n]+/", "\n", $contact) ?? '',
	'headline'=>preg_replace("/[\r\n]+/", "\n", $headline) ?? '',
	'summary'=>preg_replace("/[\r\n]+/", "\n", $summary) ?? '',
	'topSkills'=>preg_replace("/[\r\n]+/", "\n", $topSkills) ?? '',
	'languages'=>preg_replace("/[\r\n]+/", "\n", $languages) ?? '',
	'honorsAwards'=>preg_replace("/[\r\n]+/", "\n", $honorsAwards) ?? '',
	'certifications'=>preg_replace("/[\r\n]+/", "\n", $certifications) ?? '',
	'publications'=>preg_replace("/[\r\n]+/", "\n", $publications) ?? '',
	'experience'=> preg_replace("/[\r\n]+/", "\n", $experience) ?? '',
	'education'=>preg_replace("/[\r\n]+/", "\n", $education) ?? '');
unlink('pdfText.out');
unlink('pdfText2.out');
print_r($cvObj);
function prepare ($filename)
{
exec("pdfgrep \".\" '$filename'  > pdfText.out");
$fileLines = file('pdfText.out'); // Read the file into an array
$page=1;
$rightSection=$leftSection='';
$headline='';
foreach ($fileLines as $line) {
if (preg_match('/^\s{6,}/', $line)) {
        $indentation = strlen(preg_replace('/[^\s].*$/', '', $line))-1;
        reset($fileLines);
        break;
}
}
foreach ($fileLines as $line) {
if (preg_match('/Page\s+\d+\s+of\s+\d+$/', $line)) {
        $line="";
        $page++;
        error_log($leftSection .$headline.$rightSection,3,"pdfText2.out");
        $rightSection=$leftSection='';
}
if ($page==1) {
        $headline="Headline:\r\n";
        $leftSection  .=substr($line,0,$indentation);
        $rightSection .=substr($line,$indentation);
}
else
{
        $headline='';
        $leftSection .=$line;
}
}
}
unlink('pdfText.out');
unlink('pdfText2.out');
Editor is loading...