Untitled
unknown
plain_text
2 years ago
4.3 kB
5
Indexable
<?php $filename=$argv[1]; prepare($filename); $fileLines = file('pdfText2.out'); // Read the file into an array // Initialize variables to store each section $contact = ''; $summary = ''; $topSkills = ''; $languages = ''; $honorsAwards = ''; $certifications = ''; $publications = ''; $experience = ''; $education = ''; // Initialize a variable to track the current section $currentSection = ''; // Loop through each line and process the PDF data foreach ($fileLines as $line) { echo $line. "\r\n"; $line = preg_replace('/^[\p{Z}\t]+/u', '', $line); // Trim any whitespace at the start of the line $line = trim($line); if (preg_match('/^Page\s+\d+\s+of\s+\d+$/', $line)) { continue; } // Identify the section based on the line's content if ($line === 'Contact') { $currentSection = 'Contact'; } elseif ($line === 'Headline:') { $currentSection = 'Headline'; } elseif ($line === 'Summary') { $currentSection = 'Summary'; } elseif ($line === 'Top Skills') { $currentSection = 'Top Skills'; } elseif ($line === 'Languages') { $currentSection = 'Languages'; } elseif ($line === 'Honors-Awards') { $currentSection = 'Honors-Awards'; } elseif ($line === 'Certifications') { $currentSection = 'Certifications'; } elseif ($line === 'Publications') { $currentSection = 'Publications'; } elseif ($line === 'Experience') { $currentSection = 'Experience'; } elseif ($line === 'Education') { $currentSection = 'Education'; } else { // Append the line to the current section switch ($currentSection) { case 'Contact': $contact .= $line . PHP_EOL; break; case 'Headline': $headline .= $line . PHP_EOL; break; case 'Summary': $summary .= $line . PHP_EOL; break; case 'Top Skills': $topSkills .= $line . PHP_EOL; break; case 'Languages': $languages .= $line . PHP_EOL; break; case 'Honors-Awards': $honorsAwards .= $line . PHP_EOL; break; case 'Certifications': $certifications .= $line . PHP_EOL; break; case 'Publications': $publications .= $line . PHP_EOL; break; case 'Experience': $experience .= $line . PHP_EOL; break; case 'Education': $education .= $line . PHP_EOL; break; } } } $cvObj = array('contact'=>preg_replace("/[\r\n]+/", "\n", $contact) ?? '', 'headline'=>preg_replace("/[\r\n]+/", "\n", $headline) ?? '', 'summary'=>preg_replace("/[\r\n]+/", "\n", $summary) ?? '', 'topSkills'=>preg_replace("/[\r\n]+/", "\n", $topSkills) ?? '', 'languages'=>preg_replace("/[\r\n]+/", "\n", $languages) ?? '', 'honorsAwards'=>preg_replace("/[\r\n]+/", "\n", $honorsAwards) ?? '', 'certifications'=>preg_replace("/[\r\n]+/", "\n", $certifications) ?? '', 'publications'=>preg_replace("/[\r\n]+/", "\n", $publications) ?? '', 'experience'=> preg_replace("/[\r\n]+/", "\n", $experience) ?? '', 'education'=>preg_replace("/[\r\n]+/", "\n", $education) ?? ''); unlink('pdfText.out'); unlink('pdfText2.out'); print_r($cvObj); function prepare ($filename) { exec("pdfgrep \".\" '$filename' > pdfText.out"); $fileLines = file('pdfText.out'); // Read the file into an array $page=1; $rightSection=$leftSection=''; $headline=''; foreach ($fileLines as $line) { if (preg_match('/^\s{6,}/', $line)) { $indentation = strlen(preg_replace('/[^\s].*$/', '', $line))-1; reset($fileLines); break; } } foreach ($fileLines as $line) { if (preg_match('/Page\s+\d+\s+of\s+\d+$/', $line)) { $line=""; $page++; error_log($leftSection .$headline.$rightSection,3,"pdfText2.out"); $rightSection=$leftSection=''; } if ($page==1) { $headline="Headline:\r\n"; $leftSection .=substr($line,0,$indentation); $rightSection .=substr($line,$indentation); } else { $headline=''; $leftSection .=$line; } } } unlink('pdfText.out'); unlink('pdfText2.out');
Editor is loading...