Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
4.3 kB
2
Indexable
Never
<?php

$filename=$argv[1];

prepare($filename);

$fileLines = file('pdfText2.out'); // Read the file into an array

// Initialize variables to store each section
$contact = '';
$summary = '';
$topSkills = '';
$languages = '';
$honorsAwards = '';
$certifications = '';
$publications = '';
$experience = '';
$education = '';

// Initialize a variable to track the current section
$currentSection = '';

// Loop through each line and process the PDF data
foreach ($fileLines as $line) {

	echo $line. "\r\n";

    $line = preg_replace('/^[\p{Z}\t]+/u', '', $line);
    // Trim any whitespace at the start of the line
    $line = trim($line);

if (preg_match('/^Page\s+\d+\s+of\s+\d+$/', $line)) {
    continue;
}
    // Identify the section based on the line's content
    if ($line === 'Contact') {
        $currentSection = 'Contact';
    } elseif ($line === 'Headline:') {
        $currentSection = 'Headline';
    } elseif ($line === 'Summary') {
        $currentSection = 'Summary';
    } elseif ($line === 'Top Skills') {
        $currentSection = 'Top Skills';
    } elseif ($line === 'Languages') {
        $currentSection = 'Languages';
    } elseif ($line === 'Honors-Awards') {
        $currentSection = 'Honors-Awards';
    } elseif ($line === 'Certifications') {
        $currentSection = 'Certifications';
    } elseif ($line === 'Publications') {
        $currentSection = 'Publications';
    } elseif ($line === 'Experience') {
        $currentSection = 'Experience';
    } elseif ($line === 'Education') {
        $currentSection = 'Education';
    } else {
        // Append the line to the current section
        switch ($currentSection) {
            case 'Contact':
                $contact .= $line . PHP_EOL;
                break;
            case 'Headline':
                $headline .= $line . PHP_EOL;
                break;
            case 'Summary':
                $summary .= $line . PHP_EOL;
                break;
            case 'Top Skills':
                $topSkills .= $line . PHP_EOL;
                break;
            case 'Languages':
                $languages .= $line . PHP_EOL;
                break;
            case 'Honors-Awards':
                $honorsAwards .= $line . PHP_EOL;
                break;
            case 'Certifications':
                $certifications .= $line . PHP_EOL;
                break;
            case 'Publications':
                $publications .= $line . PHP_EOL;
                break;
            case 'Experience':
                $experience .= $line . PHP_EOL;
                break;
            case 'Education':
                $education .= $line . PHP_EOL;
                break;
        }
    }
}

$cvObj = array('contact'=>preg_replace("/[\r\n]+/", "\n", $contact) ?? '',
	'headline'=>preg_replace("/[\r\n]+/", "\n", $headline) ?? '',
	'summary'=>preg_replace("/[\r\n]+/", "\n", $summary) ?? '',
	'topSkills'=>preg_replace("/[\r\n]+/", "\n", $topSkills) ?? '',
	'languages'=>preg_replace("/[\r\n]+/", "\n", $languages) ?? '',
	'honorsAwards'=>preg_replace("/[\r\n]+/", "\n", $honorsAwards) ?? '',
	'certifications'=>preg_replace("/[\r\n]+/", "\n", $certifications) ?? '',
	'publications'=>preg_replace("/[\r\n]+/", "\n", $publications) ?? '',
	'experience'=> preg_replace("/[\r\n]+/", "\n", $experience) ?? '',
	'education'=>preg_replace("/[\r\n]+/", "\n", $education) ?? '');

unlink('pdfText.out');
unlink('pdfText2.out');

print_r($cvObj);

function prepare ($filename)
{

exec("pdfgrep \".\" '$filename'  > pdfText.out");

$fileLines = file('pdfText.out'); // Read the file into an array

$page=1;
$rightSection=$leftSection='';
$headline='';

foreach ($fileLines as $line) {
if (preg_match('/^\s{6,}/', $line)) {
        $indentation = strlen(preg_replace('/[^\s].*$/', '', $line))-1;
        reset($fileLines);
        break;
}
}

foreach ($fileLines as $line) {
if (preg_match('/Page\s+\d+\s+of\s+\d+$/', $line)) {
        $line="";
        $page++;
        error_log($leftSection .$headline.$rightSection,3,"pdfText2.out");
        $rightSection=$leftSection='';
}

if ($page==1) {
        $headline="Headline:\r\n";
        $leftSection  .=substr($line,0,$indentation);
        $rightSection .=substr($line,$indentation);
}
else
{
        $headline='';
        $leftSection .=$line;
}

}

}

unlink('pdfText.out');
unlink('pdfText2.out');