Untitled
unknown
csharp
4 months ago
22 kB
3
Indexable
public static CandidateParserResult ParsingFromEmail(string subject, string headers, string body_text, string body_html) { var candidateParsing = new CandidateParserResult(); //var fullText = emailSendGrid.Text.ToLower().Replace("*", "").Trim(); string fullText = body_text; string provider = FindProvider(headers, body_text); switch (provider) { case "itviec.com": { candidateParsing.Source = "itviec"; fullText = fullText.Replace("*", "").Replace("\r", ""); candidateParsing.Name = StringHelper.GetValueFromHeadersSendGrid(fullText, "name:"); candidateParsing.Email = StringHelper.GetValueFromHeadersSendGrid(fullText, "email:"); candidateParsing.Note = StringHelper.GetValueFromHeadersSendGrid(fullText, "question:") + StringHelper.GetValueFromHeadersSendGrid(fullText, "anwser:"); if (subject.Contains("applies for")) { candidateParsing.JobName = StringHelper.GetStringBetweenTwoCharLast(subject, "applies for", "- itviec"); if (!string.IsNullOrEmpty(candidateParsing.JobName)) { candidateParsing.JobName = candidateParsing.JobName.Trim(); } } break; } case "vieclam24h.vn": { candidateParsing.Source = "vieclam24h"; fullText = fullText.Replace("*", "").Replace("\r", ""); candidateParsing.Name = StringHelper.GetValueFromHeadersSendGrid(fullText, "họ và tên"); if (string.IsNullOrEmpty(candidateParsing.Name)) //Update 15/08/2022 for fixing unicode when fwd { candidateParsing.Name = StringHelper.GetValueFromHeadersSendGrid(fullText, "họ và tên"); } candidateParsing.Email = StringHelper.GetValueFromHeadersSendGrid(fullText, "email:"); candidateParsing.Phone = StringHelper.GetValueFromHeadersSendGrid(fullText, "số điện thoại:"); if (candidateParsing.Name != null && candidateParsing.Name.Contains(":")) { candidateParsing.Name.Replace(":", ""); } //Parsing JobName if (subject.Contains("vị trí")) { candidateParsing.JobName = StringHelper.GetStringFromTextToLast(subject, "vị trí"); } break; } case "timviecnhanh.com": { candidateParsing.Source = "timviecnhanh"; fullText = fullText.Replace("*", "").Replace("\r", ""); candidateParsing.Name = StringHelper.GetValueFromHeadersSendGrid(fullText, "họ tên: "); //candidateParsing.Name = StringHelper.GetValueFromHeadersSendGrid(fullText, "họ tên:"); candidateParsing.Email = StringHelper.GetValueFromHeadersSendGrid(fullText, "email: "); candidateParsing.Phone = StringHelper.GetValueFromHeadersSendGrid(fullText, "số điện thoại: "); //Parsing JobName if (subject.Contains("vị trí")) { //case 01 candidateParsing.JobName = StringHelper.GetStringFromTextToLast(subject, "vị trí"); } break; } case "careerbuilder.vn": { //Dont have email. candidateParsing.Source = "careerbuilder"; fullText = fullText.Replace("*", "").Replace(@"\r", ""); //Eng-Vni if (subject.Contains("applied for")) { candidateParsing.Name = StringHelper.GetValueFromHeadersSendGrid(fullText, "candidate name:"); candidateParsing.JobName = StringHelper.GetStringFromTextToLast(subject, "applied for "); } else { candidateParsing.Name = StringHelper.GetValueFromHeadersSendGrid(fullText, "họ và tên ứng viên:"); candidateParsing.JobName = StringHelper.GetStringFromTextToLast(subject, "vừa ứng tuyển vị trí "); } //Get email from Reply-To var header_replyto = headers; var candidateEmail = EmailHelper.ExtractSingleEmail(header_replyto); break; } case "topdev.vn": { fullText = fullText.Replace(">", ""); var bodyCandidate = StringHelper.GetStringBetweenTwoCharLast(fullText, "application for", "customercare@topdev.vn"); candidateParsing.Source = "topdev"; candidateParsing.Name = StringHelper.GetStringBetweenTwoCharLast(bodyCandidate, "fullname:", "email:"); candidateParsing.Email = StringHelper.GetStringBetweenTwoCharLast(bodyCandidate, "email:", "mobile:"); candidateParsing.Phone = StringHelper.GetStringBetweenTwoCharLast(bodyCandidate, "mobile:", "linkcv:"); candidateParsing.Phone = candidateParsing.Phone.Replace("-", ""); candidateParsing.JobName = StringHelper.GetStringBetweenTwoCharLast(fullText, "application for", "fullname"); break; } case "vietnamworks.com": { var bodyCandidate = StringHelper.GetStringBetweenTwoCharLast(fullText, "vietnamworks:", "vietnamworks.com"); var listInfo = bodyCandidate.Split(new string[] { "\n" }, StringSplitOptions.None).Where(t => !string.IsNullOrEmpty(t)).ToList(); //Try next parse if (listInfo.Count < 4) { listInfo = bodyCandidate.Split(new string[] { "\n>\n>" }, StringSplitOptions.None).Where(t => !string.IsNullOrEmpty(t)).ToList(); if (listInfo.Count < 4) { listInfo = bodyCandidate.Split(new string[] { "\n\n" }, StringSplitOptions.None).Where(t => !string.IsNullOrEmpty(t)).ToList(); } if (listInfo.Count < 4) { bodyCandidate = bodyCandidate.Replace("\t", ""); listInfo = bodyCandidate.Split(new[] { " " }, StringSplitOptions.None).Where(t => !string.IsNullOrEmpty(t)).ToList(); } } var cleanlistInfo = listInfo.Where(l => !l.ToString().Contains("https") && l != "\r" && l != " \r" && !string.IsNullOrEmpty(l) && l != " ").ToList(); //Get candidate_name, assume candidateName alway row 1; if (cleanlistInfo.Any() && !string.IsNullOrEmpty(cleanlistInfo[0])) { candidateParsing.Name = cleanlistInfo[0].Trim(); } //Get Phone var regexPhone = new Regex(@"(\(?[0-9]{3}\)?)?\-?[0-9]{3}\-?[0-9]{5}", RegexOptions.IgnoreCase); //North American number foreach (var info in listInfo) { if (regexPhone.Matches(info).Count > 0) { if (info.Trim().Length < 18) { candidateParsing.Phone = info.Trim(); break; } } } //Parsing JobName if (subject.Contains("applied to")) { candidateParsing.JobName = StringHelper.GetStringBetweenTwoCharLast(subject, "applied to", " via vietnamworks"); //If apply via Mobile App if (string.IsNullOrEmpty(candidateParsing.JobName)) { candidateParsing.JobName = StringHelper.GetStringBetweenTwoCharLast(subject, "applied to", " via "); } if (string.IsNullOrEmpty(candidateParsing.Name)) { candidateParsing.Name = StringHelper.GetStringFromBeginToText(subject, "has applied to"); candidateParsing.Name.Replace("📱", ""); } } else { candidateParsing.JobName = StringHelper.GetStringBetweenTwoCharLast(subject, "vị trí", " thông qua vietnamworks"); //If apply via Mobile App if (string.IsNullOrEmpty(candidateParsing.JobName)) { candidateParsing.JobName = StringHelper.GetStringBetweenTwoCharLast(subject, "vị trí", " thông qua ứng dụng"); } if (string.IsNullOrEmpty(candidateParsing.Name)) { candidateParsing.Name = StringHelper.GetStringFromBeginToText(subject, "đã ứng tuyển"); candidateParsing.Name.Replace(" 📱 ", ""); } } candidateParsing.Source = "vietnamworks"; //Remove FW: candidateParsing.Name = candidateParsing.Name.Replace("FW: ", ""); //Get Job URL //Get Application URL try { var htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.LoadHtml(body_html); var listHref = new List<String>(); foreach (HtmlNode link in htmlDoc.DocumentNode.SelectNodes("//a[@href]")) { // Get the value of the HREF attribute string hrefValue = link.GetAttributeValue("href", string.Empty); listHref.Add(hrefValue); if (!string.IsNullOrEmpty(hrefValue) && hrefValue.Contains("jobId")) { candidateParsing.VNW_Job_URL = hrefValue; } if (!string.IsNullOrEmpty(hrefValue) && hrefValue.Contains("application/detail")) { candidateParsing.VNW_Application_URL = hrefValue; } } } catch (Exception ex) { //logger.Error("Error while parsing VNW_URL: " + ex.Message + ex.InnerException + ex.StackTrace); } //Update Email Empty if (string.IsNullOrEmpty(candidateParsing.Email)) { candidateParsing.Email = "email_empty_" + DateTime.Now.Ticks + "@empty.talent.vn"; } break; } case "jobstreet.com": { candidateParsing.Source = "jobstreet.com"; //Parse jobstreet with html for safe. fullText = StringHelper.StripHtml(body_html).ToLower(); fullText = fullText.Replace("*", "").Replace("\r", ""); candidateParsing.Name = StringHelper.GetStringBetweenTwoCharLast(fullText, "candidate name:", "position applied:"); candidateParsing.JobName = StringHelper.GetStringBetweenTwoCharLast(fullText, "position applied:", "date applied:"); //First try parse email var header_from = headers; var parsedEmail = StringHelper.GetStringBetweenTwoCharLast((header_from), "(on behalf of", ")"); if (string.IsNullOrEmpty(parsedEmail)) { //Second try parse parsedEmail = StringHelper.GetStringBetweenTwoCharLast(fullText, "(on behalf of", ")"); } candidateParsing.Email = EmailHelper.ExtractSingleEmail(parsedEmail); break; } case "jobstreet.vn": { candidateParsing.Source = "jobstreet.vn"; //Parse jobstreet with html for safe. var bodyCandidate = StringHelper.GetStringBetweenTwoCharLast(body_html, "xem chi tiết công việc", "(đính kèm)"); var listInfo = bodyCandidate.Split(new string[] { "</div>" }, StringSplitOptions.None).ToList(); for (int i = 0; i < listInfo.Count; i++) { listInfo[i] = StringHelper.StripHtml(listInfo[i]); } listInfo = listInfo.Where(t => !string.IsNullOrEmpty(t)).ToList(); if (listInfo.Count >= 4) { candidateParsing.Name = listInfo[0]; candidateParsing.Phone = listInfo[2]; candidateParsing.Email = EmailHelper.ExtractSingleEmail(listInfo[1]); } candidateParsing.JobName = StringHelper.GetStringBetweenTwoCharLast(fullText, "cho vị trí:", "xem chi tiết"); candidateParsing.JobName = StringHelper.GetStringFromBeginToText(candidateParsing.JobName, "-"); break; } case "jobsgo.vn": { candidateParsing.Source = "jobsgo"; fullText = fullText.Replace("*", "").Replace("\r", ""); candidateParsing.Name = StringHelper.GetValueFromHeadersSendGrid(fullText, "họ tên: "); candidateParsing.Email = StringHelper.GetValueFromHeadersSendGrid(fullText, "email: "); candidateParsing.Phone = StringHelper.GetValueFromHeadersSendGrid(fullText, "số điện thoại: "); candidateParsing.JobName = StringHelper.GetStringFromTextToLast(subject, "ứng tuyển công việc"); break; } case "topcv.vn": { candidateParsing.Source = "topcv"; fullText = fullText.Replace("*", "").Replace("\r", ""); candidateParsing.Name = StringHelper.GetValueFromHeadersSendGrid(fullText, "tên ứng viên:"); candidateParsing.Email = StringHelper.GetValueFromHeadersSendGrid(fullText, "email ứng viên:"); candidateParsing.Phone = StringHelper.GetStringFromTextToLast(fullText, "sđt ứng viên:"); candidateParsing.JobName = StringHelper.GetValueFromHeadersSendGrid(fullText, "tiêu đề tin:"); break; } case "talentsolution": { candidateParsing.Source = "Talent Solution"; fullText = fullText.Replace("*", "").Replace("\r", ""); //Fix exception case //fullText = StringHelper.GetStringBetweenTwoCharLast(fullText, "kính chào", "talent solution by careerbuilder"); //Eng-Vni if (subject.Contains("applied for")) { fullText = StringHelper.GetStringBetweenTwoCharLast(fullText, "dear", "best regards,"); candidateParsing.Name = StringHelper.GetStringBetweenTwoCharLast(fullText, "full name", "address"); candidateParsing.Email = EmailHelper.ExtractSingleEmail(StringHelper.GetStringBetweenTwoCharLast(fullText, "email", "mobile phone")); candidateParsing.JobName = StringHelper.GetStringFromTextToLast(subject, "applied for"); candidateParsing.Phone = StringHelper.GetStringBetweenTwoCharLast(fullText, "mobile phone", "please give"); candidateParsing.Location = StringHelper.GetStringBetweenTwoCharLast(fullText, "city", "email"); candidateParsing.Address = StringHelper.GetStringBetweenTwoCharLast(fullText, "address", "city"); } else { fullText = StringHelper.GetStringBetweenTwoCharLast(fullText, "kính chào", "trân trọng"); candidateParsing.Name = StringHelper.GetStringBetweenTwoCharLast(fullText, "họ và tên ứng viên", "địa chỉ"); candidateParsing.Email = EmailHelper.ExtractSingleEmail(StringHelper.GetStringBetweenTwoCharLast(fullText, "email", "số điện thoại")); candidateParsing.JobName = StringHelper.GetStringFromTextToLast(subject, "cho vị trí"); candidateParsing.Phone = StringHelper.GetStringBetweenTwoCharLast(fullText, "số điện thoại", "vui lòng"); candidateParsing.Location = StringHelper.GetStringBetweenTwoCharLast(fullText, "thành phố", "email"); candidateParsing.Address = StringHelper.GetStringBetweenTwoCharLast(fullText, "địa chỉ", "thành phố"); //CV need login } break; } case "indeed": { candidateParsing.Source = "indeed.com"; fullText = fullText.Replace("*", "").Replace("\r", ""); candidateParsing.Name = StringHelper.GetStringBetweenTwoCharLast(subject, "ứng viên -", "đã nộp đơn "); candidateParsing.JobName = StringHelper.GetStringFromBeginToText(subject, "ứng viên"); //Read file to parse email break; } case "itnavi.com.vn": { candidateParsing.Source = "itnavi.com.vn"; candidateParsing.Name = StringHelper.GetValueFromHeadersSendGrid(fullText, "tên ứng viên:"); candidateParsing.Email = StringHelper.GetValueFromHeadersSendGrid(fullText, "email ứng viên:"); candidateParsing.Phone = StringHelper.GetValueFromHeadersSendGrid(fullText, "sdt ứng viên:"); candidateParsing.JobName = StringHelper.GetValueFromHeadersSendGrid(fullText, "tiêu đề tin:"); break; } case "ACME": { break; } default: break; } //Make candidate name more beautiful if (!string.IsNullOrEmpty(candidateParsing.Name)) { candidateParsing.Name = candidateParsing.Name.Replace(" ", "").Trim(); } else { candidateParsing.Name = ""; } if (!string.IsNullOrEmpty(candidateParsing.Email)) { candidateParsing.Email = EmailHelper.ExtractSingleEmail(candidateParsing.Email); candidateParsing.Email = candidateParsing.Email.Replace(" ", "").Trim(); } if (!string.IsNullOrEmpty(candidateParsing.Phone)) { candidateParsing.Phone = candidateParsing.Phone.Replace("_", "").Trim(); } return candidateParsing; } public class CandidateParserResult { public string Name { get; set; } public string Email { get; set; } public string Phone { get; set; } public string JobName { get; set; } public string Source { get; set; } public string Note { get; set; } public string WorkspaceEmail { get; set; } public string Location { get; set; } public string Address { get; set; } public string VNW_Job_URL { get; set; } public string VNW_Application_URL { get; set; } }
Editor is loading...
Leave a Comment