Untitled

 avatar
unknown
plain_text
a month ago
3.9 kB
7
Indexable
/*
  Customer states: Application generates a CSV export of personnel data;
  upon attempting to import this data to Microsoft SQL Server, data is
  corrupted; please diagnose and advise.

  CSV is formatted exactly as table is defined: (varchar, integer, varchar, varchar).
*/

import fs from "node:fs";

function parseCSV(filePath: string): string[][] {
	const fileContent = fs.readFileSync(filePath, "utf-8");
	const lines = fileContent.split("\n");
	const parsedCSV: string[][] = [];

	for (const line of lines) {
		const values = line.split(",").map((value) => value.replace(/"/g, ""));
		parsedCSV.push(values);
	}

	return parsedCSV;
}

function validateCSV(
	filePath: string,
):
	| { result: true; parsedValues: string[][] }
	| { result: false; errorMessages: string[] } {
	const fileContent = fs.readFileSync(filePath, "utf-8");
	const lines = fileContent.split("\n");

	if (fileContent.length === 0) {
		return { result: false, errorMessages: ["file is empty."] };
	}

	const parsedValues: string[][] = [];

	type State =
		| "initial"
		| "outsideElement"
		| "atSeparator"
		| "insideElement"
		| "error"
		| "endOfLine";

	function parseCharacter(
		currentState: State,
		character: string,
		isLastCharacter: boolean,
	): State {
		switch (currentState) {
			case "initial": {
				if (character === '"') {
					return "insideElement";
				}
				return "error";
			}
			case "insideElement": {
				if (character === '"') {
					return "outsideElement";
				}
				return "insideElement";
			}
			case "outsideElement": {
				if (isLastCharacter) {
					return "endOfLine";
				}
				if (character === ",") {
					return "atSeparator";
				}
				return "error";
			}
			case "atSeparator": {
				if (character === '"') {
					return "insideElement";
				}
				return "error";
			}
			default: {
				return "error";
			}
		}
	}

	for (const line of lines) {
		if (line.length === 0) {
			continue;
		}

		const values: string[] = [];
		let currentState: State = "initial";
		let characterPositionIndex = 0;
		let currentValue = "";

		for (const character of line) {
			console.info(
				currentState,
				character,
				characterPositionIndex === line.length - 1,
			);

			currentState = parseCharacter(
				currentState,
				character,
				characterPositionIndex === line.length - 1,
			);

			if (currentState === "insideElement") {
				if (character !== '"') {
					currentValue += character;
				}
			} else if (
				currentState === "atSeparator" ||
				currentState === "endOfLine"
			) {
				if (currentValue.length > 0) {
					values.push(currentValue);
					currentValue = "";
				}
			}

			if (currentState === "error") {
				return { result: false, errorMessages: ["parsing error"] };
			}
			characterPositionIndex += 1;
			if (characterPositionIndex === line.length - 1) {
				if (currentValue.length > 0) {
					values.push(currentValue);
				}
			}
		}
		parsedValues.push(values);
	}

	if (parsedValues.length === 0) {
		return { result: false, errorMessages: ["No values parsed"] };
	}

	const expectedLength = parsedValues[0].length;
	const invalidLengthElement = parsedValues.find(
		(e) => e.length !== expectedLength,
	);
	if (invalidLengthElement) {
		return {
			result: false,
			errorMessages: [
				`[${invalidLengthElement}] => invalid length; expected ${expectedLength}`,
			],
		};
	}



	const invalidTypeElement = parsedValues.find(
		(e) => isNaN(Number(e[1])),
	);
	if (invalidTypeElement) {
		return {
			result: false,
			errorMessages: [
				`[${invalidTypeElement}] => invalid type; expected number`,
			],
		};
	}


	return { result: true, parsedValues: parsedValues };
}

const filePath = "archivocsv2.csv";
const validationResult = validateCSV(filePath);
console.info(validationResult);

if (validationResult.result) {
	const _parsedData = parseCSV(filePath);
	//console.log(parsedData);
} else {
	console.error(validationResult.errorMessages);
}
Leave a Comment