diff --git a/.gitignore b/.gitignore index d84d6e7..b5245e0 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,10 @@ react-chrome-extension/build npm-debug.log* yarn-debug.log* yarn-error.log* + +# PDF Parse +PDFParse/* +!PDFParse/pdfparse.ts +!PDFParse/pdfparse.js +!PDFParse/coverParse.ts +!PDFParse/coverParse.js \ No newline at end of file diff --git a/PDFParse/Instructions.txt b/PDFParse/Instructions.txt new file mode 100644 index 0000000..76a4d47 --- /dev/null +++ b/PDFParse/Instructions.txt @@ -0,0 +1,11 @@ +INSTRUCTIONS + +1) Compile using tsc to turn the ts files into js + +2) Once compiled, run the program starting with: + +node pdfparse.js + +and write the path to your file + +3) It will produce a json and print all the results \ No newline at end of file diff --git a/PDFParse/coverParse.js b/PDFParse/coverParse.js new file mode 100644 index 0000000..057acdb --- /dev/null +++ b/PDFParse/coverParse.js @@ -0,0 +1,154 @@ +var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { + function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } + return new (P || (P = Promise))(function (resolve, reject) { + function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } + function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } + function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } + step((generator = generator.apply(thisArg, _arguments || [])).next()); + }); +}; +var __generator = (this && this.__generator) || function (thisArg, body) { + var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; + return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; + function verb(n) { return function (v) { return step([n, v]); }; } + function step(op) { + if (f) throw new TypeError("Generator is already executing."); + while (_) try { + if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; + if (y = 0, t) op = [op[0] & 2, t.value]; + switch (op[0]) { + case 0: case 1: t = op; break; + case 4: _.label++; return { value: op[1], done: false }; + case 5: _.label++; y = op[1]; op = [0]; continue; + case 7: op = _.ops.pop(); _.trys.pop(); continue; + default: + if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } + if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } + if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } + if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } + if (t[2]) _.ops.pop(); + _.trys.pop(); continue; + } + op = body.call(thisArg, _); + } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } + if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; + } +}; +var _this = this; +var fs = require('fs'); +var pdf = require('pdf-parse'); +var readline = require('readline'); +var rl = readline.createInterface({ + input: process.stdin, + output: process.stdout +}); +var openDB = function () { + return new Promise(function (resolve, reject) { + var request = window.indexedDB.open('FilesDB', 1); + request.onerror = function (event) { + reject('Error opening database'); + }; + request.onsuccess = function (event) { + var target = event.target; + var db = target.result; + if (db) { + resolve(db); + } + else { + reject('Failed to open database'); + } + }; + request.onupgradeneeded = function (event) { + var target = event.target; + var db = target.result; + db.createObjectStore('files', { keyPath: 'id' }); + }; + }); +}; +var parseCoverLetterFromPdf = function (pdfPath) { return __awaiter(_this, void 0, void 0, function () { + var dataBuffer, data, pdfText, regex, match, name_1, address, body, outputJson, error_1; + return __generator(this, function (_a) { + switch (_a.label) { + case 0: + _a.trys.push([0, 2, , 3]); + dataBuffer = fs.readFileSync(pdfPath); + return [4 /*yield*/, pdf(dataBuffer)]; + case 1: + data = _a.sent(); + pdfText = data.text; + regex = /([^]+?)(\b\d{1,5}(?:[-\s]\d{1,5})?(?:[A-Za-z]+\b|\b)[^]+?)([\s\S]*)/; + match = pdfText.match(regex); + if (!match) { + console.error('Unable to extract name, address, and body from PDF.'); + return [2 /*return*/]; + } + name_1 = match[1].replace(/[\n\r]/g, ' ').trim(); + address = match[2].replace(/[\n\r]/g, ' ').trim(); + body = match[3].trim(); + outputJson = { name: name_1, address: address, body: body }; + // Save the parsed data to IndexedDB + //await savePdfDataToDB('coverLetter', Buffer.from(JSON.stringify(outputJson))); + console.log('Cover letter data saved to IndexedDB.'); + return [3 /*break*/, 3]; + case 2: + error_1 = _a.sent(); + console.error('Error parsing PDF:', error_1.message); + return [3 /*break*/, 3]; + case 3: return [2 /*return*/]; + } + }); +}); }; +function getPdfDataFromDB() { + return __awaiter(this, void 0, void 0, function () { + var db, transaction, objectStore, request_1, error_2; + return __generator(this, function (_a) { + switch (_a.label) { + case 0: + _a.trys.push([0, 2, , 3]); + return [4 /*yield*/, openDB()]; + case 1: + db = _a.sent(); + transaction = db.transaction('files', 'readonly'); + objectStore = transaction.objectStore('files'); + request_1 = objectStore.get('coverLetter'); + return [2 /*return*/, new Promise(function (resolve, reject) { + request_1.onsuccess = function () { + var result = request_1.result; + if (result && result.data instanceof Uint8Array) { + resolve(result.data); + } + else { + resolve(null); + } + }; + request_1.onerror = function () { + console.error('Error fetching data from database'); + resolve(null); + }; + })]; + case 2: + error_2 = _a.sent(); + console.error('Error opening database:', error_2.message); + return [2 /*return*/, null]; + case 3: return [2 /*return*/]; + } + }); + }); +} +// Ask the user for the PDF file path +rl.question('Enter the path to the PDF file: ', function (pdfPath) { + // Parse cover letter from PDF + parseCoverLetterFromPdf(pdfPath) + .then(function () { + // Retrieve cover letter data from IndexedDB + return getPdfDataFromDB(); + }) + .then(function (outputJson) { + // Output the result + console.log('Retrieved data from IndexedDB:', outputJson); + rl.close(); + })["catch"](function (error) { + console.error(error); + rl.close(); + }); +}); diff --git a/PDFParse/coverParse.ts b/PDFParse/coverParse.ts new file mode 100644 index 0000000..a09c396 --- /dev/null +++ b/PDFParse/coverParse.ts @@ -0,0 +1,153 @@ +const fs = require('fs'); +const pdf = require('pdf-parse'); +const readline = require('readline'); + +//create a readline interface for user input +const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout +}); + +//structure of the parsed cover letter +interface ParsedCoverLetter { + name: string; + address: string; + body: string; +} + +//open IndexedDB or create it if it doesn't exist +const openDB = (): Promise => { + return new Promise((resolve, reject) => { + const request = window.indexedDB.open('FilesDB', 1); + + //error when database is not found + request.onerror = (event: Event) => { + reject(new Error('Error opening database')); + }; + + request.onsuccess = (event: Event) => { + const target = event.target as IDBOpenDBRequest; + const db = target.result as IDBDatabase; + + if (db) { + resolve(db); + } else { + reject(new Error('Failed to open database')); + } + }; + + request.onupgradeneeded = (event: Event) => { + const target = event.target as IDBOpenDBRequest; + const db = target.result as IDBDatabase; + + //stores the files in this object + db.createObjectStore('files', { keyPath: 'id' }); + }; + }); +}; + +//data from the parsed pdf is saved to IndexDB +const savePdfDataToDB = async (id: string, data: Uint8Array): Promise => { + try { + const db = await openDB(); + const transaction = db.transaction('files', 'readwrite'); + const objectStore = transaction.objectStore('files'); + + const request = objectStore.put({ id, data }); + + return new Promise((resolve, reject) => { + request.onsuccess = () => { + resolve(); + }; + //error message when request fails + request.onerror = () => { + console.error('Error saving data to database'); + reject(new Error('Error saving data to database')); + }; + }); + } catch (error) { + console.error('Error opening database:', error.message); + throw error; + } +}; + +//This function parses the cover letter into the name, address, and the body message +const parseCoverLetterFromPdf = async (pdfPath: string): Promise => { + try { + //reads the pdf + const dataBuffer = fs.readFileSync(pdfPath); + const data = await pdf(dataBuffer); + + const pdfText = data.text; + const regex = /([^]+?)(\b\d{1,5}(?:[-\s]\d{1,5})?(?:[A-Za-z]+\b|\b)[^]+?)([\s\S]*)/; + const match = pdfText.match(regex); + + if (!match) { + console.error('Unable to extract name, address, and body from PDF.'); + return; + } + + const name = match[1].replace(/[\n\r]/g, ' ').trim(); + const address = match[2].replace(/[\n\r]/g, ' ').trim(); + const body = match[3].trim(); + + const outputJson: ParsedCoverLetter = { name, address, body }; + + //calls the function to save data into IndexDB + await savePdfDataToDB('coverLetter', Buffer.from(JSON.stringify(outputJson))); + + console.log('Cover letter data saved to IndexedDB.'); + } catch (error) { + console.error('Error parsing PDF:', error.message); + } +}; + +//this function retrieves data from the database +async function getPdfDataFromDB(): Promise { + try { + //opens the database and retrieves the cover letter data and saves it into the request + const db = await openDB(); + const transaction = db.transaction('files', 'readonly'); + const objectStore = transaction.objectStore('files'); + const request = objectStore.get('coverLetter'); + + return new Promise((resolve, reject) => { + request.onsuccess = () => { + const result = request.result; + + if (result && result.data instanceof Uint8Array) { + resolve(result.data); + } else { + resolve(null); + } + }; + + request.onerror = () => { + console.error('Error fetching data from database'); + resolve(null); + }; + }); + } catch (error) { + console.error('Error opening database:', error.message); + return null; + } +} + +// Ask the user for the PDF file path +rl.question('Enter the path to the PDF file: ', (pdfPath) => { + // Parse cover letter from PDF + parseCoverLetterFromPdf(pdfPath) + .then(() => { + // Retrieve cover letter data from IndexedDB + return getPdfDataFromDB(); + }) + .then((outputJson) => { + // Output the result + console.log('Retrieved data from IndexedDB:', outputJson); + rl.close(); + }) + .catch((error) => { + console.error(error); + rl.close(); + }); +}); diff --git a/PDFParse/pdfparse.js b/PDFParse/pdfparse.js new file mode 100644 index 0000000..d0a0c71 --- /dev/null +++ b/PDFParse/pdfparse.js @@ -0,0 +1,249 @@ +"use strict"; +var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { + function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } + return new (P || (P = Promise))(function (resolve, reject) { + function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } + function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } + function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } + step((generator = generator.apply(thisArg, _arguments || [])).next()); + }); +}; +var __generator = (this && this.__generator) || function (thisArg, body) { + var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; + return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; + function verb(n) { return function (v) { return step([n, v]); }; } + function step(op) { + if (f) throw new TypeError("Generator is already executing."); + while (_) try { + if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; + if (y = 0, t) op = [op[0] & 2, t.value]; + switch (op[0]) { + case 0: case 1: t = op; break; + case 4: _.label++; return { value: op[1], done: false }; + case 5: _.label++; y = op[1]; op = [0]; continue; + case 7: op = _.ops.pop(); _.trys.pop(); continue; + default: + if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } + if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } + if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } + if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } + if (t[2]) _.ops.pop(); + _.trys.pop(); continue; + } + op = body.call(thisArg, _); + } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } + if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; + } +}; +exports.__esModule = true; +var fs = require("fs"); +var path = require("path"); +var pdf = require("pdf-parse"); +var openDB = function () { + // Return a Promise that wraps the logic for opening or upgrading the IndexedDB database + return new Promise(function (resolve, reject) { + // Use window.indexedDB to open the 'FilesDB' database with version 1 + var request = window.indexedDB.open('FilesDB', 1); + // Handle errors that may occur during the attempt to open the database + request.onerror = function (event) { + reject('Error opening database'); + }; + // Handle successful opening of the database + request.onsuccess = function (event) { + // Extract the result from the event and cast it to an IDBDatabase instance + var target = event.target; + var db = target.result; + // Check if the database instance is valid and resolve the Promise with it + if (db) { + resolve(db); + } + else { + // Reject the Promise if the database instance is not valid + reject('Failed to open database'); + } + }; + // Handle the case where the database version needs an upgrade + request.onupgradeneeded = function (event) { + // Extract the result from the event and cast it to an IDBDatabase instance + var target = event.target; + var db = target.result; + // Create an object store named 'files' with 'id' as the key path + db.createObjectStore('files', { keyPath: 'id' }); + }; + }); +}; +var savePdfDataToDB = function (id, data) { return __awaiter(void 0, void 0, void 0, function () { + var db, transaction, objectStore, request_1, error_1; + return __generator(this, function (_a) { + switch (_a.label) { + case 0: + _a.trys.push([0, 2, , 3]); + return [4 /*yield*/, openDB()]; + case 1: + db = _a.sent(); + transaction = db.transaction('files', 'readwrite'); + objectStore = transaction.objectStore('files'); + request_1 = objectStore.put({ id: id, data: data }); + return [2 /*return*/, new Promise(function (resolve, reject) { + request_1.onsuccess = function () { + resolve(); + }; + request_1.onerror = function () { + console.error('Error saving data to database'); + reject('Error saving data to database'); + }; + })]; + case 2: + error_1 = _a.sent(); + console.error('Error opening database:', error_1.message); + throw error_1; + case 3: return [2 /*return*/]; + } + }); +}); }; +function getPdfDataFromDB() { + return __awaiter(this, void 0, void 0, function () { + var db, transaction, objectStore, request_2, error_2; + return __generator(this, function (_a) { + switch (_a.label) { + case 0: + _a.trys.push([0, 2, , 3]); + return [4 /*yield*/, openDB()]; + case 1: + db = _a.sent(); + transaction = db.transaction('files', 'readonly'); + objectStore = transaction.objectStore('files'); + request_2 = objectStore.get('resume'); + return [2 /*return*/, new Promise(function (resolve, reject) { + // When the file is found + request_2.onsuccess = function () { + var result = request_2.result; + if (result && result.data instanceof Uint8Array) { + resolve(result.data); + } + else { + resolve(null); + } + }; + // When the file is not found + request_2.onerror = function () { + console.error('Error fetching data'); + resolve(null); + }; + })]; + case 2: + error_2 = _a.sent(); + console.error('Error opening database:', error_2.message); + return [2 /*return*/, null]; + case 3: return [2 /*return*/]; + } + }); + }); +} +function convertPdfToJson() { + return __awaiter(this, void 0, void 0, function () { + var pdfData, data, textContent_1, emailRegex, phoneRegex, addressRegex, emails, phoneNumbers, addresses, keywords, jsonPath, parsedInfo, outputJsonPath, error_3; + return __generator(this, function (_a) { + switch (_a.label) { + case 0: + _a.trys.push([0, 3, , 4]); + return [4 /*yield*/, getPdfDataFromDB()]; + case 1: + pdfData = _a.sent(); + // Check if the file exists + if (!pdfData) { + console.error('Error: The specified file does not exist in the database.'); + return [2 /*return*/]; + } + return [4 /*yield*/, pdf(pdfData)]; + case 2: + data = _a.sent(); + textContent_1 = data.text; + emailRegex = /\b[\w\.-]+@[\w\.-]+\.\w+\b/g; + phoneRegex = /\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b/g; + addressRegex = /\b\d+\s[\w\s]+,\s[\w\s]+,\s[\w\s\d]+\b/g; + emails = textContent_1.match(emailRegex); + if (emails) { + textContent_1 = textContent_1.replace(emailRegex, ''); + textContent_1 += '\nEmail Addresses:\n' + emails.join('\n') + '\n'; + } + phoneNumbers = textContent_1.match(phoneRegex); + if (phoneNumbers) { + textContent_1 = textContent_1.replace(phoneRegex, ''); + textContent_1 += '\nPhone Numbers:\n' + phoneNumbers.join('\n') + '\n'; + } + addresses = textContent_1.match(addressRegex); + if (addresses) { + textContent_1 = textContent_1.replace(addressRegex, ''); + textContent_1 += '\nAddresses:\n' + addresses.join('\n') + '\n'; + } + keywords = ['experience', 'skills', 'education', 'projects', 'achievements', 'interests']; + // Highlight keywords in uppercase + keywords.forEach(function (keyword) { + var regex = new RegExp("\\b".concat(keyword, "\\b"), 'gi'); // Using RegExp for whole word matching + textContent_1 = textContent_1.replace(regex, function (match) { return match.toUpperCase(); }); + }); + // Handle multi-word categories + textContent_1 = textContent_1.replace(/\b(Skills & Interests|Projects & Activities)\b/gi, function (match) { return match.toUpperCase(); }); + jsonPath = path.join(__dirname, 'output.json'); + // Write modified text content to a JSON file + fs.writeFileSync(jsonPath, textContent_1); + console.log('Conversion successful! Text saved to output.json.'); + parsedInfo = parseResume(jsonPath); + outputJsonPath = path.join(__dirname, 'parsed_output.json'); + fs.writeFileSync(outputJsonPath, JSON.stringify(parsedInfo, null, 2)); + console.log('Parsed Information:'); + console.log(parsedInfo); + console.log("Parsed information has been written to ".concat(outputJsonPath)); + return [3 /*break*/, 4]; + case 3: + error_3 = _a.sent(); + console.error('Error converting PDF to JSON:', error_3.message); + return [3 /*break*/, 4]; + case 4: return [2 /*return*/]; + } + }); + }); +} +function isName(line, isFirstNonEmptyLine) { + var trimmedLine = line.trim(); + return isFirstNonEmptyLine && trimmedLine.length > 0 ? trimmedLine : null; +} +function parseResume(jsonPath) { + var lines = fs.readFileSync(jsonPath, 'utf-8').split('\n'); + var parsedInfo = []; + var currentCategory = null; + var isFirstNonEmptyLine = true; + lines.forEach(function (line) { + // Skip empty lines + if (!line.trim()) { + return; + } + // Check if the line is the name + var name = isName(line, isFirstNonEmptyLine); + if (name) { + parsedInfo.push({ category: 'Name', content: [name] }); + isFirstNonEmptyLine = false; // Set to false after finding the first non-empty line + return; + } + // Identify section headers and set the current category + var sectionMatch = line.match(/^[A-Z\s]+$/); + if (sectionMatch) { + currentCategory = sectionMatch[0].trim(); + return; + } + // Add content to the current category + if (currentCategory !== null) { + var index = parsedInfo.findIndex(function (info) { return info.category === currentCategory; }); + if (index === -1) { + parsedInfo.push({ category: currentCategory, content: [line.trim()] }); + } + else { + parsedInfo[index].content.push(line.trim()); + } + } + }); + return parsedInfo; +} +// Call the function to start the conversion +convertPdfToJson(); diff --git a/PDFParse/pdfparse.ts b/PDFParse/pdfparse.ts new file mode 100644 index 0000000..8950f44 --- /dev/null +++ b/PDFParse/pdfparse.ts @@ -0,0 +1,236 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import * as pdf from 'pdf-parse'; + +const openDB = (): Promise => { + // Return a Promise that wraps the logic for opening or upgrading the IndexedDB database + return new Promise((resolve, reject) => { + // Use window.indexedDB to open the 'FilesDB' database with version 1 + const request = window.indexedDB.open('FilesDB', 1); + + // Handle errors that may occur during the attempt to open the database + request.onerror = (event: Event) => { + reject('Error opening database'); + }; + + // Handle successful opening of the database + request.onsuccess = (event: Event) => { + // Extract the result from the event and cast it to an IDBDatabase instance + const target = event.target as IDBOpenDBRequest; + const db = target.result as IDBDatabase; + + // Check if the database instance is valid and resolve the Promise with it + if (db) { + resolve(db); + } else { + // Reject the Promise if the database instance is not valid + reject('Failed to open database'); + } + }; + + // Handle the case where the database version needs an upgrade + request.onupgradeneeded = (event: Event) => { + // Extract the result from the event and cast it to an IDBDatabase instance + const target = event.target as IDBOpenDBRequest; + const db = target.result as IDBDatabase; + + // Create an object store named 'files' with 'id' as the key path + db.createObjectStore('files', { keyPath: 'id' }); + }; + }); +}; + +const savePdfDataToDB = async (id: string, data: Uint8Array): Promise => { + try { + const db = await openDB(); + const transaction = db.transaction('files', 'readwrite'); + const objectStore = transaction.objectStore('files'); + + const request = objectStore.put({ id, data }); + + return new Promise((resolve, reject) => { + request.onsuccess = () => { + resolve(); + }; + + request.onerror = () => { + console.error('Error saving data to database'); + reject('Error saving data to database'); + }; + }); + } catch (error) { + console.error('Error opening database:', error.message); + throw error; + } +}; + +interface ParsedInfo { + category: string; + content: string[]; +} + +async function getPdfDataFromDB(): Promise { + try { + // Opens the IndexedDB + const db = await openDB(); + + // Start a 'readonly' transaction on the 'files' object + const transaction = db.transaction('files', 'readonly'); + + // Get the object store within the transaction + const objectStore = transaction.objectStore('files'); + + // Gets file based on id + const request = objectStore.get('resume'); + + return new Promise((resolve, reject) => { + // When the file is found + request.onsuccess = () => { + const result = request.result; + if (result && result.data instanceof Uint8Array) { + resolve(result.data); + } else { + resolve(null); + } + }; + + // When the file is not found + request.onerror = () => { + console.error('Error fetching data'); + resolve(null); + }; + }); + } catch (error) { + console.error('Error opening database:', error.message); + return null; + } +} + +async function convertPdfToJson(): Promise { + try { + // Fetch PDF data from IndexedDB + const pdfData = await getPdfDataFromDB(); + + // Check if the file exists + if (!pdfData) { + console.error('Error: The specified file does not exist in the database.'); + return; + } + + // Parse PDF + const data = await pdf(pdfData); + + // Extract text content + let textContent = data.text; + + // Define regular expressions to match email addresses, phone numbers, and house addresses + const emailRegex = /\b[\w\.-]+@[\w\.-]+\.\w+\b/g; + const phoneRegex = /\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b/g; + const addressRegex = /\b\d+\s[\w\s]+,\s[\w\s]+,\s[\w\s\d]+\b/g; + + // Match and categorize email addresses + const emails = textContent.match(emailRegex); + if (emails) { + textContent = textContent.replace(emailRegex, ''); + textContent += '\nEmail Addresses:\n' + emails.join('\n') + '\n'; + } + + // Match and categorize phone numbers + const phoneNumbers = textContent.match(phoneRegex); + if (phoneNumbers) { + textContent = textContent.replace(phoneRegex, ''); + textContent += '\nPhone Numbers:\n' + phoneNumbers.join('\n') + '\n'; + } + + // Match and categorize house addresses + const addresses = textContent.match(addressRegex); + if (addresses) { + textContent = textContent.replace(addressRegex, ''); + textContent += '\nAddresses:\n' + addresses.join('\n') + '\n'; + } + + // Array of important keywords + const keywords = ['experience', 'skills', 'education', 'projects', 'achievements', 'interests']; + + // Highlight keywords in uppercase + keywords.forEach(keyword => { + const regex = new RegExp(`\\b${keyword}\\b`, 'gi'); // Using RegExp for whole word matching + textContent = textContent.replace(regex, match => match.toUpperCase()); + }); + + // Handle multi-word categories + textContent = textContent.replace(/\b(Skills & Interests|Projects & Activities)\b/gi, match => match.toUpperCase()); + + // Create output file path + const jsonPath = path.join(__dirname, 'output.json'); + + // Write modified text content to a JSON file + fs.writeFileSync(jsonPath, textContent); + + console.log('Conversion successful! Text saved to output.json.'); + + // Example usage of parseResume function + const parsedInfo = parseResume(jsonPath); + + // Write the parsed information to a JSON file + const outputJsonPath = path.join(__dirname, 'parsed_output.json'); + fs.writeFileSync(outputJsonPath, JSON.stringify(parsedInfo, null, 2)); + + console.log('Parsed Information:'); + console.log(parsedInfo); + console.log(`Parsed information has been written to ${outputJsonPath}`); + } catch (error) { + console.error('Error converting PDF to JSON:', error.message); + } +} + +function isName(line: string, isFirstNonEmptyLine: boolean): string | null { + const trimmedLine = line.trim(); + return isFirstNonEmptyLine && trimmedLine.length > 0 ? trimmedLine : null; +} + +function parseResume(jsonPath: string): ParsedInfo[] { + const lines = fs.readFileSync(jsonPath, 'utf-8').split('\n'); + + const parsedInfo: ParsedInfo[] = []; + let currentCategory: string | null = null; + let isFirstNonEmptyLine = true; + + lines.forEach(line => { + // Skip empty lines + if (!line.trim()) { + return; + } + + // Check if the line is the name + const name = isName(line, isFirstNonEmptyLine); + if (name) { + parsedInfo.push({ category: 'Name', content: [name] }); + isFirstNonEmptyLine = false; // Set to false after finding the first non-empty line + return; + } + + // Identify section headers and set the current category + const sectionMatch = line.match(/^[A-Z\s]+$/); + if (sectionMatch) { + currentCategory = sectionMatch[0].trim(); + return; + } + + // Add content to the current category + if (currentCategory !== null) { + const index = parsedInfo.findIndex(info => info.category === currentCategory); + + if (index === -1) { + parsedInfo.push({ category: currentCategory, content: [line.trim()] }); + } else { + parsedInfo[index].content.push(line.trim()); + } + } + }); + + return parsedInfo; +} + +// Call the function to start the conversion +convertPdfToJson(); diff --git a/package-lock.json b/package-lock.json index 5f02a38..c36a894 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,11 +16,13 @@ "@types/react": "^18.2.22", "@types/react-dom": "^18.2.7", "bootstrap": "^5.3.2", + "pdf-parse": "^1.1.1", "react": "^18.2.0", "react-bootstrap": "^2.8.0", "react-dom": "^18.2.0", "react-hook-form": "^7.46.2", "react-scripts": "5.0.1", + "readline-sync": "^1.4.10", "typescript": "^4.9.5", "web-vitals": "^3.4.0" }, @@ -16204,6 +16206,11 @@ "tslib": "^2.0.3" } }, + "node_modules/node-ensure": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/node-ensure/-/node-ensure-0.0.0.tgz", + "integrity": "sha512-DRI60hzo2oKN1ma0ckc6nQWlHU69RH6xN0sjQTjMpChPfTYvKZdcQFfdYK2RWbJcKyUizSIy/l8OTGxMAM1QDw==" + }, "node_modules/node-forge": { "version": "1.3.1", "resolved": "https://registry.npmjs.org/node-forge/-/node-forge-1.3.1.tgz", @@ -16653,6 +16660,26 @@ "node": ">=8" } }, + "node_modules/pdf-parse": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/pdf-parse/-/pdf-parse-1.1.1.tgz", + "integrity": "sha512-v6ZJ/efsBpGrGGknjtq9J/oC8tZWq0KWL5vQrk2GlzLEQPUDB1ex+13Rmidl1neNN358Jn9EHZw5y07FFtaC7A==", + "dependencies": { + "debug": "^3.1.0", + "node-ensure": "^0.0.0" + }, + "engines": { + "node": ">=6.8.1" + } + }, + "node_modules/pdf-parse/node_modules/debug": { + "version": "3.2.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", + "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", + "dependencies": { + "ms": "^2.1.1" + } + }, "node_modules/performance-now": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz", @@ -19645,6 +19672,14 @@ "node": ">=8.10.0" } }, + "node_modules/readline-sync": { + "version": "1.4.10", + "resolved": "https://registry.npmjs.org/readline-sync/-/readline-sync-1.4.10.tgz", + "integrity": "sha512-gNva8/6UAe8QYepIQH/jQ2qn91Qj0B9sYjMBBs3QOB8F2CXcKgLxQaJRP76sWVRQt+QU+8fAkCbCvjjMFu7Ycw==", + "engines": { + "node": ">= 0.8.0" + } + }, "node_modules/rechoir": { "version": "0.8.0", "resolved": "https://registry.npmjs.org/rechoir/-/rechoir-0.8.0.tgz", diff --git a/package.json b/package.json index ae7c5e1..2e65312 100644 --- a/package.json +++ b/package.json @@ -11,11 +11,13 @@ "@types/react": "^18.2.22", "@types/react-dom": "^18.2.7", "bootstrap": "^5.3.2", + "pdf-parse": "^1.1.1", "react": "^18.2.0", "react-bootstrap": "^2.8.0", "react-dom": "^18.2.0", "react-hook-form": "^7.46.2", "react-scripts": "5.0.1", + "readline-sync": "^1.4.10", "typescript": "^4.9.5", "web-vitals": "^3.4.0" }, diff --git a/react-chrome-extension/src/options/App.tsx b/react-chrome-extension/src/options/App.tsx index 32d36a6..c1cbdae 100644 --- a/react-chrome-extension/src/options/App.tsx +++ b/react-chrome-extension/src/options/App.tsx @@ -32,6 +32,51 @@ function EasyAppOptions() { + + Veteran Status (Optional) + + + + + + + + + + Disability Status (Optional) + + + + + + + + + + Gender (Optional) + + + + + + + + + + Ethnicity (Optional) + + + + + + + + + + + + + Upload Resume: