From b8e65edccd9d09fb7ae6112fd62eb2d31eb4746e Mon Sep 17 00:00:00 2001 From: stupidcomputer Date: Thu, 9 May 2024 00:50:11 -0500 Subject: [PATCH] add a simple frontend that works via console.log --- index.html | 11 +++++ main.js | 116 +++++++++++++++++++++++++++++++++++++++++++++++++++ package.json | 15 ------- shell.nix | 11 ----- 4 files changed, 127 insertions(+), 26 deletions(-) create mode 100644 index.html create mode 100644 main.js delete mode 100644 package.json delete mode 100644 shell.nix diff --git a/index.html b/index.html new file mode 100644 index 0000000..7f3cbc6 --- /dev/null +++ b/index.html @@ -0,0 +1,11 @@ + + + + + +
+ + + + \ No newline at end of file diff --git a/main.js b/main.js new file mode 100644 index 0000000..1640a89 --- /dev/null +++ b/main.js @@ -0,0 +1,116 @@ +var {pdfjsLib} = globalThis; + +function concatLines(lines) { + output = ""; + for (var line = 0; line < lines.length; line++) { + output += lines[line].str; + } + return output; +} + +function fillInCommitteePages(pages) { // TODO: make this more efficent + output = [] + for(var i = pages[0]; i < pages[pages.length - 1]; i++) { + if (!pages.includes(i)) output.push(i) + } + return output +} + +function concatAllCommitteePages(committeePages, allLines) { + output = [] + for (var i = 0; i < committeePages.length; i++) { + output = output.concat(allLines[committeePages[i]]) + } + return output; +} + +function splitByBillHeader(committeeLines, billHeader) { + output = []; + current = []; + for (var i = 0; i < committeeLines.length; i++) { + if(committeeLines[i].str == billHeader) { + output.push(current); + current = []; + } else { + current.push(committeeLines[i]) + } + } + output.shift() + return output; +} + +function extractBillInformation(splittedByBillHeader) { + var output = []; + for (var i = 0; i < splittedByBillHeader.length; i++) { + current = splittedByBillHeader[i]; + console.log(current) + var subcommittee = current[31].str; + var sponsors = current[33].str; + var school = current[35].str; + var billcode = current[5].str; + + output.push({ + subcommittee: subcommittee, + sponsors: sponsors, + school: school, + billcode: billcode, + }) + } + + return output; +} + +function processLines(lines) { + var committeePages = []; + var endPage = null; + for (var i = 0; i < lines.length; i++) { + var concatted = concatLines(lines[i]) + if (concatted.includes("COMMITTEE") && concatted.includes("GOVERNMENT")) { // we have a committee page + committeePages.push(i) + } else if (concatted.includes("ABCs")) { + endPage = i; + } + } + committeePages.push(endPage) + committeeLines = + concatAllCommitteePages( + fillInCommitteePages( + committeePages + ), lines + ) + + billHeader = committeeLines[0].str; + committeeLines.push({str: billHeader}) + + splittedByBillHeader = splitByBillHeader(committeeLines, billHeader) + billInfo = extractBillInformation(splittedByBillHeader); + console.log(billInfo) +} + +function onFileUpload() { + const reader = new FileReader() + var result = null; + reader.onload = function(evt) { + const contents = evt.target.result; + pdfjsLib.GlobalWorkerOptions.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.mjs'; + var task = pdfjsLib.getDocument({data: contents}); + var result = task.promise.then(function(pdf) { + var pageCount = pdf.numPages; + var promises = []; + for (var i = 1; i <= pageCount; i++) { + var page = pdf.getPage(i); + promises.push(page.then(function(page) { + var textContent = page.getTextContent(); + return textContent.then(function(text) { + return text.items; + }) + })) + } + Promise.all(promises).then(function(lines) { + processLines(lines) + }) + }) + } + + reader.readAsBinaryString(document.getElementById("fileupload").files[0]) +} \ No newline at end of file diff --git a/package.json b/package.json deleted file mode 100644 index 0f47370..0000000 --- a/package.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "name": "yig", - "version": "1.0.0", - "description": "parser and explorer for YMCA CCE legislative materials", - "main": "main", - "scripts": { - "test": "echo \"Error: no test specified\" && exit 1" - }, - "repository": { - "type": "git", - "url": "https://git.beepboop.systems/yig" - }, - "author": "Ryan Marina", - "license": "GPL-3.0" -} diff --git a/shell.nix b/shell.nix deleted file mode 100644 index cf1a7dc..0000000 --- a/shell.nix +++ /dev/null @@ -1,11 +0,0 @@ -with import {}; - -stdenv.mkDerivation { - name = "node"; - buildInputs = [ - nodejs - ]; - shellHook = '' - export PATH="$PWD/node_modules/.bin/:$PATH" - ''; -}