From 5c11ff43712682c545c78d38ba5d2dbc12781551 Mon Sep 17 00:00:00 2001 From: stupidcomputer Date: Sun, 19 May 2024 16:02:33 -0500 Subject: [PATCH] parser object overhauls make conference names part of the parser object in preparation for the implementation of billdb; refactor class structure for parsers --- analyser.py | 5 ++++- common.py | 2 ++ parsers.py | 29 +++++++++++++++++++++++++---- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/analyser.py b/analyser.py index 08b5eb4..726c4a2 100644 --- a/analyser.py +++ b/analyser.py @@ -1,5 +1,8 @@ import leglib -parser = leglib.parsers.HSYIGPdfParser.from_filename("YIGVolunteerBook2024.pdf") +parser = leglib.parsers.HSYIGPdfParser.from_filename( + filename="YIGVolunteerBook2024.pdf", + confname="YIGVolunteer" +) parser.parse() print([i.bill_text for i in parser.bills]) diff --git a/common.py b/common.py index 2382ecc..24f4f79 100644 --- a/common.py +++ b/common.py @@ -49,6 +49,8 @@ class BillCode: str(self.docketplacement) ) + self.conference: None | str = None # to be filled in with BillDB + def __str__(self): return "{} {} - {}-{}-{}".format( self.color, diff --git a/parsers.py b/parsers.py index e83151d..326e0d4 100644 --- a/parsers.py +++ b/parsers.py @@ -1,13 +1,31 @@ import fitz -from typing import Any +from typing import Any, Self, ClassVar +from itertools import groupby +from dataclasses import dataclass from lib import FitzBlockWrapper from common import Bill -class HSYIGPdfParser: - def __init__(self, document: fitz.Document): - self.document = document +@dataclass +class BookParser: + # class variables + humanname: ClassVar[str] = "Generic BookParser parent class." + description: ClassVar[str] = """ + A generic description of the abilities of this BookParser. + """ + # everything else + document: fitz.Document + confname: str + + @classmethod + def from_filename(cls, filename: str, confname: str): + return cls( + document=fitz.open(filename), + confname=confname + ) + +class HSYIGPdfParser(BookParser): @staticmethod def _words_in_superstring(words: list[str], superstring: str) -> bool: for word in words: @@ -142,6 +160,9 @@ class HSYIGPdfParser: title=pretty_printed["title"] )) + for bill in bills: # add the conference name to each + bill.code.conference = self.confname + self.bills = bills @staticmethod