parser object overhauls

make conference names part of the parser object in preparation for the
implementation of billdb; refactor class structure for parsers
This commit is contained in:
stupidcomputer 2024-05-19 16:02:33 -05:00
parent dbd9632e16
commit 5c11ff4371
3 changed files with 31 additions and 5 deletions

View File

@ -1,5 +1,8 @@
import leglib import leglib
parser = leglib.parsers.HSYIGPdfParser.from_filename("YIGVolunteerBook2024.pdf") parser = leglib.parsers.HSYIGPdfParser.from_filename(
filename="YIGVolunteerBook2024.pdf",
confname="YIGVolunteer"
)
parser.parse() parser.parse()
print([i.bill_text for i in parser.bills]) print([i.bill_text for i in parser.bills])

View File

@ -49,6 +49,8 @@ class BillCode:
str(self.docketplacement) str(self.docketplacement)
) )
self.conference: None | str = None # to be filled in with BillDB
def __str__(self): def __str__(self):
return "{} {} - {}-{}-{}".format( return "{} {} - {}-{}-{}".format(
self.color, self.color,

View File

@ -1,13 +1,31 @@
import fitz import fitz
from typing import Any from typing import Any, Self, ClassVar
from itertools import groupby
from dataclasses import dataclass
from lib import FitzBlockWrapper from lib import FitzBlockWrapper
from common import Bill from common import Bill
class HSYIGPdfParser: @dataclass
def __init__(self, document: fitz.Document): class BookParser:
self.document = document # class variables
humanname: ClassVar[str] = "Generic BookParser parent class."
description: ClassVar[str] = """
A generic description of the abilities of this BookParser.
"""
# everything else
document: fitz.Document
confname: str
@classmethod
def from_filename(cls, filename: str, confname: str):
return cls(
document=fitz.open(filename),
confname=confname
)
class HSYIGPdfParser(BookParser):
@staticmethod @staticmethod
def _words_in_superstring(words: list[str], superstring: str) -> bool: def _words_in_superstring(words: list[str], superstring: str) -> bool:
for word in words: for word in words:
@ -142,6 +160,9 @@ class HSYIGPdfParser:
title=pretty_printed["title"] title=pretty_printed["title"]
)) ))
for bill in bills: # add the conference name to each
bill.code.conference = self.confname
self.bills = bills self.bills = bills
@staticmethod @staticmethod