parser object overhauls

make conference names part of the parser object in preparation for the
implementation of billdb; refactor class structure for parsers
This commit is contained in:
stupidcomputer 2024-05-19 16:02:33 -05:00
parent dbd9632e16
commit 5c11ff4371
3 changed files with 31 additions and 5 deletions

View File

@ -1,5 +1,8 @@
import leglib
parser = leglib.parsers.HSYIGPdfParser.from_filename("YIGVolunteerBook2024.pdf")
parser = leglib.parsers.HSYIGPdfParser.from_filename(
filename="YIGVolunteerBook2024.pdf",
confname="YIGVolunteer"
)
parser.parse()
print([i.bill_text for i in parser.bills])

View File

@ -49,6 +49,8 @@ class BillCode:
str(self.docketplacement)
)
self.conference: None | str = None # to be filled in with BillDB
def __str__(self):
return "{} {} - {}-{}-{}".format(
self.color,

View File

@ -1,13 +1,31 @@
import fitz
from typing import Any
from typing import Any, Self, ClassVar
from itertools import groupby
from dataclasses import dataclass
from lib import FitzBlockWrapper
from common import Bill
class HSYIGPdfParser:
def __init__(self, document: fitz.Document):
self.document = document
@dataclass
class BookParser:
# class variables
humanname: ClassVar[str] = "Generic BookParser parent class."
description: ClassVar[str] = """
A generic description of the abilities of this BookParser.
"""
# everything else
document: fitz.Document
confname: str
@classmethod
def from_filename(cls, filename: str, confname: str):
return cls(
document=fitz.open(filename),
confname=confname
)
class HSYIGPdfParser(BookParser):
@staticmethod
def _words_in_superstring(words: list[str], superstring: str) -> bool:
for word in words:
@ -142,6 +160,9 @@ class HSYIGPdfParser:
title=pretty_printed["title"]
))
for bill in bills: # add the conference name to each
bill.code.conference = self.confname
self.bills = bills
@staticmethod