parser object overhauls
make conference names part of the parser object in preparation for the implementation of billdb; refactor class structure for parsers
This commit is contained in:
parent
dbd9632e16
commit
5c11ff4371
|
@ -1,5 +1,8 @@
|
|||
import leglib
|
||||
|
||||
parser = leglib.parsers.HSYIGPdfParser.from_filename("YIGVolunteerBook2024.pdf")
|
||||
parser = leglib.parsers.HSYIGPdfParser.from_filename(
|
||||
filename="YIGVolunteerBook2024.pdf",
|
||||
confname="YIGVolunteer"
|
||||
)
|
||||
parser.parse()
|
||||
print([i.bill_text for i in parser.bills])
|
||||
|
|
|
@ -49,6 +49,8 @@ class BillCode:
|
|||
str(self.docketplacement)
|
||||
)
|
||||
|
||||
self.conference: None | str = None # to be filled in with BillDB
|
||||
|
||||
def __str__(self):
|
||||
return "{} {} - {}-{}-{}".format(
|
||||
self.color,
|
||||
|
|
29
parsers.py
29
parsers.py
|
@ -1,13 +1,31 @@
|
|||
import fitz
|
||||
from typing import Any
|
||||
from typing import Any, Self, ClassVar
|
||||
from itertools import groupby
|
||||
from dataclasses import dataclass
|
||||
|
||||
from lib import FitzBlockWrapper
|
||||
from common import Bill
|
||||
|
||||
class HSYIGPdfParser:
|
||||
def __init__(self, document: fitz.Document):
|
||||
self.document = document
|
||||
@dataclass
|
||||
class BookParser:
|
||||
# class variables
|
||||
humanname: ClassVar[str] = "Generic BookParser parent class."
|
||||
description: ClassVar[str] = """
|
||||
A generic description of the abilities of this BookParser.
|
||||
"""
|
||||
|
||||
# everything else
|
||||
document: fitz.Document
|
||||
confname: str
|
||||
|
||||
@classmethod
|
||||
def from_filename(cls, filename: str, confname: str):
|
||||
return cls(
|
||||
document=fitz.open(filename),
|
||||
confname=confname
|
||||
)
|
||||
|
||||
class HSYIGPdfParser(BookParser):
|
||||
@staticmethod
|
||||
def _words_in_superstring(words: list[str], superstring: str) -> bool:
|
||||
for word in words:
|
||||
|
@ -142,6 +160,9 @@ class HSYIGPdfParser:
|
|||
title=pretty_printed["title"]
|
||||
))
|
||||
|
||||
for bill in bills: # add the conference name to each
|
||||
bill.code.conference = self.confname
|
||||
|
||||
self.bills = bills
|
||||
|
||||
@staticmethod
|
||||
|
|
Loading…
Reference in New Issue