parser object overhauls
make conference names part of the parser object in preparation for the implementation of billdb; refactor class structure for parsers
This commit is contained in:
parent
dbd9632e16
commit
5c11ff4371
|
@ -1,5 +1,8 @@
|
||||||
import leglib
|
import leglib
|
||||||
|
|
||||||
parser = leglib.parsers.HSYIGPdfParser.from_filename("YIGVolunteerBook2024.pdf")
|
parser = leglib.parsers.HSYIGPdfParser.from_filename(
|
||||||
|
filename="YIGVolunteerBook2024.pdf",
|
||||||
|
confname="YIGVolunteer"
|
||||||
|
)
|
||||||
parser.parse()
|
parser.parse()
|
||||||
print([i.bill_text for i in parser.bills])
|
print([i.bill_text for i in parser.bills])
|
||||||
|
|
|
@ -49,6 +49,8 @@ class BillCode:
|
||||||
str(self.docketplacement)
|
str(self.docketplacement)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.conference: None | str = None # to be filled in with BillDB
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "{} {} - {}-{}-{}".format(
|
return "{} {} - {}-{}-{}".format(
|
||||||
self.color,
|
self.color,
|
||||||
|
|
29
parsers.py
29
parsers.py
|
@ -1,13 +1,31 @@
|
||||||
import fitz
|
import fitz
|
||||||
from typing import Any
|
from typing import Any, Self, ClassVar
|
||||||
|
from itertools import groupby
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from lib import FitzBlockWrapper
|
from lib import FitzBlockWrapper
|
||||||
from common import Bill
|
from common import Bill
|
||||||
|
|
||||||
class HSYIGPdfParser:
|
@dataclass
|
||||||
def __init__(self, document: fitz.Document):
|
class BookParser:
|
||||||
self.document = document
|
# class variables
|
||||||
|
humanname: ClassVar[str] = "Generic BookParser parent class."
|
||||||
|
description: ClassVar[str] = """
|
||||||
|
A generic description of the abilities of this BookParser.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# everything else
|
||||||
|
document: fitz.Document
|
||||||
|
confname: str
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_filename(cls, filename: str, confname: str):
|
||||||
|
return cls(
|
||||||
|
document=fitz.open(filename),
|
||||||
|
confname=confname
|
||||||
|
)
|
||||||
|
|
||||||
|
class HSYIGPdfParser(BookParser):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _words_in_superstring(words: list[str], superstring: str) -> bool:
|
def _words_in_superstring(words: list[str], superstring: str) -> bool:
|
||||||
for word in words:
|
for word in words:
|
||||||
|
@ -142,6 +160,9 @@ class HSYIGPdfParser:
|
||||||
title=pretty_printed["title"]
|
title=pretty_printed["title"]
|
||||||
))
|
))
|
||||||
|
|
||||||
|
for bill in bills: # add the conference name to each
|
||||||
|
bill.code.conference = self.confname
|
||||||
|
|
||||||
self.bills = bills
|
self.bills = bills
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
Loading…
Reference in New Issue