add search to the setup
This commit is contained in:
parent
5c11ff4371
commit
9ba154f654
26
analyser.py
26
analyser.py
@ -1,8 +1,28 @@
|
||||
import leglib
|
||||
import leglib #billdb import BillDB, BillQuery, QueryField, QueryAll
|
||||
|
||||
parser = leglib.parsers.HSYIGPdfParser.from_filename(
|
||||
filename="YIGVolunteerBook2024.pdf",
|
||||
confname="YIGVolunteer"
|
||||
confname="HSVolunteer"
|
||||
)
|
||||
parser.parse()
|
||||
print([i.bill_text for i in parser.bills])
|
||||
|
||||
print(len(parser.bills))
|
||||
|
||||
db = leglib.billdb.BillDB()
|
||||
db.add_conference(parser=parser)
|
||||
|
||||
allbills = len(db.search(query=leglib.billdb.QueryAll))
|
||||
|
||||
bluelen = len(db.search(query=leglib.billdb.BillQuery(color=leglib.billdb.QueryField.Colors.Blue)))
|
||||
whitelen = len(db.search(query=leglib.billdb.BillQuery(color=leglib.billdb.QueryField.Colors.White)))
|
||||
redlen = len(db.search(query=leglib.billdb.BillQuery(color=leglib.billdb.QueryField.Colors.Red)))
|
||||
|
||||
senatelen = len(db.search(query=leglib.billdb.BillQuery(assembly=leglib.billdb.QueryField.Assemblies.Senate)))
|
||||
houselen = len(db.search(query=leglib.billdb.BillQuery(assembly=leglib.billdb.QueryField.Assemblies.House)))
|
||||
|
||||
franklincount = len(db.search(query=leglib.billdb.BillQuery(school="Franklin")))
|
||||
|
||||
print(allbills)
|
||||
print(redlen, whitelen, bluelen, redlen + whitelen + bluelen)
|
||||
print(senatelen, houselen, senatelen + houselen)
|
||||
print(franklincount)
|
||||
|
122
billdb.py
Normal file
122
billdb.py
Normal file
@ -0,0 +1,122 @@
|
||||
from common import Bill, CCEColors, CCEAssemblies
|
||||
from parsers import BookParser
|
||||
|
||||
from typing import Type, Self
|
||||
from dataclasses import dataclass
|
||||
|
||||
class QueryAny:
|
||||
"""
|
||||
Use this class to indicate an Any match for attributes without an Any attribute.
|
||||
"""
|
||||
pass
|
||||
|
||||
class SearchNotSatisified(BaseException):
|
||||
pass
|
||||
|
||||
class QueryAll:
|
||||
pass
|
||||
|
||||
class QueryField:
|
||||
Any = object()
|
||||
Colors = CCEColors
|
||||
Assemblies = CCEAssemblies
|
||||
|
||||
@dataclass
|
||||
class BillQuery:
|
||||
"""
|
||||
Holds a query for the BillDB.
|
||||
"""
|
||||
color: CCEColors | QueryField = QueryField.Any
|
||||
assembly: CCEAssemblies | QueryField = QueryField.Any
|
||||
committee: int | QueryField = QueryField.Any
|
||||
year: int | QueryField = QueryField.Any
|
||||
subcommittee: str | QueryField = QueryField.Any
|
||||
sponsors: str | QueryField = QueryField.Any
|
||||
school: str | QueryField = QueryField.Any
|
||||
bill_text: str | QueryField = QueryField.Any
|
||||
title: str | QueryField = QueryField.Any
|
||||
|
||||
def __post_init__(self):
|
||||
self.bill_text_concat = self.bill_text # for search compat reasons
|
||||
|
||||
class BillDB:
|
||||
def __init__(self):
|
||||
self.bills: list[Bill] = []
|
||||
self.cache: dict[Bill]
|
||||
|
||||
@staticmethod
|
||||
def code_enum_match(bill: Bill, query: BillQuery, attr: str) -> None:
|
||||
"""
|
||||
This is probably very slow. Maybe replace this with a better solution?
|
||||
|
||||
This function replaces repetitive code like this:
|
||||
|
||||
elif bill.assembly != CCEAssemblies.Any:
|
||||
if bill.assembly != query.color:
|
||||
raise SearchNotSatisified()
|
||||
|
||||
with this:
|
||||
|
||||
self.enum_match(bill, query, "color")
|
||||
|
||||
This is the case with exact_match and string_match, too.
|
||||
"""
|
||||
|
||||
if query.__getattribute__(attr) == QueryField.Any:
|
||||
return
|
||||
|
||||
# check the Any case
|
||||
if query.__getattribute__(attr) != bill.code.__getattribute__(attr).__class__.Any:
|
||||
# make sure we're not matching
|
||||
if bill.code.__getattribute__(attr) != query.__getattribute__(attr):
|
||||
raise SearchNotSatisified()
|
||||
|
||||
# if we do match, no exception
|
||||
|
||||
@staticmethod
|
||||
def string_match(bill: Bill, query: BillQuery, attr: str) -> None:
|
||||
"""
|
||||
See self.code_enum_match for more info.
|
||||
"""
|
||||
if query.__getattribute__(attr) == QueryField.Any:
|
||||
return
|
||||
|
||||
if not query.__getattribute__(attr).lower() in bill.__getattribute__(attr).lower():
|
||||
raise SearchNotSatisified()
|
||||
|
||||
def add_conference(self: Self, parser: Type[BookParser]) -> None:
|
||||
"""
|
||||
Type[BookParser] -> any subclass of BookParser
|
||||
"""
|
||||
|
||||
# this works because each BookParser must insert its self.confname into its self.bills[i].code.conference field.
|
||||
self.bills += parser.bills
|
||||
|
||||
def search(self: Self, query: BillQuery | QueryAll) -> list[Bill]:
|
||||
if query == QueryAll:
|
||||
return self.bills
|
||||
results = []
|
||||
for bill in self.bills:
|
||||
try:
|
||||
self.code_enum_match(bill, query, "color")
|
||||
self.code_enum_match(bill, query, "assembly")
|
||||
|
||||
if not query.committee == QueryField.Any:
|
||||
if not query.committee == bill.code.committee:
|
||||
raise SearchNotSatisified()
|
||||
|
||||
if not query.committee == QueryField.Any:
|
||||
if not query.year == bill.code.year:
|
||||
raise SearchNotSatisified()
|
||||
|
||||
self.string_match(bill, query, "subcommittee")
|
||||
self.string_match(bill, query, "sponsors")
|
||||
self.string_match(bill, query, "school")
|
||||
self.string_match(bill, query, "bill_text_concat")
|
||||
self.string_match(bill, query, "title")
|
||||
|
||||
except SearchNotSatisified:
|
||||
continue
|
||||
results.append(bill)
|
||||
|
||||
return results
|
18
common.py
18
common.py
@ -3,12 +3,15 @@ from enum import StrEnum, auto
|
||||
class CCEColors(StrEnum):
|
||||
Red = "Red"
|
||||
White = "White",
|
||||
Blue = "Blue"
|
||||
Blue = "Blue",
|
||||
Undefined = "Undefined", # some conferences don't have assemblies
|
||||
Any = "Any" # for searching purposes
|
||||
|
||||
class CCEAssemblies(StrEnum):
|
||||
Senate = "Senate",
|
||||
House = "House",
|
||||
GeneralAssembly = "GeneralAssembly"
|
||||
GeneralAssembly = "GeneralAssembly",
|
||||
Any = "Any" # for searching purposes
|
||||
|
||||
class BillCode:
|
||||
def __init__(self, text: str):
|
||||
@ -37,19 +40,20 @@ class BillCode:
|
||||
elif assemblydivision == "G":
|
||||
self.assembly = CCEAssemblies.GeneralAssembly
|
||||
|
||||
self.year = int(dashsplit[0])
|
||||
# reverse y2k problem; but conference years are stored in YY, not YYYY form
|
||||
self.year = int(dashsplit[0]) + 2000
|
||||
self.committee = int(dashsplit[1])
|
||||
self.docketplacement = int(dashsplit[2])
|
||||
|
||||
self.stringrep = self.color[0].upper() + \
|
||||
self.assembly[0].upper() + \
|
||||
"B/{}-{}-{}".format(
|
||||
str(self.year),
|
||||
str(self.year - 2000),
|
||||
str(self.committee),
|
||||
str(self.docketplacement)
|
||||
)
|
||||
|
||||
self.conference: None | str = None # to be filled in with BillDB
|
||||
self.conference: None | str = None # to be filled in with BookParser and friends
|
||||
|
||||
def __str__(self):
|
||||
return "{} {} - {}-{}-{}".format(
|
||||
@ -79,3 +83,7 @@ class Bill:
|
||||
self.school = school.rstrip()
|
||||
self.bill_text = bill_text
|
||||
self.title = title
|
||||
|
||||
@property
|
||||
def bill_text_concat(self):
|
||||
return ''.join(self.bill_text)
|
||||
|
@ -1,5 +1,2 @@
|
||||
import fitz
|
||||
import math
|
||||
|
||||
from typing import Any
|
||||
import parsers
|
||||
import billdb
|
||||
|
Loading…
Reference in New Issue
Block a user