add search to the setup
This commit is contained in:
parent
5c11ff4371
commit
9ba154f654
26
analyser.py
26
analyser.py
|
@ -1,8 +1,28 @@
|
||||||
import leglib
|
import leglib #billdb import BillDB, BillQuery, QueryField, QueryAll
|
||||||
|
|
||||||
parser = leglib.parsers.HSYIGPdfParser.from_filename(
|
parser = leglib.parsers.HSYIGPdfParser.from_filename(
|
||||||
filename="YIGVolunteerBook2024.pdf",
|
filename="YIGVolunteerBook2024.pdf",
|
||||||
confname="YIGVolunteer"
|
confname="HSVolunteer"
|
||||||
)
|
)
|
||||||
parser.parse()
|
parser.parse()
|
||||||
print([i.bill_text for i in parser.bills])
|
|
||||||
|
print(len(parser.bills))
|
||||||
|
|
||||||
|
db = leglib.billdb.BillDB()
|
||||||
|
db.add_conference(parser=parser)
|
||||||
|
|
||||||
|
allbills = len(db.search(query=leglib.billdb.QueryAll))
|
||||||
|
|
||||||
|
bluelen = len(db.search(query=leglib.billdb.BillQuery(color=leglib.billdb.QueryField.Colors.Blue)))
|
||||||
|
whitelen = len(db.search(query=leglib.billdb.BillQuery(color=leglib.billdb.QueryField.Colors.White)))
|
||||||
|
redlen = len(db.search(query=leglib.billdb.BillQuery(color=leglib.billdb.QueryField.Colors.Red)))
|
||||||
|
|
||||||
|
senatelen = len(db.search(query=leglib.billdb.BillQuery(assembly=leglib.billdb.QueryField.Assemblies.Senate)))
|
||||||
|
houselen = len(db.search(query=leglib.billdb.BillQuery(assembly=leglib.billdb.QueryField.Assemblies.House)))
|
||||||
|
|
||||||
|
franklincount = len(db.search(query=leglib.billdb.BillQuery(school="Franklin")))
|
||||||
|
|
||||||
|
print(allbills)
|
||||||
|
print(redlen, whitelen, bluelen, redlen + whitelen + bluelen)
|
||||||
|
print(senatelen, houselen, senatelen + houselen)
|
||||||
|
print(franklincount)
|
||||||
|
|
|
@ -0,0 +1,122 @@
|
||||||
|
from common import Bill, CCEColors, CCEAssemblies
|
||||||
|
from parsers import BookParser
|
||||||
|
|
||||||
|
from typing import Type, Self
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
class QueryAny:
|
||||||
|
"""
|
||||||
|
Use this class to indicate an Any match for attributes without an Any attribute.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
class SearchNotSatisified(BaseException):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class QueryAll:
|
||||||
|
pass
|
||||||
|
|
||||||
|
class QueryField:
|
||||||
|
Any = object()
|
||||||
|
Colors = CCEColors
|
||||||
|
Assemblies = CCEAssemblies
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class BillQuery:
|
||||||
|
"""
|
||||||
|
Holds a query for the BillDB.
|
||||||
|
"""
|
||||||
|
color: CCEColors | QueryField = QueryField.Any
|
||||||
|
assembly: CCEAssemblies | QueryField = QueryField.Any
|
||||||
|
committee: int | QueryField = QueryField.Any
|
||||||
|
year: int | QueryField = QueryField.Any
|
||||||
|
subcommittee: str | QueryField = QueryField.Any
|
||||||
|
sponsors: str | QueryField = QueryField.Any
|
||||||
|
school: str | QueryField = QueryField.Any
|
||||||
|
bill_text: str | QueryField = QueryField.Any
|
||||||
|
title: str | QueryField = QueryField.Any
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
self.bill_text_concat = self.bill_text # for search compat reasons
|
||||||
|
|
||||||
|
class BillDB:
|
||||||
|
def __init__(self):
|
||||||
|
self.bills: list[Bill] = []
|
||||||
|
self.cache: dict[Bill]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def code_enum_match(bill: Bill, query: BillQuery, attr: str) -> None:
|
||||||
|
"""
|
||||||
|
This is probably very slow. Maybe replace this with a better solution?
|
||||||
|
|
||||||
|
This function replaces repetitive code like this:
|
||||||
|
|
||||||
|
elif bill.assembly != CCEAssemblies.Any:
|
||||||
|
if bill.assembly != query.color:
|
||||||
|
raise SearchNotSatisified()
|
||||||
|
|
||||||
|
with this:
|
||||||
|
|
||||||
|
self.enum_match(bill, query, "color")
|
||||||
|
|
||||||
|
This is the case with exact_match and string_match, too.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if query.__getattribute__(attr) == QueryField.Any:
|
||||||
|
return
|
||||||
|
|
||||||
|
# check the Any case
|
||||||
|
if query.__getattribute__(attr) != bill.code.__getattribute__(attr).__class__.Any:
|
||||||
|
# make sure we're not matching
|
||||||
|
if bill.code.__getattribute__(attr) != query.__getattribute__(attr):
|
||||||
|
raise SearchNotSatisified()
|
||||||
|
|
||||||
|
# if we do match, no exception
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def string_match(bill: Bill, query: BillQuery, attr: str) -> None:
|
||||||
|
"""
|
||||||
|
See self.code_enum_match for more info.
|
||||||
|
"""
|
||||||
|
if query.__getattribute__(attr) == QueryField.Any:
|
||||||
|
return
|
||||||
|
|
||||||
|
if not query.__getattribute__(attr).lower() in bill.__getattribute__(attr).lower():
|
||||||
|
raise SearchNotSatisified()
|
||||||
|
|
||||||
|
def add_conference(self: Self, parser: Type[BookParser]) -> None:
|
||||||
|
"""
|
||||||
|
Type[BookParser] -> any subclass of BookParser
|
||||||
|
"""
|
||||||
|
|
||||||
|
# this works because each BookParser must insert its self.confname into its self.bills[i].code.conference field.
|
||||||
|
self.bills += parser.bills
|
||||||
|
|
||||||
|
def search(self: Self, query: BillQuery | QueryAll) -> list[Bill]:
|
||||||
|
if query == QueryAll:
|
||||||
|
return self.bills
|
||||||
|
results = []
|
||||||
|
for bill in self.bills:
|
||||||
|
try:
|
||||||
|
self.code_enum_match(bill, query, "color")
|
||||||
|
self.code_enum_match(bill, query, "assembly")
|
||||||
|
|
||||||
|
if not query.committee == QueryField.Any:
|
||||||
|
if not query.committee == bill.code.committee:
|
||||||
|
raise SearchNotSatisified()
|
||||||
|
|
||||||
|
if not query.committee == QueryField.Any:
|
||||||
|
if not query.year == bill.code.year:
|
||||||
|
raise SearchNotSatisified()
|
||||||
|
|
||||||
|
self.string_match(bill, query, "subcommittee")
|
||||||
|
self.string_match(bill, query, "sponsors")
|
||||||
|
self.string_match(bill, query, "school")
|
||||||
|
self.string_match(bill, query, "bill_text_concat")
|
||||||
|
self.string_match(bill, query, "title")
|
||||||
|
|
||||||
|
except SearchNotSatisified:
|
||||||
|
continue
|
||||||
|
results.append(bill)
|
||||||
|
|
||||||
|
return results
|
18
common.py
18
common.py
|
@ -3,12 +3,15 @@ from enum import StrEnum, auto
|
||||||
class CCEColors(StrEnum):
|
class CCEColors(StrEnum):
|
||||||
Red = "Red"
|
Red = "Red"
|
||||||
White = "White",
|
White = "White",
|
||||||
Blue = "Blue"
|
Blue = "Blue",
|
||||||
|
Undefined = "Undefined", # some conferences don't have assemblies
|
||||||
|
Any = "Any" # for searching purposes
|
||||||
|
|
||||||
class CCEAssemblies(StrEnum):
|
class CCEAssemblies(StrEnum):
|
||||||
Senate = "Senate",
|
Senate = "Senate",
|
||||||
House = "House",
|
House = "House",
|
||||||
GeneralAssembly = "GeneralAssembly"
|
GeneralAssembly = "GeneralAssembly",
|
||||||
|
Any = "Any" # for searching purposes
|
||||||
|
|
||||||
class BillCode:
|
class BillCode:
|
||||||
def __init__(self, text: str):
|
def __init__(self, text: str):
|
||||||
|
@ -37,19 +40,20 @@ class BillCode:
|
||||||
elif assemblydivision == "G":
|
elif assemblydivision == "G":
|
||||||
self.assembly = CCEAssemblies.GeneralAssembly
|
self.assembly = CCEAssemblies.GeneralAssembly
|
||||||
|
|
||||||
self.year = int(dashsplit[0])
|
# reverse y2k problem; but conference years are stored in YY, not YYYY form
|
||||||
|
self.year = int(dashsplit[0]) + 2000
|
||||||
self.committee = int(dashsplit[1])
|
self.committee = int(dashsplit[1])
|
||||||
self.docketplacement = int(dashsplit[2])
|
self.docketplacement = int(dashsplit[2])
|
||||||
|
|
||||||
self.stringrep = self.color[0].upper() + \
|
self.stringrep = self.color[0].upper() + \
|
||||||
self.assembly[0].upper() + \
|
self.assembly[0].upper() + \
|
||||||
"B/{}-{}-{}".format(
|
"B/{}-{}-{}".format(
|
||||||
str(self.year),
|
str(self.year - 2000),
|
||||||
str(self.committee),
|
str(self.committee),
|
||||||
str(self.docketplacement)
|
str(self.docketplacement)
|
||||||
)
|
)
|
||||||
|
|
||||||
self.conference: None | str = None # to be filled in with BillDB
|
self.conference: None | str = None # to be filled in with BookParser and friends
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "{} {} - {}-{}-{}".format(
|
return "{} {} - {}-{}-{}".format(
|
||||||
|
@ -79,3 +83,7 @@ class Bill:
|
||||||
self.school = school.rstrip()
|
self.school = school.rstrip()
|
||||||
self.bill_text = bill_text
|
self.bill_text = bill_text
|
||||||
self.title = title
|
self.title = title
|
||||||
|
|
||||||
|
@property
|
||||||
|
def bill_text_concat(self):
|
||||||
|
return ''.join(self.bill_text)
|
||||||
|
|
|
@ -1,5 +1,2 @@
|
||||||
import fitz
|
|
||||||
import math
|
|
||||||
|
|
||||||
from typing import Any
|
|
||||||
import parsers
|
import parsers
|
||||||
|
import billdb
|
||||||
|
|
|
@ -3,5 +3,6 @@
|
||||||
# nativeBuildInputs is usually what you want -- tools you need to run
|
# nativeBuildInputs is usually what you want -- tools you need to run
|
||||||
nativeBuildInputs = with pkgs; [
|
nativeBuildInputs = with pkgs; [
|
||||||
buildPackages.python311Packages.pymupdf
|
buildPackages.python311Packages.pymupdf
|
||||||
|
buildPackages.python311Packages.flask
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue