Compare commits
9 Commits
10dd6418b7
...
3848e1f777
Author | SHA1 | Date |
---|---|---|
stupidcomputer | 3848e1f777 | |
stupidcomputer | 9680a416da | |
stupidcomputer | 9ba154f654 | |
stupidcomputer | 5c11ff4371 | |
stupidcomputer | dbd9632e16 | |
stupidcomputer | 11fbcb474a | |
stupidcomputer | eabe1c98a0 | |
stupidcomputer | a62453bdea | |
stupidcomputer | 875890a83a |
|
@ -0,0 +1,2 @@
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
Binary file not shown.
|
@ -0,0 +1,99 @@
|
||||||
|
import secrets
|
||||||
|
|
||||||
|
from flask import Flask
|
||||||
|
from flask import render_template
|
||||||
|
from flask_bootstrap import Bootstrap
|
||||||
|
|
||||||
|
from .leglib.billdb import BillDB, BillQuery, QueryField, QueryAll
|
||||||
|
from .leglib.parsers import HSYIGPdfParser
|
||||||
|
|
||||||
|
parser = HSYIGPdfParser.from_filename(
|
||||||
|
filename="YIGVolunteerBook2024.pdf",
|
||||||
|
confname="HSVolunteer"
|
||||||
|
)
|
||||||
|
parser.parse()
|
||||||
|
db = BillDB()
|
||||||
|
db.add_conference(parser=parser)
|
||||||
|
|
||||||
|
def create_app(test_config=None):
|
||||||
|
app = Flask(__name__, instance_relative_config=True)
|
||||||
|
app.config.from_mapping(
|
||||||
|
SECRET_KEY=str(secrets.randbelow(100000000))
|
||||||
|
)
|
||||||
|
|
||||||
|
Bootstrap(app)
|
||||||
|
|
||||||
|
@app.route('/')
|
||||||
|
def index():
|
||||||
|
bills = db.search(query=QueryAll)
|
||||||
|
return render_template('index.html', number_bills=len(bills), number_conferences=2, bills=bills)
|
||||||
|
|
||||||
|
@app.route('/legislation/<conference>/<year>')
|
||||||
|
def show_conference(conference=QueryField.Any):
|
||||||
|
return conference
|
||||||
|
|
||||||
|
@app.route('/legislation/<conference>/<color>/<year>')
|
||||||
|
def show_color(
|
||||||
|
conference=QueryField.Any,
|
||||||
|
year=QueryField.Any,
|
||||||
|
color=QueryField.Any,
|
||||||
|
):
|
||||||
|
bills = db.search(query=BillQuery(
|
||||||
|
color=color,
|
||||||
|
year=int(year),
|
||||||
|
))
|
||||||
|
return render_template('color.html', bills=bills)
|
||||||
|
|
||||||
|
@app.route('/legislation/<conference>/<color>/<assembly>/<year>')
|
||||||
|
def show_assembly(
|
||||||
|
conference=QueryField.Any,
|
||||||
|
assembly=QueryField.Any,
|
||||||
|
color=QueryField.Any,
|
||||||
|
year=QueryField.Any,
|
||||||
|
):
|
||||||
|
bills = db.search(query=BillQuery(
|
||||||
|
color=color,
|
||||||
|
assembly=assembly,
|
||||||
|
year=int(year),
|
||||||
|
))
|
||||||
|
return render_template('assembly.html', bills=bills)
|
||||||
|
|
||||||
|
@app.route('/legislation/<conference>/<color>/<assembly>/<year>/<committee>')
|
||||||
|
def show_committee(
|
||||||
|
conference=QueryField.Any,
|
||||||
|
assembly=QueryField.Any,
|
||||||
|
color=QueryField.Any,
|
||||||
|
year=QueryField.Any,
|
||||||
|
committee=QueryField.Any,
|
||||||
|
):
|
||||||
|
bills = db.search(query=BillQuery(
|
||||||
|
color=QueryField.Any,
|
||||||
|
assembly=assembly,
|
||||||
|
year=int(year),
|
||||||
|
committee=int(committee),
|
||||||
|
))
|
||||||
|
|
||||||
|
return render_template('committee.html', bills=bills)
|
||||||
|
|
||||||
|
@app.route('/legislation/<conference>/<color>/<assembly>/<year>/<committee>/<order>')
|
||||||
|
def show_bill(
|
||||||
|
conference=QueryField.Any,
|
||||||
|
assembly=QueryField.Any,
|
||||||
|
color=QueryField.Any,
|
||||||
|
year=QueryField.Any,
|
||||||
|
committee=QueryField.Any,
|
||||||
|
order=QueryField.Any,
|
||||||
|
):
|
||||||
|
print(order, int(order))
|
||||||
|
print(color, assembly, year, committee, order)
|
||||||
|
bills = db.search(query=BillQuery(
|
||||||
|
color=color,
|
||||||
|
assembly=assembly,
|
||||||
|
year=int(year),
|
||||||
|
committee=int(committee),
|
||||||
|
order=int(order),
|
||||||
|
))
|
||||||
|
|
||||||
|
return render_template("bill.html", bill=bills[0])
|
||||||
|
|
||||||
|
return app
|
|
@ -0,0 +1,29 @@
|
||||||
|
from leglib.billdb import BillDB, BillQuery, QueryField, QueryAll
|
||||||
|
from leglib.parsers import HSYIGPdfParser
|
||||||
|
|
||||||
|
parser = HSYIGPdfParser.from_filename(
|
||||||
|
filename="YIGVolunteerBook2024.pdf",
|
||||||
|
confname="HSVolunteer"
|
||||||
|
)
|
||||||
|
parser.parse()
|
||||||
|
|
||||||
|
print(len(parser.bills))
|
||||||
|
|
||||||
|
db = BillDB()
|
||||||
|
db.add_conference(parser=parser)
|
||||||
|
|
||||||
|
allbills = len(db.search(query=QueryAll))
|
||||||
|
|
||||||
|
bluelen = len(db.search(query=BillQuery(color=QueryField.Colors.Blue)))
|
||||||
|
whitelen = len(db.search(query=BillQuery(color=QueryField.Colors.White)))
|
||||||
|
redlen = len(db.search(query=BillQuery(color=QueryField.Colors.Red)))
|
||||||
|
|
||||||
|
senatelen = len(db.search(query=BillQuery(assembly=QueryField.Assemblies.Senate)))
|
||||||
|
houselen = len(db.search(query=BillQuery(assembly=QueryField.Assemblies.House)))
|
||||||
|
|
||||||
|
franklincount = len(db.search(query=BillQuery(school="Franklin")))
|
||||||
|
|
||||||
|
print(allbills)
|
||||||
|
print(redlen, whitelen, bluelen, redlen + whitelen + bluelen)
|
||||||
|
print(senatelen, houselen, senatelen + houselen)
|
||||||
|
print(franklincount)
|
|
@ -0,0 +1,128 @@
|
||||||
|
from .common import Bill, CCEColors, CCEAssemblies
|
||||||
|
from .parsers import BookParser
|
||||||
|
|
||||||
|
from typing import Type, Self
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
class QueryAny:
|
||||||
|
"""
|
||||||
|
Use this class to indicate an Any match for attributes without an Any attribute.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
class SearchNotSatisified(BaseException):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class QueryAll:
|
||||||
|
pass
|
||||||
|
|
||||||
|
class QueryField:
|
||||||
|
Any = object()
|
||||||
|
Colors = CCEColors
|
||||||
|
Assemblies = CCEAssemblies
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class BillQuery:
|
||||||
|
"""
|
||||||
|
Holds a query for the BillDB.
|
||||||
|
"""
|
||||||
|
color: str | CCEColors | QueryField = QueryField.Any
|
||||||
|
assembly: str | CCEAssemblies | QueryField = QueryField.Any
|
||||||
|
committee: int | QueryField = QueryField.Any
|
||||||
|
year: int | QueryField = QueryField.Any
|
||||||
|
order: int | QueryField = QueryField.Any
|
||||||
|
subcommittee: str | QueryField = QueryField.Any
|
||||||
|
sponsors: str | QueryField = QueryField.Any
|
||||||
|
school: str | QueryField = QueryField.Any
|
||||||
|
bill_text: str | QueryField = QueryField.Any
|
||||||
|
title: str | QueryField = QueryField.Any
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
self.bill_text_concat = self.bill_text # for search compat reasons
|
||||||
|
|
||||||
|
class BillDB:
|
||||||
|
def __init__(self):
|
||||||
|
self.bills: list[Bill] = []
|
||||||
|
self.cache: dict[Bill]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def code_enum_match(bill: Bill, query: BillQuery, attr: str) -> None:
|
||||||
|
"""
|
||||||
|
This is probably very slow. Maybe replace this with a better solution?
|
||||||
|
|
||||||
|
This function replaces repetitive code like this:
|
||||||
|
|
||||||
|
elif bill.assembly != CCEAssemblies.Any:
|
||||||
|
if bill.assembly != query.color:
|
||||||
|
raise SearchNotSatisified()
|
||||||
|
|
||||||
|
with this:
|
||||||
|
|
||||||
|
self.enum_match(bill, query, "color")
|
||||||
|
|
||||||
|
This is the case with exact_match and string_match, too.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if query.__getattribute__(attr) == QueryField.Any:
|
||||||
|
return
|
||||||
|
|
||||||
|
# check the Any case
|
||||||
|
if query.__getattribute__(attr) != bill.code.__getattribute__(attr).__class__.Any:
|
||||||
|
# make sure we're not matching
|
||||||
|
if bill.code.__getattribute__(attr) != query.__getattribute__(attr):
|
||||||
|
raise SearchNotSatisified()
|
||||||
|
|
||||||
|
# if we do match, no exception
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def string_match(bill: Bill, query: BillQuery, attr: str) -> None:
|
||||||
|
"""
|
||||||
|
See self.code_enum_match for more info.
|
||||||
|
"""
|
||||||
|
if query.__getattribute__(attr) == QueryField.Any:
|
||||||
|
return
|
||||||
|
|
||||||
|
if not query.__getattribute__(attr).lower() in bill.__getattribute__(attr).lower():
|
||||||
|
raise SearchNotSatisified()
|
||||||
|
|
||||||
|
def add_conference(self: Self, parser: Type[BookParser]) -> None:
|
||||||
|
"""
|
||||||
|
Type[BookParser] -> any subclass of BookParser
|
||||||
|
"""
|
||||||
|
|
||||||
|
# this works because each BookParser must insert its self.confname into its self.bills[i].code.conference field.
|
||||||
|
self.bills += parser.bills
|
||||||
|
|
||||||
|
def search(self: Self, query: BillQuery | QueryAll) -> list[Bill]:
|
||||||
|
if query == QueryAll:
|
||||||
|
return self.bills
|
||||||
|
results = []
|
||||||
|
for bill in self.bills:
|
||||||
|
try:
|
||||||
|
# print("debug, q: {}, b: {}".format(str(query.committee), str(bill.code.committee)))
|
||||||
|
self.code_enum_match(bill, query, "color")
|
||||||
|
self.code_enum_match(bill, query, "assembly")
|
||||||
|
|
||||||
|
if not query.committee == QueryField.Any:
|
||||||
|
if not query.committee == bill.code.committee:
|
||||||
|
raise SearchNotSatisified()
|
||||||
|
|
||||||
|
if not query.order == QueryField.Any:
|
||||||
|
if not query.order == bill.code.docketplacement:
|
||||||
|
raise SearchNotSatisified()
|
||||||
|
|
||||||
|
if not query.committee == QueryField.Any:
|
||||||
|
if not query.year == bill.code.year:
|
||||||
|
raise SearchNotSatisified()
|
||||||
|
|
||||||
|
self.string_match(bill, query, "subcommittee")
|
||||||
|
self.string_match(bill, query, "sponsors")
|
||||||
|
self.string_match(bill, query, "school")
|
||||||
|
self.string_match(bill, query, "bill_text_concat")
|
||||||
|
self.string_match(bill, query, "title")
|
||||||
|
|
||||||
|
except SearchNotSatisified:
|
||||||
|
continue
|
||||||
|
results.append(bill)
|
||||||
|
|
||||||
|
return results
|
|
@ -0,0 +1,161 @@
|
||||||
|
from enum import StrEnum, auto
|
||||||
|
|
||||||
|
class CCEColors(StrEnum):
|
||||||
|
Red = "Red"
|
||||||
|
White = "White",
|
||||||
|
Blue = "Blue",
|
||||||
|
Undefined = "Undefined", # some conferences don't have assemblies
|
||||||
|
Any = "Any" # for searching purposes
|
||||||
|
|
||||||
|
class CCEAssemblies(StrEnum):
|
||||||
|
Senate = "Senate",
|
||||||
|
House = "House",
|
||||||
|
GeneralAssembly = "GeneralAssembly",
|
||||||
|
Any = "Any" # for searching purposes
|
||||||
|
|
||||||
|
class BillCode:
|
||||||
|
def __init__(self, text: str):
|
||||||
|
# try to parse
|
||||||
|
# codes are in this rough format: "RSB/yy-c(c)-n(n)"
|
||||||
|
|
||||||
|
text = text.rstrip()
|
||||||
|
slashsplit = text.split('/')
|
||||||
|
dashsplit = slashsplit[1].split('-')
|
||||||
|
|
||||||
|
assemblycode = slashsplit[0]
|
||||||
|
|
||||||
|
self.color = assemblycode[0]
|
||||||
|
if self.color == "R":
|
||||||
|
self.color = CCEColors.Red
|
||||||
|
elif self.color == "W":
|
||||||
|
self.color = CCEColors.White
|
||||||
|
elif self.color == "B":
|
||||||
|
self.color = CCEColors.Blue
|
||||||
|
|
||||||
|
assemblydivision = assemblycode[1]
|
||||||
|
if assemblydivision == "S":
|
||||||
|
self.assembly = CCEAssemblies.Senate
|
||||||
|
elif assemblydivision == "H":
|
||||||
|
self.assembly = CCEAssemblies.House
|
||||||
|
elif assemblydivision == "G":
|
||||||
|
self.assembly = CCEAssemblies.GeneralAssembly
|
||||||
|
|
||||||
|
# reverse y2k problem; but conference years are stored in YY, not YYYY form
|
||||||
|
self.year = int(dashsplit[0]) + 2000
|
||||||
|
self.committee = int(dashsplit[1])
|
||||||
|
self.docketplacement = int(dashsplit[2])
|
||||||
|
|
||||||
|
self.stringrep = self.color[0].upper() + \
|
||||||
|
self.assembly[0].upper() + \
|
||||||
|
"B/{}-{}-{}".format(
|
||||||
|
str(self.year - 2000),
|
||||||
|
str(self.committee),
|
||||||
|
str(self.docketplacement)
|
||||||
|
)
|
||||||
|
|
||||||
|
self.conference: None | str = None # to be filled in with BookParser and friends
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "{} {} - {}-{}-{}".format(
|
||||||
|
self.color,
|
||||||
|
self.assembly,
|
||||||
|
str(self.year),
|
||||||
|
str(self.committee),
|
||||||
|
str(self.docketplacement)
|
||||||
|
)
|
||||||
|
|
||||||
|
class Bill:
|
||||||
|
def __init__(self,
|
||||||
|
code: str | BillCode,
|
||||||
|
sponsors: str,
|
||||||
|
subcommittee: str,
|
||||||
|
school: str,
|
||||||
|
bill_text: list[str],
|
||||||
|
title: str
|
||||||
|
):
|
||||||
|
if isinstance(code, str):
|
||||||
|
self.code = BillCode(code)
|
||||||
|
else:
|
||||||
|
self.code = code
|
||||||
|
|
||||||
|
self.sponsors = sponsors.rstrip()
|
||||||
|
self.subcommittee = subcommittee.rstrip()
|
||||||
|
self.school = school.rstrip()
|
||||||
|
self.bill_text = bill_text
|
||||||
|
self.title = title
|
||||||
|
|
||||||
|
@property
|
||||||
|
def bill_text_concat(self):
|
||||||
|
return ''.join(self.bill_text)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def url(self):
|
||||||
|
if self.code.conference:
|
||||||
|
return "/legislation/" + '/'.join([
|
||||||
|
self.code.conference,
|
||||||
|
self.code.color,
|
||||||
|
self.code.assembly,
|
||||||
|
str(self.code.year),
|
||||||
|
str(self.code.committee),
|
||||||
|
str(self.code.docketplacement)
|
||||||
|
])
|
||||||
|
else:
|
||||||
|
return "/legislation/" + '/'.join([
|
||||||
|
"defaultconf",
|
||||||
|
self.code.color,
|
||||||
|
self.code.assembly,
|
||||||
|
str(self.code.year),
|
||||||
|
str(self.code.committee),
|
||||||
|
str(self.code.docketplacement)
|
||||||
|
])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def committee_url(self):
|
||||||
|
if self.code.conference:
|
||||||
|
return "/legislation/" + '/'.join([
|
||||||
|
self.code.conference,
|
||||||
|
self.code.color,
|
||||||
|
self.code.assembly,
|
||||||
|
str(self.code.year),
|
||||||
|
str(self.code.committee)
|
||||||
|
])
|
||||||
|
else:
|
||||||
|
return "/legislation/" + '/'.join([
|
||||||
|
"defaultconf",
|
||||||
|
self.code.color,
|
||||||
|
self.code.assembly,
|
||||||
|
str(self.code.year),
|
||||||
|
str(self.code.committee)
|
||||||
|
])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def assembly_url(self):
|
||||||
|
if self.code.conference:
|
||||||
|
return "/legislation/" + '/'.join([
|
||||||
|
self.code.conference,
|
||||||
|
self.code.color,
|
||||||
|
self.code.assembly,
|
||||||
|
str(self.code.year),
|
||||||
|
])
|
||||||
|
else:
|
||||||
|
return "/legislation/" + '/'.join([
|
||||||
|
"defaultconf",
|
||||||
|
self.code.color,
|
||||||
|
self.code.assembly,
|
||||||
|
str(self.code.year),
|
||||||
|
])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def color_url(self):
|
||||||
|
if self.code.conference:
|
||||||
|
return "/legislation/" + '/'.join([
|
||||||
|
self.code.conference,
|
||||||
|
self.code.color,
|
||||||
|
str(self.code.year),
|
||||||
|
])
|
||||||
|
else:
|
||||||
|
return "/legislation/" + '/'.join([
|
||||||
|
"defaultconf",
|
||||||
|
self.code.color,
|
||||||
|
str(self.code.year),
|
||||||
|
])
|
|
@ -0,0 +1,20 @@
|
||||||
|
class FitzBlockWrapper:
|
||||||
|
def __init__(self, block):
|
||||||
|
self.x0, self.y0, self.x1, \
|
||||||
|
self.y1, self.text, \
|
||||||
|
self.block_number, self.block_type = block
|
||||||
|
|
||||||
|
self.x0 = int(self.x0)
|
||||||
|
self.x1 = int(self.x1)
|
||||||
|
self.y0 = int(self.y0)
|
||||||
|
self.y1 = int(self.y1)
|
||||||
|
self.block_number = int(self.block_number)
|
||||||
|
self.block_type = int(self.block_type)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return str((
|
||||||
|
self.x0, self.y0, self.x1, self.y1, self.text
|
||||||
|
))
|
||||||
|
|
||||||
|
def __repl__(self):
|
||||||
|
return self.__str__()
|
|
@ -0,0 +1,202 @@
|
||||||
|
import fitz
|
||||||
|
from typing import Any, Self, ClassVar
|
||||||
|
from itertools import groupby
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from .lib import FitzBlockWrapper
|
||||||
|
from .common import Bill
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class BookParser:
|
||||||
|
# class variables
|
||||||
|
humanname: ClassVar[str] = "Generic BookParser parent class."
|
||||||
|
description: ClassVar[str] = """
|
||||||
|
A generic description of the abilities of this BookParser.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# everything else
|
||||||
|
document: fitz.Document
|
||||||
|
confname: str
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_filename(cls, filename: str, confname: str):
|
||||||
|
return cls(
|
||||||
|
document=fitz.open(filename),
|
||||||
|
confname=confname
|
||||||
|
)
|
||||||
|
|
||||||
|
class HSYIGPdfParser(BookParser):
|
||||||
|
@staticmethod
|
||||||
|
def _words_in_superstring(words: list[str], superstring: str) -> bool:
|
||||||
|
for word in words:
|
||||||
|
if not str(word).lower() in str(superstring).lower():
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _generate_legislative_pages_list(self, sections: list[int]) -> list[int]:
|
||||||
|
"""
|
||||||
|
sections is an array of section pages plus the last page.
|
||||||
|
"""
|
||||||
|
current = 0
|
||||||
|
legislative_pages: list[int] = []
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
legislative_pages += list(
|
||||||
|
range(
|
||||||
|
sections[current] + 1,
|
||||||
|
sections[current + 1],
|
||||||
|
1
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
current += 1
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return legislative_pages
|
||||||
|
|
||||||
|
def _generate_section_markers(self, document: fitz.Document) -> list[int]:
|
||||||
|
section_pages = []
|
||||||
|
for page in document:
|
||||||
|
text = page.get_text().encode("utf8")
|
||||||
|
is_section_page = self._words_in_superstring(
|
||||||
|
words=[ "Committee", "YMCA", "Tennessee", "Youth", "in" ],
|
||||||
|
superstring=text
|
||||||
|
)
|
||||||
|
is_last_page = self._words_in_superstring(
|
||||||
|
words=[ "ABCs" ],
|
||||||
|
superstring=text
|
||||||
|
)
|
||||||
|
# print("page number {} contains sentintal? {}".format(page.number, is_section_page))
|
||||||
|
# if len(page.get_images()) == 3:
|
||||||
|
# print("page {} has one image!".format(page.number))
|
||||||
|
# print(page.get_images())
|
||||||
|
|
||||||
|
if is_section_page and len(page.get_images()) == 3:
|
||||||
|
section_pages.append(page.number)
|
||||||
|
|
||||||
|
if is_last_page and len(section_pages) > 2:
|
||||||
|
section_pages.append(page.number)
|
||||||
|
|
||||||
|
return section_pages
|
||||||
|
|
||||||
|
def _get_block_info_from_page(self, page: fitz.Page):
|
||||||
|
return [FitzBlockWrapper(i) for i in page.get_text("blocks")]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _remove_image_blocks(blocks: list[FitzBlockWrapper]) -> list[FitzBlockWrapper]:
|
||||||
|
to_return: list[FitzBlockWrapper] = []
|
||||||
|
for block in blocks:
|
||||||
|
if block.block_type == 0:
|
||||||
|
to_return.append(block)
|
||||||
|
|
||||||
|
return to_return
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _remove_coordinate_information(blocks: list[FitzBlockWrapper]) -> list[FitzBlockWrapper]:
|
||||||
|
to_return: list[str] = []
|
||||||
|
for block in blocks:
|
||||||
|
to_return.append(block.text)
|
||||||
|
|
||||||
|
return to_return
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_info_from_block(block, lat: int):
|
||||||
|
to_return = []
|
||||||
|
for i in block:
|
||||||
|
if math.floor(i[0]) == lat:
|
||||||
|
to_return.append(i)
|
||||||
|
return to_return
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _split_list_by_element(arr: list[Any], pivot: Any):
|
||||||
|
output = []
|
||||||
|
current = []
|
||||||
|
for i in arr:
|
||||||
|
if i == pivot:
|
||||||
|
output.append(current)
|
||||||
|
current = []
|
||||||
|
else:
|
||||||
|
current.append(i)
|
||||||
|
|
||||||
|
output.append(current)
|
||||||
|
return output
|
||||||
|
|
||||||
|
def parse(self):
|
||||||
|
section_pages = self._generate_section_markers(self.document)
|
||||||
|
legislative_pages = self._generate_legislative_pages_list(section_pages)
|
||||||
|
joined_blocks: list[FitzBlockWrapper] = []
|
||||||
|
for page_number in legislative_pages:
|
||||||
|
page = self.document.load_page(page_number)
|
||||||
|
block_info = self._get_block_info_from_page(page)
|
||||||
|
|
||||||
|
joined_blocks += block_info[:-1] # remove the page number at the end of every page
|
||||||
|
|
||||||
|
joined_blocks = self._remove_image_blocks(joined_blocks)
|
||||||
|
joined_blocks = self._remove_coordinate_information(joined_blocks)
|
||||||
|
|
||||||
|
bill_header = joined_blocks[0]
|
||||||
|
|
||||||
|
splitted = self._split_list_by_element(joined_blocks, bill_header)
|
||||||
|
|
||||||
|
bills: list[Bill] = []
|
||||||
|
for splitted_item in splitted:
|
||||||
|
try:
|
||||||
|
bill_code, _, _, subcommittee, sponsors, school, *bill_text = splitted_item
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
bill_text = ' '.join(bill_text)
|
||||||
|
|
||||||
|
# print(type(bill_text))
|
||||||
|
|
||||||
|
pretty_printed = self._pretty_print_bill_text(bill_text)
|
||||||
|
bills.append(Bill(
|
||||||
|
code=bill_code,
|
||||||
|
subcommittee=subcommittee,
|
||||||
|
sponsors=sponsors,
|
||||||
|
school=school,
|
||||||
|
bill_text=pretty_printed["bill_array"],
|
||||||
|
title=pretty_printed["title"]
|
||||||
|
))
|
||||||
|
|
||||||
|
for bill in bills: # add the conference name to each
|
||||||
|
bill.code.conference = self.confname
|
||||||
|
|
||||||
|
self.bills = bills
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _find_first_line_number(bill_arrays):
|
||||||
|
for i in range(len(bill_arrays)):
|
||||||
|
try:
|
||||||
|
if str(int(bill_arrays[i])) == bill_arrays[i]:
|
||||||
|
return i
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _pretty_print_bill_text(self, bill_text: str):
|
||||||
|
replaced = bill_text.replace("<EFBFBD>", "\n")
|
||||||
|
replaced = bill_text
|
||||||
|
replaced = replaced.split('\n')
|
||||||
|
replaced = [
|
||||||
|
i \
|
||||||
|
.replace('<EFBFBD>', ' ') \
|
||||||
|
.rstrip() \
|
||||||
|
.lstrip() \
|
||||||
|
for i in replaced
|
||||||
|
]
|
||||||
|
|
||||||
|
first_line_number = self._find_first_line_number(replaced)
|
||||||
|
title = ' '.join(replaced[:(first_line_number - 1)])
|
||||||
|
title = ' '.join(title.split()) # remove double spaces
|
||||||
|
rebuilt = replaced[first_line_number:][1::2]
|
||||||
|
# remove the last line number, it doesn't have a cooresponding space at the end
|
||||||
|
rebuilt = rebuilt[:-1]
|
||||||
|
|
||||||
|
# remove the first line, as it's the whitespace between the title and the bill text
|
||||||
|
rebuilt = rebuilt[1:]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"title": title.lstrip(),
|
||||||
|
"bill_array": rebuilt
|
||||||
|
}
|
|
@ -0,0 +1,14 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %} testing title {% endblock %}
|
||||||
|
{% block defcontent %}
|
||||||
|
<h1>{{ bills[0].code.color }} {{ bills[0].code.assembly }}</h1>
|
||||||
|
|
||||||
|
{% for bill in bills %}
|
||||||
|
<div class="container border-black">
|
||||||
|
<a href="{{ bill.url }}">({{bill.code.assembly[0]}}{{bill.code.committee}}/{{bill.code.docketplacement}}) {{ bill.title }}</a>
|
||||||
|
<p>Sponsors: {{ bill.sponsors }}</p>
|
||||||
|
<p>School: {{ bill.school }}</p>
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
{% endblock %}
|
|
@ -0,0 +1,32 @@
|
||||||
|
{% extends "bootstrap/base.html" %}
|
||||||
|
|
||||||
|
{% block navbar %}
|
||||||
|
<nav class="navbar navbar-default">
|
||||||
|
<a class="navbar-brand" href="/">cceexplorer</a>
|
||||||
|
<div class="navbar-nav" id="navbarNav">
|
||||||
|
<ul class="nav navbar-nav">
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link" href="/">Home</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link" href="/search">Search</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link" href="/statistics">Statistics</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link" href="/conferences">Conferences</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link" href="/scores">Scores</a>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</nav>
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="container-fluid">
|
||||||
|
{% block defcontent %}{% endblock %}
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
|
@ -0,0 +1,28 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %} cceexplorer - {{ bill.title }} {% endblock %}
|
||||||
|
{% block defcontent %}
|
||||||
|
<div class="container">
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-xs-3 border border-dark rounded">
|
||||||
|
<h1>{{ bill.code.stringrep }}</h1>
|
||||||
|
<p><i>{{ bill.title }}</i></p>
|
||||||
|
<p>Introduced by {{ bill.sponsors }} (of {{ bill.school }}) within the {{ bill.subcommittee }} subcommittee</p>
|
||||||
|
<hr>
|
||||||
|
<ul>
|
||||||
|
<li><a href="{{ bill.committee_url }}">Go to this bill's committee</a></li>
|
||||||
|
<li><a href="{{ bill.assembly_url }}">Go to this bill's assembly</a></li>
|
||||||
|
<li><a href="{{ bill.color_url }}">Go to this bill's color grouping</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="col-xs-7 border border-dark rounded">
|
||||||
|
<br>
|
||||||
|
{% for line in bill.bill_text %}
|
||||||
|
{% if line == "" %}
|
||||||
|
{% endif %}
|
||||||
|
<p>{{ line }}</p>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
|
@ -0,0 +1,14 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %} testing title {% endblock %}
|
||||||
|
{% block defcontent %}
|
||||||
|
<h1>All {{bills[0].code.color}} Legislation</h1>
|
||||||
|
|
||||||
|
{% for bill in bills %}
|
||||||
|
<div class="container border-black">
|
||||||
|
<a href="{{ bill.url }}">({{bill.code.committee}}/{{bill.code.docketplacement}}) {{ bill.title }}</a>
|
||||||
|
<p>Sponsors: {{ bill.sponsors }}</p>
|
||||||
|
<p>School: {{ bill.school }}</p>
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
{% endblock %}
|
|
@ -0,0 +1,14 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %} testing title {% endblock %}
|
||||||
|
{% block defcontent %}
|
||||||
|
<h1>{{ bills[0].code.assembly }} Committee {{ bills[0].code.committee }}</h1>
|
||||||
|
|
||||||
|
{% for bill in bills %}
|
||||||
|
<div class="container border-black">
|
||||||
|
<a href="{{ bill.url }}">({{bill.code.color}}) {{ bill.title }}</a>
|
||||||
|
<p>Sponsors: {{ bill.sponsors }}</p>
|
||||||
|
<p>School: {{ bill.school }}</p>
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
{% endblock %}
|
|
@ -0,0 +1,15 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %} testing title {% endblock %}
|
||||||
|
{% block defcontent %}
|
||||||
|
<h1>{{ bills[0].code.color }} {{ bills[0].code.assembly }}</h1>
|
||||||
|
|
||||||
|
{% for bill in bills %}
|
||||||
|
<div class="container border-black">
|
||||||
|
<a href="{{ bill.url }}">({{bill.code.assembly[0]}}{{bill.code.committee}}/{{bill.code.docketplacement}}) {{ bill.title }}</a>
|
||||||
|
<p>Sponsors: {{ bill.sponsors }}</p>
|
||||||
|
<p>School: {{ bill.school }}</p>
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
{% endblock %}
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %} testing title {% endblock %}
|
||||||
|
{% block defcontent %}
|
||||||
|
<h1>Welcome to cceexplorer</h1>
|
||||||
|
<p><i>an interactive database with {{ number_bills }} bills and {{ number_conferences }} conferences</i></p>
|
||||||
|
<p>here's all of them, down here!</p>
|
||||||
|
<ul>
|
||||||
|
{% for bill in bills %}
|
||||||
|
<li>
|
||||||
|
<a href="{{ bill.url }}">{{ bill.title }}</a>
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
{% endblock %}
|
|
@ -0,0 +1,6 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %} testing title {% endblock %}
|
||||||
|
{% block defcontent %}
|
||||||
|
<h1>testing</h1>
|
||||||
|
{% endblock %}
|
|
@ -0,0 +1,9 @@
|
||||||
|
{ pkgs ? import <nixpkgs> {} }:
|
||||||
|
pkgs.mkShell {
|
||||||
|
# nativeBuildInputs is usually what you want -- tools you need to run
|
||||||
|
nativeBuildInputs = with pkgs; [
|
||||||
|
buildPackages.python311Packages.pymupdf
|
||||||
|
buildPackages.python311Packages.flask
|
||||||
|
buildPackages.python311Packages.flask-bootstrap
|
||||||
|
];
|
||||||
|
}
|
Loading…
Reference in New Issue