Compare commits
No commits in common. "3848e1f777de966ae319978b9f70e5d5ae7faec8" and "10dd6418b704c7f80c2059ec1586bfbd81c06df9" have entirely different histories.
3848e1f777
...
10dd6418b7
|
@ -1,2 +0,0 @@
|
||||||
__pycache__/
|
|
||||||
*.pyc
|
|
Binary file not shown.
|
@ -1,99 +0,0 @@
|
||||||
import secrets
|
|
||||||
|
|
||||||
from flask import Flask
|
|
||||||
from flask import render_template
|
|
||||||
from flask_bootstrap import Bootstrap
|
|
||||||
|
|
||||||
from .leglib.billdb import BillDB, BillQuery, QueryField, QueryAll
|
|
||||||
from .leglib.parsers import HSYIGPdfParser
|
|
||||||
|
|
||||||
parser = HSYIGPdfParser.from_filename(
|
|
||||||
filename="YIGVolunteerBook2024.pdf",
|
|
||||||
confname="HSVolunteer"
|
|
||||||
)
|
|
||||||
parser.parse()
|
|
||||||
db = BillDB()
|
|
||||||
db.add_conference(parser=parser)
|
|
||||||
|
|
||||||
def create_app(test_config=None):
|
|
||||||
app = Flask(__name__, instance_relative_config=True)
|
|
||||||
app.config.from_mapping(
|
|
||||||
SECRET_KEY=str(secrets.randbelow(100000000))
|
|
||||||
)
|
|
||||||
|
|
||||||
Bootstrap(app)
|
|
||||||
|
|
||||||
@app.route('/')
|
|
||||||
def index():
|
|
||||||
bills = db.search(query=QueryAll)
|
|
||||||
return render_template('index.html', number_bills=len(bills), number_conferences=2, bills=bills)
|
|
||||||
|
|
||||||
@app.route('/legislation/<conference>/<year>')
|
|
||||||
def show_conference(conference=QueryField.Any):
|
|
||||||
return conference
|
|
||||||
|
|
||||||
@app.route('/legislation/<conference>/<color>/<year>')
|
|
||||||
def show_color(
|
|
||||||
conference=QueryField.Any,
|
|
||||||
year=QueryField.Any,
|
|
||||||
color=QueryField.Any,
|
|
||||||
):
|
|
||||||
bills = db.search(query=BillQuery(
|
|
||||||
color=color,
|
|
||||||
year=int(year),
|
|
||||||
))
|
|
||||||
return render_template('color.html', bills=bills)
|
|
||||||
|
|
||||||
@app.route('/legislation/<conference>/<color>/<assembly>/<year>')
|
|
||||||
def show_assembly(
|
|
||||||
conference=QueryField.Any,
|
|
||||||
assembly=QueryField.Any,
|
|
||||||
color=QueryField.Any,
|
|
||||||
year=QueryField.Any,
|
|
||||||
):
|
|
||||||
bills = db.search(query=BillQuery(
|
|
||||||
color=color,
|
|
||||||
assembly=assembly,
|
|
||||||
year=int(year),
|
|
||||||
))
|
|
||||||
return render_template('assembly.html', bills=bills)
|
|
||||||
|
|
||||||
@app.route('/legislation/<conference>/<color>/<assembly>/<year>/<committee>')
|
|
||||||
def show_committee(
|
|
||||||
conference=QueryField.Any,
|
|
||||||
assembly=QueryField.Any,
|
|
||||||
color=QueryField.Any,
|
|
||||||
year=QueryField.Any,
|
|
||||||
committee=QueryField.Any,
|
|
||||||
):
|
|
||||||
bills = db.search(query=BillQuery(
|
|
||||||
color=QueryField.Any,
|
|
||||||
assembly=assembly,
|
|
||||||
year=int(year),
|
|
||||||
committee=int(committee),
|
|
||||||
))
|
|
||||||
|
|
||||||
return render_template('committee.html', bills=bills)
|
|
||||||
|
|
||||||
@app.route('/legislation/<conference>/<color>/<assembly>/<year>/<committee>/<order>')
|
|
||||||
def show_bill(
|
|
||||||
conference=QueryField.Any,
|
|
||||||
assembly=QueryField.Any,
|
|
||||||
color=QueryField.Any,
|
|
||||||
year=QueryField.Any,
|
|
||||||
committee=QueryField.Any,
|
|
||||||
order=QueryField.Any,
|
|
||||||
):
|
|
||||||
print(order, int(order))
|
|
||||||
print(color, assembly, year, committee, order)
|
|
||||||
bills = db.search(query=BillQuery(
|
|
||||||
color=color,
|
|
||||||
assembly=assembly,
|
|
||||||
year=int(year),
|
|
||||||
committee=int(committee),
|
|
||||||
order=int(order),
|
|
||||||
))
|
|
||||||
|
|
||||||
return render_template("bill.html", bill=bills[0])
|
|
||||||
|
|
||||||
return app
|
|
|
@ -1,29 +0,0 @@
|
||||||
from leglib.billdb import BillDB, BillQuery, QueryField, QueryAll
|
|
||||||
from leglib.parsers import HSYIGPdfParser
|
|
||||||
|
|
||||||
parser = HSYIGPdfParser.from_filename(
|
|
||||||
filename="YIGVolunteerBook2024.pdf",
|
|
||||||
confname="HSVolunteer"
|
|
||||||
)
|
|
||||||
parser.parse()
|
|
||||||
|
|
||||||
print(len(parser.bills))
|
|
||||||
|
|
||||||
db = BillDB()
|
|
||||||
db.add_conference(parser=parser)
|
|
||||||
|
|
||||||
allbills = len(db.search(query=QueryAll))
|
|
||||||
|
|
||||||
bluelen = len(db.search(query=BillQuery(color=QueryField.Colors.Blue)))
|
|
||||||
whitelen = len(db.search(query=BillQuery(color=QueryField.Colors.White)))
|
|
||||||
redlen = len(db.search(query=BillQuery(color=QueryField.Colors.Red)))
|
|
||||||
|
|
||||||
senatelen = len(db.search(query=BillQuery(assembly=QueryField.Assemblies.Senate)))
|
|
||||||
houselen = len(db.search(query=BillQuery(assembly=QueryField.Assemblies.House)))
|
|
||||||
|
|
||||||
franklincount = len(db.search(query=BillQuery(school="Franklin")))
|
|
||||||
|
|
||||||
print(allbills)
|
|
||||||
print(redlen, whitelen, bluelen, redlen + whitelen + bluelen)
|
|
||||||
print(senatelen, houselen, senatelen + houselen)
|
|
||||||
print(franklincount)
|
|
|
@ -1,128 +0,0 @@
|
||||||
from .common import Bill, CCEColors, CCEAssemblies
|
|
||||||
from .parsers import BookParser
|
|
||||||
|
|
||||||
from typing import Type, Self
|
|
||||||
from dataclasses import dataclass
|
|
||||||
|
|
||||||
class QueryAny:
|
|
||||||
"""
|
|
||||||
Use this class to indicate an Any match for attributes without an Any attribute.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
class SearchNotSatisified(BaseException):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class QueryAll:
|
|
||||||
pass
|
|
||||||
|
|
||||||
class QueryField:
|
|
||||||
Any = object()
|
|
||||||
Colors = CCEColors
|
|
||||||
Assemblies = CCEAssemblies
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class BillQuery:
|
|
||||||
"""
|
|
||||||
Holds a query for the BillDB.
|
|
||||||
"""
|
|
||||||
color: str | CCEColors | QueryField = QueryField.Any
|
|
||||||
assembly: str | CCEAssemblies | QueryField = QueryField.Any
|
|
||||||
committee: int | QueryField = QueryField.Any
|
|
||||||
year: int | QueryField = QueryField.Any
|
|
||||||
order: int | QueryField = QueryField.Any
|
|
||||||
subcommittee: str | QueryField = QueryField.Any
|
|
||||||
sponsors: str | QueryField = QueryField.Any
|
|
||||||
school: str | QueryField = QueryField.Any
|
|
||||||
bill_text: str | QueryField = QueryField.Any
|
|
||||||
title: str | QueryField = QueryField.Any
|
|
||||||
|
|
||||||
def __post_init__(self):
|
|
||||||
self.bill_text_concat = self.bill_text # for search compat reasons
|
|
||||||
|
|
||||||
class BillDB:
|
|
||||||
def __init__(self):
|
|
||||||
self.bills: list[Bill] = []
|
|
||||||
self.cache: dict[Bill]
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def code_enum_match(bill: Bill, query: BillQuery, attr: str) -> None:
|
|
||||||
"""
|
|
||||||
This is probably very slow. Maybe replace this with a better solution?
|
|
||||||
|
|
||||||
This function replaces repetitive code like this:
|
|
||||||
|
|
||||||
elif bill.assembly != CCEAssemblies.Any:
|
|
||||||
if bill.assembly != query.color:
|
|
||||||
raise SearchNotSatisified()
|
|
||||||
|
|
||||||
with this:
|
|
||||||
|
|
||||||
self.enum_match(bill, query, "color")
|
|
||||||
|
|
||||||
This is the case with exact_match and string_match, too.
|
|
||||||
"""
|
|
||||||
|
|
||||||
if query.__getattribute__(attr) == QueryField.Any:
|
|
||||||
return
|
|
||||||
|
|
||||||
# check the Any case
|
|
||||||
if query.__getattribute__(attr) != bill.code.__getattribute__(attr).__class__.Any:
|
|
||||||
# make sure we're not matching
|
|
||||||
if bill.code.__getattribute__(attr) != query.__getattribute__(attr):
|
|
||||||
raise SearchNotSatisified()
|
|
||||||
|
|
||||||
# if we do match, no exception
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def string_match(bill: Bill, query: BillQuery, attr: str) -> None:
|
|
||||||
"""
|
|
||||||
See self.code_enum_match for more info.
|
|
||||||
"""
|
|
||||||
if query.__getattribute__(attr) == QueryField.Any:
|
|
||||||
return
|
|
||||||
|
|
||||||
if not query.__getattribute__(attr).lower() in bill.__getattribute__(attr).lower():
|
|
||||||
raise SearchNotSatisified()
|
|
||||||
|
|
||||||
def add_conference(self: Self, parser: Type[BookParser]) -> None:
|
|
||||||
"""
|
|
||||||
Type[BookParser] -> any subclass of BookParser
|
|
||||||
"""
|
|
||||||
|
|
||||||
# this works because each BookParser must insert its self.confname into its self.bills[i].code.conference field.
|
|
||||||
self.bills += parser.bills
|
|
||||||
|
|
||||||
def search(self: Self, query: BillQuery | QueryAll) -> list[Bill]:
|
|
||||||
if query == QueryAll:
|
|
||||||
return self.bills
|
|
||||||
results = []
|
|
||||||
for bill in self.bills:
|
|
||||||
try:
|
|
||||||
# print("debug, q: {}, b: {}".format(str(query.committee), str(bill.code.committee)))
|
|
||||||
self.code_enum_match(bill, query, "color")
|
|
||||||
self.code_enum_match(bill, query, "assembly")
|
|
||||||
|
|
||||||
if not query.committee == QueryField.Any:
|
|
||||||
if not query.committee == bill.code.committee:
|
|
||||||
raise SearchNotSatisified()
|
|
||||||
|
|
||||||
if not query.order == QueryField.Any:
|
|
||||||
if not query.order == bill.code.docketplacement:
|
|
||||||
raise SearchNotSatisified()
|
|
||||||
|
|
||||||
if not query.committee == QueryField.Any:
|
|
||||||
if not query.year == bill.code.year:
|
|
||||||
raise SearchNotSatisified()
|
|
||||||
|
|
||||||
self.string_match(bill, query, "subcommittee")
|
|
||||||
self.string_match(bill, query, "sponsors")
|
|
||||||
self.string_match(bill, query, "school")
|
|
||||||
self.string_match(bill, query, "bill_text_concat")
|
|
||||||
self.string_match(bill, query, "title")
|
|
||||||
|
|
||||||
except SearchNotSatisified:
|
|
||||||
continue
|
|
||||||
results.append(bill)
|
|
||||||
|
|
||||||
return results
|
|
|
@ -1,161 +0,0 @@
|
||||||
from enum import StrEnum, auto
|
|
||||||
|
|
||||||
class CCEColors(StrEnum):
|
|
||||||
Red = "Red"
|
|
||||||
White = "White",
|
|
||||||
Blue = "Blue",
|
|
||||||
Undefined = "Undefined", # some conferences don't have assemblies
|
|
||||||
Any = "Any" # for searching purposes
|
|
||||||
|
|
||||||
class CCEAssemblies(StrEnum):
|
|
||||||
Senate = "Senate",
|
|
||||||
House = "House",
|
|
||||||
GeneralAssembly = "GeneralAssembly",
|
|
||||||
Any = "Any" # for searching purposes
|
|
||||||
|
|
||||||
class BillCode:
|
|
||||||
def __init__(self, text: str):
|
|
||||||
# try to parse
|
|
||||||
# codes are in this rough format: "RSB/yy-c(c)-n(n)"
|
|
||||||
|
|
||||||
text = text.rstrip()
|
|
||||||
slashsplit = text.split('/')
|
|
||||||
dashsplit = slashsplit[1].split('-')
|
|
||||||
|
|
||||||
assemblycode = slashsplit[0]
|
|
||||||
|
|
||||||
self.color = assemblycode[0]
|
|
||||||
if self.color == "R":
|
|
||||||
self.color = CCEColors.Red
|
|
||||||
elif self.color == "W":
|
|
||||||
self.color = CCEColors.White
|
|
||||||
elif self.color == "B":
|
|
||||||
self.color = CCEColors.Blue
|
|
||||||
|
|
||||||
assemblydivision = assemblycode[1]
|
|
||||||
if assemblydivision == "S":
|
|
||||||
self.assembly = CCEAssemblies.Senate
|
|
||||||
elif assemblydivision == "H":
|
|
||||||
self.assembly = CCEAssemblies.House
|
|
||||||
elif assemblydivision == "G":
|
|
||||||
self.assembly = CCEAssemblies.GeneralAssembly
|
|
||||||
|
|
||||||
# reverse y2k problem; but conference years are stored in YY, not YYYY form
|
|
||||||
self.year = int(dashsplit[0]) + 2000
|
|
||||||
self.committee = int(dashsplit[1])
|
|
||||||
self.docketplacement = int(dashsplit[2])
|
|
||||||
|
|
||||||
self.stringrep = self.color[0].upper() + \
|
|
||||||
self.assembly[0].upper() + \
|
|
||||||
"B/{}-{}-{}".format(
|
|
||||||
str(self.year - 2000),
|
|
||||||
str(self.committee),
|
|
||||||
str(self.docketplacement)
|
|
||||||
)
|
|
||||||
|
|
||||||
self.conference: None | str = None # to be filled in with BookParser and friends
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "{} {} - {}-{}-{}".format(
|
|
||||||
self.color,
|
|
||||||
self.assembly,
|
|
||||||
str(self.year),
|
|
||||||
str(self.committee),
|
|
||||||
str(self.docketplacement)
|
|
||||||
)
|
|
||||||
|
|
||||||
class Bill:
|
|
||||||
def __init__(self,
|
|
||||||
code: str | BillCode,
|
|
||||||
sponsors: str,
|
|
||||||
subcommittee: str,
|
|
||||||
school: str,
|
|
||||||
bill_text: list[str],
|
|
||||||
title: str
|
|
||||||
):
|
|
||||||
if isinstance(code, str):
|
|
||||||
self.code = BillCode(code)
|
|
||||||
else:
|
|
||||||
self.code = code
|
|
||||||
|
|
||||||
self.sponsors = sponsors.rstrip()
|
|
||||||
self.subcommittee = subcommittee.rstrip()
|
|
||||||
self.school = school.rstrip()
|
|
||||||
self.bill_text = bill_text
|
|
||||||
self.title = title
|
|
||||||
|
|
||||||
@property
|
|
||||||
def bill_text_concat(self):
|
|
||||||
return ''.join(self.bill_text)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def url(self):
|
|
||||||
if self.code.conference:
|
|
||||||
return "/legislation/" + '/'.join([
|
|
||||||
self.code.conference,
|
|
||||||
self.code.color,
|
|
||||||
self.code.assembly,
|
|
||||||
str(self.code.year),
|
|
||||||
str(self.code.committee),
|
|
||||||
str(self.code.docketplacement)
|
|
||||||
])
|
|
||||||
else:
|
|
||||||
return "/legislation/" + '/'.join([
|
|
||||||
"defaultconf",
|
|
||||||
self.code.color,
|
|
||||||
self.code.assembly,
|
|
||||||
str(self.code.year),
|
|
||||||
str(self.code.committee),
|
|
||||||
str(self.code.docketplacement)
|
|
||||||
])
|
|
||||||
|
|
||||||
@property
|
|
||||||
def committee_url(self):
|
|
||||||
if self.code.conference:
|
|
||||||
return "/legislation/" + '/'.join([
|
|
||||||
self.code.conference,
|
|
||||||
self.code.color,
|
|
||||||
self.code.assembly,
|
|
||||||
str(self.code.year),
|
|
||||||
str(self.code.committee)
|
|
||||||
])
|
|
||||||
else:
|
|
||||||
return "/legislation/" + '/'.join([
|
|
||||||
"defaultconf",
|
|
||||||
self.code.color,
|
|
||||||
self.code.assembly,
|
|
||||||
str(self.code.year),
|
|
||||||
str(self.code.committee)
|
|
||||||
])
|
|
||||||
|
|
||||||
@property
|
|
||||||
def assembly_url(self):
|
|
||||||
if self.code.conference:
|
|
||||||
return "/legislation/" + '/'.join([
|
|
||||||
self.code.conference,
|
|
||||||
self.code.color,
|
|
||||||
self.code.assembly,
|
|
||||||
str(self.code.year),
|
|
||||||
])
|
|
||||||
else:
|
|
||||||
return "/legislation/" + '/'.join([
|
|
||||||
"defaultconf",
|
|
||||||
self.code.color,
|
|
||||||
self.code.assembly,
|
|
||||||
str(self.code.year),
|
|
||||||
])
|
|
||||||
|
|
||||||
@property
|
|
||||||
def color_url(self):
|
|
||||||
if self.code.conference:
|
|
||||||
return "/legislation/" + '/'.join([
|
|
||||||
self.code.conference,
|
|
||||||
self.code.color,
|
|
||||||
str(self.code.year),
|
|
||||||
])
|
|
||||||
else:
|
|
||||||
return "/legislation/" + '/'.join([
|
|
||||||
"defaultconf",
|
|
||||||
self.code.color,
|
|
||||||
str(self.code.year),
|
|
||||||
])
|
|
|
@ -1,20 +0,0 @@
|
||||||
class FitzBlockWrapper:
|
|
||||||
def __init__(self, block):
|
|
||||||
self.x0, self.y0, self.x1, \
|
|
||||||
self.y1, self.text, \
|
|
||||||
self.block_number, self.block_type = block
|
|
||||||
|
|
||||||
self.x0 = int(self.x0)
|
|
||||||
self.x1 = int(self.x1)
|
|
||||||
self.y0 = int(self.y0)
|
|
||||||
self.y1 = int(self.y1)
|
|
||||||
self.block_number = int(self.block_number)
|
|
||||||
self.block_type = int(self.block_type)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return str((
|
|
||||||
self.x0, self.y0, self.x1, self.y1, self.text
|
|
||||||
))
|
|
||||||
|
|
||||||
def __repl__(self):
|
|
||||||
return self.__str__()
|
|
|
@ -1,202 +0,0 @@
|
||||||
import fitz
|
|
||||||
from typing import Any, Self, ClassVar
|
|
||||||
from itertools import groupby
|
|
||||||
from dataclasses import dataclass
|
|
||||||
|
|
||||||
from .lib import FitzBlockWrapper
|
|
||||||
from .common import Bill
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class BookParser:
|
|
||||||
# class variables
|
|
||||||
humanname: ClassVar[str] = "Generic BookParser parent class."
|
|
||||||
description: ClassVar[str] = """
|
|
||||||
A generic description of the abilities of this BookParser.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# everything else
|
|
||||||
document: fitz.Document
|
|
||||||
confname: str
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_filename(cls, filename: str, confname: str):
|
|
||||||
return cls(
|
|
||||||
document=fitz.open(filename),
|
|
||||||
confname=confname
|
|
||||||
)
|
|
||||||
|
|
||||||
class HSYIGPdfParser(BookParser):
|
|
||||||
@staticmethod
|
|
||||||
def _words_in_superstring(words: list[str], superstring: str) -> bool:
|
|
||||||
for word in words:
|
|
||||||
if not str(word).lower() in str(superstring).lower():
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
def _generate_legislative_pages_list(self, sections: list[int]) -> list[int]:
|
|
||||||
"""
|
|
||||||
sections is an array of section pages plus the last page.
|
|
||||||
"""
|
|
||||||
current = 0
|
|
||||||
legislative_pages: list[int] = []
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
legislative_pages += list(
|
|
||||||
range(
|
|
||||||
sections[current] + 1,
|
|
||||||
sections[current + 1],
|
|
||||||
1
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
current += 1
|
|
||||||
except IndexError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return legislative_pages
|
|
||||||
|
|
||||||
def _generate_section_markers(self, document: fitz.Document) -> list[int]:
|
|
||||||
section_pages = []
|
|
||||||
for page in document:
|
|
||||||
text = page.get_text().encode("utf8")
|
|
||||||
is_section_page = self._words_in_superstring(
|
|
||||||
words=[ "Committee", "YMCA", "Tennessee", "Youth", "in" ],
|
|
||||||
superstring=text
|
|
||||||
)
|
|
||||||
is_last_page = self._words_in_superstring(
|
|
||||||
words=[ "ABCs" ],
|
|
||||||
superstring=text
|
|
||||||
)
|
|
||||||
# print("page number {} contains sentintal? {}".format(page.number, is_section_page))
|
|
||||||
# if len(page.get_images()) == 3:
|
|
||||||
# print("page {} has one image!".format(page.number))
|
|
||||||
# print(page.get_images())
|
|
||||||
|
|
||||||
if is_section_page and len(page.get_images()) == 3:
|
|
||||||
section_pages.append(page.number)
|
|
||||||
|
|
||||||
if is_last_page and len(section_pages) > 2:
|
|
||||||
section_pages.append(page.number)
|
|
||||||
|
|
||||||
return section_pages
|
|
||||||
|
|
||||||
def _get_block_info_from_page(self, page: fitz.Page):
|
|
||||||
return [FitzBlockWrapper(i) for i in page.get_text("blocks")]
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _remove_image_blocks(blocks: list[FitzBlockWrapper]) -> list[FitzBlockWrapper]:
|
|
||||||
to_return: list[FitzBlockWrapper] = []
|
|
||||||
for block in blocks:
|
|
||||||
if block.block_type == 0:
|
|
||||||
to_return.append(block)
|
|
||||||
|
|
||||||
return to_return
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _remove_coordinate_information(blocks: list[FitzBlockWrapper]) -> list[FitzBlockWrapper]:
|
|
||||||
to_return: list[str] = []
|
|
||||||
for block in blocks:
|
|
||||||
to_return.append(block.text)
|
|
||||||
|
|
||||||
return to_return
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _get_info_from_block(block, lat: int):
|
|
||||||
to_return = []
|
|
||||||
for i in block:
|
|
||||||
if math.floor(i[0]) == lat:
|
|
||||||
to_return.append(i)
|
|
||||||
return to_return
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _split_list_by_element(arr: list[Any], pivot: Any):
|
|
||||||
output = []
|
|
||||||
current = []
|
|
||||||
for i in arr:
|
|
||||||
if i == pivot:
|
|
||||||
output.append(current)
|
|
||||||
current = []
|
|
||||||
else:
|
|
||||||
current.append(i)
|
|
||||||
|
|
||||||
output.append(current)
|
|
||||||
return output
|
|
||||||
|
|
||||||
def parse(self):
|
|
||||||
section_pages = self._generate_section_markers(self.document)
|
|
||||||
legislative_pages = self._generate_legislative_pages_list(section_pages)
|
|
||||||
joined_blocks: list[FitzBlockWrapper] = []
|
|
||||||
for page_number in legislative_pages:
|
|
||||||
page = self.document.load_page(page_number)
|
|
||||||
block_info = self._get_block_info_from_page(page)
|
|
||||||
|
|
||||||
joined_blocks += block_info[:-1] # remove the page number at the end of every page
|
|
||||||
|
|
||||||
joined_blocks = self._remove_image_blocks(joined_blocks)
|
|
||||||
joined_blocks = self._remove_coordinate_information(joined_blocks)
|
|
||||||
|
|
||||||
bill_header = joined_blocks[0]
|
|
||||||
|
|
||||||
splitted = self._split_list_by_element(joined_blocks, bill_header)
|
|
||||||
|
|
||||||
bills: list[Bill] = []
|
|
||||||
for splitted_item in splitted:
|
|
||||||
try:
|
|
||||||
bill_code, _, _, subcommittee, sponsors, school, *bill_text = splitted_item
|
|
||||||
except ValueError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
bill_text = ' '.join(bill_text)
|
|
||||||
|
|
||||||
# print(type(bill_text))
|
|
||||||
|
|
||||||
pretty_printed = self._pretty_print_bill_text(bill_text)
|
|
||||||
bills.append(Bill(
|
|
||||||
code=bill_code,
|
|
||||||
subcommittee=subcommittee,
|
|
||||||
sponsors=sponsors,
|
|
||||||
school=school,
|
|
||||||
bill_text=pretty_printed["bill_array"],
|
|
||||||
title=pretty_printed["title"]
|
|
||||||
))
|
|
||||||
|
|
||||||
for bill in bills: # add the conference name to each
|
|
||||||
bill.code.conference = self.confname
|
|
||||||
|
|
||||||
self.bills = bills
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _find_first_line_number(bill_arrays):
|
|
||||||
for i in range(len(bill_arrays)):
|
|
||||||
try:
|
|
||||||
if str(int(bill_arrays[i])) == bill_arrays[i]:
|
|
||||||
return i
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _pretty_print_bill_text(self, bill_text: str):
|
|
||||||
replaced = bill_text.replace("<EFBFBD>", "\n")
|
|
||||||
replaced = bill_text
|
|
||||||
replaced = replaced.split('\n')
|
|
||||||
replaced = [
|
|
||||||
i \
|
|
||||||
.replace('<EFBFBD>', ' ') \
|
|
||||||
.rstrip() \
|
|
||||||
.lstrip() \
|
|
||||||
for i in replaced
|
|
||||||
]
|
|
||||||
|
|
||||||
first_line_number = self._find_first_line_number(replaced)
|
|
||||||
title = ' '.join(replaced[:(first_line_number - 1)])
|
|
||||||
title = ' '.join(title.split()) # remove double spaces
|
|
||||||
rebuilt = replaced[first_line_number:][1::2]
|
|
||||||
# remove the last line number, it doesn't have a cooresponding space at the end
|
|
||||||
rebuilt = rebuilt[:-1]
|
|
||||||
|
|
||||||
# remove the first line, as it's the whitespace between the title and the bill text
|
|
||||||
rebuilt = rebuilt[1:]
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title.lstrip(),
|
|
||||||
"bill_array": rebuilt
|
|
||||||
}
|
|
|
@ -1,14 +0,0 @@
|
||||||
{% extends "base.html" %}
|
|
||||||
|
|
||||||
{% block title %} testing title {% endblock %}
|
|
||||||
{% block defcontent %}
|
|
||||||
<h1>{{ bills[0].code.color }} {{ bills[0].code.assembly }}</h1>
|
|
||||||
|
|
||||||
{% for bill in bills %}
|
|
||||||
<div class="container border-black">
|
|
||||||
<a href="{{ bill.url }}">({{bill.code.assembly[0]}}{{bill.code.committee}}/{{bill.code.docketplacement}}) {{ bill.title }}</a>
|
|
||||||
<p>Sponsors: {{ bill.sponsors }}</p>
|
|
||||||
<p>School: {{ bill.school }}</p>
|
|
||||||
</div>
|
|
||||||
{% endfor %}
|
|
||||||
{% endblock %}
|
|
|
@ -1,32 +0,0 @@
|
||||||
{% extends "bootstrap/base.html" %}
|
|
||||||
|
|
||||||
{% block navbar %}
|
|
||||||
<nav class="navbar navbar-default">
|
|
||||||
<a class="navbar-brand" href="/">cceexplorer</a>
|
|
||||||
<div class="navbar-nav" id="navbarNav">
|
|
||||||
<ul class="nav navbar-nav">
|
|
||||||
<li class="nav-item">
|
|
||||||
<a class="nav-link" href="/">Home</a>
|
|
||||||
</li>
|
|
||||||
<li class="nav-item">
|
|
||||||
<a class="nav-link" href="/search">Search</a>
|
|
||||||
</li>
|
|
||||||
<li class="nav-item">
|
|
||||||
<a class="nav-link" href="/statistics">Statistics</a>
|
|
||||||
</li>
|
|
||||||
<li class="nav-item">
|
|
||||||
<a class="nav-link" href="/conferences">Conferences</a>
|
|
||||||
</li>
|
|
||||||
<li class="nav-item">
|
|
||||||
<a class="nav-link" href="/scores">Scores</a>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
{% endblock %}
|
|
||||||
|
|
||||||
{% block content %}
|
|
||||||
<div class="container-fluid">
|
|
||||||
{% block defcontent %}{% endblock %}
|
|
||||||
</div>
|
|
||||||
{% endblock %}
|
|
|
@ -1,28 +0,0 @@
|
||||||
{% extends "base.html" %}
|
|
||||||
|
|
||||||
{% block title %} cceexplorer - {{ bill.title }} {% endblock %}
|
|
||||||
{% block defcontent %}
|
|
||||||
<div class="container">
|
|
||||||
<div class="row">
|
|
||||||
<div class="col-xs-3 border border-dark rounded">
|
|
||||||
<h1>{{ bill.code.stringrep }}</h1>
|
|
||||||
<p><i>{{ bill.title }}</i></p>
|
|
||||||
<p>Introduced by {{ bill.sponsors }} (of {{ bill.school }}) within the {{ bill.subcommittee }} subcommittee</p>
|
|
||||||
<hr>
|
|
||||||
<ul>
|
|
||||||
<li><a href="{{ bill.committee_url }}">Go to this bill's committee</a></li>
|
|
||||||
<li><a href="{{ bill.assembly_url }}">Go to this bill's assembly</a></li>
|
|
||||||
<li><a href="{{ bill.color_url }}">Go to this bill's color grouping</a></li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
<div class="col-xs-7 border border-dark rounded">
|
|
||||||
<br>
|
|
||||||
{% for line in bill.bill_text %}
|
|
||||||
{% if line == "" %}
|
|
||||||
{% endif %}
|
|
||||||
<p>{{ line }}</p>
|
|
||||||
{% endfor %}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{% endblock %}
|
|
|
@ -1,14 +0,0 @@
|
||||||
{% extends "base.html" %}
|
|
||||||
|
|
||||||
{% block title %} testing title {% endblock %}
|
|
||||||
{% block defcontent %}
|
|
||||||
<h1>All {{bills[0].code.color}} Legislation</h1>
|
|
||||||
|
|
||||||
{% for bill in bills %}
|
|
||||||
<div class="container border-black">
|
|
||||||
<a href="{{ bill.url }}">({{bill.code.committee}}/{{bill.code.docketplacement}}) {{ bill.title }}</a>
|
|
||||||
<p>Sponsors: {{ bill.sponsors }}</p>
|
|
||||||
<p>School: {{ bill.school }}</p>
|
|
||||||
</div>
|
|
||||||
{% endfor %}
|
|
||||||
{% endblock %}
|
|
|
@ -1,14 +0,0 @@
|
||||||
{% extends "base.html" %}
|
|
||||||
|
|
||||||
{% block title %} testing title {% endblock %}
|
|
||||||
{% block defcontent %}
|
|
||||||
<h1>{{ bills[0].code.assembly }} Committee {{ bills[0].code.committee }}</h1>
|
|
||||||
|
|
||||||
{% for bill in bills %}
|
|
||||||
<div class="container border-black">
|
|
||||||
<a href="{{ bill.url }}">({{bill.code.color}}) {{ bill.title }}</a>
|
|
||||||
<p>Sponsors: {{ bill.sponsors }}</p>
|
|
||||||
<p>School: {{ bill.school }}</p>
|
|
||||||
</div>
|
|
||||||
{% endfor %}
|
|
||||||
{% endblock %}
|
|
|
@ -1,15 +0,0 @@
|
||||||
{% extends "base.html" %}
|
|
||||||
|
|
||||||
{% block title %} testing title {% endblock %}
|
|
||||||
{% block defcontent %}
|
|
||||||
<h1>{{ bills[0].code.color }} {{ bills[0].code.assembly }}</h1>
|
|
||||||
|
|
||||||
{% for bill in bills %}
|
|
||||||
<div class="container border-black">
|
|
||||||
<a href="{{ bill.url }}">({{bill.code.assembly[0]}}{{bill.code.committee}}/{{bill.code.docketplacement}}) {{ bill.title }}</a>
|
|
||||||
<p>Sponsors: {{ bill.sponsors }}</p>
|
|
||||||
<p>School: {{ bill.school }}</p>
|
|
||||||
</div>
|
|
||||||
{% endfor %}
|
|
||||||
{% endblock %}
|
|
||||||
|
|
|
@ -1,15 +0,0 @@
|
||||||
{% extends "base.html" %}
|
|
||||||
|
|
||||||
{% block title %} testing title {% endblock %}
|
|
||||||
{% block defcontent %}
|
|
||||||
<h1>Welcome to cceexplorer</h1>
|
|
||||||
<p><i>an interactive database with {{ number_bills }} bills and {{ number_conferences }} conferences</i></p>
|
|
||||||
<p>here's all of them, down here!</p>
|
|
||||||
<ul>
|
|
||||||
{% for bill in bills %}
|
|
||||||
<li>
|
|
||||||
<a href="{{ bill.url }}">{{ bill.title }}</a>
|
|
||||||
</li>
|
|
||||||
{% endfor %}
|
|
||||||
</ul>
|
|
||||||
{% endblock %}
|
|
|
@ -1,6 +0,0 @@
|
||||||
{% extends "base.html" %}
|
|
||||||
|
|
||||||
{% block title %} testing title {% endblock %}
|
|
||||||
{% block defcontent %}
|
|
||||||
<h1>testing</h1>
|
|
||||||
{% endblock %}
|
|
|
@ -1,9 +0,0 @@
|
||||||
{ pkgs ? import <nixpkgs> {} }:
|
|
||||||
pkgs.mkShell {
|
|
||||||
# nativeBuildInputs is usually what you want -- tools you need to run
|
|
||||||
nativeBuildInputs = with pkgs; [
|
|
||||||
buildPackages.python311Packages.pymupdf
|
|
||||||
buildPackages.python311Packages.flask
|
|
||||||
buildPackages.python311Packages.flask-bootstrap
|
|
||||||
];
|
|
||||||
}
|
|
Loading…
Reference in New Issue