add country and catagory fields; simplify serialization into legislation objects
This commit is contained in:
parent
a1c722295b
commit
761b00eb49
|
@ -163,45 +163,63 @@ class CCEParserBase():
|
||||||
for legislative_text in splitted:
|
for legislative_text in splitted:
|
||||||
# there are some blocks that contain just one value
|
# there are some blocks that contain just one value
|
||||||
# and are aligned to some x value on the pdf
|
# and are aligned to some x value on the pdf
|
||||||
|
|
||||||
# it's an easy way to extract stuff
|
# it's an easy way to extract stuff
|
||||||
|
|
||||||
|
try:
|
||||||
|
country = get_block_by_x_value(legislative_text, 139).text.rstrip()
|
||||||
|
country = country.replace("Sponsor: ", "").lstrip()
|
||||||
|
except AttributeError:
|
||||||
|
country = None # this is a yig bill
|
||||||
|
|
||||||
|
try:
|
||||||
|
category = get_block_by_x_value(legislative_text, 151).text.rstrip().lstrip()
|
||||||
|
except AttributeError:
|
||||||
|
try:
|
||||||
|
category = get_block_by_x_value(legislative_text, 153).text.rstrip().lstrip()
|
||||||
|
except AttributeError:
|
||||||
|
print([(i.text, i.x0) for i in legislative_text])
|
||||||
|
|
||||||
leg_code = get_block_by_x_value(legislative_text, 88).text.rstrip()
|
leg_code = get_block_by_x_value(legislative_text, 88).text.rstrip()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
school = get_block_by_x_value(legislative_text, 177).text.rstrip()
|
school = get_block_by_x_value(legislative_text, 177).text.rstrip().lstrip()
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
try:
|
try:
|
||||||
school = get_block_by_x_value(legislative_text, 186).text.rstrip()
|
school = get_block_by_x_value(legislative_text, 186).text.rstrip().lstrip()
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
school = "you tell me, man"
|
school = "you tell me, man"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sponsors = get_block_by_x_value(legislative_text, 163).text.rstrip()
|
sponsors = get_block_by_x_value(legislative_text, 163).text.rstrip().lstrip()
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
try:
|
try:
|
||||||
sponsors = get_block_by_x_value(legislative_text, 166).text.rstrip()
|
sponsors = get_block_by_x_value(legislative_text, 166).text.rstrip().lstrip()
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
sponsors = "you tell me, man"
|
sponsors = "you tell me, man"
|
||||||
try:
|
|
||||||
subcommittee = get_block_by_x_value(legislative_text, 151).text.rstrip()
|
|
||||||
except AttributeError:
|
|
||||||
try:
|
|
||||||
subcommittee = get_block_by_x_value(legislative_text, 153).text.rstrip()
|
|
||||||
except AttributeError:
|
|
||||||
subcommittee = "you tell me, man"
|
|
||||||
the_rest = ''.join([i.text for i in legislative_text[12:]])
|
the_rest = ''.join([i.text for i in legislative_text[12:]])
|
||||||
print([i.text for i in legislative_text[12:]])
|
|
||||||
handled = self.handle_the_rest(the_rest)
|
handled = self.handle_the_rest(the_rest)
|
||||||
title = handled["title"]
|
title = handled["title"]
|
||||||
bill_text = handled["bill_text"]
|
bill_text = handled["bill_text"]
|
||||||
|
|
||||||
|
codesplit = leg_code.split('/')
|
||||||
|
assembly = codesplit[0]
|
||||||
|
dashsplit = codesplit[1].split('-')
|
||||||
|
year = 2000 + int(dashsplit[0])
|
||||||
|
committee = int(dashsplit[1])
|
||||||
|
docket_order = int(dashsplit[2])
|
||||||
|
|
||||||
output.append({
|
output.append({
|
||||||
"code": leg_code,
|
"assembly": assembly,
|
||||||
|
"year": year,
|
||||||
|
"committee": committee,
|
||||||
|
"docket_order": docket_order,
|
||||||
|
"category": category,
|
||||||
|
"country": country,
|
||||||
"school": school,
|
"school": school,
|
||||||
"sponsors": sponsors,
|
"sponsors": sponsors,
|
||||||
"subcommittee": subcommittee,
|
"legislation_title": title,
|
||||||
"title": title,
|
"text": bill_text
|
||||||
"bill_text": bill_text
|
|
||||||
})
|
})
|
||||||
|
|
||||||
self.output = output
|
self.output = output
|
||||||
|
@ -255,22 +273,33 @@ class HSYIG24(CCEParserBase):
|
||||||
|
|
||||||
# it's an easy way to extract stuff
|
# it's an easy way to extract stuff
|
||||||
legislative_text = remove_block_by_x_value(legislative_text, 565) # remove page numbers
|
legislative_text = remove_block_by_x_value(legislative_text, 565) # remove page numbers
|
||||||
|
category = get_block_by_x_value(legislative_text, 139).text.rstrip().lstrip()
|
||||||
leg_code = get_block_by_x_value(legislative_text, 88).text.rstrip()
|
leg_code = get_block_by_x_value(legislative_text, 88).text.rstrip()
|
||||||
school = get_block_by_x_value(legislative_text, 163).text.rstrip()
|
school = get_block_by_x_value(legislative_text, 163).text.rstrip().lstrip()
|
||||||
sponsors = get_block_by_x_value(legislative_text, 152).text.rstrip()
|
sponsors = get_block_by_x_value(legislative_text, 152).text.rstrip().lstrip()
|
||||||
subcommittee = get_block_by_x_value(legislative_text, 139).text.rstrip()
|
|
||||||
the_rest = ''.join([i.text for i in legislative_text[6:]])
|
the_rest = ''.join([i.text for i in legislative_text[6:]])
|
||||||
handled = self.handle_the_rest(the_rest)
|
handled = self.handle_the_rest(the_rest)
|
||||||
title = handled["title"]
|
title = handled["title"]
|
||||||
bill_text = handled["bill_text"]
|
bill_text = handled["bill_text"]
|
||||||
|
|
||||||
|
codesplit = leg_code.split('/')
|
||||||
|
assembly = codesplit[0]
|
||||||
|
dashsplit = codesplit[1].split('-')
|
||||||
|
year = 2000 + int(dashsplit[0])
|
||||||
|
committee = int(dashsplit[1])
|
||||||
|
docket_order = int(dashsplit[2])
|
||||||
|
|
||||||
output.append({
|
output.append({
|
||||||
"code": leg_code,
|
"assembly": assembly,
|
||||||
|
"year": year,
|
||||||
|
"committee": committee,
|
||||||
|
"docket_order": docket_order,
|
||||||
|
"category": category,
|
||||||
|
"country": None, # this is a yig bill
|
||||||
"school": school,
|
"school": school,
|
||||||
"sponsors": sponsors,
|
"sponsors": sponsors,
|
||||||
"subcommittee": subcommittee,
|
"legislation_title": title,
|
||||||
"title": title,
|
"text": bill_text
|
||||||
"bill_text": bill_text
|
|
||||||
})
|
})
|
||||||
|
|
||||||
self.output = output
|
self.output = output
|
||||||
|
@ -287,8 +316,9 @@ def main():
|
||||||
return
|
return
|
||||||
|
|
||||||
for text in doc.output:
|
for text in doc.output:
|
||||||
print("{} ---------------------------- {}".format(
|
print("{} {} {} ---------------------------- {}".format(
|
||||||
text["title"], text["bill_text"]
|
text["country"], text["category"],
|
||||||
|
text["legislation_title"], text["text"]
|
||||||
))
|
))
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
# Generated by Django 4.2.12 on 2024-06-30 03:14
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('explorer', '0006_remove_legislationclassification_obvious_change'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='legislativetext',
|
||||||
|
name='category',
|
||||||
|
field=models.CharField(default='', max_length=256),
|
||||||
|
preserve_default=False,
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='legislativetext',
|
||||||
|
name='country',
|
||||||
|
field=models.CharField(blank=True, max_length=512, null=True),
|
||||||
|
),
|
||||||
|
]
|
|
@ -47,24 +47,7 @@ class LegislationBook(models.Model):
|
||||||
return
|
return
|
||||||
|
|
||||||
for text in parsed.output:
|
for text in parsed.output:
|
||||||
print(text["code"])
|
text = LegislativeText(**text, from_book=self)
|
||||||
codesplit = text["code"].split('/')
|
|
||||||
assembly = codesplit[0]
|
|
||||||
dashsplit = codesplit[1].split('-')
|
|
||||||
year = 2000 + int(dashsplit[0])
|
|
||||||
committee = int(dashsplit[1])
|
|
||||||
docket_order = int(dashsplit[2])
|
|
||||||
text = LegislativeText(
|
|
||||||
assembly=assembly,
|
|
||||||
year=year,
|
|
||||||
committee=committee,
|
|
||||||
docket_order=docket_order,
|
|
||||||
school=text["school"],
|
|
||||||
sponsors=text["sponsors"],
|
|
||||||
legislation_title=text["title"],
|
|
||||||
text=text["bill_text"],
|
|
||||||
from_book=self
|
|
||||||
)
|
|
||||||
text.save()
|
text.save()
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
@ -93,6 +76,7 @@ class LegislativeText(models.Model):
|
||||||
text = models.TextField()
|
text = models.TextField()
|
||||||
year = models.IntegerField()
|
year = models.IntegerField()
|
||||||
committee = models.IntegerField()
|
committee = models.IntegerField()
|
||||||
|
category = models.CharField(max_length=256)
|
||||||
docket_order = models.IntegerField()
|
docket_order = models.IntegerField()
|
||||||
school = models.CharField(max_length=256)
|
school = models.CharField(max_length=256)
|
||||||
sponsors = models.CharField(max_length=256)
|
sponsors = models.CharField(max_length=256)
|
||||||
|
|
Loading…
Reference in New Issue