demographic-manager-sanitized/main.py

330 lines
11 KiB
Python
Raw Normal View History

2024-06-24 04:57:26 -05:00
import json
import common
2024-06-24 04:57:53 -05:00
import interndata
2024-06-24 04:57:26 -05:00
from openpyxl import Workbook
from openpyxl.utils import get_column_letter
from openpyxl.styles import Font
2024-06-24 04:57:57 -05:00
from openpyxl.chart import (
PieChart,
ProjectedPieChart,
Reference
)
from openpyxl.chart.series import DataPoint
2024-06-24 04:57:43 -05:00
def convert_x_y_to_col_id(x, y):
return get_column_letter(x) + str(y)
def convert_x_y_range_to_cols(x1, y1, x2, y2):
return "{}:{}".format(convert_x_y_to_col_id(x1, y1), convert_x_y_to_col_id(x2, y2))
2024-06-24 04:57:26 -05:00
def mapper(string):
# map to human-readable values
try:
2024-06-24 04:57:43 -05:00
return common.mapper_data[str(string)]
2024-06-24 04:57:26 -05:00
except KeyError:
return "WARNING: {}".format(string)
2024-06-24 04:57:53 -05:00
def aggregate_demographic_totals(payload, adultonly, childonly, internonly, withoutinterns):
2024-06-24 04:57:26 -05:00
results = {"age": {}, "gender": {}, "ethnicity": {}, "race": {}}
for i in payload:
2024-06-24 04:57:53 -05:00
if adultonly and not i["age"] == "a":
continue
2024-06-24 04:57:53 -05:00
if childonly and not i["age"] == "c":
continue
if internonly and not i["age"] == "i":
continue
if withoutinterns and i["age"] == "i":
continue
2024-06-24 04:57:26 -05:00
# for every entry we get
for key in results.keys():
# for every field we have
results[key].setdefault(i[key], 0)
results[key][i[key]] += 1
return results
2024-06-24 04:57:53 -05:00
def get_totals_for_specific_attribute_on_specific_site(site, adultonly=False, childonly=False, internonly=False, withoutinterns=False):
2024-06-24 04:57:26 -05:00
manifest = open("data/manifest.json")
manifest_data = json.loads(manifest.read())
record_to_be_used = manifest_data["sites"]
site_object = None
for i in manifest_data["sites"]:
if i["name"] == site:
site_object = i
if not site_object:
raise TypeError("site argument is not valid")
used_filename = site_object["usedrecord"]
2024-06-24 04:57:53 -05:00
if not used_filename: # it's just a placeholder, so we should return no records
return {"age": {}, "gender": {}, "ethnicity": {}, "race": {}}
present_interns = site_object["present"]
intern_serialized = interndata.get_staff_records()
intern_data = interndata.return_intern_data_from_present_array(present_interns, intern_serialized)
2024-06-24 04:57:26 -05:00
site_specific_data_file = open("data/" + used_filename)
demographic_data_json = json.loads(site_specific_data_file.read())
payload = demographic_data_json["payload"]
2024-06-24 04:57:53 -05:00
return aggregate_demographic_totals(payload + intern_data, adultonly, childonly, internonly, withoutinterns)
2024-06-24 04:57:26 -05:00
def generate_table_for_attr(attr, totals):
2024-06-24 04:57:43 -05:00
header = [mapper(attr), "Count"]
data = []
2024-06-24 04:57:26 -05:00
for key, value in totals[attr].items():
2024-06-24 04:57:43 -05:00
data.append([mapper(key), value])
data.sort()
data.insert(0, header)
2024-06-24 04:57:26 -05:00
2024-06-24 04:57:43 -05:00
return data
2024-06-24 04:57:26 -05:00
def write_dataarray_to_specific_cell(x, y, dataarray, worksheet):
i_c = 0
for i in dataarray:
j_c = 0
for j in i:
worksheet.cell(row=i_c + 1 + x, column=j_c + 1 + y, value=dataarray[i_c][j_c])
j_c += 1
i_c += 1
def generate_provider_string(providers):
2024-06-24 04:57:43 -05:00
used_providers = sorted(providers)
if len(used_providers) > 1:
before_and = ', '.join(used_providers[:-1])
if len(used_providers) > 2:
and_string = ', and {}'.format(used_providers[-1])
else:
and_string = ' and {}'.format(used_providers[-1])
2024-06-24 04:57:26 -05:00
return before_and + and_string
2024-06-24 04:57:53 -05:00
try:
return used_providers[0]
except: # there's no documented communications coordinator or whatever
return "an Unknown Person"
2024-06-24 04:57:26 -05:00
def handle_writing_of_attrs(worksheet, totals, offset):
2024-06-24 04:57:26 -05:00
count = 0
maxlen = 0
2024-06-24 04:57:26 -05:00
for i in totals.keys():
tmp = generate_table_for_attr(i, totals)
maxlen = max(maxlen, len(tmp))
2024-06-24 04:57:26 -05:00
write_dataarray_to_specific_cell(offset, count * 3, tmp, worksheet)
2024-06-24 04:57:26 -05:00
count += 1
return maxlen
2024-06-24 04:57:26 -05:00
def adjust_column_width(worksheet, times):
for i in range(times):
worksheet.column_dimensions[get_column_letter((i * 3) + 1)].width = 20
def write_information_to_spreadsheet(totals, worksheet, offset):
2024-06-24 04:57:43 -05:00
return handle_writing_of_attrs(worksheet, totals, offset)
2024-06-24 04:57:55 -05:00
def plural(arr):
if len(arr) > 1:
return "s"
return ""
2024-06-24 04:57:53 -05:00
def handle_spreadsheet_decoration(ws, providers, persons, commleads, onsiteleads, time, date):
ws["A1"].value = "Information for {} site attendance at {} on {}".format(ws.title, time, date)
ws["A2"].value = "Fields not present should be assumed 0."
ws["A3"].value = "Data submitted by {} in person after conclusion of site operations.".format(generate_provider_string(providers))
ws["A4"].value = "Interns present: {}".format(generate_provider_string(persons))
2024-06-24 04:57:55 -05:00
ws["A5"].value = "Communication Lead{}: {}".format(plural(commleads), generate_provider_string(commleads))
ws["A6"].value = "On Site Lead{}: {}".format(plural(onsiteleads), generate_provider_string(onsiteleads))
ws.merge_cells("A1:E1")
ws.merge_cells("A2:E2")
ws.merge_cells("A3:H3")
ws.merge_cells("A4:H4")
ws.merge_cells("A5:H5")
ws.merge_cells("A6:H6")
2024-06-24 04:57:26 -05:00
2024-06-24 04:57:57 -05:00
def get_all_data_points_from_all_sites(manifest):
files_to_read = []
record_objects = []
for site in manifest["sites"]:
files_to_read.append(site["usedrecord"])
for file in files_to_read:
handle = open("data/" + file, "r")
handle_data = json.loads(handle.read())
payload = handle_data["payload"]
record_objects += payload
return record_objects
def dict_to_2d_array(dictionary, keylambda=lambda x: x):
return [(keylambda(k), v) for k, v in dictionary.items()]
def correct_dict_with_none_values(dictionary):
try:
dictionary["unk"] += dictionary[None]
del dictionary[None]
except KeyError:
pass
return dictionary
def generate_final_report_data(ws):
fd = open("data/manifest.json", "r")
manifest = json.loads(fd.read())
global_data_points = get_all_data_points_from_all_sites(manifest)
total_sites = len(manifest["sites"])
children = []
for person in global_data_points:
if person["age"] == "c":
children.append(person)
aggregation = aggregate_demographic_totals(global_data_points, False, False, False, False)
children_aggregation = aggregate_demographic_totals(children, False, False, False, False)
total_children = children_aggregation["age"]["c"]
total_girls = children_aggregation["gender"]["f"]
total_boys = children_aggregation["gender"]["m"]
race_breakdown = sorted(dict_to_2d_array(correct_dict_with_none_values(children_aggregation["race"]), mapper))
ethnicity_breakdown = sorted(dict_to_2d_array(correct_dict_with_none_values(children_aggregation["ethnicity"]), mapper))
gender_breakdown = [
["Gender", "Count"],
["Boys", total_boys],
["Girls", total_girls],
]
race_breakdown.insert(0, ["Race", "Count"])
final_report_data_to_append = [
["Final Report", ""],
["", ""],
["Gender Breakdown", ""],
*gender_breakdown,
["", ""],
["Race Breakdown", ""],
*race_breakdown,
["", ""],
["Ethnicity Breakdown", ""],
*ethnicity_breakdown,
["", ""],
["Total Children Served", ""],
[total_girls + total_boys, ""],
["", ""],
["Number of Sites", ""],
[total_sites, ""],
["", ""],
["Average Children per Site", ""],
[(total_girls + total_boys) / total_sites, ""],
2024-06-24 04:58:01 -05:00
["", ""],
["Percentages of Relevant Demographics", ""],
2024-06-24 04:58:02 -05:00
["Girls", str(total_girls / total_children * 100)[:5] + "% + "%""],
2024-06-24 04:58:01 -05:00
["Hispanic", str(int(ethnicity_breakdown[0][1]) / total_children * 100)[:5] + "%"],
["People of Color", str(int(race_breakdown[3][1]) / total_children * 100)[:5] + "%"],
2024-06-24 04:57:57 -05:00
]
2024-06-24 04:58:01 -05:00
print(ethnicity_breakdown)
print("aslkdfjalsdkfjalsdkf")
print(race_breakdown[2])
2024-06-24 04:57:57 -05:00
for row in final_report_data_to_append:
ws.append(row)
gender_pie = PieChart()
data = Reference(ws, min_col=2, min_row=5, max_row=5 + len(gender_breakdown) - 2)
labels = Reference(ws, min_col=1, min_row=5, max_row=5 + len(gender_breakdown) - 2)
gender_pie.add_data(data)
gender_pie.set_categories(labels)
gender_pie.title = "Gender Breakdown"
ws.add_chart(gender_pie, "D1")
race_pie = PieChart()
data = Reference(ws, min_col=2, min_row=5 + len(gender_breakdown) + 2, max_row=5 + len(gender_breakdown) + len(race_breakdown))
labels = Reference(ws, min_col=1, min_row=5 + len(gender_breakdown) + 2, max_row=5 + len(gender_breakdown) + len(race_breakdown))
race_pie.add_data(data)
race_pie.set_categories(labels)
race_pie.title = "Race Breakdown"
ws.add_chart(race_pie, "D17")
ethnicity_pie = PieChart()
data = Reference(ws, min_col=2, min_row=18, max_row=19)
labels = Reference(ws, min_col=1, min_row=18, max_row=19)
ethnicity_pie.add_data(data)
ethnicity_pie.set_categories(labels)
ethnicity_pie.title = "Ethnicity Breakdown"
ws.add_chart(ethnicity_pie, "D33")
ws.column_dimensions["A"].width = 27
print(children_aggregation)
print(total_children, total_girls, total_boys)
print(race_breakdown)
print(final_report_data_to_append)
2024-06-24 04:57:26 -05:00
fd = open("data/manifest.json", "r")
json_data = json.loads(fd.read())
wb = Workbook()
for site in json_data["sites"]:
2024-06-24 04:57:53 -05:00
print(site["name"])
2024-06-24 04:57:26 -05:00
providers = []
for record in site["records"]:
providers.append(record['submitter'])
persons = site["present"]
commleads = site["commleads"]
onsiteleads = site["onsiteleads"]
2024-06-24 04:57:53 -05:00
time = site["time"]
date = site["date"]
length = 8
ws = wb.create_sheet(site["name"])
ws.cell(row=length, column=1, value="Combined Totals")
2024-06-24 04:57:43 -05:00
ws.merge_cells(convert_x_y_range_to_cols(1, length, 11, length))
ws[convert_x_y_to_col_id(1, length)].font = Font(italic=True)
length += write_information_to_spreadsheet(get_totals_for_specific_attribute_on_specific_site(site["name"]), ws, length) + 2
2024-06-24 04:57:53 -05:00
ws.cell(row=length, column=1, value="Combined Totals (without interns)")
ws.merge_cells(convert_x_y_range_to_cols(1, length, 11, length))
ws[convert_x_y_to_col_id(1, length)].font = Font(italic=True)
length += write_information_to_spreadsheet(get_totals_for_specific_attribute_on_specific_site(site["name"], withoutinterns=True), ws, length) + 2
ws.cell(row=length, column=1, value="Adults")
2024-06-24 04:57:43 -05:00
ws.merge_cells(convert_x_y_range_to_cols(1, length, 11, length))
ws[convert_x_y_to_col_id(1, length)].font = Font(italic=True)
length += write_information_to_spreadsheet(get_totals_for_specific_attribute_on_specific_site(site["name"], adultonly=True), ws, length) + 3
ws.cell(row=length, column=1, value="Children")
2024-06-24 04:57:43 -05:00
ws.merge_cells(convert_x_y_range_to_cols(1, length, 11, length))
ws[convert_x_y_to_col_id(1, length)].font = Font(italic=True)
length += write_information_to_spreadsheet(get_totals_for_specific_attribute_on_specific_site(site["name"], childonly=True), ws, length) + 3
2024-06-24 04:57:53 -05:00
ws.cell(row=length, column=1, value="Interns")
ws.merge_cells(convert_x_y_range_to_cols(1, length, 11, length))
ws[convert_x_y_to_col_id(1, length)].font = Font(italic=True)
length += write_information_to_spreadsheet(get_totals_for_specific_attribute_on_specific_site(site["name"], internonly=True), ws, length) + 3
adjust_column_width(ws, 4)
2024-06-24 04:57:53 -05:00
handle_spreadsheet_decoration(ws, providers, persons, commleads, onsiteleads, time, date)
2024-06-24 04:57:57 -05:00
ws = wb.create_sheet("Final Report")
generate_final_report_data(ws)
2024-06-24 04:57:26 -05:00
del wb["Sheet"]
wb.save("test.xlsx")