diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7a60b85
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+__pycache__/
+*.pyc
diff --git a/YIGVolunteerBook2024.pdf b/YIGVolunteerBook2024.pdf
new file mode 100644
index 0000000..acbc262
Binary files /dev/null and b/YIGVolunteerBook2024.pdf differ
diff --git a/analyser.py b/analyser.py
new file mode 100644
index 0000000..6c8ac0c
--- /dev/null
+++ b/analyser.py
@@ -0,0 +1,4 @@
+import leglib
+import fitz
+
+leglib.PdfParser(fitz.open("YIGVolunteerBook2024.pdf")).parse()
diff --git a/leglib.py b/leglib.py
new file mode 100644
index 0000000..a5d5a25
--- /dev/null
+++ b/leglib.py
@@ -0,0 +1,202 @@
+import fitz
+import math
+
+from typing import Any
+
+class FitzBlockWrapper:
+    def __init__(self, block):
+        self.x0, self.y0, self.x1, \
+            self.y1, self.text, \
+            self.block_number, self.block_type = block
+
+        self.x0 = int(self.x0)
+        self.x1 = int(self.x1)
+        self.y0 = int(self.y0)
+        self.y1 = int(self.y1)
+        self.block_number = int(self.block_number)
+        self.block_type = int(self.block_type)
+
+    def __str__(self):
+        return str((
+            self.x0, self.y0, self.x1, self.y1, self.text
+        ))
+
+    def __repl__(self):
+        return self.__str__()
+
+class BillCode:
+    def __init__(self, text: str):
+        # try to parse
+        # codes are in this rough format: "RSB/yy-c(c)-n(n)"
+
+        text = text.rstrip()
+        slashsplit = text.split('/')
+        dashsplit = slashsplit[1].split('-')
+
+        assemblycode = slashsplit[0]
+
+        self.color = assemblycode[0]
+        if self.color == "R":
+            self.color = "red"
+        elif self.color == "W":
+            self.color = "white"
+        elif self.color == "B":
+            self.color = "blue"
+
+        assemblydivision = assemblycode[1]
+        if assemblydivision == "S":
+            self.assembly = "senate"
+        elif assemblydivision == "H":
+            self.assembly = "house"
+
+        self.year = int(dashsplit[0])
+        self.committee = int(dashsplit[1])
+        self.docketplacement = int(dashsplit[2])
+
+    def __str__(self):
+        return "{} {} - {}-{}-{}".format(
+            self.color,
+            self.assembly,
+            str(self.year),
+            str(self.committee),
+            str(self.docketplacement)
+        )
+
+class Bill:
+    def __init__(self,
+        code: str | BillCode,
+        sponsors: str,
+        subcommittee: str,
+        school: str
+    ):
+        if isinstance(code, str):
+            self.code = BillCode(code)
+        else:
+            self.code = code
+
+        self.sponsors = sponsors.rstrip()
+        self.subcommittee = subcommittee.rstrip()
+        self.school = school.rstrip()
+
+class PdfParser:
+    def __init__(self, document: fitz.Document):
+        self.document = document
+
+    @staticmethod
+    def _words_in_superstring(words: list[str], superstring: str) -> bool:
+        for word in words:
+            if not str(word).lower() in str(superstring).lower():
+                return False
+        return True
+
+    def _generate_legislative_pages_list(self, sections: list[int]) -> list[int]:
+        """
+        sections is an array of section pages plus the last page.
+        """
+        current = 0
+        legislative_pages: list[int] = []
+        try:
+            while True:
+                legislative_pages += list(
+                    range(
+                        sections[current] + 1,
+                        sections[current + 1],
+                        1
+                    )
+                )
+
+                current += 1
+        except IndexError:
+            pass
+
+        return legislative_pages
+
+    def _generate_section_markers(self, document: fitz.Document) -> list[int]:
+        section_pages = []
+        for page in document:
+            text = page.get_text().encode("utf8")
+            is_section_page = self._words_in_superstring(
+                    words=[ "Committee", "YMCA", "Tennessee", "Youth", "in" ],
+                    superstring=text
+            )
+            is_last_page = self._words_in_superstring(
+                    words=[ "ABCs" ],
+                    superstring=text
+            )
+            print("page number {} contains sentintal? {}".format(page.number, is_section_page))
+            if len(page.get_images()) == 3:
+                print("page {} has one image!".format(page.number))
+                print(page.get_images())
+
+            if is_section_page and len(page.get_images()) == 3:
+                section_pages.append(page.number)
+
+            if is_last_page and len(section_pages) > 2:
+                section_pages.append(page.number)
+
+        return section_pages
+
+    def _get_block_info_from_page(self, page: fitz.Page):
+        return [FitzBlockWrapper(i) for i in page.get_text("blocks")]
+
+    @staticmethod
+    def _remove_image_blocks(blocks: list[FitzBlockWrapper]) -> list[FitzBlockWrapper]:
+        to_return: list[FitzBlockWrapper] = []
+        for block in blocks:
+            if block.block_type == 0:
+                to_return.append(block)
+
+        return to_return
+
+    @staticmethod
+    def _remove_coordinate_information(blocks: list[FitzBlockWrapper]) -> list[FitzBlockWrapper]:
+        to_return: list[str] = []
+        for block in blocks:
+            to_return.append(block.text)
+
+        return to_return
+
+    @staticmethod
+    def _get_info_from_block(block, lat: int):
+        to_return = []
+        for i in block:
+            if math.floor(i[0]) == lat:
+                to_return.append(i)
+        return to_return
+
+    @staticmethod
+    def _split_list_by_element(arr: list[Any], pivot: Any):
+        output = []
+        current = []
+        for i in arr:
+            if i == pivot:
+                output.append(current)
+                current = []
+            else:
+                current.append(i)
+
+        output.append(current)
+        return output
+
+    def parse(self):
+        section_pages = self._generate_section_markers(self.document)
+        legislative_pages = self._generate_legislative_pages_list(section_pages)
+        joined_blocks: list[FitzBlockWrapper] = []
+        for page_number in legislative_pages:
+            page = self.document.load_page(page_number)
+            block_info = self._get_block_info_from_page(page)
+
+            joined_blocks += block_info
+
+        joined_blocks = self._remove_image_blocks(joined_blocks)
+        joined_blocks = self._remove_coordinate_information(joined_blocks)
+
+        bill_header = joined_blocks[0]
+
+        splitted = self._split_list_by_element(joined_blocks, bill_header)
+
+        count = 0
+        for i in splitted:
+            if count < 20:
+                print(i)
+            count += 1
diff --git a/shell.nix b/shell.nix
new file mode 100644
index 0000000..2c827b4
--- /dev/null
+++ b/shell.nix
@@ -0,0 +1,7 @@
+{ pkgs ? import <nixpkgs> {} }:
+  pkgs.mkShell {
+    # nativeBuildInputs is usually what you want -- tools you need to run
+    nativeBuildInputs = with pkgs; [
+        buildPackages.python311Packages.pymupdf
+    ];
+}