diff --git a/.gitignore b/.gitignore
index d0c63a6..6035ca2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,9 @@
 *.pyc
 *.pyo
 *.egg-info
+hocr.egg-info
+dist/
+build/
 
 # Git files
 *.orig
diff --git a/build/lib/hocr/__init__.py b/build/lib/hocr/__init__.py
new file mode 100644
index 0000000..d878826
--- /dev/null
+++ b/build/lib/hocr/__init__.py
@@ -0,0 +1,8 @@
+# -*- coding: utf-8 -*-
+
+__title__ = 'hocr'
+__version__ = '0.2.12'
+__copyright__ = 'Copyright 2017 Vic.ai'
+__description__ = 'HOCR manipulation and utility library, based on @jsfenfen and @concordusapps'
+__author__ = 'rune@vic.ai'
+# __all__ = ['parse', ]  # noqa
diff --git a/build/lib/hocr/page.py b/build/lib/hocr/page.py
new file mode 100644
index 0000000..cb0d486
--- /dev/null
+++ b/build/lib/hocr/page.py
@@ -0,0 +1,167 @@
+import re
+import six
+
+
+class Box(object):
+
+    def __init__(self, text=None, left=0, right=0, top=0, bottom=0):
+
+        # Parse the text string representation if given.
+        if text is not None:
+            left, top, right, bottom = map(int, text.split())
+
+        self.left = left
+        self.right = right
+        self.top = top
+        self.bottom = bottom
+
+    @property
+    def width(self):
+        return self.right - self.left
+
+    @property
+    def height(self):
+        return self.bottom - self.top
+
+    def __repr__(self):
+        return '<Box(%r, %r, %r, %r)>' % (
+            self.left, self.top, self.right, self.bottom)
+
+
+class Base(object):
+
+    _allowed_ocr_classes = {}
+    _dir_methods = []
+
+    def __init__(self, element):  # noqa
+        """
+        @param[in] element
+            XML node for the OCR element.
+        """
+        # Store the element for later reference.
+        self._element = element
+
+        # Create an element cache.
+        self._cache = {}
+
+        # Parse the properties of the HOCR element.
+        properties = element.get('title', '').split(';')
+        for prop in properties:
+            prop = prop.strip()
+
+            if six.PY3:
+                name, value = prop.split(maxsplit=1)
+            else:
+                name, value = prop.split(' ', 1)
+
+            if name == 'bbox':
+                self.box = Box(value)
+
+            elif name == 'image':
+                self.image = value.strip('" ')
+
+            elif name == 'x_wconf':
+                self.wconf = int(value)
+
+            elif name == 'textangle':
+                self.textangle = int(value)
+                if value == '90':
+                    self.vertical = True
+
+            elif name == 'x_size':
+                self.size = value
+
+            elif name == 'x_ascenders':
+                self.ascenders = float(value)
+
+            elif name == 'x_descenders':
+                self.descenders = float(value)
+
+            elif name == 'ppageno':
+                self.ppageno = int(value)
+
+    def __dir__(self):
+
+        if six.PY3:
+            return super().__dir__() + list(self._allowed_ocr_classes)
+        else:
+            return list(self._allowed_ocr_classes) + getattr(self, '_dir_methods', [])
+            return super(
+                Base, self).__dir__() + list(self._allowed_ocr_classes)
+
+    def __getattr__(self, name):
+        # Return the cached version if present.
+        if name in self._cache:
+            return self._cache[name]
+
+        # Parse the named OCR elements.
+        if name in self._allowed_ocr_classes:
+            ref = OCR_CLASSES[name]
+            nodes = self._element.find_all(class_=re.compile(ref['name']))
+            self._cache[name] = elements = list(map(ref['class'], nodes))
+            return elements
+
+        # Attribute is not present.
+        raise AttributeError(name)
+
+
+class Word(Base):
+
+    _allowed_ocr_classes = {}
+    _dir_methods = ['box', 'bold', 'italic', 'lang', 'wconf']
+
+    def __init__(self, element):
+        # Initialize the base.
+        if six.PY3:
+            super().__init__(element)
+        else:
+            super(Word, self).__init__(element)
+
+        # Discover if we are "bold".
+        # A word element is bold if its text node is wrapped in a <strong/>.
+        self.bold = bool(element.find('strong'))
+
+        # Discover if we are "italic".
+        # A word element is italic if its text node is wrapped in a <em/>.
+        self.italic = bool(element.find('em'))
+
+        # Find the text node.
+        self.text = element.text
+
+        self.lang = element.get("lang", '')
+
+    def __str__(self):
+        return '<Word(%r, %r)>' % (self.text, self.box)
+
+
+class Line(Base):
+    _allowed_ocr_classes = {'words'}
+    _dir_methods = ['box', 'text', 'vertical', 'textangle']
+    vertical = False
+    textangle = 0
+
+    @property
+    def text(self):
+        return ' '.join([w.text for w in self.words])
+
+
+class Paragraph(Base):
+    _allowed_ocr_classes = {'lines', 'words'}
+
+
+class Block(Base):
+    _allowed_ocr_classes = {'paragraphs', 'lines', 'words'}
+    _dir_methods = ['box', ]
+
+
+class Page(Base):
+    _allowed_ocr_classes = {'blocks', 'paragraphs', 'lines', 'words'}
+    _dir_methods = ['image', ]
+
+
+OCR_CLASSES = {
+    'words': {'name': 'ocr.?_word', 'class': Word},
+    'lines': {'name': 'ocr_line', 'class': Line},
+    'paragraphs': {'name': 'ocr_par', 'class': Paragraph},
+    'blocks': {'name': 'ocr_carea', 'class': Block}
+}
diff --git a/build/lib/hocr/parser.py b/build/lib/hocr/parser.py
new file mode 100644
index 0000000..5e361b2
--- /dev/null
+++ b/build/lib/hocr/parser.py
@@ -0,0 +1,29 @@
+import six
+import re
+from bs4 import UnicodeDammit, BeautifulSoup
+from .page import Page
+
+kill_html_closing_tags = re.compile(r'\<\/\s*html', re.I)
+
+
+def parse(source):
+    """Parse a HOCR stream into page elements.
+            @param[in] source
+        Either a file-like object or a filename of the HOCR text.
+    """
+    # Coerce the source into content.
+    if isinstance(source, six.string_types):
+        with open(source, 'rb') as stream:
+            content = stream.read()
+
+    else:
+        content = source.read()
+    
+    # Parse the HOCR xml stream.
+    ud = UnicodeDammit(content, is_html=True)
+        
+    # will take a while for a 500 page document
+    soup = BeautifulSoup(ud.unicode_markup, 'lxml')
+
+    # Get all the pages and parse them into page elements.
+    return [Page(x) for x in soup.find_all(class_='ocr_page')]
diff --git a/dist/hocr-0.2.12-py2.7.egg b/dist/hocr-0.2.12-py2.7.egg
new file mode 100644
index 0000000..89083bb
Binary files /dev/null and b/dist/hocr-0.2.12-py2.7.egg differ
diff --git a/hocr/__init__.py b/hocr/__init__.py
index e69de29..d878826 100644
--- a/hocr/__init__.py
+++ b/hocr/__init__.py
@@ -0,0 +1,8 @@
+# -*- coding: utf-8 -*-
+
+__title__ = 'hocr'
+__version__ = '0.2.12'
+__copyright__ = 'Copyright 2017 Vic.ai'
+__description__ = 'HOCR manipulation and utility library, based on @jsfenfen and @concordusapps'
+__author__ = 'rune@vic.ai'
+# __all__ = ['parse', ]  # noqa
diff --git a/hocr/page.py b/hocr/page.py
index 6f701f7..f30fa31 100644
--- a/hocr/page.py
+++ b/hocr/page.py
@@ -31,13 +31,13 @@ def __repr__(self):
 class Base(object):
 
     _allowed_ocr_classes = {}
+    _dir_methods = []
 
-    def __init__(self, element):
+    def __init__(self, element):  # noqa
         """
         @param[in] element
             XML node for the OCR element.
         """
-
         # Store the element for later reference.
         self._element = element
 
@@ -60,11 +60,35 @@ def __init__(self, element):
             elif name == 'image':
                 self.image = value.strip('" ')
 
+            elif name == 'x_wconf':
+                self.wconf = int(value)
+
+            elif name == 'textangle':
+                self.textangle = int(value)
+                if value == '90':
+                    self.vertical = True
+
+            elif name == 'x_size':
+                self.size = value
+
+            elif name == 'x_ascenders':
+                self.ascenders = float(value)
+
+            elif name == 'x_descenders':
+                self.descenders = float(value)
+
+            elif name == 'ppageno':
+                self.ppageno = int(value)
+
+        if element.get('lang', None):
+            self.lang = element.get('lang', None)
+
     def __dir__(self):
 
         if six.PY3:
             return super().__dir__() + list(self._allowed_ocr_classes)
         else:
+            return list(self._allowed_ocr_classes) + getattr(self, '_dir_methods', [])
             return super(
                 Base, self).__dir__() + list(self._allowed_ocr_classes)
 
@@ -87,6 +111,7 @@ def __getattr__(self, name):
 class Word(Base):
 
     _allowed_ocr_classes = {}
+    _dir_methods = ['box', 'bold', 'italic', 'lang', 'wconf']
 
     def __init__(self, element):
         # Initialize the base.
@@ -106,7 +131,7 @@ def __init__(self, element):
         # Find the text node.
         self.text = element.text
 
-        self.lang = element.get("lang",'')
+        self.lang = element.get("lang", '')
 
     def __str__(self):
         return '<Word(%r, %r)>' % (self.text, self.box)
@@ -114,18 +139,28 @@ def __str__(self):
 
 class Line(Base):
     _allowed_ocr_classes = {'words'}
+    _dir_methods = ['box', 'text', 'vertical', 'textangle']
+    vertical = False
+    textangle = 0
+
+    @property
+    def text(self):
+        return ' '.join([w.text for w in self.words])
 
 
 class Paragraph(Base):
     _allowed_ocr_classes = {'lines', 'words'}
+    _dir_methods = ['lang', ]
 
 
 class Block(Base):
     _allowed_ocr_classes = {'paragraphs', 'lines', 'words'}
+    _dir_methods = ['box', ]
 
 
 class Page(Base):
     _allowed_ocr_classes = {'blocks', 'paragraphs', 'lines', 'words'}
+    _dir_methods = ['image', ]
 
 
 OCR_CLASSES = {
diff --git a/requirements_dev.txt b/requirements.txt
similarity index 100%
rename from requirements_dev.txt
rename to requirements.txt
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..4b586ae
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,29 @@
+#
+# Copyright 2017 Vic.ai - Rune Loyning
+#
+# https://github.com/Vic-ai/python-hocr
+#
+
+from setuptools import find_packages
+from distutils.core import setup
+from pkgutil import get_importer
+
+meta = get_importer('hocr').find_module('__init__').load_module('__init__')
+
+setup(
+    name="hocr",
+    version=meta.__version__,
+    description=meta.__description__,
+    author='Vic.ai',
+    author_email='rune@vic.ai',
+    url='http://github.com/loyning/python-24so/',
+    keywords='hocr',
+    classifiers=[],
+    packages=find_packages(),
+    include_package_data=True,
+    install_requires=[
+        'beautifulsoup4',
+        'six',
+        'lxml'
+    ],
+)