From a7898accff342f94ea9a6cb1df1ce26367a9568f Mon Sep 17 00:00:00 2001
From: Michael Aquilina <michaelaquilina@gmail.com>
Date: Fri, 1 Aug 2014 21:44:08 +0200
Subject: [PATCH 001/100] Fix minor spelling mistake

---
 goose/outputformatters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/goose/outputformatters.py b/goose/outputformatters.py
index ae42457b..df6741d7 100644
--- a/goose/outputformatters.py
+++ b/goose/outputformatters.py
@@ -47,7 +47,7 @@ def get_language(self):
         Returns the language is by the article or
         the configuration language
         """
-        # we don't want to force the target laguage
+        # we don't want to force the target language
         # so we use the article.meta_lang
         if self.config.use_meta_language == True:
             if self.article.meta_lang:

From b6a54f9047ee9bdfb2daa702cb54ad1cde5388d9 Mon Sep 17 00:00:00 2001
From: Michael Aquilina <michaelaquilina@gmail.com>
Date: Fri, 1 Aug 2014 21:44:23 +0200
Subject: [PATCH 002/100] Use PEP8 convention for boolean statements

---
 goose/outputformatters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/goose/outputformatters.py b/goose/outputformatters.py
index df6741d7..1f8ba4bd 100644
--- a/goose/outputformatters.py
+++ b/goose/outputformatters.py
@@ -49,7 +49,7 @@ def get_language(self):
         """
         # we don't want to force the target language
         # so we use the article.meta_lang
-        if self.config.use_meta_language == True:
+        if self.config.use_meta_language:
             if self.article.meta_lang:
                 return self.article.meta_lang[:2]
         return self.config.target_language

From 2916126acbac794066677e4559a622f5c8e3a395 Mon Sep 17 00:00:00 2001
From: Michael Aquilina <michaelaquilina@gmail.com>
Date: Fri, 1 Aug 2014 22:15:30 +0200
Subject: [PATCH 003/100] Do not fail when stopword list is not available for a
 certain language

---
 goose/text.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/goose/text.py b/goose/text.py
index 4008d62b..dd4fb701 100644
--- a/goose/text.py
+++ b/goose/text.py
@@ -95,7 +95,12 @@ def __init__(self, language='en'):
         # to generate dynamic path for file to load
         if not language in self._cached_stop_words:
             path = os.path.join('text', 'stopwords-%s.txt' % language)
-            self._cached_stop_words[language] = set(FileHelper.loadResourceFile(path).splitlines())
+            try:
+                content = FileHelper.loadResourceFile(path)
+                word_list = content.splitlines()
+            except IOError:
+                word_list = []
+            self._cached_stop_words[language] = set(word_list)
         self.STOP_WORDS = self._cached_stop_words[language]
 
     def remove_punctuation(self, content):

From b9330040eb7b6cb7bef2dbc4dcf2365a9fdfa4b5 Mon Sep 17 00:00:00 2001
From: Michael Aquilina <michaelaquilina@gmail.com>
Date: Sun, 3 Aug 2014 20:09:08 +0200
Subject: [PATCH 004/100] Fix minor spelling error "handling"

---
 goose/crawler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/goose/crawler.py b/goose/crawler.py
index 211d410e..77dcb535 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -117,10 +117,10 @@ def crawl(self, crawl_candidate):
         # let's process it
         if self.article.top_node is not None:
 
-            # video handeling
+            # video handling
             self.video_extractor.get_videos()
 
-            # image handeling
+            # image handling
             if self.config.enable_image_fetching:
                 self.get_image()
 

From c31c9c4d992561f882f019d20ea790594aea3a67 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Sun, 28 Dec 2014 23:30:42 +0100
Subject: [PATCH 005/100] #157 - add test case files

---
 tests/data/extractors/test_opengraphcontent.html | 15 +++++++++++++++
 tests/data/extractors/test_opengraphcontent.json |  6 ++++++
 2 files changed, 21 insertions(+)
 create mode 100644 tests/data/extractors/test_opengraphcontent.html
 create mode 100644 tests/data/extractors/test_opengraphcontent.json

diff --git a/tests/data/extractors/test_opengraphcontent.html b/tests/data/extractors/test_opengraphcontent.html
new file mode 100644
index 00000000..46e5c9de
--- /dev/null
+++ b/tests/data/extractors/test_opengraphcontent.html
@@ -0,0 +1,15 @@
+<html>
+  <body>
+    <div>
+      <p>
+          Not an Actual Content
+          TextNode 1 - The Scala supported IDE is one of the few pain points of developers who want to start using Scala in their Java project. On existing long term project developed by a team its hard to step in and introduce a new language that is not supported by the existing IDE. On way to go about it is to hid the fact that you use Scala from the Java world by using one way dependency injection. Still, if you wish to truly absorb Scala into your existing java environment then you'll soon introduced cross language dependencies.
+      </p>
+    </div>
+    <div itemprop='articleBody'>
+      <p>
+        Search-and-rescue teams were mobilized from across Southeast Asia on Sunday after a commercial airliner with 162 people on board lost contact with ground controllers off the coast of Borneo, a search effort that evoked a distressingly familiar mix of grief and mystery nine months after a Malaysia Airlines jetliner disappeared over the Indian Ocean.
+      </p>
+    </div>
+  </body>
+</html>
diff --git a/tests/data/extractors/test_opengraphcontent.json b/tests/data/extractors/test_opengraphcontent.json
new file mode 100644
index 00000000..a775091d
--- /dev/null
+++ b/tests/data/extractors/test_opengraphcontent.json
@@ -0,0 +1,6 @@
+{
+    "url": "http://exemple.com/test_opengraphcontent",
+    "expected": {
+        "cleaned_text": "Search-and-rescue teams were mobilized from across Southeast Asia on Sunday after a commercial airliner with 162 people on board lost contact with ground controllers off the coast of Borneo, a search effort that evoked a distressingly familiar mix of grief and mystery nine months after a Malaysia Airlines jetliner disappeared over the Indian Ocean."
+    }
+}

From 5ac4a32e010308d209b8534fed824ae7d5683e98 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Sun, 28 Dec 2014 23:31:26 +0100
Subject: [PATCH 006/100] #157 - remove childnode one by one to keep parent
 node

---
 goose/cleaners.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/goose/cleaners.py b/goose/cleaners.py
index 2e8bc87a..c1384ee0 100644
--- a/goose/cleaners.py
+++ b/goose/cleaners.py
@@ -246,7 +246,8 @@ def div_to_para(self, doc, dom_type):
                 bad_divs += 1
             elif div is not None:
                 replaceNodes = self.get_replacement_nodes(doc, div)
-                div.clear()
+                for child in self.parser.childNodes(div):
+                    div.remove(child)
 
                 for c, n in enumerate(replaceNodes):
                     div.insert(c, n)

From 71f1deccfa836c6494ae7b35569dd86e4a77eff9 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Sun, 28 Dec 2014 23:32:42 +0100
Subject: [PATCH 007/100] #157 - hanbdle schema.org microdata

---
 goose/extractors.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/goose/extractors.py b/goose/extractors.py
index 1c8a37f1..44db6b5e 100644
--- a/goose/extractors.py
+++ b/goose/extractors.py
@@ -41,6 +41,11 @@
 RE_LANG = r'^[A-Za-z]{2}$'
 
 
+KNOWN_CONTENT_TAGS = [
+    {'attribute': 'itemprop', 'value': 'articleBody'}
+]
+
+
 class ContentExtractor(object):
 
     def __init__(self, config, article):
@@ -231,6 +236,11 @@ def extract_tags(self):
         return set(tags)
 
     def calculate_best_node(self):
+
+        top_node_from_known_tags = self.get_top_node_from_known_tags()
+        if top_node_from_known_tags is not None:
+            return top_node_from_known_tags
+
         doc = self.article.doc
         top_node = None
         nodes_to_check = self.nodes_to_check(doc)
@@ -303,6 +313,22 @@ def calculate_best_node(self):
 
         return top_node
 
+    def is_known_tags_element(self, node):
+        for tag in KNOWN_CONTENT_TAGS:
+            if self.parser.getAttribute(node, tag['attribute']) == tag['value']:
+                return True
+        return False
+
+    def get_top_node_from_known_tags(self):
+        for known_content_tag in KNOWN_CONTENT_TAGS:
+            content_tags = self.parser.getElementsByTag(self.article.doc,
+                                                        attr=known_content_tag['attribute'],
+                                                        value=known_content_tag['value'])
+        if len(content_tags):
+            top_node = content_tags[0]
+            self.parser.setAttribute(top_node, "extraction", "microDataExtration")
+            return content_tags[0]
+
     def is_boostable(self, node):
         """\
         alot of times the first paragraph might be the caption under an image
@@ -341,8 +367,13 @@ def walk_siblings(self, node):
         return b
 
     def add_siblings(self, top_node):
+        # in case the extraction used known attributes
+        # we don't want to add sibilings
+        if self.is_known_tags_element(top_node):
+            return top_node
         baselinescore_siblings_para = self.get_siblings_score(top_node)
         results = self.walk_siblings(top_node)
+        print results
         for current_node in results:
             ps = self.get_siblings_content(current_node, baselinescore_siblings_para)
             for p in ps:

From 6215fface3064248a6b3eaf5a2cd9835e818fac6 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Sun, 28 Dec 2014 23:34:28 +0100
Subject: [PATCH 008/100] #157 - add test case

---
 tests/extractors.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/extractors.py b/tests/extractors.py
index 84ba0502..a3e0dccb 100644
--- a/tests/extractors.py
+++ b/tests/extractors.py
@@ -355,6 +355,11 @@ def test_okaymarketing(self):
         fields = ['cleaned_text']
         self.runArticleAssertions(article=article, fields=fields)
 
+    def test_opengraphcontent(self):
+        article = self.getArticle()
+        fields = ['cleaned_text']
+        self.runArticleAssertions(article=article, fields=fields)
+
 
 class TestExtractWithUrl(TestExtractionBase):
 

From b8991df12db180d1b46919caab2eea643670b232 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Sun, 28 Dec 2014 23:35:40 +0100
Subject: [PATCH 009/100] #157 - remove print

---
 goose/extractors.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/goose/extractors.py b/goose/extractors.py
index 44db6b5e..33cdf0b1 100644
--- a/goose/extractors.py
+++ b/goose/extractors.py
@@ -373,7 +373,6 @@ def add_siblings(self, top_node):
             return top_node
         baselinescore_siblings_para = self.get_siblings_score(top_node)
         results = self.walk_siblings(top_node)
-        print results
         for current_node in results:
             ps = self.get_siblings_content(current_node, baselinescore_siblings_para)
             for p in ps:

From 9379cd83c1cdfe0468b792c3ba09c22e15c2af93 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 00:17:30 +0100
Subject: [PATCH 010/100] #157 - corrected content with microdata

---
 tests/data/extractors/test_lefigaro.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/data/extractors/test_lefigaro.json b/tests/data/extractors/test_lefigaro.json
index 311f4455..19f655ec 100644
--- a/tests/data/extractors/test_lefigaro.json
+++ b/tests/data/extractors/test_lefigaro.json
@@ -6,7 +6,7 @@
         "domain": "www.lefigaro.fr", 
         "final_url": "http://www.lefigaro.fr/conjoncture/2013/04/05/20002-20130405ARTFIG00473-montebourg-envisage-des-privatisations-partielles.php", 
         "meta_keywords": "Actualit\u00e9 \u00e9conomique, entreprises, \u00e9conomie, bourse, emploi, imp\u00f4ts, cac 40, creation d'entreprise, chef d'entreprise, grands patrons, consommation, multinationales, privatisation, d\u00e9localisations, concurrence, monopole, crise, bourse, licenciements, union europ\u00e9enne, etats-unis, chine, pmi, pme, tpe, salaires, relance, pib, pnb, aides sociales, japon, r\u00e9cession, \u00e9conomie verte, fmi, reprise, croissance, news, actu", 
-        "cleaned_text": "Selon le ministre du Redressement productif interview\u00e9 par le Wall Street Journal, le gouvernement", 
+        "cleaned_text": "«Dans le cadre de l'effort de restructuration budgétaire", 
         "tags": [
             "EDF", 
             "Privatisation", 
@@ -19,4 +19,4 @@
         "meta_favicon": "http://www.lefigaro.fr/icones/favicon.ico", 
         "meta_lang": null
     }
-}
\ No newline at end of file
+}

From f28a6e7ad4fd65b26df4556a0fee15dec384b0b3 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 00:26:54 +0100
Subject: [PATCH 011/100] #157 - refactor

---
 goose/extractors.py | 50 ++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/goose/extractors.py b/goose/extractors.py
index 33cdf0b1..f477c940 100644
--- a/goose/extractors.py
+++ b/goose/extractors.py
@@ -41,11 +41,6 @@
 RE_LANG = r'^[A-Za-z]{2}$'
 
 
-KNOWN_CONTENT_TAGS = [
-    {'attribute': 'itemprop', 'value': 'articleBody'}
-]
-
-
 class ContentExtractor(object):
 
     def __init__(self, config, article):
@@ -214,6 +209,22 @@ def get_domain(self):
             return o.hostname
         return None
 
+    def get_articlebody(self):
+        article_body = self.parser.getElementsByTag(
+                            self.article.doc,
+                            attr='itemprop',
+                            value='articleBody')
+        if len(article_body):
+            article_body = article_body[0]
+            self.parser.setAttribute(article_body, "extraction", "microDataExtration")
+            return article_body
+        return None
+
+    def is_articlebody(self, node):
+        if self.parser.getAttribute(node, 'itemprop') == 'articleBody':
+            return True
+        return False
+
     def extract_tags(self):
         node = self.article.doc
 
@@ -237,10 +248,6 @@ def extract_tags(self):
 
     def calculate_best_node(self):
 
-        top_node_from_known_tags = self.get_top_node_from_known_tags()
-        if top_node_from_known_tags is not None:
-            return top_node_from_known_tags
-
         doc = self.article.doc
         top_node = None
         nodes_to_check = self.nodes_to_check(doc)
@@ -313,22 +320,6 @@ def calculate_best_node(self):
 
         return top_node
 
-    def is_known_tags_element(self, node):
-        for tag in KNOWN_CONTENT_TAGS:
-            if self.parser.getAttribute(node, tag['attribute']) == tag['value']:
-                return True
-        return False
-
-    def get_top_node_from_known_tags(self):
-        for known_content_tag in KNOWN_CONTENT_TAGS:
-            content_tags = self.parser.getElementsByTag(self.article.doc,
-                                                        attr=known_content_tag['attribute'],
-                                                        value=known_content_tag['value'])
-        if len(content_tags):
-            top_node = content_tags[0]
-            self.parser.setAttribute(top_node, "extraction", "microDataExtration")
-            return content_tags[0]
-
     def is_boostable(self, node):
         """\
         alot of times the first paragraph might be the caption under an image
@@ -369,7 +360,7 @@ def walk_siblings(self, node):
     def add_siblings(self, top_node):
         # in case the extraction used known attributes
         # we don't want to add sibilings
-        if self.is_known_tags_element(top_node):
+        if self.is_articlebody(top_node):
             return top_node
         baselinescore_siblings_para = self.get_siblings_score(top_node)
         results = self.walk_siblings(top_node)
@@ -508,6 +499,13 @@ def nodes_to_check(self, doc):
         on like paragraphs and tables
         """
         nodes_to_check = []
+
+        # microdata
+        # set the most score to articleBody node
+        article_body_node = self.get_articlebody()
+        if article_body_node is not None:
+            self.update_score(article_body_node, 99)
+
         for tag in ['p', 'pre', 'td']:
             items = self.parser.getElementsByTag(doc, tag=tag)
             nodes_to_check += items

From ced075f3458318a2bfd30f4392cc221eaa4a4862 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 01:02:21 +0100
Subject: [PATCH 012/100] #160 - fail silently for unknown images

---
 goose/images/utils.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/goose/images/utils.py b/goose/images/utils.py
index 2767416f..a70c6b5d 100644
--- a/goose/images/utils.py
+++ b/goose/images/utils.py
@@ -33,12 +33,15 @@ class ImageUtils(object):
 
     @classmethod
     def get_image_dimensions(self, identify_program, path):
-        image = Image.open(path)
         image_details = ImageDetails()
-        image_details.set_mime_type(image.format)
-        width, height = image.size
-        image_details.set_width(width)
-        image_details.set_height(height)
+        try:
+            image = Image.open(path)
+            image_details.set_mime_type(image.format)
+            width, height = image.size
+            image_details.set_width(width)
+            image_details.set_height(height)
+        except IOError:
+            image_details.set_mime_type('NA')
         return image_details
 
     @classmethod

From 048bcdbe893399cf0b82e6ddd970f023d3faba35 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 01:24:42 +0100
Subject: [PATCH 013/100] #161 - add parser list variable

---
 goose/configuration.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/goose/configuration.py b/goose/configuration.py
index 42696f58..259c3e90 100644
--- a/goose/configuration.py
+++ b/goose/configuration.py
@@ -29,6 +29,11 @@
 
 HTTP_DEFAULT_TIMEOUT = 30
 
+AVAILABLE_PARSERS = {
+    'lxml' : Parser,
+    'soupparser': ParserSoup,
+}
+
 
 class Configuration(object):
 
@@ -84,6 +89,7 @@ def __init__(self):
         self.additional_data_extractor = None
 
         # Parser type
+        self.available_parsers = AVAILABLE_PARSERS.keys()
         self.parser_class = 'lxml'
 
         # set the local storage path
@@ -94,7 +100,7 @@ def __init__(self):
         self.http_timeout = HTTP_DEFAULT_TIMEOUT
 
     def get_parser(self):
-        return Parser if self.parser_class == 'lxml' else ParserSoup
+        return AVAILABLE_PARSERS[self.parser_class]
 
     def get_publishdate_extractor(self):
         return self.extract_publishdate

From eaaa60ae7a5c139a44f84395fed0e0cfb67556eb Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 01:25:05 +0100
Subject: [PATCH 014/100] #161 - parser fallback

---
 goose/__init__.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/goose/__init__.py b/goose/__init__.py
index 885dc6e5..49073bd1 100644
--- a/goose/__init__.py
+++ b/goose/__init__.py
@@ -59,8 +59,14 @@ def shutdown_network(self):
         pass
 
     def crawl(self, crawl_candiate):
-        crawler = Crawler(self.config)
-        article = crawler.crawl(crawl_candiate)
+        parsers = self.config.available_parsers
+        parsers.remove(self.config.parser_class)
+        try:
+            crawler = Crawler(self.config)
+            article = crawler.crawl(crawl_candiate)
+        except (UnicodeDecodeError, ValueError):
+            self.config.parser_class = parsers[0]
+            return self.crawl(crawl_candiate)
         return article
 
     def initialize(self):

From f6647fc49a66c9c7c2ba2c09b2e2baf8c1acd831 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 01:54:52 +0100
Subject: [PATCH 015/100] Merge pull request #5 from
 cronycle/feature/4-publish-date

feature(extractors/publish_date): Extract publish date from meta tags.

Conflicts:

	tests/extractors.py
---
 goose/configuration.py                            | 12 ------------
 goose/crawler.py                                  |  3 +--
 goose/extractors.py                               | 14 ++++++++++++++
 tests/data/extractors/test_publish_date.html      |  7 +++++++
 tests/data/extractors/test_publish_date.json      |  6 ++++++
 .../extractors/test_publish_date_article.html     |  7 +++++++
 .../extractors/test_publish_date_article.json     |  6 ++++++
 .../data/extractors/test_publish_date_rnews.html  |  7 +++++++
 .../data/extractors/test_publish_date_rnews.json  |  6 ++++++
 tests/extractors.py                               | 15 +++++++++++++++
 10 files changed, 69 insertions(+), 14 deletions(-)
 create mode 100644 tests/data/extractors/test_publish_date.html
 create mode 100644 tests/data/extractors/test_publish_date.json
 create mode 100644 tests/data/extractors/test_publish_date_article.html
 create mode 100644 tests/data/extractors/test_publish_date_article.json
 create mode 100644 tests/data/extractors/test_publish_date_rnews.html
 create mode 100644 tests/data/extractors/test_publish_date_rnews.json

diff --git a/goose/configuration.py b/goose/configuration.py
index 259c3e90..fe26b22a 100644
--- a/goose/configuration.py
+++ b/goose/configuration.py
@@ -102,18 +102,6 @@ def __init__(self):
     def get_parser(self):
         return AVAILABLE_PARSERS[self.parser_class]
 
-    def get_publishdate_extractor(self):
-        return self.extract_publishdate
-
-    def set_publishdate_extractor(self, extractor):
-        """\
-        Pass in to extract article publish dates.
-        @param extractor a concrete instance of PublishDateExtractor
-        """
-        if not extractor:
-            raise ValueError("extractor must not be null!")
-        self.extract_publishdate = extractor
-
     def get_additionaldata_extractor(self):
         return self.additional_data_extractor
 
diff --git a/goose/crawler.py b/goose/crawler.py
index 211d410e..192429f1 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -95,8 +95,7 @@ def crawl(self, crawl_candidate):
         self.article.raw_html = raw_html
         self.article.doc = doc
         self.article.raw_doc = deepcopy(doc)
-        # TODO
-        # self.article.publish_date = config.publishDateExtractor.extract(doc)
+        self.article.publish_date = self.extractor.get_publish_date()
         # self.article.additional_data = config.get_additionaldata_extractor.extract(doc)
         self.article.title = self.extractor.get_title()
         self.article.meta_lang = self.extractor.get_meta_lang()
diff --git a/goose/extractors.py b/goose/extractors.py
index f477c940..8b0146ce 100644
--- a/goose/extractors.py
+++ b/goose/extractors.py
@@ -39,6 +39,11 @@
 A_REL_TAG_SELECTOR = "a[rel=tag]"
 A_HREF_TAG_SELECTOR = "a[href*='/tag/'], a[href*='/tags/'], a[href*='/topic/'], a[href*='?keyword=']"
 RE_LANG = r'^[A-Za-z]{2}$'
+KNOWN_PUBLISH_DATE_META_TAGS = [
+    {'attribute': 'property', 'value': 'rnews:datePublished'},
+    {'attribute': 'property', 'value': 'article:published_time'},
+    {'attribute': 'name', 'value': 'OriginalPublicationDate'},
+]
 
 
 class ContentExtractor(object):
@@ -118,6 +123,15 @@ def split_title(self, title, splitter):
         title = title_pieces[large_text_index]
         return TITLE_REPLACEMENTS.replaceAll(title).strip()
 
+    def get_publish_date(self):
+        for known_meta_tag in KNOWN_PUBLISH_DATE_META_TAGS:
+            meta_tags = self.parser.getElementsByTag(self.article.doc,
+                                                tag='meta',
+                                                attr=known_meta_tag['attribute'],
+                                                value=known_meta_tag['value'])
+            if meta_tags:
+                return self.parser.getAttribute(meta_tags[0], attr='content')
+
     def get_favicon(self):
         """\
         Extract the favicon from a website
diff --git a/tests/data/extractors/test_publish_date.html b/tests/data/extractors/test_publish_date.html
new file mode 100644
index 00000000..6ce2b927
--- /dev/null
+++ b/tests/data/extractors/test_publish_date.html
@@ -0,0 +1,7 @@
+<html>
+  <head>
+    <meta name='OriginalPublicationDate' content='2014-06-30T16:54:02+00:00'>
+  </head>
+  <body>
+  </body>
+</html>
diff --git a/tests/data/extractors/test_publish_date.json b/tests/data/extractors/test_publish_date.json
new file mode 100644
index 00000000..a37e1173
--- /dev/null
+++ b/tests/data/extractors/test_publish_date.json
@@ -0,0 +1,6 @@
+{
+  "url": "http://example.com/example",
+    "expected": {
+        "publish_date": "2014-06-30T16:54:02+00:00"
+    }
+}
diff --git a/tests/data/extractors/test_publish_date_article.html b/tests/data/extractors/test_publish_date_article.html
new file mode 100644
index 00000000..3d03667e
--- /dev/null
+++ b/tests/data/extractors/test_publish_date_article.html
@@ -0,0 +1,7 @@
+<html>
+  <head>
+    <meta property='article:published_time' content='2012-01-11T15:55:01+00:00'>
+  </head>
+  <body>
+  </body>
+</html>
diff --git a/tests/data/extractors/test_publish_date_article.json b/tests/data/extractors/test_publish_date_article.json
new file mode 100644
index 00000000..06f14aa6
--- /dev/null
+++ b/tests/data/extractors/test_publish_date_article.json
@@ -0,0 +1,6 @@
+{
+  "url": "http://example.com/example",
+    "expected": {
+        "publish_date": "2012-01-11T15:55:01+00:00"
+    }
+}
diff --git a/tests/data/extractors/test_publish_date_rnews.html b/tests/data/extractors/test_publish_date_rnews.html
new file mode 100644
index 00000000..ca71f718
--- /dev/null
+++ b/tests/data/extractors/test_publish_date_rnews.html
@@ -0,0 +1,7 @@
+<html>
+  <head>
+    <meta property='rnews:datePublished' content='2010-02-22T11:53:04+00:00'>
+  </head>
+  <body>
+  </body>
+</html>
diff --git a/tests/data/extractors/test_publish_date_rnews.json b/tests/data/extractors/test_publish_date_rnews.json
new file mode 100644
index 00000000..623b13bb
--- /dev/null
+++ b/tests/data/extractors/test_publish_date_rnews.json
@@ -0,0 +1,6 @@
+{
+    "url": "http://example.com/example",
+    "expected": {
+        "publish_date": "2010-02-22T11:53:04+00:00"
+    }
+}
diff --git a/tests/extractors.py b/tests/extractors.py
index a3e0dccb..83907ac3 100644
--- a/tests/extractors.py
+++ b/tests/extractors.py
@@ -361,6 +361,21 @@ def test_opengraphcontent(self):
         self.runArticleAssertions(article=article, fields=fields)
 
 
+class TestPublishDate(TestExtractionBase):
+
+    def test_publish_date(self):
+        article = self.getArticle()
+        self.runArticleAssertions(article=article, fields=['publish_date'])
+
+    def test_publish_date_rnews(self):
+        article = self.getArticle()
+        self.runArticleAssertions(article=article, fields=['publish_date'])
+
+    def test_publish_date_article(self):
+        article = self.getArticle()
+        self.runArticleAssertions(article=article, fields=['publish_date'])
+
+
 class TestExtractWithUrl(TestExtractionBase):
 
     def test_get_canonical_url(self):

From 2498065a5d017a0cea101ae4771cab7d0e00d1fa Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 02:11:01 +0100
Subject: [PATCH 016/100] #163 - add schema published date parsing test

---
 .../data/extractors/test_publish_date_schema.html | 15 +++++++++++++++
 .../data/extractors/test_publish_date_schema.json |  6 ++++++
 tests/extractors.py                               |  4 ++++
 3 files changed, 25 insertions(+)
 create mode 100644 tests/data/extractors/test_publish_date_schema.html
 create mode 100644 tests/data/extractors/test_publish_date_schema.json

diff --git a/tests/data/extractors/test_publish_date_schema.html b/tests/data/extractors/test_publish_date_schema.html
new file mode 100644
index 00000000..8a666dfa
--- /dev/null
+++ b/tests/data/extractors/test_publish_date_schema.html
@@ -0,0 +1,15 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/loose.dtd"><html>
+<html>
+    <head>
+        <title>test video</title>
+    </head>
+
+    <body>
+        <div>
+            <time itemprop="datePublished" datetime="2014-10-09T12:06:16">9 octobre 2014 à 12:06</time>
+            <p>
+            TextNode 1 - The Scala supported IDE is one of the few pain points of developers who want to start using Scala in their Java project. On existing long term project developed by a team its hard to step in and introduce a new language that is not supported by the existing IDE. On way to go about it is to hid the fact that you use Scala from the Java world by using one way dependency injection. Still, if you wish to truly absorb Scala into your existing java environment then you'll soon introduced cross language dependencies.
+            </p>
+        </div>
+    </body>
+</html>
diff --git a/tests/data/extractors/test_publish_date_schema.json b/tests/data/extractors/test_publish_date_schema.json
new file mode 100644
index 00000000..8e150921
--- /dev/null
+++ b/tests/data/extractors/test_publish_date_schema.json
@@ -0,0 +1,6 @@
+{
+    "url": "http://example.com/example",
+    "expected": {
+        "publish_date": "2014-10-09T12:06:16"
+    }
+}
diff --git a/tests/extractors.py b/tests/extractors.py
index 83907ac3..cfcc4bfa 100644
--- a/tests/extractors.py
+++ b/tests/extractors.py
@@ -375,6 +375,10 @@ def test_publish_date_article(self):
         article = self.getArticle()
         self.runArticleAssertions(article=article, fields=['publish_date'])
 
+    def test_publish_date_schema(self):
+        article = self.getArticle()
+        self.runArticleAssertions(article=article, fields=['publish_date'])
+
 
 class TestExtractWithUrl(TestExtractionBase):
 

From 5910f39a29161ad79032d4dd06d3410840873091 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 02:11:36 +0100
Subject: [PATCH 017/100] #163 - do not use only meta for publication date

---
 goose/extractors.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/goose/extractors.py b/goose/extractors.py
index 8b0146ce..0163fbd0 100644
--- a/goose/extractors.py
+++ b/goose/extractors.py
@@ -39,10 +39,11 @@
 A_REL_TAG_SELECTOR = "a[rel=tag]"
 A_HREF_TAG_SELECTOR = "a[href*='/tag/'], a[href*='/tags/'], a[href*='/topic/'], a[href*='?keyword=']"
 RE_LANG = r'^[A-Za-z]{2}$'
-KNOWN_PUBLISH_DATE_META_TAGS = [
-    {'attribute': 'property', 'value': 'rnews:datePublished'},
-    {'attribute': 'property', 'value': 'article:published_time'},
-    {'attribute': 'name', 'value': 'OriginalPublicationDate'},
+KNOWN_PUBLISH_DATE_TAGS = [
+    {'attribute': 'property', 'value': 'rnews:datePublished', 'content': 'content'},
+    {'attribute': 'property', 'value': 'article:published_time', 'content': 'content'},
+    {'attribute': 'name', 'value': 'OriginalPublicationDate', 'content': 'content'},
+    {'attribute': 'itemprop', 'value': 'datePublished', 'content': 'datetime'},
 ]
 
 
@@ -124,13 +125,12 @@ def split_title(self, title, splitter):
         return TITLE_REPLACEMENTS.replaceAll(title).strip()
 
     def get_publish_date(self):
-        for known_meta_tag in KNOWN_PUBLISH_DATE_META_TAGS:
+        for known_meta_tag in KNOWN_PUBLISH_DATE_TAGS:
             meta_tags = self.parser.getElementsByTag(self.article.doc,
-                                                tag='meta',
                                                 attr=known_meta_tag['attribute'],
                                                 value=known_meta_tag['value'])
             if meta_tags:
-                return self.parser.getAttribute(meta_tags[0], attr='content')
+                return self.parser.getAttribute(meta_tags[0], known_meta_tag['content'])
 
     def get_favicon(self):
         """\

From f8fc13dcf1d1b8a170a47dffb6d87fb35f45b68e Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 02:41:56 +0100
Subject: [PATCH 018/100] #165 - add opengraph property to article

---
 goose/article.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/goose/article.py b/goose/article.py
index d195f166..fbfc1478 100644
--- a/goose/article.py
+++ b/goose/article.py
@@ -64,6 +64,9 @@ def __init__(self):
         # been in the artcle, these are not meta keywords
         self.tags = set()
 
+        # holds a dict of all opengrah data found
+        self.opengraph = {}
+
         # holds a list of any movies
         # we found on the page like youtube, vimeo
         self.movies = []

From a27cfffdb7056a1b3f3fe1327c31db27de467bb2 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 02:42:18 +0100
Subject: [PATCH 019/100] #165 - extract opengraph data

---
 goose/crawler.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/goose/crawler.py b/goose/crawler.py
index 192429f1..64868986 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -106,6 +106,9 @@ def crawl(self, crawl_candidate):
         self.article.domain = self.extractor.get_domain()
         self.article.tags = self.extractor.extract_tags()
 
+        # opengraph
+        self.article.opengraph = self.extractor.extract_opengraph()
+
         # before we do any calcs on the body itself let's clean up the document
         self.article.doc = self.cleaner.clean()
 

From 101e69c81cfdd1ef0549b72268565e49caa334de Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 02:43:31 +0100
Subject: [PATCH 020/100] #165 - opengraph extractor

---
 goose/extractors.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/goose/extractors.py b/goose/extractors.py
index 0163fbd0..7356b669 100644
--- a/goose/extractors.py
+++ b/goose/extractors.py
@@ -239,6 +239,17 @@ def is_articlebody(self, node):
             return True
         return False
 
+    def extract_opengraph(self):
+        opengraph = {}
+        node = self.article.doc
+        metas = self.parser.getElementsByTag(node, 'meta')
+        for meta in metas:
+            attr = self.parser.getAttribute(meta, 'property')
+            if attr is not None and attr.startswith("og:"):
+                value = self.parser.getAttribute(meta, 'content')
+                opengraph.update({attr.split(":")[1]: value})
+        return opengraph
+
     def extract_tags(self):
         node = self.article.doc
 

From eb1274b84833946cea69e87b7bcc651be4487a97 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 02:52:37 +0100
Subject: [PATCH 021/100] #165 - rename dict

---
 goose/extractors.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/goose/extractors.py b/goose/extractors.py
index 7356b669..0b1ff636 100644
--- a/goose/extractors.py
+++ b/goose/extractors.py
@@ -240,15 +240,15 @@ def is_articlebody(self, node):
         return False
 
     def extract_opengraph(self):
-        opengraph = {}
+        opengraph_dict = {}
         node = self.article.doc
         metas = self.parser.getElementsByTag(node, 'meta')
         for meta in metas:
             attr = self.parser.getAttribute(meta, 'property')
             if attr is not None and attr.startswith("og:"):
                 value = self.parser.getAttribute(meta, 'content')
-                opengraph.update({attr.split(":")[1]: value})
-        return opengraph
+                opengraph_dict.update({attr.split(":")[1]: value})
+        return opengraph_dict
 
     def extract_tags(self):
         node = self.article.doc

From c2eb34efa8819f9ab974a491bcf378163762c674 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 02:53:09 +0100
Subject: [PATCH 022/100] #165 - opengraph extraction test

---
 tests/data/extractors/test_opengraph.html | 16 ++++++++++++++++
 tests/data/extractors/test_opengraph.json | 12 ++++++++++++
 2 files changed, 28 insertions(+)
 create mode 100644 tests/data/extractors/test_opengraph.html
 create mode 100644 tests/data/extractors/test_opengraph.json

diff --git a/tests/data/extractors/test_opengraph.html b/tests/data/extractors/test_opengraph.html
new file mode 100644
index 00000000..bcc8cbb8
--- /dev/null
+++ b/tests/data/extractors/test_opengraph.html
@@ -0,0 +1,16 @@
+<html>
+    <head>
+      <meta property="og:url" content="http://www.somenews.com/2012/09/19/nyregion/some-news-article.html?pagewanted=all"/>
+      <meta property="og:type" content="article"/>
+      <meta property="og:title" content="Some News Article Story"/>
+      <meta property="og:description" content="Some News Happened in New York">
+      <meta property="og:image" content="http://graphics8.somenews.com/images/2012/09/19/region/some-news-image.jpg"/>
+    </head>
+    <body>
+        <div>
+            <p>
+              TextNode 1 - The Scala supported IDE is one of the few pain points of developers who want to start using Scala in their Java project. On existing long term project developed by a team its hard to step in and introduce a new language that is not supported by the existing IDE. On way to go about it is to hid the fact that you use Scala from the Java world by using one way dependency injection. Still, if you wish to truly absorb Scala into your existing java environment then you'll soon introduced cross language dependencies.
+            </p>
+        </div>
+    </body>
+</html>
diff --git a/tests/data/extractors/test_opengraph.json b/tests/data/extractors/test_opengraph.json
new file mode 100644
index 00000000..ba05d768
--- /dev/null
+++ b/tests/data/extractors/test_opengraph.json
@@ -0,0 +1,12 @@
+{
+    "url": "http://exemple.com/test_opengraphcontent",
+    "expected": {
+        "opengraph": {
+            "url": "http://www.somenews.com/2012/09/19/nyregion/some-news-article.html?pagewanted=all", 
+            "image": "http://graphics8.somenews.com/images/2012/09/19/region/some-news-image.jpg", 
+            "type": "article", 
+            "description": "Some News Happened in New York", 
+            "title": "Some News Article Story"
+        }
+    }
+}

From 6bbe2db44fad4a05acc5859709b48b0ccd1e9ef5 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 02:54:33 +0100
Subject: [PATCH 023/100] #165 - rename article body extraction test

---
 .../{test_opengraphcontent.html => test_articlebody.html}  | 0
 .../{test_opengraphcontent.json => test_articlebody.json}  | 0
 tests/extractors.py                                        | 7 ++++++-
 3 files changed, 6 insertions(+), 1 deletion(-)
 rename tests/data/extractors/{test_opengraphcontent.html => test_articlebody.html} (100%)
 rename tests/data/extractors/{test_opengraphcontent.json => test_articlebody.json} (100%)

diff --git a/tests/data/extractors/test_opengraphcontent.html b/tests/data/extractors/test_articlebody.html
similarity index 100%
rename from tests/data/extractors/test_opengraphcontent.html
rename to tests/data/extractors/test_articlebody.html
diff --git a/tests/data/extractors/test_opengraphcontent.json b/tests/data/extractors/test_articlebody.json
similarity index 100%
rename from tests/data/extractors/test_opengraphcontent.json
rename to tests/data/extractors/test_articlebody.json
diff --git a/tests/extractors.py b/tests/extractors.py
index cfcc4bfa..eba69def 100644
--- a/tests/extractors.py
+++ b/tests/extractors.py
@@ -355,11 +355,16 @@ def test_okaymarketing(self):
         fields = ['cleaned_text']
         self.runArticleAssertions(article=article, fields=fields)
 
-    def test_opengraphcontent(self):
+    def test_articlebody(self):
         article = self.getArticle()
         fields = ['cleaned_text']
         self.runArticleAssertions(article=article, fields=fields)
 
+    def test_opengraph(self):
+        article = self.getArticle()
+        fields = ['opengraph']
+        self.runArticleAssertions(article=article, fields=fields)
+
 
 class TestPublishDate(TestExtractionBase):
 

From 124371ed0b25e5ff2cf95c5a6062abfd01c132d7 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 03:25:40 +0100
Subject: [PATCH 024/100] #139 - article links property

---
 goose/article.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/goose/article.py b/goose/article.py
index fbfc1478..b8645f3d 100644
--- a/goose/article.py
+++ b/goose/article.py
@@ -71,6 +71,9 @@ def __init__(self):
         # we found on the page like youtube, vimeo
         self.movies = []
 
+        # holds links found in the main article
+        self.links = []
+
         # stores the final URL that we're going to try
         # and fetch content against, this would be expanded if any
         self.final_url = u""

From 4adf4bceab9378f891cf083191336544738476aa Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 03:26:06 +0100
Subject: [PATCH 025/100] #139 - extract article links

---
 goose/crawler.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/goose/crawler.py b/goose/crawler.py
index 64868986..e23a72e9 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -119,6 +119,9 @@ def crawl(self, crawl_candidate):
         # let's process it
         if self.article.top_node is not None:
 
+            # article links
+            self.article.links = self.extractor.extract_links()
+
             # video handeling
             self.video_extractor.get_videos()
 
@@ -129,6 +132,9 @@ def crawl(self, crawl_candidate):
             # post cleanup
             self.article.top_node = self.extractor.post_cleanup()
 
+            # article links
+            self.article.links = self.extractor.extract_links()
+
             # clean_text
             self.article.cleaned_text = self.formatter.get_formatted_text()
 

From 0a3303e2183adcb68817552a9e09317ffa37848a Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 03:26:33 +0100
Subject: [PATCH 026/100] #139 - article links extract method

---
 goose/extractors.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/goose/extractors.py b/goose/extractors.py
index 0b1ff636..c2630003 100644
--- a/goose/extractors.py
+++ b/goose/extractors.py
@@ -250,6 +250,15 @@ def extract_opengraph(self):
                 opengraph_dict.update({attr.split(":")[1]: value})
         return opengraph_dict
 
+    def extract_links(self):
+        links = []
+        items = self.parser.getElementsByTag(self.article.top_node, 'a')
+        for i in items:
+            attr = self.parser.getAttribute(i, 'href')
+            if attr:
+                links.append(attr)
+        return links
+
     def extract_tags(self):
         node = self.article.doc
 

From cda2ef624d875c332ea9caaed13353946d936bd0 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 03:48:55 +0100
Subject: [PATCH 027/100] #142 - extract authors

---
 goose/article.py    |  3 +++
 goose/crawler.py    |  1 +
 goose/extractors.py | 19 +++++++++++++++++++
 3 files changed, 23 insertions(+)

diff --git a/goose/article.py b/goose/article.py
index b8645f3d..c37f7d5e 100644
--- a/goose/article.py
+++ b/goose/article.py
@@ -74,6 +74,9 @@ def __init__(self):
         # holds links found in the main article
         self.links = []
 
+        # hold author names
+        self.authors = []
+
         # stores the final URL that we're going to try
         # and fetch content against, this would be expanded if any
         self.final_url = u""
diff --git a/goose/crawler.py b/goose/crawler.py
index e23a72e9..cf124f45 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -105,6 +105,7 @@ def crawl(self, crawl_candidate):
         self.article.canonical_link = self.extractor.get_canonical_link()
         self.article.domain = self.extractor.get_domain()
         self.article.tags = self.extractor.extract_tags()
+        self.article.authors = self.extractor.extract_authors()
 
         # opengraph
         self.article.opengraph = self.extractor.extract_opengraph()
diff --git a/goose/extractors.py b/goose/extractors.py
index c2630003..fd3fd62e 100644
--- a/goose/extractors.py
+++ b/goose/extractors.py
@@ -259,6 +259,25 @@ def extract_links(self):
                 links.append(attr)
         return links
 
+    def extract_authors(self):
+        authors = []
+        author_nodes = self.parser.getElementsByTag(
+                            self.article.doc,
+                            attr='itemprop',
+                            value='author')
+
+        for author in author_nodes:
+            name_nodes = self.parser.getElementsByTag(
+                            author,
+                            attr='itemprop',
+                            value='name')
+
+            if len(name_nodes) > 0:
+                name = self.parser.getText(name_nodes[0])
+                authors.append(name)
+
+        return list(set(authors))
+
     def extract_tags(self):
         node = self.article.doc
 

From 675c077b6173d890e1a20918db3a919bd45f5105 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 05:01:32 +0100
Subject: [PATCH 028/100] #169 - extract tweets

---
 goose/article.py    | 3 +++
 goose/crawler.py    | 3 +++
 goose/extractors.py | 9 +++++++++
 3 files changed, 15 insertions(+)

diff --git a/goose/article.py b/goose/article.py
index c37f7d5e..093a9d96 100644
--- a/goose/article.py
+++ b/goose/article.py
@@ -67,6 +67,9 @@ def __init__(self):
         # holds a dict of all opengrah data found
         self.opengraph = {}
 
+        # holds twitter embeds
+        self.tweets = []
+
         # holds a list of any movies
         # we found on the page like youtube, vimeo
         self.movies = []
diff --git a/goose/crawler.py b/goose/crawler.py
index cf124f45..6afdb5f1 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -123,6 +123,9 @@ def crawl(self, crawl_candidate):
             # article links
             self.article.links = self.extractor.extract_links()
 
+            # tweets
+            self.article.tweets = self.extractor.extract_tweets()
+
             # video handeling
             self.video_extractor.get_videos()
 
diff --git a/goose/extractors.py b/goose/extractors.py
index fd3fd62e..780a5d94 100644
--- a/goose/extractors.py
+++ b/goose/extractors.py
@@ -259,6 +259,15 @@ def extract_links(self):
                 links.append(attr)
         return links
 
+    def extract_tweets(self):
+        tweets = []
+        items = self.parser.getElementsByTag(self.article.top_node, tag='blockquote', attr="class", value="twitter-tweet")
+        for i in items:
+            for attr in ['gravityScore', 'gravityNodes']:
+                self.parser.delAttribute(i, attr)
+            tweets.append(self.parser.nodeToString(i))
+        return tweets
+
     def extract_authors(self):
         authors = []
         author_nodes = self.parser.getElementsByTag(

From af493289f30a35d5eb9ff0a2b6151c226fc1e921 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Mon, 29 Dec 2014 05:02:28 +0100
Subject: [PATCH 029/100] #169 - extract tweets

---
 goose/extractors.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/goose/extractors.py b/goose/extractors.py
index 780a5d94..57ad0b92 100644
--- a/goose/extractors.py
+++ b/goose/extractors.py
@@ -261,11 +261,17 @@ def extract_links(self):
 
     def extract_tweets(self):
         tweets = []
-        items = self.parser.getElementsByTag(self.article.top_node, tag='blockquote', attr="class", value="twitter-tweet")
+        items = self.parser.getElementsByTag(
+                        self.article.top_node,
+                        tag='blockquote',
+                        attr="class",
+                        value="twitter-tweet")
+
         for i in items:
             for attr in ['gravityScore', 'gravityNodes']:
                 self.parser.delAttribute(i, attr)
             tweets.append(self.parser.nodeToString(i))
+
         return tweets
 
     def extract_authors(self):

From 90b3cac4d6a1b07bd7249864d2e29d249375b0d1 Mon Sep 17 00:00:00 2001
From: Sergey Kirillov <sergey.kirillov@gmail.com>
Date: Mon, 29 Dec 2014 14:00:27 +0200
Subject: [PATCH 030/100] Replaced bare except with except Exception

---
 goose/images/utils.py | 2 +-
 goose/network.py      | 2 +-
 goose/text.py         | 2 +-
 goose/version.py      | 2 +-
 setup.py              | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/goose/images/utils.py b/goose/images/utils.py
index a70c6b5d..daf5eddb 100644
--- a/goose/images/utils.py
+++ b/goose/images/utils.py
@@ -119,5 +119,5 @@ def fetch(self, http_client, src):
             f = urllib2.urlopen(req)
             data = f.read()
             return data
-        except:
+        except Exception:
             return None
diff --git a/goose/network.py b/goose/network.py
index 0a338a44..666a7d61 100644
--- a/goose/network.py
+++ b/goose/network.py
@@ -51,7 +51,7 @@ def get_html(self, url):
             self.result = urllib2.urlopen(
                             self.request,
                             timeout=self.config.http_timeout)
-        except:
+        except Exception:
             self.result = None
 
         # read the result content
diff --git a/goose/text.py b/goose/text.py
index 4008d62b..badbfadc 100644
--- a/goose/text.py
+++ b/goose/text.py
@@ -46,7 +46,7 @@ def encodeValue(value):
         value = smart_unicode(value)
     except (UnicodeEncodeError, DjangoUnicodeDecodeError):
         value = smart_str(value)
-    except:
+    except Exception:
         value = string_org
     return value
 
diff --git a/goose/version.py b/goose/version.py
index 43693f9c..875065c7 100644
--- a/goose/version.py
+++ b/goose/version.py
@@ -21,5 +21,5 @@
 limitations under the License.
 """
 
-version_info = (1, 0, 22)
+version_info = (1, 0, 23)
 __version__ = ".".join(map(str, version_info))
diff --git a/setup.py b/setup.py
index 2e2b74c0..ebad2547 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@
 try:
     with open(os.path.join(os.path.dirname(__file__), 'README.rst')) as f:
         long_description = f.read()
-except:
+except Exception:
     long_description = description
 
 setup(name='goose-extractor',

From 90b041dbcd45a790f6cca4b0055094345df5bb13 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 00:32:47 +0100
Subject: [PATCH 031/100] #171 - do not increment version yet

---
 goose/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/goose/version.py b/goose/version.py
index 875065c7..43693f9c 100644
--- a/goose/version.py
+++ b/goose/version.py
@@ -21,5 +21,5 @@
 limitations under the License.
 """
 
-version_info = (1, 0, 23)
+version_info = (1, 0, 22)
 __version__ = ".".join(map(str, version_info))

From 848acf8fba6f3cfbd4934569bec4f46fce71c00e Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 00:53:02 +0100
Subject: [PATCH 032/100] #172 - tweet extraction tests

---
 tests/data/extractors/test_tweet.html | 21 +++++++++++++++++++++
 tests/data/extractors/test_tweet.json |  6 ++++++
 tests/extractors.py                   |  9 +++++++++
 3 files changed, 36 insertions(+)
 create mode 100644 tests/data/extractors/test_tweet.html
 create mode 100644 tests/data/extractors/test_tweet.json

diff --git a/tests/data/extractors/test_tweet.html b/tests/data/extractors/test_tweet.html
new file mode 100644
index 00000000..0a390dd8
--- /dev/null
+++ b/tests/data/extractors/test_tweet.html
@@ -0,0 +1,21 @@
+<html>
+    <body>
+        <div>
+            <p>
+              TextNode 1 - The Scala supported IDE is one of the few pain points of developers who want to start using Scala in their Java project. On existing long term project developed by a team its hard to step in and introduce a new language that is not supported by the existing IDE. On way to go about it is to hid the fact that you use Scala from the Java world by using one way dependency injection. 
+              <blockquote class="twitter-tweet" lang="en"><p>Python-Goose: useful <a href="https://twitter.com/hashtag/python?src=hash">#python</a> library for extracting body text + metadata from a news article or article-type page: <a href="https://t.co/OGKBvxxunu">https://t.co/OGKBvxxunu</a></p>&mdash; Derek Greene (@derekgreene) <a href="https://twitter.com/derekgreene/status/527783221539643393">October 30, 2014</a></blockquote>
+              <script async src="//platform.twitter.com/widgets.js" charset="utf-8"></script>
+              Still, if you wish to truly absorb Scala into your existing java environment then you'll soon introduced cross language dependencies.
+            </p>
+        </div>
+        <div itemprop="articleBody">
+            <p>
+              TextNode 1 - The Scala supported IDE is one of the few pain points of developers who want to start using Scala in their Java project. On existing long term project developed by a team its hard to step in and introduce a new language that is not supported by the existing IDE. On way to go about it is to hid the fact that you use Scala from the Java world by using one way dependency injection. Still, if you wish to truly absorb Scala into your existing java environment then you'll soon introduced cross language dependencies.
+              <blockquote class="twitter-tweet" lang="en"><p>Python-Goose: useful <a href="https://twitter.com/hashtag/python?src=hash">#python</a> library for extracting body text + metadata from a news article or article-type page: <a href="https://t.co/OGKBvxxunu">https://t.co/OGKBvxxunu</a></p>&mdash; Derek Greene (@derekgreene) <a href="https://twitter.com/derekgreene/status/527783221539643393">October 30, 2014</a></blockquote>
+              <script async src="//platform.twitter.com/widgets.js" charset="utf-8"></script>
+              <blockquote class="twitter-tweet" lang="en"><p>Python-Goose: useful <a href="https://twitter.com/hashtag/python?src=hash">#python</a> library for extracting body text + metadata from a news article or article-type page: <a href="https://t.co/OGKBvxxunu">https://t.co/OGKBvxxunu</a></p>&mdash; Derek Greene (@derekgreene) <a href="https://twitter.com/derekgreene/status/527783221539643393">October 30, 2014</a></blockquote>
+            <script async src="//platform.twitter.com/widgets.js" charset="utf-8"></script>
+            </p>
+        </div>
+    </body>
+</html>
diff --git a/tests/data/extractors/test_tweet.json b/tests/data/extractors/test_tweet.json
new file mode 100644
index 00000000..80986ad6
--- /dev/null
+++ b/tests/data/extractors/test_tweet.json
@@ -0,0 +1,6 @@
+{
+    "url": "http://exemple.com/tweet/", 
+    "expected": {
+        "tweets": 2
+    }
+}
diff --git a/tests/extractors.py b/tests/extractors.py
index eba69def..9a850003 100644
--- a/tests/extractors.py
+++ b/tests/extractors.py
@@ -439,6 +439,15 @@ def extract(self, instance):
         return article
 
 
+class TestArticleTweet(TestExtractionBase):
+
+    def test_tweet(self):
+        article = self.getArticle()
+        number_tweets = len(article.tweets)
+        expected_number_tweets = self.data['expected']['tweets']
+        self.assertEqual(number_tweets, expected_number_tweets)
+
+
 class TestArticleTags(TestExtractionBase):
 
     def test_tags_kexp(self):

From 321fb86e0938e8a95d289fd2e111dd4083a748bb Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 01:05:04 +0100
Subject: [PATCH 033/100] #173 - authors extraction test case

---
 tests/data/extractors/test_author_schema.html | 12 ++++++++++++
 tests/data/extractors/test_author_schema.json | 11 +++++++++++
 tests/extractors.py                           |  8 ++++++++
 3 files changed, 31 insertions(+)
 create mode 100644 tests/data/extractors/test_author_schema.html
 create mode 100644 tests/data/extractors/test_author_schema.json

diff --git a/tests/data/extractors/test_author_schema.html b/tests/data/extractors/test_author_schema.html
new file mode 100644
index 00000000..da7cfab4
--- /dev/null
+++ b/tests/data/extractors/test_author_schema.html
@@ -0,0 +1,12 @@
+<html>
+    <body>
+        <p class="byline-dateline">
+            <span class="byline" itemprop="author creator" itemscope="" itemtype="http://schema.org/Person" itemid="http://topics.nytimes.com/top/reference/timestopics/people/s/kevin_sack/index.html">By <a href="http://topics.nytimes.com/top/reference/timestopics/people/s/kevin_sack/index.html" rel="author" title="More Articles by KEVIN SACK"><span class="byline-author" data-byline-name="KEVIN SACK" itemprop="name" data-twitter-handle="ksacknyt">KEVIN SACK</span></a>, </span><span class="byline" itemprop="author creator" itemscope="" itemtype="http://schema.org/Person" itemid="http://topics.nytimes.com/top/reference/timestopics/people/f/sheri_fink/index.html"><a href="http://topics.nytimes.com/top/reference/timestopics/people/f/sheri_fink/index.html" rel="author" title="More Articles by SHERI FINK"><span class="byline-author" data-byline-name="SHERI FINK" itemprop="name">SHERI FINK</span></a>, </span><span class="byline" itemprop="author creator" itemscope="" itemtype="http://schema.org/Person" itemid="http://topics.nytimes.com/top/reference/timestopics/people/b/pam_belluck/index.html"><a href="http://topics.nytimes.com/top/reference/timestopics/people/b/pam_belluck/index.html" rel="author" title="More Articles by PAM BELLUCK"><span class="byline-author" data-byline-name="PAM BELLUCK" itemprop="name">PAM BELLUCK</span></a> and </span><span class="byline" itemprop="author creator" itemscope="" itemtype="http://schema.org/Person" itemid="http://topics.nytimes.com/top/reference/timestopics/people/n/adam_nossiter/index.html"><a href="http://topics.nytimes.com/top/reference/timestopics/people/n/adam_nossiter/index.html" rel="author" title="More Articles by ADAM NOSSITER"><span class="byline-author" data-byline-name="ADAM NOSSITER" itemprop="name">ADAM NOSSITER</span></a></span>
+        </p>
+        <div itemprop="articleBody">
+            <p>
+              TextNode 1 - The Scala supported IDE is one of the few pain points of developers who want to start using Scala in their Java project. On existing long term project developed by a team its hard to step in and introduce a new language that is not supported by the existing IDE. On way to go about it is to hid the fact that you use Scala from the Java world by using one way dependency injection. Still, if you wish to truly absorb Scala into your existing java environment then you'll soon introduced cross language dependencies.
+            </p>
+        </div>
+    </body>
+</html>
diff --git a/tests/data/extractors/test_author_schema.json b/tests/data/extractors/test_author_schema.json
new file mode 100644
index 00000000..32185d65
--- /dev/null
+++ b/tests/data/extractors/test_author_schema.json
@@ -0,0 +1,11 @@
+{
+    "url": "http://exemple.com/tweet/", 
+    "expected": {
+        "authors": [
+            "KEVIN SACK", 
+            "ADAM NOSSITER", 
+            "PAM BELLUCK", 
+            "SHERI FINK"
+        ]
+    }
+}
diff --git a/tests/extractors.py b/tests/extractors.py
index 9a850003..2c9ccf86 100644
--- a/tests/extractors.py
+++ b/tests/extractors.py
@@ -448,6 +448,14 @@ def test_tweet(self):
         self.assertEqual(number_tweets, expected_number_tweets)
 
 
+class TestArticleAuthor(TestExtractionBase):
+
+    def test_author_schema(self):
+        article = self.getArticle()
+        fields = ['authors']
+        self.runArticleAssertions(article=article, fields=fields)
+
+
 class TestArticleTags(TestExtractionBase):
 
     def test_tags_kexp(self):

From 989ab243efac255b6347e0af4ec2f53d9ceba74f Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 01:11:52 +0100
Subject: [PATCH 034/100] #175 - links extraction tests

---
 tests/data/extractors/test_links.html | 16 ++++++++++++++++
 tests/data/extractors/test_links.json |  6 ++++++
 tests/extractors.py                   |  9 +++++++++
 3 files changed, 31 insertions(+)
 create mode 100644 tests/data/extractors/test_links.html
 create mode 100644 tests/data/extractors/test_links.json

diff --git a/tests/data/extractors/test_links.html b/tests/data/extractors/test_links.html
new file mode 100644
index 00000000..c097d4ee
--- /dev/null
+++ b/tests/data/extractors/test_links.html
@@ -0,0 +1,16 @@
+<html>
+    <body>
+        <div>
+            <p>
+              TextNode 1 - The Scala supported IDE is one of the few pain points of developers who want to start using Scala in their Java project. On existing long term project developed by a team its hard to step in and introduce a new language that is not supported by the existing IDE. On way to go about it is to hid the fact that you use Scala from the Java world by using one way dependency injection. 
+              <a href="https://twitter.com/hashtag/python?src=hash">links</a>
+              Still, if you wish to truly absorb Scala into your existing java environment then you'll soon introduced cross language dependencies.
+            </p>
+        </div>
+        <div itemprop="articleBody">
+            <p>
+              TextNode 1 - The Scala supported IDE is one of the few pain points of developers who want to start using Scala in their Java project. On existing long term project developed by a <a href="https://twitter.com/hashtag/python?src=hash"> team its hard </a> to step in and introduce a new language that is not supported by the existing IDE. On way to go about it is to hid the fact that you use Scala from the Java world by using one way dependency injection. Still, if you wish <a href="https://twitter.com/hashtag/python?src=hash">to truly absorb Scala into your</a> existing java environment then you'll soon introduced cross language dependencies.
+            </p>
+        </div>
+    </body>
+</html>
diff --git a/tests/data/extractors/test_links.json b/tests/data/extractors/test_links.json
new file mode 100644
index 00000000..74f1c682
--- /dev/null
+++ b/tests/data/extractors/test_links.json
@@ -0,0 +1,6 @@
+{
+    "url": "http://exemple.com/links/", 
+    "expected": {
+        "links": 2
+    }
+}
diff --git a/tests/extractors.py b/tests/extractors.py
index 2c9ccf86..7d43b705 100644
--- a/tests/extractors.py
+++ b/tests/extractors.py
@@ -448,6 +448,15 @@ def test_tweet(self):
         self.assertEqual(number_tweets, expected_number_tweets)
 
 
+class TestArticleLinks(TestExtractionBase):
+
+    def test_links(self):
+        article = self.getArticle()
+        number_links = len(article.links)
+        expected_number_links = self.data['expected']['links']
+        self.assertEqual(number_links, expected_number_links)
+
+
 class TestArticleAuthor(TestExtractionBase):
 
     def test_author_schema(self):

From 96caa3c21afdd8db95ad6d3375a0eb3849a183f0 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 01:47:11 +0100
Subject: [PATCH 035/100] #177 - tags are a list

---
 goose/article.py    | 2 +-
 goose/extractors.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/goose/article.py b/goose/article.py
index 093a9d96..6b144d4f 100644
--- a/goose/article.py
+++ b/goose/article.py
@@ -62,7 +62,7 @@ def __init__(self):
 
         # holds a set of tags that may have
         # been in the artcle, these are not meta keywords
-        self.tags = set()
+        self.tags = []
 
         # holds a dict of all opengrah data found
         self.opengraph = {}
diff --git a/goose/extractors.py b/goose/extractors.py
index 57ad0b92..6d8a075d 100644
--- a/goose/extractors.py
+++ b/goose/extractors.py
@@ -312,7 +312,7 @@ def extract_tags(self):
             if tag:
                 tags.append(tag)
 
-        return set(tags)
+        return list(set(tags))
 
     def calculate_best_node(self):
 

From 6338f6841f9d3d2f10b586012ffd9817b03817ed Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 01:47:41 +0100
Subject: [PATCH 036/100] #177 - title is empty string by default

---
 goose/article.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/goose/article.py b/goose/article.py
index 6b144d4f..48cb6578 100644
--- a/goose/article.py
+++ b/goose/article.py
@@ -26,7 +26,7 @@ class Article(object):
 
     def __init__(self):
         # title of the article
-        self.title = None
+        self.title = u""
 
         # stores the lovely, pure text from the article,
         # stripped of html, formatting, etc...

From 206f6e23dfed930a7a921bb710f96515c8ded956 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 01:48:11 +0100
Subject: [PATCH 037/100] #177 - info method return article data as dict

---
 goose/article.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/goose/article.py b/goose/article.py
index 48cb6578..c00ca4d6 100644
--- a/goose/article.py
+++ b/goose/article.py
@@ -106,3 +106,26 @@ def __init__(self):
 
         # A property bucket for consumers of goose to store custom data extractions.
         self.additional_data = {}
+
+    @property
+    def infos(self):
+        data = {
+            "meta": {
+                "description": self.meta_description,
+                "lang": self.meta_lang,
+                "keywords": self.meta_keywords,
+                "favicon": self.meta_favicon,
+                "canonical": self.canonical_link,
+            },
+            "domain": self.domain,
+            "title": self.title,
+            "cleaned_text": self.cleaned_text,
+            "opengraph": self.opengraph,
+            "tags": self.tags,
+            "tweets": self.tweets,
+            "movies": self.movies,
+            "links": self.links,
+            "authors": self.authors,
+            "publish_date": self.publish_date
+        }
+        return data

From 37e24b291f352d40f54c720dd2921f75c53f0e81 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 01:49:55 +0100
Subject: [PATCH 038/100] #177 - add top image to returned dict

---
 goose/article.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/goose/article.py b/goose/article.py
index c00ca4d6..d4885616 100644
--- a/goose/article.py
+++ b/goose/article.py
@@ -117,6 +117,7 @@ def infos(self):
                 "favicon": self.meta_favicon,
                 "canonical": self.canonical_link,
             },
+            "image": None,
             "domain": self.domain,
             "title": self.title,
             "cleaned_text": self.cleaned_text,
@@ -128,4 +129,9 @@ def infos(self):
             "authors": self.authors,
             "publish_date": self.publish_date
         }
+
+        # image
+        if self.top_image is not None:
+            data['image'] = self.top_image.src
+
         return data

From e452c23fe8b5ebc8eae76aa51aa1ed461f9e3b9a Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 02:41:16 +0100
Subject: [PATCH 039/100] #129 - add issue test case

---
 tests/data/extractors/test_issue129.html | 1460 ++++++++++++++++++++++
 tests/data/extractors/test_issue129.json |    6 +
 tests/extractors.py                      |    5 +
 3 files changed, 1471 insertions(+)
 create mode 100644 tests/data/extractors/test_issue129.html
 create mode 100644 tests/data/extractors/test_issue129.json

diff --git a/tests/data/extractors/test_issue129.html b/tests/data/extractors/test_issue129.html
new file mode 100644
index 00000000..9f523cbc
--- /dev/null
+++ b/tests/data/extractors/test_issue129.html
@@ -0,0 +1,1460 @@
+
+<!DOCTYPE html>
+<html class='v2' dir='ltr'>
+<head>
+<meta content='width=1100' name='viewport'/>
+<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'/>
+<script type="text/javascript">(function() { var b=window,f="chrome",g="tick",k="jstiming";(function(){function d(a){this.t={};this.tick=function(a,d,c){var e=void 0!=c?c:(new Date).getTime();this.t[a]=[e,d];if(void 0==c)try{b.console.timeStamp("CSI/"+a)}catch(h){}};this[g]("start",null,a)}var a;b.performance&&(a=b.performance.timing);var n=a?new d(a.responseStart):new d;b.jstiming={Timer:d,load:n};if(a){var c=a.navigationStart,h=a.responseStart;0<c&&h>=c&&(b[k].srt=h-c)}if(a){var e=b[k].load;0<c&&h>=c&&(e[g]("_wtsrt",void 0,c),e[g]("wtsrt_","_wtsrt",h),e[g]("tbsd_","wtsrt_"))}try{a=null,
+b[f]&&b[f].csi&&(a=Math.floor(b[f].csi().pageT),e&&0<c&&(e[g]("_tbnd",void 0,b[f].csi().startE),e[g]("tbnd_","_tbnd",c))),null==a&&b.gtbExternal&&(a=b.gtbExternal.pageT()),null==a&&b.external&&(a=b.external.pageT,e&&0<c&&(e[g]("_tbnd",void 0,b.external.startE),e[g]("tbnd_","_tbnd",c))),a&&(b[k].pt=a)}catch(p){}})();b.tickAboveFold=function(d){var a=0;if(d.offsetParent){do a+=d.offsetTop;while(d=d.offsetParent)}d=a;750>=d&&b[k].load[g]("aft")};var l=!1;function m(){l||(l=!0,b[k].load[g]("firstScrollTime"))}b.addEventListener?b.addEventListener("scroll",m,!1):b.attachEvent("onscroll",m);
+ })();</script>
+<meta content='blogger' name='generator'/>
+<link href='http://lostinjit.blogspot.fr/favicon.ico' rel='icon' type='image/x-icon'/>
+<link href='http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html' rel='canonical'/>
+<link rel="alternate" type="application/atom+xml" title="Lost in JIT - Atom" href="http://lostinjit.blogspot.com/feeds/posts/default" />
+<link rel="alternate" type="application/rss+xml" title="Lost in JIT - RSS" href="http://lostinjit.blogspot.com/feeds/posts/default?alt=rss" />
+<link rel="service.post" type="application/atom+xml" title="Lost in JIT - Atom" href="http://www.blogger.com/feeds/5761095693213568868/posts/default" />
+
+<link rel="alternate" type="application/atom+xml" title="Lost in JIT - Atom" href="http://lostinjit.blogspot.com/feeds/4365261822998854267/comments/default" />
+<!--[if IE]><script type="text/javascript" src="https://www.blogger.com/static/v1/jsbin/2704825972-ieretrofit.js"></script>
+<![endif]-->
+<!--[if IE]> <script> (function() { var html5 = ("abbr,article,aside,audio,canvas,datalist,details," + "figure,footer,header,hgroup,mark,menu,meter,nav,output," + "progress,section,time,video").split(','); for (var i = 0; i < html5.length; i++) { document.createElement(html5[i]); } try { document.execCommand('BackgroundImageCache', false, true); } catch(e) {} })(); </script> <![endif]-->
+<title>Lost in JIT: PyPy and the road towards SciPy</title>
+<link type='text/css' rel='stylesheet' href='https://www.blogger.com/static/v1/widgets/1109018805-widget_css_2_bundle.css' />
+<link type='text/css' rel='stylesheet' href='https://www.blogger.com/dyn-css/authorization.css?targetBlogID=5761095693213568868&zx=aef1a7fc-ed33-4f45-8eaf-bdc65d99ff82' />
+<style id='page-skin-1' type='text/css'><!--
+/*
+-----------------------------------------------
+Blogger Template Style
+Name:     Simple
+Designer: Josh Peterson
+URL:      www.noaesthetic.com
+----------------------------------------------- */
+/* Variable definitions
+====================
+<Variable name="keycolor" description="Main Color" type="color" default="#66bbdd"/>
+<Group description="Page Text" selector="body">
+<Variable name="body.font" description="Font" type="font"
+default="normal normal 12px Arial, Tahoma, Helvetica, FreeSans, sans-serif"/>
+<Variable name="body.text.color" description="Text Color" type="color" default="#222222"/>
+</Group>
+<Group description="Backgrounds" selector=".body-fauxcolumns-outer">
+<Variable name="body.background.color" description="Outer Background" type="color" default="#66bbdd"/>
+<Variable name="content.background.color" description="Main Background" type="color" default="#ffffff"/>
+<Variable name="header.background.color" description="Header Background" type="color" default="transparent"/>
+</Group>
+<Group description="Links" selector=".main-outer">
+<Variable name="link.color" description="Link Color" type="color" default="#2288bb"/>
+<Variable name="link.visited.color" description="Visited Color" type="color" default="#888888"/>
+<Variable name="link.hover.color" description="Hover Color" type="color" default="#33aaff"/>
+</Group>
+<Group description="Blog Title" selector=".header h1">
+<Variable name="header.font" description="Font" type="font"
+default="normal normal 60px Arial, Tahoma, Helvetica, FreeSans, sans-serif"/>
+<Variable name="header.text.color" description="Title Color" type="color" default="#3399bb" />
+</Group>
+<Group description="Blog Description" selector=".header .description">
+<Variable name="description.text.color" description="Description Color" type="color"
+default="#777777" />
+</Group>
+<Group description="Tabs Text" selector=".tabs-inner .widget li a">
+<Variable name="tabs.font" description="Font" type="font"
+default="normal normal 14px Arial, Tahoma, Helvetica, FreeSans, sans-serif"/>
+<Variable name="tabs.text.color" description="Text Color" type="color" default="#999999"/>
+<Variable name="tabs.selected.text.color" description="Selected Color" type="color" default="#000000"/>
+</Group>
+<Group description="Tabs Background" selector=".tabs-outer .PageList">
+<Variable name="tabs.background.color" description="Background Color" type="color" default="#f5f5f5"/>
+<Variable name="tabs.selected.background.color" description="Selected Color" type="color" default="#eeeeee"/>
+</Group>
+<Group description="Post Title" selector="h3.post-title, .comments h4">
+<Variable name="post.title.font" description="Font" type="font"
+default="normal normal 22px Arial, Tahoma, Helvetica, FreeSans, sans-serif"/>
+</Group>
+<Group description="Date Header" selector=".date-header">
+<Variable name="date.header.color" description="Text Color" type="color"
+default="#222222"/>
+<Variable name="date.header.background.color" description="Background Color" type="color"
+default="transparent"/>
+</Group>
+<Group description="Post Footer" selector=".post-footer">
+<Variable name="post.footer.text.color" description="Text Color" type="color" default="#666666"/>
+<Variable name="post.footer.background.color" description="Background Color" type="color"
+default="#f9f9f9"/>
+<Variable name="post.footer.border.color" description="Shadow Color" type="color" default="#eeeeee"/>
+</Group>
+<Group description="Gadgets" selector="h2">
+<Variable name="widget.title.font" description="Title Font" type="font"
+default="normal bold 11px Arial, Tahoma, Helvetica, FreeSans, sans-serif"/>
+<Variable name="widget.title.text.color" description="Title Color" type="color" default="#000000"/>
+<Variable name="widget.alternate.text.color" description="Alternate Color" type="color" default="#999999"/>
+</Group>
+<Group description="Images" selector=".main-inner">
+<Variable name="image.background.color" description="Background Color" type="color" default="#ffffff"/>
+<Variable name="image.border.color" description="Border Color" type="color" default="#eeeeee"/>
+<Variable name="image.text.color" description="Caption Text Color" type="color" default="#222222"/>
+</Group>
+<Group description="Accents" selector=".content-inner">
+<Variable name="body.rule.color" description="Separator Line Color" type="color" default="#eeeeee"/>
+<Variable name="tabs.border.color" description="Tabs Border Color" type="color" default="#eeeeee"/>
+</Group>
+<Variable name="body.background" description="Body Background" type="background"
+color="#66bbdd" default="$(color) none repeat scroll top left"/>
+<Variable name="body.background.override" description="Body Background Override" type="string" default=""/>
+<Variable name="body.background.gradient.cap" description="Body Gradient Cap" type="url"
+default="url(//www.blogblog.com/1kt/simple/gradients_light.png)"/>
+<Variable name="body.background.gradient.tile" description="Body Gradient Tile" type="url"
+default="url(//www.blogblog.com/1kt/simple/body_gradient_tile_light.png)"/>
+<Variable name="content.background.color.selector" description="Content Background Color Selector" type="string" default=".content-inner"/>
+<Variable name="content.padding" description="Content Padding" type="length" default="10px"/>
+<Variable name="content.padding.horizontal" description="Content Horizontal Padding" type="length" default="10px"/>
+<Variable name="content.shadow.spread" description="Content Shadow Spread" type="length" default="40px"/>
+<Variable name="content.shadow.spread.webkit" description="Content Shadow Spread (WebKit)" type="length" default="5px"/>
+<Variable name="content.shadow.spread.ie" description="Content Shadow Spread (IE)" type="length" default="10px"/>
+<Variable name="main.border.width" description="Main Border Width" type="length" default="0"/>
+<Variable name="header.background.gradient" description="Header Gradient" type="url" default="none"/>
+<Variable name="header.shadow.offset.left" description="Header Shadow Offset Left" type="length" default="-1px"/>
+<Variable name="header.shadow.offset.top" description="Header Shadow Offset Top" type="length" default="-1px"/>
+<Variable name="header.shadow.spread" description="Header Shadow Spread" type="length" default="1px"/>
+<Variable name="header.padding" description="Header Padding" type="length" default="30px"/>
+<Variable name="header.border.size" description="Header Border Size" type="length" default="1px"/>
+<Variable name="header.bottom.border.size" description="Header Bottom Border Size" type="length" default="1px"/>
+<Variable name="header.border.horizontalsize" description="Header Horizontal Border Size" type="length" default="0"/>
+<Variable name="description.text.size" description="Description Text Size" type="string" default="140%"/>
+<Variable name="tabs.margin.top" description="Tabs Margin Top" type="length" default="0" />
+<Variable name="tabs.margin.side" description="Tabs Side Margin" type="length" default="30px" />
+<Variable name="tabs.background.gradient" description="Tabs Background Gradient" type="url"
+default="url(//www.blogblog.com/1kt/simple/gradients_light.png)"/>
+<Variable name="tabs.border.width" description="Tabs Border Width" type="length" default="1px"/>
+<Variable name="tabs.bevel.border.width" description="Tabs Bevel Border Width" type="length" default="1px"/>
+<Variable name="date.header.padding" description="Date Header Padding" type="string" default="inherit"/>
+<Variable name="date.header.letterspacing" description="Date Header Letter Spacing" type="string" default="inherit"/>
+<Variable name="date.header.margin" description="Date Header Margin" type="string" default="inherit"/>
+<Variable name="post.margin.bottom" description="Post Bottom Margin" type="length" default="25px"/>
+<Variable name="image.border.small.size" description="Image Border Small Size" type="length" default="2px"/>
+<Variable name="image.border.large.size" description="Image Border Large Size" type="length" default="5px"/>
+<Variable name="page.width.selector" description="Page Width Selector" type="string" default=".region-inner"/>
+<Variable name="page.width" description="Page Width" type="string" default="auto"/>
+<Variable name="main.section.margin" description="Main Section Margin" type="length" default="15px"/>
+<Variable name="main.padding" description="Main Padding" type="length" default="15px"/>
+<Variable name="main.padding.top" description="Main Padding Top" type="length" default="30px"/>
+<Variable name="main.padding.bottom" description="Main Padding Bottom" type="length" default="30px"/>
+<Variable name="paging.background"
+color="#ffffff"
+description="Background of blog paging area" type="background"
+default="transparent none no-repeat scroll top center"/>
+<Variable name="footer.bevel" description="Bevel border length of footer" type="length" default="0"/>
+<Variable name="mobile.background.overlay" description="Mobile Background Overlay" type="string"
+default="transparent none repeat scroll top left"/>
+<Variable name="mobile.background.size" description="Mobile Background Size" type="string" default="auto"/>
+<Variable name="mobile.button.color" description="Mobile Button Color" type="color" default="#ffffff" />
+<Variable name="startSide" description="Side where text starts in blog language" type="automatic" default="left"/>
+<Variable name="endSide" description="Side where text ends in blog language" type="automatic" default="right"/>
+*/
+/* Content
+----------------------------------------------- */
+body {
+font: normal normal 12px Arial, Tahoma, Helvetica, FreeSans, sans-serif;
+color: #222222;
+background: #66bbdd none repeat scroll top left;
+padding: 0 40px 40px 40px;
+}
+html body .region-inner {
+min-width: 0;
+max-width: 100%;
+width: auto;
+}
+a:link {
+text-decoration:none;
+color: #2288bb;
+}
+a:visited {
+text-decoration:none;
+color: #888888;
+}
+a:hover {
+text-decoration:underline;
+color: #33aaff;
+}
+.body-fauxcolumn-outer .fauxcolumn-inner {
+background: transparent url(//www.blogblog.com/1kt/simple/body_gradient_tile_light.png) repeat scroll top left;
+_background-image: none;
+}
+.body-fauxcolumn-outer .cap-top {
+position: absolute;
+z-index: 1;
+height: 400px;
+width: 100%;
+background: #66bbdd none repeat scroll top left;
+}
+.body-fauxcolumn-outer .cap-top .cap-left {
+width: 100%;
+background: transparent url(//www.blogblog.com/1kt/simple/gradients_light.png) repeat-x scroll top left;
+_background-image: none;
+}
+.content-outer {
+-moz-box-shadow: 0 0 40px rgba(0, 0, 0, .15);
+-webkit-box-shadow: 0 0 5px rgba(0, 0, 0, .15);
+-goog-ms-box-shadow: 0 0 10px #333333;
+box-shadow: 0 0 40px rgba(0, 0, 0, .15);
+margin-bottom: 1px;
+}
+.content-inner {
+padding: 10px 10px;
+}
+.content-inner {
+background-color: #ffffff;
+}
+/* Header
+----------------------------------------------- */
+.header-outer {
+background: transparent none repeat-x scroll 0 -400px;
+_background-image: none;
+}
+.Header h1 {
+font: normal normal 60px Arial, Tahoma, Helvetica, FreeSans, sans-serif;
+color: #3399bb;
+text-shadow: -1px -1px 1px rgba(0, 0, 0, .2);
+}
+.Header h1 a {
+color: #3399bb;
+}
+.Header .description {
+font-size: 140%;
+color: #777777;
+}
+.header-inner .Header .titlewrapper {
+padding: 22px 30px;
+}
+.header-inner .Header .descriptionwrapper {
+padding: 0 30px;
+}
+/* Tabs
+----------------------------------------------- */
+.tabs-inner .section:first-child {
+border-top: 1px solid #eeeeee;
+}
+.tabs-inner .section:first-child ul {
+margin-top: -1px;
+border-top: 1px solid #eeeeee;
+border-left: 0 solid #eeeeee;
+border-right: 0 solid #eeeeee;
+}
+.tabs-inner .widget ul {
+background: #f5f5f5 url(//www.blogblog.com/1kt/simple/gradients_light.png) repeat-x scroll 0 -800px;
+_background-image: none;
+border-bottom: 1px solid #eeeeee;
+margin-top: 0;
+margin-left: -30px;
+margin-right: -30px;
+}
+.tabs-inner .widget li a {
+display: inline-block;
+padding: .6em 1em;
+font: normal normal 14px Arial, Tahoma, Helvetica, FreeSans, sans-serif;
+color: #999999;
+border-left: 1px solid #ffffff;
+border-right: 1px solid #eeeeee;
+}
+.tabs-inner .widget li:first-child a {
+border-left: none;
+}
+.tabs-inner .widget li.selected a, .tabs-inner .widget li a:hover {
+color: #000000;
+background-color: #eeeeee;
+text-decoration: none;
+}
+/* Columns
+----------------------------------------------- */
+.main-outer {
+border-top: 0 solid #eeeeee;
+}
+.fauxcolumn-left-outer .fauxcolumn-inner {
+border-right: 1px solid #eeeeee;
+}
+.fauxcolumn-right-outer .fauxcolumn-inner {
+border-left: 1px solid #eeeeee;
+}
+/* Headings
+----------------------------------------------- */
+h2 {
+margin: 0 0 1em 0;
+font: normal bold 11px Arial, Tahoma, Helvetica, FreeSans, sans-serif;
+color: #000000;
+}
+/* Widgets
+----------------------------------------------- */
+.widget .zippy {
+color: #999999;
+text-shadow: 2px 2px 1px rgba(0, 0, 0, .1);
+}
+.widget .popular-posts ul {
+list-style: none;
+}
+/* Posts
+----------------------------------------------- */
+.date-header span {
+background-color: transparent;
+color: #222222;
+padding: inherit;
+letter-spacing: inherit;
+margin: inherit;
+}
+.main-inner {
+padding-top: 30px;
+padding-bottom: 30px;
+}
+.main-inner .column-center-inner {
+padding: 0 15px;
+}
+.main-inner .column-center-inner .section {
+margin: 0 15px;
+}
+.post {
+margin: 0 0 25px 0;
+}
+h3.post-title, .comments h4 {
+font: normal normal 22px Arial, Tahoma, Helvetica, FreeSans, sans-serif;
+margin: .75em 0 0;
+}
+.post-body {
+font-size: 110%;
+line-height: 1.4;
+position: relative;
+}
+.post-body img, .post-body .tr-caption-container, .Profile img, .Image img,
+.BlogList .item-thumbnail img {
+padding: 2px;
+background: #ffffff;
+border: 1px solid #eeeeee;
+-moz-box-shadow: 1px 1px 5px rgba(0, 0, 0, .1);
+-webkit-box-shadow: 1px 1px 5px rgba(0, 0, 0, .1);
+box-shadow: 1px 1px 5px rgba(0, 0, 0, .1);
+}
+.post-body img, .post-body .tr-caption-container {
+padding: 5px;
+}
+.post-body .tr-caption-container {
+color: #222222;
+}
+.post-body .tr-caption-container img {
+padding: 0;
+background: transparent;
+border: none;
+-moz-box-shadow: 0 0 0 rgba(0, 0, 0, .1);
+-webkit-box-shadow: 0 0 0 rgba(0, 0, 0, .1);
+box-shadow: 0 0 0 rgba(0, 0, 0, .1);
+}
+.post-header {
+margin: 0 0 1.5em;
+line-height: 1.6;
+font-size: 90%;
+}
+.post-footer {
+margin: 20px -2px 0;
+padding: 5px 10px;
+color: #666666;
+background-color: #f9f9f9;
+border-bottom: 1px solid #eeeeee;
+line-height: 1.6;
+font-size: 90%;
+}
+#comments .comment-author {
+padding-top: 1.5em;
+border-top: 1px solid #eeeeee;
+background-position: 0 1.5em;
+}
+#comments .comment-author:first-child {
+padding-top: 0;
+border-top: none;
+}
+.avatar-image-container {
+margin: .2em 0 0;
+}
+#comments .avatar-image-container img {
+border: 1px solid #eeeeee;
+}
+/* Comments
+----------------------------------------------- */
+.comments .comments-content .icon.blog-author {
+background-repeat: no-repeat;
+background-image: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABIAAAASCAYAAABWzo5XAAAAAXNSR0IArs4c6QAAAAZiS0dEAP8A/wD/oL2nkwAAAAlwSFlzAAALEgAACxIB0t1+/AAAAAd0SU1FB9sLFwMeCjjhcOMAAAD+SURBVDjLtZSvTgNBEIe/WRRnm3U8RC1neQdsm1zSBIU9VVF1FkUguQQsD9ITmD7ECZIJSE4OZo9stoVjC/zc7ky+zH9hXwVwDpTAWWLrgS3QAe8AZgaAJI5zYAmc8r0G4AHYHQKVwII8PZrZFsBFkeRCABYiMh9BRUhnSkPTNCtVXYXURi1FpBDgArj8QU1eVXUzfnjv7yP7kwu1mYrkWlU33vs1QNu2qU8pwN0UpKoqokjWwCztrMuBhEhmh8bD5UDqur75asbcX0BGUB9/HAMB+r32hznJgXy2v0sGLBcyAJ1EK3LFcbo1s91JeLwAbwGYu7TP/3ZGfnXYPgAVNngtqatUNgAAAABJRU5ErkJggg==);
+}
+.comments .comments-content .loadmore a {
+border-top: 1px solid #999999;
+border-bottom: 1px solid #999999;
+}
+.comments .comment-thread.inline-thread {
+background-color: #f9f9f9;
+}
+.comments .continue {
+border-top: 2px solid #999999;
+}
+/* Accents
+---------------------------------------------- */
+.section-columns td.columns-cell {
+border-left: 1px solid #eeeeee;
+}
+.blog-pager {
+background: transparent none no-repeat scroll top center;
+}
+.blog-pager-older-link, .home-link,
+.blog-pager-newer-link {
+background-color: #ffffff;
+padding: 5px;
+}
+.footer-outer {
+border-top: 0 dashed #bbbbbb;
+}
+/* Mobile
+----------------------------------------------- */
+body.mobile  {
+background-size: auto;
+}
+.mobile .body-fauxcolumn-outer {
+background: transparent none repeat scroll top left;
+}
+.mobile .body-fauxcolumn-outer .cap-top {
+background-size: 100% auto;
+}
+.mobile .content-outer {
+-webkit-box-shadow: 0 0 3px rgba(0, 0, 0, .15);
+box-shadow: 0 0 3px rgba(0, 0, 0, .15);
+}
+body.mobile .AdSense {
+margin: 0 -40px;
+}
+.mobile .tabs-inner .widget ul {
+margin-left: 0;
+margin-right: 0;
+}
+.mobile .post {
+margin: 0;
+}
+.mobile .main-inner .column-center-inner .section {
+margin: 0;
+}
+.mobile .date-header span {
+padding: 0.1em 10px;
+margin: 0 -10px;
+}
+.mobile h3.post-title {
+margin: 0;
+}
+.mobile .blog-pager {
+background: transparent none no-repeat scroll top center;
+}
+.mobile .footer-outer {
+border-top: none;
+}
+.mobile .main-inner, .mobile .footer-inner {
+background-color: #ffffff;
+}
+.mobile-index-contents {
+color: #222222;
+}
+.mobile-link-button {
+background-color: #2288bb;
+}
+.mobile-link-button a:link, .mobile-link-button a:visited {
+color: #ffffff;
+}
+.mobile .tabs-inner .section:first-child {
+border-top: none;
+}
+.mobile .tabs-inner .PageList .widget-content {
+background-color: #eeeeee;
+color: #000000;
+border-top: 1px solid #eeeeee;
+border-bottom: 1px solid #eeeeee;
+}
+.mobile .tabs-inner .PageList .widget-content .pagelist-arrow {
+border-left: 1px solid #eeeeee;
+}
+
+--></style>
+<style id='template-skin-1' type='text/css'><!--
+body {
+min-width: 960px;
+}
+.content-outer, .content-fauxcolumn-outer, .region-inner {
+min-width: 960px;
+max-width: 960px;
+_width: 960px;
+}
+.main-inner .columns {
+padding-left: 0;
+padding-right: 310px;
+}
+.main-inner .fauxcolumn-center-outer {
+left: 0;
+right: 310px;
+/* IE6 does not respect left and right together */
+_width: expression(this.parentNode.offsetWidth -
+parseInt("0") -
+parseInt("310px") + 'px');
+}
+.main-inner .fauxcolumn-left-outer {
+width: 0;
+}
+.main-inner .fauxcolumn-right-outer {
+width: 310px;
+}
+.main-inner .column-left-outer {
+width: 0;
+right: 100%;
+margin-left: -0;
+}
+.main-inner .column-right-outer {
+width: 310px;
+margin-right: -310px;
+}
+#layout {
+min-width: 0;
+}
+#layout .content-outer {
+min-width: 0;
+width: 800px;
+}
+#layout .region-inner {
+min-width: 0;
+width: auto;
+}
+--></style>
+<script type="text/javascript">var a="indexOf",b="&m=1",e="(^|&)m=",f="?",g="?m=1";function h(){var c=window.location.href,d=c.split(f);switch(d.length){case 1:return c+g;case 2:return 0<=d[1].search(e)?null:c+b;default:return null}}var k=navigator.userAgent;if(-1!=k[a]("Mobile")&&-1!=k[a]("WebKit")&&-1==k[a]("iPad")||-1!=k[a]("Opera Mini")||-1!=k[a]("IEMobile")){var l=h();l&&window.location.replace(l)};
+</script><script type="text/javascript">
+if (window.jstiming) window.jstiming.load.tick('headEnd');
+</script></head>
+<body class='loading variant-pale'>
+<div class='navbar section' id='navbar'><div class='widget Navbar' id='Navbar1'><script type="text/javascript">
+    function setAttributeOnload(object, attribute, val) {
+      if(window.addEventListener) {
+        window.addEventListener('load',
+          function(){ object[attribute] = val; }, false);
+      } else {
+        window.attachEvent('onload', function(){ object[attribute] = val; });
+      }
+    }
+  </script>
+<div id="navbar-iframe-container"></div>
+<script type="text/javascript" src="https://apis.google.com/js/plusone.js"></script>
+<script type="text/javascript">
+        gapi.load("gapi.iframes:gapi.iframes.style.bubble", function() {
+          if (gapi.iframes && gapi.iframes.getContext) {
+            gapi.iframes.getContext().openChild({
+                url: 'https://www.blogger.com/navbar.g?targetBlogID\0755761095693213568868\46blogName\75Lost+in+JIT\46publishMode\75PUBLISH_MODE_BLOGSPOT\46navbarType\75LIGHT\46layoutType\75LAYOUTS\46searchRoot\75http://lostinjit.blogspot.com/search\46blogLocale\75en\46v\0752\46homepageUrl\75http://lostinjit.blogspot.com/\46targetPostID\0754365261822998854267\46blogPostOrPageUrl\75http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html\46vt\0752188700566461077561',
+                where: document.getElementById("navbar-iframe-container"),
+                id: "navbar-iframe"
+            });
+          }
+        });
+      </script><script type="text/javascript">
+(function() {
+var script = document.createElement('script');
+script.type = 'text/javascript';
+script.src = '//pagead2.googlesyndication.com/pagead/js/google_top_exp.js';
+var head = document.getElementsByTagName('head')[0];
+if (head) {
+head.appendChild(script);
+}})();
+</script>
+</div></div>
+<div class='body-fauxcolumns'>
+<div class='fauxcolumn-outer body-fauxcolumn-outer'>
+<div class='cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left'>
+<div class='fauxborder-right'></div>
+<div class='fauxcolumn-inner'>
+</div>
+</div>
+<div class='cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+</div>
+<div class='content'>
+<div class='content-fauxcolumns'>
+<div class='fauxcolumn-outer content-fauxcolumn-outer'>
+<div class='cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left'>
+<div class='fauxborder-right'></div>
+<div class='fauxcolumn-inner'>
+</div>
+</div>
+<div class='cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+</div>
+<div class='content-outer'>
+<div class='content-cap-top cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left content-fauxborder-left'>
+<div class='fauxborder-right content-fauxborder-right'></div>
+<div class='content-inner'>
+<header>
+<div class='header-outer'>
+<div class='header-cap-top cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left header-fauxborder-left'>
+<div class='fauxborder-right header-fauxborder-right'></div>
+<div class='region-inner header-inner'>
+<div class='header section' id='header'><div class='widget Header' id='Header1'>
+<div id='header-inner'>
+<div class='titlewrapper'>
+<h1 class='title'>
+<a href='http://lostinjit.blogspot.fr/'>Lost in JIT</a>
+</h1>
+</div>
+<div class='descriptionwrapper'>
+<p class='description'><span>
+</span></p>
+</div>
+</div>
+</div></div>
+</div>
+</div>
+<div class='header-cap-bottom cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+</header>
+<div class='tabs-outer'>
+<div class='tabs-cap-top cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left tabs-fauxborder-left'>
+<div class='fauxborder-right tabs-fauxborder-right'></div>
+<div class='region-inner tabs-inner'>
+<div class='tabs section' id='crosscol'></div>
+<div class='tabs section' id='crosscol-overflow'></div>
+</div>
+</div>
+<div class='tabs-cap-bottom cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+<div class='main-outer'>
+<div class='main-cap-top cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left main-fauxborder-left'>
+<div class='fauxborder-right main-fauxborder-right'></div>
+<div class='region-inner main-inner'>
+<div class='columns fauxcolumns'>
+<div class='fauxcolumn-outer fauxcolumn-center-outer'>
+<div class='cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left'>
+<div class='fauxborder-right'></div>
+<div class='fauxcolumn-inner'>
+</div>
+</div>
+<div class='cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+<div class='fauxcolumn-outer fauxcolumn-left-outer'>
+<div class='cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left'>
+<div class='fauxborder-right'></div>
+<div class='fauxcolumn-inner'>
+</div>
+</div>
+<div class='cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+<div class='fauxcolumn-outer fauxcolumn-right-outer'>
+<div class='cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left'>
+<div class='fauxborder-right'></div>
+<div class='fauxcolumn-inner'>
+</div>
+</div>
+<div class='cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+<!-- corrects IE6 width calculation -->
+<div class='columns-inner'>
+<div class='column-center-outer'>
+<div class='column-center-inner'>
+<div class='main section' id='main'><div class='widget Blog' id='Blog1'>
+<div class='blog-posts hfeed'>
+<!-- google_ad_section_start(name=default) -->
+
+          <div class="date-outer">
+        
+<h2 class='date-header'><span>Thursday, October 27, 2011</span></h2>
+
+          <div class="date-posts">
+        
+<div class='post-outer'>
+<div class='post hentry uncustomized-post-template' itemprop='blogPost' itemscope='itemscope' itemtype='http://schema.org/BlogPosting'>
+<meta content='5761095693213568868' itemprop='blogId'/>
+<meta content='4365261822998854267' itemprop='postId'/>
+<a name='4365261822998854267'></a>
+<h3 class='post-title entry-title' itemprop='name'>
+PyPy and the road towards SciPy
+</h3>
+<div class='post-header'>
+<div class='post-header-line-1'></div>
+</div>
+<div class='post-body entry-content' id='post-body-4365261822998854267' itemprop='description articleBody'>
+<p>Hello</p><br /><p>Recent PyPys effort to bring NumPy and the associated <a class="reference external" href="http://pypy.org/numpydonate.html">fundraiser</a><br />caused a lot of discussion in the SciPy community regarding PyPy, NumPy,<br />SciPy and the future of numeric computing in Python.</p><br /><p>There were <a class="reference external" href="http://mail.python.org/pipermail/pypy-dev/2011-October/008601.html">discussions</a> on the topic as well as <a class="reference external" href="http://technicaldiscovery.blogspot.com/2011/10/thoughts-on-porting-numpy-to-pypy.html">various</a> <a class="reference external" href="http://blog.streamitive.com/2011/10/17/numpy-isnt-about-fast-arrays/">blog</a> <a class="reference external" href="http://blog.streamitive.com/2011/10/19/more-thoughts-on-arrays-in-pypy/">posts</a><br />from the SciPy community who addressed few issues. It seems there was a lot<br />of talking past each other and I would like to clarify on a few points here,<br />although this should be taken as my personal opinion on the subject.</p><br /><p>So, let's start from the beginning. There are <strong>no plans</strong> for PyPy to<br />reimplement everything that's out there in RPython. That has been pointed<br />out from the beginning as a fallacy of our approach -- we simply don't plan<br />to do that. We agree that Python is a great glue language and we would like<br />to keep it that way. PyPy can nicely interface with C using ctypes with<br />a slightly worse story for C++ (even though there were <a class="reference external" href="http://morepypy.blogspot.com/2011/08/wrapping-c-libraries-with-reflection.html">experiments</a>).<br />What we know by now is that CPython C API is not a very good glue for PyPy,<br />it's too tied to CPython and it prevents a lot of interesting optimizations<br />from happening. The contenders are a few with Cython being a favorite<br />for now, however for Cython to be usable we need to have a story for C++<br />(I know Cython does have a story but it's unclear how that would work with<br />the PyPy backend).</p><br /><p>Which brings me to second point that while a lot of code in packages like<br />SciPy or matplotlib should be reusable in PyPy, it's probably not in<br />the current form. Either a lot of it has to move to Cython or some other<br />way of interfacing with C will come across. This should make it clear that<br />we <strong>want</strong> to interface with SciPy and reuse as much as possible.</p><br /><p>Another recurring topic that seems to pop up is why we just don't reuse Cython<br />for NumPy instead of reimplementing everything. The problem is that we need<br />a robust array type with all the interface <strong>before</strong> we can start using Cython<br />for anything. Since we're going to implement it anyway, why not go all the way<br />and implement the full NumPy module? And that is the topic of the current<br />funding proposal is exactly that -- to provide <strong>full</strong> NumPy module. That<br />would be a very good start for integrating the full stack of SciPy and<br />matplotlib and all other libraries out there.</p><br /><p>But also the trick is that a robust array module can go a long way alone.<br />It allows you to prototype a lot of algorithms on it's own and generally has<br />it's uses, without having to worry &quot;but if I read all the elements from the<br />array it's going to be dog slow&quot;.</p><br /><p>The last accusation is that we're trying to split the community. The answer is<br />simply no. We have a relatively good roadmap how to get to support what's out<br />there in scientific community and ideally support all people out there. This<br />will however take some time and the group of people that can run their<br />stuff on top of PyPy will be growing over time. This is indeed precisely what<br />is happening in other areas of python world -- more and more stuff run on PyPy<br />and people find it more and more interesting to try and to adapt their<br />own stuff to run.</p><br /><p>To summarize, I don't really think there is that much of a gap between us<br />and SciPy people. We'll start small (by providing <strong>full</strong> NumPy implementation)<br />and then gradually move forward reusing as much as possible from the entire<br />stack.</p><br /><p>Cheers,<br />fijal</p>
+<div style='clear: both;'></div>
+</div>
+<div class='post-footer'>
+<div class='post-footer-line post-footer-line-1'>
+<span class='post-author vcard'>
+Posted by
+<span class='fn' itemprop='author' itemscope='itemscope' itemtype='http://schema.org/Person'>
+<meta content='http://www.blogger.com/profile/11410841070239382771' itemprop='url'/>
+<a class='g-profile' href='http://www.blogger.com/profile/11410841070239382771' rel='author' title='author profile'>
+<span itemprop='name'>Maciej Fijalkowski</span>
+</a>
+</span>
+</span>
+<span class='post-timestamp'>
+at
+<meta content='http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html' itemprop='url'/>
+<a class='timestamp-link' href='http://lostinjit.blogspot.fr/2011/10/pypy-and-road-towards-scipy.html' rel='bookmark' title='permanent link'><abbr class='published' itemprop='datePublished' title='2011-10-27T05:06:00-07:00'>5:06 AM</abbr></a>
+</span>
+<span class='reaction-buttons'>
+</span>
+<span class='post-comment-link'>
+</span>
+<span class='post-backlinks post-comment-link'>
+</span>
+<span class='post-icons'>
+<span class='item-control blog-admin pid-1531970785'>
+<a href='http://www.blogger.com/post-edit.g?blogID=5761095693213568868&postID=4365261822998854267&from=pencil' title='Edit Post'>
+<img alt='' class='icon-action' height='18' src='http://img2.blogblog.com/img/icon18_edit_allbkg.gif' width='18'/>
+</a>
+</span>
+</span>
+<div class='post-share-buttons goog-inline-block'>
+</div>
+</div>
+<div class='post-footer-line post-footer-line-2'>
+<span class='post-labels'>
+</span>
+</div>
+<div class='post-footer-line post-footer-line-3'>
+<span class='post-location'>
+</span>
+</div>
+</div>
+</div>
+<div class='comments' id='comments'>
+<a name='comments'></a>
+<h4>7 comments:</h4>
+<div class='comments-content'>
+<script async='async' src='//www.blogblog.com/dynamicviews/4224c15c4e7c9321/js/comments.js' type='text/javascript'></script>
+<script type='text/javascript'>
+    (function() {
+      var items = [{'id': '5038916005497812552', 'body': 'I\46#39;m going to play devil\46#39;s advocate and ask the question of why PyPy should care one bit about the existing Numpy implementation or supporting C++ right now.   I think it would be cool if the PyPy folks simply built the array type that *they* want.   Make it fast.  Do every kind of crazy optimization you can think of with it.  Sure, call it something other than numpy to start, but make it something that programmers who want to live on the bleeding edge can play around it and try out (I know I\46#39;d be interested in messing around with something like that).   Providing full numpy compatibility and all of that can come later on after more experience has been gained.', 'timestamp': '1319719182528', 'permalink': 'http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html?showComment\0751319719182528#c5038916005497812552', 'author': {'name': 'Dave Beazley', 'avatarUrl': 'http://img2.blogblog.com/img/b16-rounded.gif', 'profileUrl': 'http://www.blogger.com/profile/02802905126181462140'}, 'displayTime': 'October 27, 2011 at 5:39 AM', 'deleteclass': 'item-control blog-admin pid-1194191824'}, {'id': '6305406649190702282', 'body': 'Hi Dave.\74br /\76\74br /\76If you download PyPy nightly, you can play with numpy.array that does exactly this. We\46#39;re working on adding features (like multi dimensional arrays) and simply numpy API is kind of good.', 'timestamp': '1319719445409', 'permalink': 'http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html?showComment\0751319719445409#c6305406649190702282', 'author': {'name': 'Maciej Fijalkowski', 'avatarUrl': 'http://img2.blogblog.com/img/b16-rounded.gif', 'profileUrl': 'http://www.blogger.com/profile/11410841070239382771'}, 'displayTime': 'October 27, 2011 at 5:44 AM', 'deleteclass': 'item-control blog-admin pid-1531970785'}, {'id': '8828707821233613724', 'body': 'The numpy interface is battle-tested over many years of use, and is pretty flexible.  I am usually pleasantly surprised when applying it to new problems.\74br /\76\74br /\76Given the effort required to integrate a multidimensional array type into PyPy, I don\46#39;t think it makes sense to try to reinvent the wheel by designing a completely new API.  I could see someone experimenting with the API after a numpy-derived core is in place.', 'timestamp': '1319730939249', 'permalink': 'http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html?showComment\0751319730939249#c8828707821233613724', 'author': {'name': 'stan', 'avatarUrl': 'http://img2.blogblog.com/img/b16-rounded.gif', 'profileUrl': 'http://www.blogger.com/profile/08688052715877131030'}, 'displayTime': 'October 27, 2011 at 8:55 AM', 'deleteclass': 'item-control blog-admin pid-900817052'}, {'id': '1102855255106800373', 'body': 'You can write \46quot;full\46quot; in bold, but that doesn\46#39;t make it so.  It should be clear to you by now that by claiming to provide a full numpy implementation you are at the very least confusing the issue for many users.  To spell it out once more, here is what numpy provides and what you plan to implement:\74br /\76\74br /\76  - Python API; ndarray, dtypes  (yes)\74br /\76  - C API; ndarray, dtypes       (no)\74br /\76  - Fourier transforms           (no - I think)\74br /\76  - Linear algebra               (no - I think)\74br /\76  - Random number generators     (yes - I think)\74br /\76\74br /\76Furthermore, several people (Travis, David, Ian, Dave Beazley above) mentioned you shouldn\46#39;t call your implementation numpy.  Before you were using micronumpy, that makes a lot more sense.', 'timestamp': '1319878931591', 'permalink': 'http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html?showComment\0751319878931591#c1102855255106800373', 'author': {'name': 'Ralf Gommers', 'avatarUrl': 'http://img2.blogblog.com/img/b16-rounded.gif', 'profileUrl': 'http://www.blogger.com/profile/10189904547449375563'}, 'displayTime': 'October 29, 2011 at 2:02 AM', 'deleteclass': 'item-control blog-admin pid-138535197'}, {'id': '5119755470051336461', 'body': 'When I mean full, I mean full. It\46#39;s all yes in your table except the C API. The way to call C using those arrays will be provided, but not using the CPython C API.\74br /\76\74br /\76We\46#39;ll rename it to numpypy for time being (at least until it\46#39;s reasonably complete).', 'timestamp': '1319881189267', 'permalink': 'http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html?showComment\0751319881189267#c5119755470051336461', 'author': {'name': 'Maciej Fijalkowski', 'avatarUrl': 'http://img2.blogblog.com/img/b16-rounded.gif', 'profileUrl': 'http://www.blogger.com/profile/11410841070239382771'}, 'displayTime': 'October 29, 2011 at 2:39 AM', 'deleteclass': 'item-control blog-admin pid-1531970785'}, {'id': '3173950294581147309', 'body': 'I\46#39;m not quite sure why people are getting so fussed about it. Most of the work in SciPy is in the C code, and it will still be easy to point some algorithm written in C at the memory held by the new PyPy arrays as it is in the current numpy.\74br /\76\74br /\76Why would people use PyPy for science if it\46#39;s implementation of numpy was slower than CPythons one? They wouldn\46#39;t, so that\46#39;s why PyPy can\46#39;t expose the existing CPython C API, as simply the act of exposing that API would make it much slower, due to the overhead of simulating ref-counting etc. There\46#39;s no point PyPy trying to make a numpy implementation that exposes the CPython C API.', 'timestamp': '1320066807242', 'permalink': 'http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html?showComment\0751320066807242#c3173950294581147309', 'author': {'name': 'Ben Young', 'avatarUrl': 'http://img2.blogblog.com/img/b16-rounded.gif', 'profileUrl': 'http://www.blogger.com/profile/05200062269405992736'}, 'displayTime': 'October 31, 2011 at 6:13 AM', 'deleteclass': 'item-control blog-admin pid-574566042'}, {'id': '2099125541336803566', 'body': 'I think that linear algebra and Fourier transforms are frequently needed.\74br /\76Come on guys, lets donate:\74br /\76http://pypy.org/numpydonate.html', 'timestamp': '1344279423153', 'permalink': 'http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html?showComment\0751344279423153#c2099125541336803566', 'author': {'name': 'Raul Durand', 'avatarUrl': 'http://img2.blogblog.com/img/b16-rounded.gif', 'profileUrl': 'http://www.blogger.com/profile/03417426673915503070'}, 'displayTime': 'August 6, 2012 at 11:57 AM', 'deleteclass': 'item-control blog-admin pid-2006058685'}];
+      var msgs = {'loadMore': 'Load more...', 'loading': 'Loading...', 'loaded': 'No more!', 'addComment': 'Add comment', 'reply': 'Reply', 'delete': 'Delete'};
+      var config = {'blogId': '5761095693213568868', 'postId': '4365261822998854267', 'feed': 'http://lostinjit.blogspot.com/feeds/4365261822998854267/comments/default', 'authorName': 'Maciej Fijalkowski', 'authorUrl': 'http://www.blogger.com/profile/11410841070239382771', 'baseUri': 'http://www.blogger.com', 'maxThreadDepth': 2};
+
+// <![CDATA[
+      var cursor = null;
+      if (items && items.length > 0) {
+        cursor = parseInt(items[items.length - 1].timestamp) + 1;
+      }
+
+      var bodyFromEntry = function(entry) {
+        if (entry.gd$extendedProperty) {
+          for (var k in entry.gd$extendedProperty) {
+            if (entry.gd$extendedProperty[k].name == 'blogger.contentRemoved') {
+              return '<span class="deleted-comment">' + entry.content.$t + '</span>';
+            }
+          }
+        }
+        return entry.content.$t;
+      }
+
+      var parse = function(data) {
+        cursor = null;
+        var comments = [];
+        if (data && data.feed && data.feed.entry) {
+          for (var i = 0, entry; entry = data.feed.entry[i]; i++) {
+            var comment = {};
+            // comment ID, parsed out of the original id format
+            var id = /blog-(\d+).post-(\d+)/.exec(entry.id.$t);
+            comment.id = id ? id[2] : null;
+            comment.body = bodyFromEntry(entry);
+            comment.timestamp = Date.parse(entry.published.$t) + '';
+            if (entry.author && entry.author.constructor === Array) {
+              var auth = entry.author[0];
+              if (auth) {
+                comment.author = {
+                  name: (auth.name ? auth.name.$t : undefined),
+                  profileUrl: (auth.uri ? auth.uri.$t : undefined),
+                  avatarUrl: (auth.gd$image ? auth.gd$image.src : undefined)
+                };
+              }
+            }
+            if (entry.link) {
+              if (entry.link[2]) {
+                comment.link = comment.permalink = entry.link[2].href;
+              }
+              if (entry.link[3]) {
+                var pid = /.*comments\/default\/(\d+)\?.*/.exec(entry.link[3].href);
+                if (pid && pid[1]) {
+                  comment.parentId = pid[1];
+                }
+              }
+            }
+            comment.deleteclass = 'item-control blog-admin';
+            if (entry.gd$extendedProperty) {
+              for (var k in entry.gd$extendedProperty) {
+                if (entry.gd$extendedProperty[k].name == 'blogger.itemClass') {
+                  comment.deleteclass += ' ' + entry.gd$extendedProperty[k].value;
+                } else if (entry.gd$extendedProperty[k].name == 'blogger.displayTime') {
+                  comment.displayTime = entry.gd$extendedProperty[k].value;
+                }
+              }
+            }
+            comments.push(comment);
+          }
+        }
+        return comments;
+      };
+
+      var paginator = function(callback) {
+        if (hasMore()) {
+          var url = config.feed + '?alt=json&v=2&orderby=published&reverse=false&max-results=50';
+          if (cursor) {
+            url += '&published-min=' + new Date(cursor).toISOString();
+          }
+          window.bloggercomments = function(data) {
+            var parsed = parse(data);
+            cursor = parsed.length < 50 ? null
+                : parseInt(parsed[parsed.length - 1].timestamp) + 1
+            callback(parsed);
+            window.bloggercomments = null;
+          }
+          url += '&callback=bloggercomments';
+          var script = document.createElement('script');
+          script.type = 'text/javascript';
+          script.src = url;
+          document.getElementsByTagName('head')[0].appendChild(script);
+        }
+      };
+      var hasMore = function() {
+        return !!cursor;
+      };
+      var getMeta = function(key, comment) {
+        if ('iswriter' == key) {
+          var matches = !!comment.author
+              && comment.author.name == config.authorName
+              && comment.author.profileUrl == config.authorUrl;
+          return matches ? 'true' : '';
+        } else if ('deletelink' == key) {
+          return config.baseUri + '/delete-comment.g?blogID='
+               + config.blogId + '&postID=' + comment.id;
+        } else if ('deleteclass' == key) {
+          return comment.deleteclass;
+        }
+        return '';
+      };
+
+      var replybox = null;
+      var replyUrlParts = null;
+      var replyParent = undefined;
+
+      var onReply = function(commentId, domId) {
+        if (replybox == null) {
+          // lazily cache replybox, and adjust to suit this style:
+          replybox = document.getElementById('comment-editor');
+          if (replybox != null) {
+            replybox.height = '250px';
+            replybox.style.display = 'block';
+            replyUrlParts = replybox.src.split('#');
+          }
+        }
+        if (replybox && (commentId !== replyParent)) {
+          document.getElementById(domId).insertBefore(replybox, null);
+          replybox.src = replyUrlParts[0]
+              + (commentId ? '&parentID=' + commentId : '')
+              + '#' + replyUrlParts[1];
+          replyParent = commentId;
+        }
+      };
+
+      var hash = (window.location.hash || '#').substring(1);
+      var startThread, targetComment;
+      if (/^comment-form_/.test(hash)) {
+        startThread = hash.substring('comment-form_'.length);
+      } else if (/^c[0-9]+$/.test(hash)) {
+        targetComment = hash.substring(1);
+      }
+
+      // Configure commenting API:
+      var configJso = {
+        'maxDepth': config.maxThreadDepth
+      };
+      var provider = {
+        'id': config.postId,
+        'data': items,
+        'loadNext': paginator,
+        'hasMore': hasMore,
+        'getMeta': getMeta,
+        'onReply': onReply,
+        'rendered': true,
+        'initComment': targetComment,
+        'initReplyThread': startThread,
+        'config': configJso,
+        'messages': msgs
+      };
+
+      var render = function() {
+        if (window.goog && window.goog.comments) {
+          var holder = document.getElementById('comment-holder');
+          window.goog.comments.render(holder, provider);
+        }
+      };
+
+      // render now, or queue to render when library loads:
+      if (window.goog && window.goog.comments) {
+        render();
+      } else {
+        window.goog = window.goog || {};
+        window.goog.comments = window.goog.comments || {};
+        window.goog.comments.loadQueue = window.goog.comments.loadQueue || [];
+        window.goog.comments.loadQueue.push(render);
+      }
+    })();
+// ]]>
+</script>
+<div id='comment-holder'>
+<div id='bc_0_8C' kind='c'><div id='bc_0_8CT'><div id='bc_0_7T' class='comment-thread' kind='r'  t='0' u='0'><ol id='bc_0_7TB'><li id='bc_0_0B' class='comment' kind='b'><div class='avatar-image-container'><img src='http://img2.blogblog.com/img/b36-rounded.png'></img></div><div id='c5038916005497812552' class='comment-block'><div id='bc_0_0M' class='comment-header' kind='m'><cite class='user'><a rel='nofollow' href='http://www.blogger.com/profile/02802905126181462140'>Dave Beazley</a></cite><span class='icon user'></span><span class='datetime secondary-text'><a rel='nofollow' href='http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html?showComment=1319719182528#c5038916005497812552'>October 27, 2011 at 5:39 AM</a></span></div><p id='bc_0_0MC' class='comment-content'>I&#39;m going to play devil&#39;s advocate and ask the question of why PyPy should care one bit about the existing Numpy implementation or supporting C++ right now.   I think it would be cool if the PyPy folks simply built the array type that *they* want.   Make it fast.  Do every kind of crazy optimization you can think of with it.  Sure, call it something other than numpy to start, but make it something that programmers who want to live on the bleeding edge can play around it and try out (I know I&#39;d be interested in messing around with something like that).   Providing full numpy compatibility and all of that can come later on after more experience has been gained.</p><span id='bc_0_0MN' class='comment-actions secondary-text' kind='m'><a kind='i' href='javascript:;' target='_self' o='r'>Reply</a><span class='item-control blog-admin pid-1194191824'><a o='d' target='_self' href='http://www.blogger.com/delete-comment.g?blogID=5761095693213568868&amp;postID=5038916005497812552'>Delete</a></span></span></div><div id='bc_0_0BR' class='comment-replies'></div><div id='bc_0_0B_box' class='comment-replybox-single'></div></li><li id='bc_0_1B' class='comment' kind='b'><div class='avatar-image-container'><img src='http://img2.blogblog.com/img/b36-rounded.png'></img></div><div id='c6305406649190702282' class='comment-block'><div id='bc_0_1M' class='comment-header' kind='m'><cite class='user blog-author'><a rel='nofollow' href='http://www.blogger.com/profile/11410841070239382771'>Maciej Fijalkowski</a></cite><span class='icon user blog-author'></span><span class='datetime secondary-text'><a rel='nofollow' href='http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html?showComment=1319719445409#c6305406649190702282'>October 27, 2011 at 5:44 AM</a></span></div><p id='bc_0_1MC' class='comment-content'>Hi Dave.<br /><br />If you download PyPy nightly, you can play with numpy.array that does exactly this. We&#39;re working on adding features (like multi dimensional arrays) and simply numpy API is kind of good.</p><span id='bc_0_1MN' class='comment-actions secondary-text' kind='m'><a kind='i' href='javascript:;' target='_self' o='r'>Reply</a><span class='item-control blog-admin pid-1531970785'><a o='d' target='_self' href='http://www.blogger.com/delete-comment.g?blogID=5761095693213568868&amp;postID=6305406649190702282'>Delete</a></span></span></div><div id='bc_0_1BR' class='comment-replies'></div><div id='bc_0_1B_box' class='comment-replybox-single'></div></li><li id='bc_0_2B' class='comment' kind='b'><div class='avatar-image-container'><img src='http://img2.blogblog.com/img/b36-rounded.png'></img></div><div id='c8828707821233613724' class='comment-block'><div id='bc_0_2M' class='comment-header' kind='m'><cite class='user'><a rel='nofollow' href='http://www.blogger.com/profile/08688052715877131030'>stan</a></cite><span class='icon user'></span><span class='datetime secondary-text'><a rel='nofollow' href='http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html?showComment=1319730939249#c8828707821233613724'>October 27, 2011 at 8:55 AM</a></span></div><p id='bc_0_2MC' class='comment-content'>The numpy interface is battle-tested over many years of use, and is pretty flexible.  I am usually pleasantly surprised when applying it to new problems.<br /><br />Given the effort required to integrate a multidimensional array type into PyPy, I don&#39;t think it makes sense to try to reinvent the wheel by designing a completely new API.  I could see someone experimenting with the API after a numpy-derived core is in place.</p><span id='bc_0_2MN' class='comment-actions secondary-text' kind='m'><a kind='i' href='javascript:;' target='_self' o='r'>Reply</a><span class='item-control blog-admin pid-900817052'><a o='d' target='_self' href='http://www.blogger.com/delete-comment.g?blogID=5761095693213568868&amp;postID=8828707821233613724'>Delete</a></span></span></div><div id='bc_0_2BR' class='comment-replies'></div><div id='bc_0_2B_box' class='comment-replybox-single'></div></li><li id='bc_0_3B' class='comment' kind='b'><div class='avatar-image-container'><img src='http://img2.blogblog.com/img/b36-rounded.png'></img></div><div id='c1102855255106800373' class='comment-block'><div id='bc_0_3M' class='comment-header' kind='m'><cite class='user'><a rel='nofollow' href='http://www.blogger.com/profile/10189904547449375563'>Ralf Gommers</a></cite><span class='icon user'></span><span class='datetime secondary-text'><a rel='nofollow' href='http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html?showComment=1319878931591#c1102855255106800373'>October 29, 2011 at 2:02 AM</a></span></div><p id='bc_0_3MC' class='comment-content'>You can write &quot;full&quot; in bold, but that doesn&#39;t make it so.  It should be clear to you by now that by claiming to provide a full numpy implementation you are at the very least confusing the issue for many users.  To spell it out once more, here is what numpy provides and what you plan to implement:<br /><br />  - Python API; ndarray, dtypes  (yes)<br />  - C API; ndarray, dtypes       (no)<br />  - Fourier transforms           (no - I think)<br />  - Linear algebra               (no - I think)<br />  - Random number generators     (yes - I think)<br /><br />Furthermore, several people (Travis, David, Ian, Dave Beazley above) mentioned you shouldn&#39;t call your implementation numpy.  Before you were using micronumpy, that makes a lot more sense.</p><span id='bc_0_3MN' class='comment-actions secondary-text' kind='m'><a kind='i' href='javascript:;' target='_self' o='r'>Reply</a><span class='item-control blog-admin pid-138535197'><a o='d' target='_self' href='http://www.blogger.com/delete-comment.g?blogID=5761095693213568868&amp;postID=1102855255106800373'>Delete</a></span></span></div><div id='bc_0_3BR' class='comment-replies'></div><div id='bc_0_3B_box' class='comment-replybox-single'></div></li><li id='bc_0_4B' class='comment' kind='b'><div class='avatar-image-container'><img src='http://img2.blogblog.com/img/b36-rounded.png'></img></div><div id='c5119755470051336461' class='comment-block'><div id='bc_0_4M' class='comment-header' kind='m'><cite class='user blog-author'><a rel='nofollow' href='http://www.blogger.com/profile/11410841070239382771'>Maciej Fijalkowski</a></cite><span class='icon user blog-author'></span><span class='datetime secondary-text'><a rel='nofollow' href='http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html?showComment=1319881189267#c5119755470051336461'>October 29, 2011 at 2:39 AM</a></span></div><p id='bc_0_4MC' class='comment-content'>When I mean full, I mean full. It&#39;s all yes in your table except the C API. The way to call C using those arrays will be provided, but not using the CPython C API.<br /><br />We&#39;ll rename it to numpypy for time being (at least until it&#39;s reasonably complete).</p><span id='bc_0_4MN' class='comment-actions secondary-text' kind='m'><a kind='i' href='javascript:;' target='_self' o='r'>Reply</a><span class='item-control blog-admin pid-1531970785'><a o='d' target='_self' href='http://www.blogger.com/delete-comment.g?blogID=5761095693213568868&amp;postID=5119755470051336461'>Delete</a></span></span></div><div id='bc_0_4BR' class='comment-replies'></div><div id='bc_0_4B_box' class='comment-replybox-single'></div></li><li id='bc_0_5B' class='comment' kind='b'><div class='avatar-image-container'><img src='http://img2.blogblog.com/img/b36-rounded.png'></img></div><div id='c3173950294581147309' class='comment-block'><div id='bc_0_5M' class='comment-header' kind='m'><cite class='user'><a rel='nofollow' href='http://www.blogger.com/profile/05200062269405992736'>Ben Young</a></cite><span class='icon user'></span><span class='datetime secondary-text'><a rel='nofollow' href='http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html?showComment=1320066807242#c3173950294581147309'>October 31, 2011 at 6:13 AM</a></span></div><p id='bc_0_5MC' class='comment-content'>I&#39;m not quite sure why people are getting so fussed about it. Most of the work in SciPy is in the C code, and it will still be easy to point some algorithm written in C at the memory held by the new PyPy arrays as it is in the current numpy.<br /><br />Why would people use PyPy for science if it&#39;s implementation of numpy was slower than CPythons one? They wouldn&#39;t, so that&#39;s why PyPy can&#39;t expose the existing CPython C API, as simply the act of exposing that API would make it much slower, due to the overhead of simulating ref-counting etc. There&#39;s no point PyPy trying to make a numpy implementation that exposes the CPython C API.</p><span id='bc_0_5MN' class='comment-actions secondary-text' kind='m'><a kind='i' href='javascript:;' target='_self' o='r'>Reply</a><span class='item-control blog-admin pid-574566042'><a o='d' target='_self' href='http://www.blogger.com/delete-comment.g?blogID=5761095693213568868&amp;postID=3173950294581147309'>Delete</a></span></span></div><div id='bc_0_5BR' class='comment-replies'></div><div id='bc_0_5B_box' class='comment-replybox-single'></div></li><li id='bc_0_6B' class='comment' kind='b'><div class='avatar-image-container'><img src='http://img2.blogblog.com/img/b36-rounded.png'></img></div><div id='c2099125541336803566' class='comment-block'><div id='bc_0_6M' class='comment-header' kind='m'><cite class='user'><a rel='nofollow' href='http://www.blogger.com/profile/03417426673915503070'>Raul Durand</a></cite><span class='icon user'></span><span class='datetime secondary-text'><a rel='nofollow' href='http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html?showComment=1344279423153#c2099125541336803566'>August 6, 2012 at 11:57 AM</a></span></div><p id='bc_0_6MC' class='comment-content'>I think that linear algebra and Fourier transforms are frequently needed.<br />Come on guys, lets donate:<br />http://pypy.org/numpydonate.html</p><span id='bc_0_6MN' class='comment-actions secondary-text' kind='m'><a kind='i' href='javascript:;' target='_self' o='r'>Reply</a><span class='item-control blog-admin pid-2006058685'><a o='d' target='_self' href='http://www.blogger.com/delete-comment.g?blogID=5761095693213568868&amp;postID=2099125541336803566'>Delete</a></span></span></div><div id='bc_0_6BR' class='comment-replies'></div><div id='bc_0_6B_box' class='comment-replybox-single'></div></li></ol><div id='bc_0_7I' class='continue' kind='ci'><a href='javascript:;' target='_self'>Add comment</a></div><div id='bc_0_7T_box' class='comment-replybox-thread'></div><div id='bc_0_7L' class='loadmore loaded' kind='rb'><a href='javascript:;' target='_self'>Load more...</a></div></div></div></div>
+</div>
+</div>
+<p class='comment-footer'>
+<div class='comment-form'>
+<a name='comment-form'></a>
+<p>
+</p>
+<a href='https://www.blogger.com/comment-iframe.g?blogID=5761095693213568868&postID=4365261822998854267' id='comment-editor-src'></a>
+<iframe allowtransparency='true' class='blogger-iframe-colorize blogger-comment-from-post' frameborder='0' height='410' id='comment-editor' name='comment-editor' src='' width='100%'></iframe>
+<script type="text/javascript" src="https://www.blogger.com/static/v1/jsbin/1342943956-comment_from_post_iframe.js"></script>
+<script type='text/javascript'>
+      BLOG_CMT_createIframe('https://www.blogger.com/rpc_relay.html');
+    </script>
+</div>
+</p>
+<div id='backlinks-container'>
+<div id='Blog1_backlinks-container'>
+</div>
+</div>
+</div>
+</div>
+
+        </div></div>
+      
+<!-- google_ad_section_end -->
+</div>
+<div class='blog-pager' id='blog-pager'>
+<span id='blog-pager-newer-link'>
+<a class='blog-pager-newer-link' href='http://lostinjit.blogspot.fr/2011/11/analysing-pythons-performance-under.html' id='Blog1_blog-pager-newer-link' title='Newer Post'>Newer Post</a>
+</span>
+<span id='blog-pager-older-link'>
+<a class='blog-pager-older-link' href='http://lostinjit.blogspot.fr/2011/10/wikipedia-tag-clutter-pypy-and-dangers.html' id='Blog1_blog-pager-older-link' title='Older Post'>Older Post</a>
+</span>
+<a class='home-link' href='http://lostinjit.blogspot.fr/'>Home</a>
+</div>
+<div class='clear'></div>
+<div class='post-feeds'>
+<div class='feed-links'>
+Subscribe to:
+<a class='feed-link' href='http://lostinjit.blogspot.com/feeds/4365261822998854267/comments/default' target='_blank' type='application/atom+xml'>Post Comments (Atom)</a>
+</div>
+</div>
+<script type="text/javascript">window.___gcfg = {'lang': 'en'};</script>
+</div></div>
+</div>
+</div>
+<div class='column-left-outer'>
+<div class='column-left-inner'>
+<aside>
+</aside>
+</div>
+</div>
+<div class='column-right-outer'>
+<div class='column-right-inner'>
+<aside>
+<div class='sidebar section' id='sidebar-right-1'><div class='widget Followers' id='Followers1'>
+<h2 class='title'>Followers</h2>
+<div class='widget-content'>
+<div id='Followers1-wrapper'>
+<div style='margin-right:2px;'>
+<script type="text/javascript">
+        if (!window.google || !google.friendconnect) {
+          document.write('<script type="text/javascript"' +
+              'src="//www.google.com/friendconnect/script/friendconnect.js">' +
+              '</scr' + 'ipt>');
+        }
+      </script>
+<script type="text/javascript">
+      if (!window.registeredBloggerCallbacks) {
+        window.registeredBloggerCallbacks = true;
+
+        
+
+        
+        gadgets.rpc.register('requestReload', function() {
+          document.location.reload();
+        });
+
+        
+        gadgets.rpc.register('requestSignOut', function(siteId) {
+          
+          google.friendconnect.container.openSocialSiteId = siteId;
+          google.friendconnect.requestSignOut();
+        });
+      }
+    </script>
+<script type="text/javascript">
+    
+    function registerGetBlogUrls() {
+      gadgets.rpc.register('getBlogUrls', function() {
+        var holder = {};
+        
+          
+            
+            
+              holder.currentPost = "http://www.blogger.com/feeds/5761095693213568868/posts/default/4365261822998854267";
+            
+            
+            
+              holder.currentComments = "http://www.blogger.com/feeds/5761095693213568868/4365261822998854267/comments/default";
+            
+            holder.currentPostUrl = "";
+            holder.currentPostId = 4365261822998854267
+          
+          
+          
+            holder.postFeed = "http://www.blogger.com/feeds/5761095693213568868/posts/default";
+          
+          
+          
+            holder.commentFeed = "http://www.blogger.com/feeds/5761095693213568868/comments/default";
+          
+          holder.currentBlogUrl = "http://lostinjit.blogspot.com/";
+          holder.currentBlogId = "5761095693213568868";
+        
+        return holder;
+      });
+    }
+  </script>
+<script type="text/javascript">
+  if (!window.registeredCommonBloggerCallbacks) {
+    window.registeredCommonBloggerCallbacks = true;
+
+    gadgets.rpc.register('resize_iframe', function(height) {
+      var el = document.getElementById(this['f']);
+      if (el) {
+        el.style.height = height + 'px';
+      }
+    });
+
+    
+    gadgets.rpc.register('set_pref', function() {});
+
+    registerGetBlogUrls();
+  }
+  </script>
+<div id="div-1c5vz08p6brin" style="width: 100%; "></div>
+<script type="text/javascript">
+    var skin = {};
+    skin['FACE_SIZE'] = '32';
+    skin['HEIGHT'] = "260";
+    skin['TITLE'] = "Followers";
+    skin['BORDER_COLOR'] = "transparent";
+    skin['ENDCAP_BG_COLOR'] = "transparent";
+    skin['ENDCAP_TEXT_COLOR'] = "#222222";
+    skin['ENDCAP_LINK_COLOR'] = "#2288bb";
+    skin['ALTERNATE_BG_COLOR'] = "transparent";
+    
+    skin['CONTENT_BG_COLOR'] = "transparent";
+    skin['CONTENT_LINK_COLOR'] = "#2288bb";
+    skin['CONTENT_TEXT_COLOR'] = "#222222";
+    skin['CONTENT_SECONDARY_LINK_COLOR'] = "#2288bb";
+    skin['CONTENT_SECONDARY_TEXT_COLOR'] = "#999999";
+    skin['CONTENT_HEADLINE_COLOR'] = "#000000";
+    skin['FONT_FACE'] = "normal normal 12px Arial, Tahoma, Helvetica, FreeSans, sans-serif";
+    google.friendconnect.container.setParentUrl("/");
+    google.friendconnect.container["renderMembersGadget"](
+    {id: "div-1c5vz08p6brin",
+     height: 260,
+     
+     
+     
+     site: "09937614739564950641",
+     
+     locale: 'en' },
+     skin);
+  </script>
+</div>
+</div>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='//www.blogger.com/rearrange?blogID=5761095693213568868&widgetType=Followers&widgetId=Followers1&action=editWidget&sectionId=sidebar-right-1' onclick='return _WidgetManager._PopupConfig(document.getElementById("Followers1"));' target='configFollowers1' title='Edit'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div>
+</div></div>
+<table border='0' cellpadding='0' cellspacing='0' class='section-columns columns-2'>
+<tbody>
+<tr>
+<td class='first columns-cell'>
+<div class='sidebar section' id='sidebar-right-2-1'><div class='widget Profile' id='Profile1'>
+<h2>About Me</h2>
+<div class='widget-content'>
+<dl class='profile-datablock'>
+<dt class='profile-data'>
+<a class='profile-name-link g-profile' href='http://www.blogger.com/profile/11410841070239382771' rel='author' style='background-image: url(//www.blogger.com/img/logo-16.png);'>
+Maciej Fijalkowski
+</a>
+</dt>
+</dl>
+<a class='profile-link' href='http://www.blogger.com/profile/11410841070239382771' rel='author'>View my complete profile</a>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='//www.blogger.com/rearrange?blogID=5761095693213568868&widgetType=Profile&widgetId=Profile1&action=editWidget&sectionId=sidebar-right-2-1' onclick='return _WidgetManager._PopupConfig(document.getElementById("Profile1"));' target='configProfile1' title='Edit'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div>
+</div></div>
+</td>
+<td class='columns-cell'>
+<div class='sidebar section' id='sidebar-right-2-2'><div class='widget BlogArchive' id='BlogArchive1'>
+<h2>Blog Archive</h2>
+<div class='widget-content'>
+<div id='ArchiveList'>
+<div id='BlogArchive1_ArchiveList'>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle' href='javascript:void(0)'>
+<span class='zippy'>
+
+          &#9658;&#160;
+        
+</span>
+</a>
+<a class='post-count-link' href='http://lostinjit.blogspot.fr/search?updated-min=2013-01-01T00:00:00-08:00&amp;updated-max=2014-01-01T00:00:00-08:00&amp;max-results=1'>2013</a>
+<span class='post-count' dir='ltr'>(1)</span>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle' href='javascript:void(0)'>
+<span class='zippy'>
+
+          &#9658;&#160;
+        
+</span>
+</a>
+<a class='post-count-link' href='http://lostinjit.blogspot.fr/2013_11_01_archive.html'>November</a>
+<span class='post-count' dir='ltr'>(1)</span>
+</li>
+</ul>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle' href='javascript:void(0)'>
+<span class='zippy'>
+
+          &#9658;&#160;
+        
+</span>
+</a>
+<a class='post-count-link' href='http://lostinjit.blogspot.fr/search?updated-min=2012-01-01T00:00:00-08:00&amp;updated-max=2013-01-01T00:00:00-08:00&amp;max-results=4'>2012</a>
+<span class='post-count' dir='ltr'>(4)</span>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle' href='javascript:void(0)'>
+<span class='zippy'>
+
+          &#9658;&#160;
+        
+</span>
+</a>
+<a class='post-count-link' href='http://lostinjit.blogspot.fr/2012_12_01_archive.html'>December</a>
+<span class='post-count' dir='ltr'>(1)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle' href='javascript:void(0)'>
+<span class='zippy'>
+
+          &#9658;&#160;
+        
+</span>
+</a>
+<a class='post-count-link' href='http://lostinjit.blogspot.fr/2012_07_01_archive.html'>July</a>
+<span class='post-count' dir='ltr'>(1)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle' href='javascript:void(0)'>
+<span class='zippy'>
+
+          &#9658;&#160;
+        
+</span>
+</a>
+<a class='post-count-link' href='http://lostinjit.blogspot.fr/2012_04_01_archive.html'>April</a>
+<span class='post-count' dir='ltr'>(1)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle' href='javascript:void(0)'>
+<span class='zippy'>
+
+          &#9658;&#160;
+        
+</span>
+</a>
+<a class='post-count-link' href='http://lostinjit.blogspot.fr/2012_02_01_archive.html'>February</a>
+<span class='post-count' dir='ltr'>(1)</span>
+</li>
+</ul>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate expanded'>
+<a class='toggle' href='javascript:void(0)'>
+<span class='zippy toggle-open'>&#9660;&#160;</span>
+</a>
+<a class='post-count-link' href='http://lostinjit.blogspot.fr/search?updated-min=2011-01-01T00:00:00-08:00&amp;updated-max=2012-01-01T00:00:00-08:00&amp;max-results=9'>2011</a>
+<span class='post-count' dir='ltr'>(9)</span>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle' href='javascript:void(0)'>
+<span class='zippy'>
+
+          &#9658;&#160;
+        
+</span>
+</a>
+<a class='post-count-link' href='http://lostinjit.blogspot.fr/2011_12_01_archive.html'>December</a>
+<span class='post-count' dir='ltr'>(2)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle' href='javascript:void(0)'>
+<span class='zippy'>
+
+          &#9658;&#160;
+        
+</span>
+</a>
+<a class='post-count-link' href='http://lostinjit.blogspot.fr/2011_11_01_archive.html'>November</a>
+<span class='post-count' dir='ltr'>(1)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate expanded'>
+<a class='toggle' href='javascript:void(0)'>
+<span class='zippy toggle-open'>&#9660;&#160;</span>
+</a>
+<a class='post-count-link' href='http://lostinjit.blogspot.fr/2011_10_01_archive.html'>October</a>
+<span class='post-count' dir='ltr'>(3)</span>
+<ul class='posts'>
+<li><a href='http://lostinjit.blogspot.fr/2011/10/pypy-and-road-towards-scipy.html'>PyPy and the road towards SciPy</a></li>
+<li><a href='http://lostinjit.blogspot.fr/2011/10/wikipedia-tag-clutter-pypy-and-dangers.html'>Wikipedia, tag clutter, pypy and the dangers of bu...</a></li>
+<li><a href='http://lostinjit.blogspot.fr/2011/10/pypys-future-directions.html'>PyPy&#39;s future directions</a></li>
+</ul>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle' href='javascript:void(0)'>
+<span class='zippy'>
+
+          &#9658;&#160;
+        
+</span>
+</a>
+<a class='post-count-link' href='http://lostinjit.blogspot.fr/2011_07_01_archive.html'>July</a>
+<span class='post-count' dir='ltr'>(1)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle' href='javascript:void(0)'>
+<span class='zippy'>
+
+          &#9658;&#160;
+        
+</span>
+</a>
+<a class='post-count-link' href='http://lostinjit.blogspot.fr/2011_06_01_archive.html'>June</a>
+<span class='post-count' dir='ltr'>(1)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle' href='javascript:void(0)'>
+<span class='zippy'>
+
+          &#9658;&#160;
+        
+</span>
+</a>
+<a class='post-count-link' href='http://lostinjit.blogspot.fr/2011_01_01_archive.html'>January</a>
+<span class='post-count' dir='ltr'>(1)</span>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='//www.blogger.com/rearrange?blogID=5761095693213568868&widgetType=BlogArchive&widgetId=BlogArchive1&action=editWidget&sectionId=sidebar-right-2-2' onclick='return _WidgetManager._PopupConfig(document.getElementById("BlogArchive1"));' target='configBlogArchive1' title='Edit'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div>
+</div></div>
+</td>
+</tr>
+</tbody>
+</table>
+<div class='sidebar section' id='sidebar-right-3'></div>
+</aside>
+</div>
+</div>
+</div>
+<div style='clear: both'></div>
+<!-- columns -->
+</div>
+<!-- main -->
+</div>
+</div>
+<div class='main-cap-bottom cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+<footer>
+<div class='footer-outer'>
+<div class='footer-cap-top cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left footer-fauxborder-left'>
+<div class='fauxborder-right footer-fauxborder-right'></div>
+<div class='region-inner footer-inner'>
+<div class='foot section' id='footer-1'></div>
+<table border='0' cellpadding='0' cellspacing='0' class='section-columns columns-2'>
+<tbody>
+<tr>
+<td class='first columns-cell'>
+<div class='foot section' id='footer-2-1'></div>
+</td>
+<td class='columns-cell'>
+<div class='foot section' id='footer-2-2'></div>
+</td>
+</tr>
+</tbody>
+</table>
+<!-- outside of the include in order to lock Attribution widget -->
+<div class='foot section' id='footer-3'><div class='widget Attribution' id='Attribution1'>
+<div class='widget-content' style='text-align: center;'>
+Simple template. Powered by <a href='http://www.blogger.com' target='_blank'>Blogger</a>.
+</div>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='//www.blogger.com/rearrange?blogID=5761095693213568868&widgetType=Attribution&widgetId=Attribution1&action=editWidget&sectionId=footer-3' onclick='return _WidgetManager._PopupConfig(document.getElementById("Attribution1"));' target='configAttribution1' title='Edit'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div></div>
+</div>
+</div>
+<div class='footer-cap-bottom cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+</footer>
+<!-- content -->
+</div>
+</div>
+<div class='content-cap-bottom cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+</div>
+<script type='text/javascript'>
+    window.setTimeout(function() {
+        document.body.className = document.body.className.replace('loading', '');
+      }, 10);
+  </script>
+<script type="text/javascript">
+if (window.jstiming) window.jstiming.load.tick('widgetJsBefore');
+</script><script type="text/javascript" src="https://www.blogger.com/static/v1/widgets/3739804914-widgets.js"></script>
+<script type="text/javascript" src="https://apis.google.com/js/plusone.js"></script>
+<script type='text/javascript'>
+if (typeof(BLOG_attachCsiOnload) != 'undefined' && BLOG_attachCsiOnload != null) { window['blogger_templates_experiment_id'] = "templatesV2";window['blogger_blog_id'] = '5761095693213568868';BLOG_attachCsiOnload('item_'); }_WidgetManager._Init('//www.blogger.com/rearrange?blogID\x3d5761095693213568868','//lostinjit.blogspot.fr/2011/10/pypy-and-road-towards-scipy.html','5761095693213568868');
+_WidgetManager._SetDataContext([{'name': 'blog', 'data': {'blogId': '5761095693213568868', 'bloggerUrl': 'http://www.blogger.com', 'title': 'Lost in JIT', 'pageType': 'item', 'postId': '4365261822998854267', 'url': 'http://lostinjit.blogspot.fr/2011/10/pypy-and-road-towards-scipy.html', 'canonicalUrl': 'http://lostinjit.blogspot.com/2011/10/pypy-and-road-towards-scipy.html', 'canonicalHomepageUrl': 'http://lostinjit.blogspot.com/', 'homepageUrl': 'http://lostinjit.blogspot.fr/', 'blogspotFaviconUrl': 'http://lostinjit.blogspot.fr/favicon.ico', 'enabledCommentProfileImages': true, 'adultContent': false, 'disableAdSenseWidget': false, 'analyticsAccountNumber': '', 'searchLabel': '', 'searchQuery': '', 'pageName': 'PyPy and the road towards SciPy', 'pageTitle': 'Lost in JIT: PyPy and the road towards SciPy', 'encoding': 'UTF-8', 'locale': 'en', 'localeUnderscoreDelimited': 'en', 'isPrivate': false, 'isMobile': false, 'isMobileRequest': false, 'mobileClass': '', 'isPrivateBlog': false, 'languageDirection': 'ltr', 'feedLinks': '\74link rel\75\42alternate\42 type\75\42application/atom+xml\42 title\75\42Lost in JIT - Atom\42 href\75\42http://lostinjit.blogspot.com/feeds/posts/default\42 /\76\n\74link rel\75\42alternate\42 type\75\42application/rss+xml\42 title\75\42Lost in JIT - RSS\42 href\75\42http://lostinjit.blogspot.com/feeds/posts/default?alt\75rss\42 /\76\n\74link rel\75\42service.post\42 type\75\42application/atom+xml\42 title\75\42Lost in JIT - Atom\42 href\75\42http://www.blogger.com/feeds/5761095693213568868/posts/default\42 /\76\n\n\74link rel\75\42alternate\42 type\75\42application/atom+xml\42 title\75\42Lost in JIT - Atom\42 href\75\42http://lostinjit.blogspot.com/feeds/4365261822998854267/comments/default\42 /\76\n', 'meTag': '', 'openIdOpTag': '', 'latencyHeadScript': '\74script type\75\42text/javascript\42\76(function() { var b\75window,f\75\42chrome\42,g\75\42tick\42,k\75\42jstiming\42;(function(){function d(a){this.t\75{};this.tick\75function(a,d,c){var e\75void 0!\75c?c:(new Date).getTime();this.t[a]\75[e,d];if(void 0\75\75c)try{b.console.timeStamp(\42CSI/\42+a)}catch(h){}};this[g](\42start\42,null,a)}var a;b.performance\46\46(a\75b.performance.timing);var n\75a?new d(a.responseStart):new d;b.jstiming\75{Timer:d,load:n};if(a){var c\75a.navigationStart,h\75a.responseStart;0\74c\46\46h\76\75c\46\46(b[k].srt\75h-c)}if(a){var e\75b[k].load;0\74c\46\46h\76\75c\46\46(e[g](\42_wtsrt\42,void 0,c),e[g](\42wtsrt_\42,\42_wtsrt\42,h),e[g](\42tbsd_\42,\42wtsrt_\42))}try{a\75null,\nb[f]\46\46b[f].csi\46\46(a\75Math.floor(b[f].csi().pageT),e\46\0460\74c\46\46(e[g](\42_tbnd\42,void 0,b[f].csi().startE),e[g](\42tbnd_\42,\42_tbnd\42,c))),null\75\75a\46\46b.gtbExternal\46\46(a\75b.gtbExternal.pageT()),null\75\75a\46\46b.external\46\46(a\75b.external.pageT,e\46\0460\74c\46\46(e[g](\42_tbnd\42,void 0,b.external.startE),e[g](\42tbnd_\42,\42_tbnd\42,c))),a\46\46(b[k].pt\75a)}catch(p){}})();b.tickAboveFold\75function(d){var a\0750;if(d.offsetParent){do a+\75d.offsetTop;while(d\75d.offsetParent)}d\75a;750\76\75d\46\46b[k].load[g](\42aft\42)};var l\75!1;function m(){l||(l\75!0,b[k].load[g](\42firstScrollTime\42))}b.addEventListener?b.addEventListener(\42scroll\42,m,!1):b.attachEvent(\42onscroll\42,m);\n })();\74/script\076', 'mobileHeadScript': '', 'ieCssRetrofitLinks': '\74!--[if IE]\76\74script type\75\42text/javascript\42 src\75\42https://www.blogger.com/static/v1/jsbin/2704825972-ieretrofit.js\42\76\74/script\76\n\74![endif]--\076', 'view': '', 'dynamicViewsCommentsSrc': '//www.blogblog.com/dynamicviews/4224c15c4e7c9321/js/comments.js', 'dynamicViewsScriptSrc': '//www.blogblog.com/dynamicviews/76f25a6f2e06af76', 'plusOneApiSrc': 'https://apis.google.com/js/plusone.js', 'sf': 'n', 'tf': ''}}, {'name': 'skin', 'data': {'vars': {'content_padding': '10px', 'paging_background': 'transparent none no-repeat scroll top center', 'image_border_small_size': '2px', 'body_background_gradient_cap': 'url(//www.blogblog.com/1kt/simple/gradients_light.png)', 'tabs_selected_background_color': '#eeeeee', 'body_rule_color': '#eeeeee', 'header_shadow_spread': '1px', 'header_shadow_offset_left': '-1px', 'header_padding': '30px', 'link_visited_color': '#888888', 'mobile_background_size': 'auto', 'post_title_font': 'normal normal 22px Arial, Tahoma, Helvetica, FreeSans, sans-serif', 'tabs_text_color': '#999999', 'date_header_margin': 'inherit', 'header_background_gradient': 'none', 'header_bottom_border_size': '1px', 'content_shadow_spread_webkit': '5px', 'header_shadow_offset_top': '-1px', 'mobile_button_color': '#ffffff', 'widget_title_font': 'normal bold 11px Arial, Tahoma, Helvetica, FreeSans, sans-serif', 'main_section_margin': '15px', 'keycolor': '#66bbdd', 'content_shadow_spread': '40px', 'image_background_color': '#ffffff', 'header_font': 'normal normal 60px Arial, Tahoma, Helvetica, FreeSans, sans-serif', 'header_background_color': 'transparent', 'main_border_width': '0', 'main_padding': '15px', 'tabs_border_color': '#eeeeee', 'image_text_color': '#222222', 'description_text_size': '140%', 'post_footer_background_color': '#f9f9f9', 'body_background_gradient_tile': 'url(//www.blogblog.com/1kt/simple/body_gradient_tile_light.png)', 'page_width': 'auto', 'tabs_background_gradient': 'url(//www.blogblog.com/1kt/simple/gradients_light.png)', 'mobile_background_overlay': 'transparent none repeat scroll top left', 'main_padding_bottom': '30px', 'tabs_background_color': '#f5f5f5', 'image_border_large_size': '5px', 'post_margin_bottom': '25px', 'body_background_override': '', 'tabs_margin_side': '30px', 'link_hover_color': '#33aaff', 'description_text_color': '#777777', 'body_text_color': '#222222', 'content_padding_horizontal': '10px', 'endSide': 'right', 'page_width_selector': '.region-inner', 'date_header_padding': 'inherit', 'post_footer_text_color': '#666666', 'startSide': 'left', 'date_header_color': '#222222', 'tabs_font': 'normal normal 14px Arial, Tahoma, Helvetica, FreeSans, sans-serif', 'post_footer_border_color': '#eeeeee', 'body_font': 'normal normal 12px Arial, Tahoma, Helvetica, FreeSans, sans-serif', 'footer_bevel': '0', 'tabs_bevel_border_width': '1px', 'date_header_background_color': 'transparent', 'content_shadow_spread_ie': '10px', 'widget_title_text_color': '#000000', 'header_border_horizontalsize': '0', 'image_border_color': '#eeeeee', 'tabs_border_width': '1px', 'content_background_color': '#ffffff', 'link_color': '#2288bb', 'header_text_color': '#3399bb', 'date_header_letterspacing': 'inherit', 'tabs_margin_top': '0', 'main_padding_top': '30px', 'header_border_size': '1px', 'tabs_selected_text_color': '#000000', 'content_background_color_selector': '.content-inner', 'body_background_color': '#66bbdd', 'widget_alternate_text_color': '#999999'}, 'override': ''}}, {'name': 'view', 'data': {'classic': {'name': 'classic', 'url': '?view\75classic'}, 'flipcard': {'name': 'flipcard', 'url': '?view\75flipcard'}, 'magazine': {'name': 'magazine', 'url': '?view\75magazine'}, 'mosaic': {'name': 'mosaic', 'url': '?view\75mosaic'}, 'sidebar': {'name': 'sidebar', 'url': '?view\75sidebar'}, 'snapshot': {'name': 'snapshot', 'url': '?view\75snapshot'}, 'timeslide': {'name': 'timeslide', 'url': '?view\75timeslide'}}}]);
+_WidgetManager._RegisterWidget('_NavbarView', new _WidgetInfo('Navbar1', 'navbar', null, document.getElementById('Navbar1'), {}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_HeaderView', new _WidgetInfo('Header1', 'header', null, document.getElementById('Header1'), {}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_BlogView', new _WidgetInfo('Blog1', 'main', null, document.getElementById('Blog1'), {'cmtInteractionsEnabled': false, 'lightboxEnabled': true, 'lightboxModuleUrl': 'https://www.blogger.com/static/v1/jsbin/2355179239-lbx.js', 'lightboxCssUrl': 'https://www.blogger.com/static/v1/v-css/2392111094-lightbox_bundle.css'}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_FollowersView', new _WidgetInfo('Followers1', 'sidebar-right-1', null, document.getElementById('Followers1'), {}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_ProfileView', new _WidgetInfo('Profile1', 'sidebar-right-2-1', null, document.getElementById('Profile1'), {}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_BlogArchiveView', new _WidgetInfo('BlogArchive1', 'sidebar-right-2-2', null, document.getElementById('BlogArchive1'), {'languageDirection': 'ltr'}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_AttributionView', new _WidgetInfo('Attribution1', 'footer-3', null, document.getElementById('Attribution1'), {'attribution': 'Simple template. Powered by \74a href\75\47http://www.blogger.com\47 target\75\47_blank\47\76Blogger\74/a\76.'}, 'displayModeFull'));
+</script>
+</body>
+</html>
diff --git a/tests/data/extractors/test_issue129.json b/tests/data/extractors/test_issue129.json
new file mode 100644
index 00000000..ddf6cbc8
--- /dev/null
+++ b/tests/data/extractors/test_issue129.json
@@ -0,0 +1,6 @@
+{
+    "url": "http://lostinjit.blogspot.fr/2011/10/pypy-and-road-towards-scipy.html", 
+    "expected": {
+        "cleaned_text": "Recent PyPys effort to bring NumPy and the associated fundraiser"
+    }
+}
diff --git a/tests/extractors.py b/tests/extractors.py
index 7d43b705..458a0705 100644
--- a/tests/extractors.py
+++ b/tests/extractors.py
@@ -365,6 +365,11 @@ def test_opengraph(self):
         fields = ['opengraph']
         self.runArticleAssertions(article=article, fields=fields)
 
+    def test_issue129(self):
+        article = self.getArticle()
+        fields = ['cleaned_text']
+        self.runArticleAssertions(article=article, fields=fields)
+
 
 class TestPublishDate(TestExtractionBase):
 

From a36b5a8ae1291fdf6e7e7e3e469ec3768faa7cfa Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 02:41:44 +0100
Subject: [PATCH 040/100] #129 - force articleBody to be the document root if
 found

---
 goose/crawler.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/goose/crawler.py b/goose/crawler.py
index 6afdb5f1..e25c7fee 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -110,6 +110,13 @@ def crawl(self, crawl_candidate):
         # opengraph
         self.article.opengraph = self.extractor.extract_opengraph()
 
+        # check for an articleBody
+        # if we find one force the article.doc to be the articleBody node
+        # this will prevent the cleaner to remove unwanted text content
+        article_body = self.extractor.get_articlebody()
+        if article_body is not None:
+            self.article.doc = article_body
+
         # before we do any calcs on the body itself let's clean up the document
         self.article.doc = self.cleaner.clean()
 

From b04f1e9f82c917f6aca648a65e6c484f7bb5dfb9 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 03:09:47 +0100
Subject: [PATCH 041/100] #137 - opengraph title test case

---
 tests/data/extractors/test_title_opengraph.html | 13 +++++++++++++
 tests/data/extractors/test_title_opengraph.json |  6 ++++++
 tests/extractors.py                             |  5 +++++
 3 files changed, 24 insertions(+)
 create mode 100644 tests/data/extractors/test_title_opengraph.html
 create mode 100644 tests/data/extractors/test_title_opengraph.json

diff --git a/tests/data/extractors/test_title_opengraph.html b/tests/data/extractors/test_title_opengraph.html
new file mode 100644
index 00000000..dbafee7a
--- /dev/null
+++ b/tests/data/extractors/test_title_opengraph.html
@@ -0,0 +1,13 @@
+<html>
+    <head>
+      <meta property="og:title" content="Good article title"/>
+      <title>Wrong article title - website</title>
+    </head>
+    <body>
+        <div>
+            <p>
+              TextNode 1 - The Scala supported IDE is one of the few pain points of developers who want to start using Scala in their Java project. On existing long term project developed by a team its hard to step in and introduce a new language that is not supported by the existing IDE. On way to go about it is to hid the fact that you use Scala from the Java world by using one way dependency injection. Still, if you wish to truly absorb Scala into your existing java environment then you'll soon introduced cross language dependencies.
+            </p>
+        </div>
+    </body>
+</html>
diff --git a/tests/data/extractors/test_title_opengraph.json b/tests/data/extractors/test_title_opengraph.json
new file mode 100644
index 00000000..b4b6cdea
--- /dev/null
+++ b/tests/data/extractors/test_title_opengraph.json
@@ -0,0 +1,6 @@
+{
+    "url": "http://exemple.com/test_opengraphcontent",
+    "expected": {
+        "title": "Good article title"
+    }
+}
diff --git a/tests/extractors.py b/tests/extractors.py
index 458a0705..ff4825ed 100644
--- a/tests/extractors.py
+++ b/tests/extractors.py
@@ -365,6 +365,11 @@ def test_opengraph(self):
         fields = ['opengraph']
         self.runArticleAssertions(article=article, fields=fields)
 
+    def test_title_opengraph(self):
+        article = self.getArticle()
+        fields = ['title']
+        self.runArticleAssertions(article=article, fields=fields)
+
     def test_issue129(self):
         article = self.getArticle()
         fields = ['cleaned_text']

From 655aca6424c549238bc4ab80ee2ea10c212f5f03 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 04:36:44 +0100
Subject: [PATCH 042/100] #137 - test separator

---
 tests/data/extractors/test_title_opengraph.html | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/data/extractors/test_title_opengraph.html b/tests/data/extractors/test_title_opengraph.html
index dbafee7a..6e6c0c64 100644
--- a/tests/data/extractors/test_title_opengraph.html
+++ b/tests/data/extractors/test_title_opengraph.html
@@ -1,6 +1,7 @@
 <html>
     <head>
-      <meta property="og:title" content="Good article title"/>
+      <meta property="og:site_name" content="TechCrunch"/>
+      <meta property="og:title" content="Good article title | TechCrunch"/>
       <title>Wrong article title - website</title>
     </head>
     <body>

From 3ff269e8ef32f8795e0de7d7954cb9902b5be7bf Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 04:37:16 +0100
Subject: [PATCH 043/100] #137 - use og:title in test case

---
 tests/data/extractors/test_time.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/data/extractors/test_time.json b/tests/data/extractors/test_time.json
index 31341c9c..05cb400c 100644
--- a/tests/data/extractors/test_time.json
+++ b/tests/data/extractors/test_time.json
@@ -6,8 +6,8 @@
         "final_url": "http://www.time.com/time/health/article/0,8599,2011497,00.html", 
         "meta_keywords": "bp, oil, spill, gulf, mexico, invisible, dispersed, deepwater horizon, Charles Hopkinson", 
         "cleaned_text": "This month, the federal government released", 
-        "title": "Invisible Oil from BP Spill May Threaten Gulf Aquatic Life", 
+        "title": "Oil from Spill Could Still Pose Major Threat", 
         "meta_favicon": "http://img.timeinc.net/time/favicon.ico", 
         "meta_lang": null
     }
-}
\ No newline at end of file
+}

From d31112b34374973f49846ee74f578453062c37f2 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 04:37:43 +0100
Subject: [PATCH 044/100] #137 - corrected title

---
 tests/data/extractors/test_allnewlyrics1.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/data/extractors/test_allnewlyrics1.json b/tests/data/extractors/test_allnewlyrics1.json
index 4f8e8cc1..53cd1cf8 100644
--- a/tests/data/extractors/test_allnewlyrics1.json
+++ b/tests/data/extractors/test_allnewlyrics1.json
@@ -10,8 +10,8 @@
             "PJ Morton", 
             "Stevie Wonder"
         ], 
-        "title": "PJ Morton (Ft. Stevie Wonder)", 
+        "title": "\u201cOnly One\u201d Lyrics : PJ Morton (Ft. Stevie Wonder)", 
         "meta_favicon": "", 
         "meta_lang": "en"
     }
-}
\ No newline at end of file
+}

From 0e370dc900bd11b890657dd082f1255ef6d96cbe Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 04:38:05 +0100
Subject: [PATCH 045/100] #137 - corrected title

---
 tests/data/extractors/test_cnn1.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/data/extractors/test_cnn1.json b/tests/data/extractors/test_cnn1.json
index b847add0..ced9eb91 100644
--- a/tests/data/extractors/test_cnn1.json
+++ b/tests/data/extractors/test_cnn1.json
@@ -6,8 +6,8 @@
         "final_url": "http://www.cnn.com/2010/POLITICS/08/13/democrats.social.security/index.html", 
         "meta_keywords": "", 
         "cleaned_text": "Washington (CNN) -- Democrats pledged ", 
-        "title": "Democrats to use Social Security against GOP this fall", 
+        "title": "Democrats to use Social Security against GOP this fall - CNN.com", 
         "meta_favicon": "http://i.cdn.turner.com/cnn/.element/img/3.0/global/misc/apple-touch-icon.png", 
         "meta_lang": "en"
     }
-}
\ No newline at end of file
+}

From 66b63fcf0e1f44629c9164b92d74e72e894a56d6 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 04:39:16 +0100
Subject: [PATCH 046/100] #137 - fetch opengraph before title

---
 goose/crawler.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/goose/crawler.py b/goose/crawler.py
index e25c7fee..43aaf4ea 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -95,9 +95,9 @@ def crawl(self, crawl_candidate):
         self.article.raw_html = raw_html
         self.article.doc = doc
         self.article.raw_doc = deepcopy(doc)
+        self.article.opengraph = self.extractor.extract_opengraph()
         self.article.publish_date = self.extractor.get_publish_date()
         # self.article.additional_data = config.get_additionaldata_extractor.extract(doc)
-        self.article.title = self.extractor.get_title()
         self.article.meta_lang = self.extractor.get_meta_lang()
         self.article.meta_favicon = self.extractor.get_favicon()
         self.article.meta_description = self.extractor.get_meta_description()
@@ -106,9 +106,7 @@ def crawl(self, crawl_candidate):
         self.article.domain = self.extractor.get_domain()
         self.article.tags = self.extractor.extract_tags()
         self.article.authors = self.extractor.extract_authors()
-
-        # opengraph
-        self.article.opengraph = self.extractor.extract_opengraph()
+        self.article.title = self.extractor.get_title()
 
         # check for an articleBody
         # if we find one force the article.doc to be the articleBody node

From bd96c943393f271fb04b82ecc6d0b7fff1b59c19 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 04:39:57 +0100
Subject: [PATCH 047/100] #137 - refactor title extraction based on opengraph,
 meta headling and title element

---
 goose/extractors.py | 86 +++++++++++++++++++++++++++++----------------
 1 file changed, 56 insertions(+), 30 deletions(-)

diff --git a/goose/extractors.py b/goose/extractors.py
index 6d8a075d..07f5321f 100644
--- a/goose/extractors.py
+++ b/goose/extractors.py
@@ -30,6 +30,7 @@
 MOTLEY_REPLACEMENT = StringReplacement("&#65533;", "")
 ESCAPED_FRAGMENT_REPLACEMENT = StringReplacement(u"#!", u"?_escaped_fragment_=")
 TITLE_REPLACEMENTS = ReplaceSequence().create(u"&raquo;").append(u"»")
+TITLE_SPLITTERS = [u"|", u"-", u"»", u":"]
 PIPE_SPLITTER = StringSplitter("\\|")
 DASH_SPLITTER = StringSplitter(" - ")
 ARROWS_SPLITTER = StringSplitter("»")
@@ -65,44 +66,69 @@ def __init__(self, config, article):
         # stopwords class
         self.stopwords_class = config.stopwords_class
 
+    def clean_title(self, title):
+        """Clean title with the use of og:site_name
+        in this case try to get ride of site name
+        and use TITLE_SPLITTERS to reformat title
+        """
+        # check if we have the site name in opengraph data
+        if "site_name" in self.article.opengraph.keys():
+            site_name = self.article.opengraph['site_name']
+            # remove the site name from title
+            title = title.replace(site_name, '').strip()
+
+        # try to remove the domain from url
+        if self.article.domain:
+            pattern = re.compile(self.article.domain, re.IGNORECASE)
+            title = pattern.sub("", title).strip()
+
+        # split the title in words
+        # TechCrunch | my wonderfull article
+        # my wonderfull article | TechCrunch
+        title_words = title.split()
+
+        # check if first letter is in TITLE_SPLITTERS
+        # if so remove it
+        if title_words[0] in TITLE_SPLITTERS:
+            title_words.pop(0)
+
+        # check if last letter is in TITLE_SPLITTERS
+        # if so remove it
+        if title_words[-1] in TITLE_SPLITTERS:
+            title_words.pop(-1)
+
+        # rebuild the title
+        title = u" ".join(title_words).strip()
+
+        return title
+
     def get_title(self):
         """\
         Fetch the article title and analyze it
         """
-
         title = ''
-        doc = self.article.doc
 
-        title_element = self.parser.getElementsByTag(doc, tag='title')
-        # no title found
-        if title_element is None or len(title_element) == 0:
-            return title
+        # rely on opengraph in case we have the data
+        if "title" in self.article.opengraph.keys():
+            title = self.article.opengraph['title']
+            return self.clean_title(title)
 
-        # title elem found
-        title_text = self.parser.getText(title_element[0])
-        used_delimeter = False
-
-        # split title with |
-        if '|' in title_text:
-            title_text = self.split_title(title_text, PIPE_SPLITTER)
-            used_delimeter = True
-
-        # split title with -
-        if not used_delimeter and '-' in title_text:
-            title_text = self.split_title(title_text, DASH_SPLITTER)
-            used_delimeter = True
-
-        # split title with »
-        if not used_delimeter and u'»' in title_text:
-            title_text = self.split_title(title_text, ARROWS_SPLITTER)
-            used_delimeter = True
-
-        # split title with :
-        if not used_delimeter and ':' in title_text:
-            title_text = self.split_title(title_text, COLON_SPLITTER)
-            used_delimeter = True
+        # try to fetch the meta headline
+        meta_headline = self.parser.getElementsByTag(
+                            self.article.doc,
+                            tag="meta",
+                            attr="name",
+                            value="headline")
+        if meta_headline is not None and len(meta_headline) > 0:
+            title = self.parser.getAttribute(meta_headline[0], 'content')
+            return self.clean_title(title)
+
+        # otherwise use the title meta
+        title_element = self.parser.getElementsByTag(self.article.doc, tag='title')
+        if title_element is not None and len(title_element) > 0:
+            title = self.parser.getText(title_element[0])
+            return self.clean_title(title)
 
-        title = MOTLEY_REPLACEMENT.replaceAll(title_text)
         return title
 
     def split_title(self, title, splitter):

From 148ce9bd6c84fe761f05d33f55a54da24b9a72c7 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 04:40:36 +0100
Subject: [PATCH 048/100] #137 - more explicit error message

---
 tests/extractors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/extractors.py b/tests/extractors.py
index ff4825ed..dccae5b2 100644
--- a/tests/extractors.py
+++ b/tests/extractors.py
@@ -125,7 +125,7 @@ def runArticleAssertions(self, article, fields):
                 continue
 
             # default assertion
-            msg = u"Error %s" % field
+            msg = u"Error %s \nexpected: %s\nresult: %s" % (field, expected_value, result_value)
             self.assertEqual(expected_value, result_value, msg=msg)
 
     def extract(self, instance):

From e404f1bd0bf4a0571e53378aa8ab87c287a914d6 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 07:02:06 +0100
Subject: [PATCH 049/100] #115 - remove businessinsider tests case due to no
 valid html

---
 .../extractors/test_businessinsider1.html     | 2211 ----------------
 .../extractors/test_businessinsider1.json     |   12 -
 .../extractors/test_businessinsider2.html     | 2278 -----------------
 .../extractors/test_businessinsider2.json     |   12 -
 tests/extractors.py                           |   10 -
 5 files changed, 4523 deletions(-)
 delete mode 100644 tests/data/extractors/test_businessinsider1.html
 delete mode 100644 tests/data/extractors/test_businessinsider1.json
 delete mode 100644 tests/data/extractors/test_businessinsider2.html
 delete mode 100644 tests/data/extractors/test_businessinsider2.json

diff --git a/tests/data/extractors/test_businessinsider1.html b/tests/data/extractors/test_businessinsider1.html
deleted file mode 100644
index 18603a35..00000000
--- a/tests/data/extractors/test_businessinsider1.html
+++ /dev/null
@@ -1,2211 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" xmlns:fb="http://www.facebook.com/2008/fbml"  class="hidden-sidebar">
-<head>
-    <meta http-equiv="content-type" content="text/html;charset=utf-8" />
-    <title>MEANWHILE: Developments In Greece...</title>
-    <link type="text/css" rel="stylesheet" media="all" href="http://static5.businessinsider.com/assets/css/min-all.css?1316814144" />
-<link type="text/css" rel="stylesheet" media="print" href="http://static5.businessinsider.com/assets/css/min-print.css?1316814144" />
-<script type="text/javascript" src="http://static8.businessinsider.com/assets/js/min.js?1316814144"></script>
-<script type="text/javascript" src="http://scripts.verticalacuity.com/vat/mon/vt.js?1316814144"></script>
-<meta name="author" content="Linette Lopez" />
-<meta name="date" content="2011-09-21" />
-
-<meta name="keywords" content="Economy, Greece, Austerity, Economic Crisis, Eurozone, Euro, Europe, Linette Lopez" />
-<meta name="description" content="More moves to survive." />
-<meta name="tbi-image" content="http://static5.businessinsider.com/image/4e77323e69beddba4c00001c/meanwhile-developments-in-greece.jpg" />
-<meta name="tbi-searchable" content="1" />
-<meta name="tbi-contributed" content="0" />
-<meta name="sailthru.thumb" content="http://static8.businessinsider.com/image/4e77323e69beddba4c00001c-50-50/meanwhile-developments-in-greece.jpg" />
-<meta name="sailthru.title" content="MEANWHILE: Developments In Greece..." />
-<meta property="og:title" content="MEANWHILE: Developments In Greece..." />
-<meta property="og:type" content="article" />
-<meta property="og:url" content="http://www.businessinsider.com/meanwhile-developments-in-greece-2011-9" />
-<meta property="og:site_name" content="Business Insider" />
-<meta property="fb:app_id" content="155043519637" />
-<meta property="og:image" content="http://static5.businessinsider.com/image/4e77323e69beddba4c00001c/meanwhile-developments-in-greece.jpg" />
-<script type="text/javascript">post = {"id":"4e7a385b69beddb048000021","uri":"http:\/\/www.businessinsider.com\/meanwhile-developments-in-greece-2011-9","thumb":"http:\/\/static7.businessinsider.com\/image\/4e77323e69beddba4c00001c-90-90\/meanwhile-developments-in-greece.jpg","author":"Linette Lopez","author_ids":"Linette Lopez:15"};</script>
-    <meta name="tbi-vertical" content="moneygame" />
-
-	<meta property="fb:page_id" content="20446254070" />
-
-    <!--[if lt IE 8]>
-        <link type="text/css" rel="stylesheet" media="all" href="http://static5.businessinsider.com/assets/css/ie7_or_lower.css" />
-    <![endif]-->
-    <!--[if IE 8]>
-        <link type="text/css" rel="stylesheet" media="all" href="http://static7.businessinsider.com/assets/css/ie8.css" />
-    <![endif]-->
-    <link rel="alternate" type="application/rss+xml" title="RSS" href="http://feeds.feedburner.com/TheMoneyGame" />
-    <link rel="shortcut icon" href="http://static7.businessinsider.com/assets/images/faviconBI.ico" />
-	<link rel="icon" type="image/ico" href="http://static7.businessinsider.com/assets/images/faviconBI.ico" />
-	<link rel="apple-touch-icon" href="http://static8.businessinsider.com/assets/images/apple-touch-icon.png" />
-
-    <script type="text/javascript">
-        var vertical = 'moneygame';
-        var federated_media_section = 'economy';
-        var post_id = '';
-        var _sf_startpt=(new Date()).getTime();
-    </script>
-
-    	<!-- outclip js code -->
-	<script type='text/javascript'>
-	var _oc = { setKeyValueList: function(o) {}, setKeyValue: function(x,y) {}, add: function(t, w, h, k) { }}; (function(){var L ="http://g.3gl.net/jp/t0/18-s.C"; var V='t0'; var w=window;var d=document;var g=!!w.chrome;var O=!!w.opera;var i=!!(d.compatMode && d.all && !O);var f=!!w.Iterator;var S=!!d.getElementsByClassName && !(f||O||g) && WebSocket!==undefined;var h="createElement";var z="appendChild";if(!d.getElementById||!(S||O||i||f||g)){return;}f=false;var a={i:[],a:function(n,v){this.i[n]=v;},k:{l:{},p:{}}};var k=a.k;_oc.setKeyValueList=function(o){k.l=o;};_oc.setKeyValue=function(x,y){k.p[x]=y;};_oc.add=function(tn,wi,hi,kv){var s=d.getElementsByTagName("script");if(!(s.length)){return;}var x=s.length-1;a.a(x+" "+tn,{n:tn,b:s[x],s:f,t:{w:wi,h:hi,k:kv}});};_oc.p=function(fn){fn(V,a);};function ev(t,el,fn){if(w.attachEvent){el.attachEvent('on'+t,fn);}else {el.addEventListener(t,fn,f);}}ev('load',w,function(){var r=d[h]('iframe');var s=r.style;s.position='absolute';s.top='-10000px';s.left='-1000px';d.body[z](r);var i=r.contentWindow.document.open("text/html","replace");i.write('<body onload=\'var d=document, s = d.createElement("script"); s.type="text/javascript"; s.src="'+L+'"; d.body.appendChild(s); \'><\/body>');i.close();});})();
-	</script>
-	<!-- end outclip code -->
-
-	<!-- Quantcast Tag, part 1 -->
-	<script type="text/javascript">
-	var _qevents = _qevents || [];
-	(function() {
-	var elem = document.createElement('script');
-	elem.src = (document.location.protocol == "https:" ? "https://secure" : "http://edge") + ".quantserve.com/quant.js";
-	elem.async = true;
-	elem.type = "text/javascript";
-	var scpt = document.getElementsByTagName('script')[0];
-	scpt.parentNode.insertBefore(elem, scpt);
-	})();
-	</script>
-	<!-- / Quantcast Tag, part 1 -->
-
-	<!-- Google Analytics -->
-
-	<script type="text/javascript">
-		var _gaq = _gaq || [];
-		_gaq.push(['_setAccount', 'UA-2241657-6']);
-		_gaq.push(['_setCustomVar', 1, "Vertical", "moneygame", 3]);
-		_gaq.push(['_setCustomVar', 2, "PageType", "post", 3]);
-		_gaq.push(['_trackOutbound']);
-		_gaq.push(['_trackPageview']);
-	</script>
-	<!-- / Google Analytics -->
-
-	<!-- Twitter Anywhere -->
-	<script src="https://platform.twitter.com/anywhere.js?id=ZV0JHq7YJkjozsfohDIleQ&v=1" type="text/javascript"></script>
-	<!-- / Twitter Anywhere -->
-
-    <!--[if gte IE 9]>
-		<meta name="application-name" content="Business Insider - moneygame"/>
-		<meta name="msapplication-tooltip" content="Start the page in Site Mode"/>
-		<meta name="msapplication-starturl" content="http://www.businessinsider.com/moneygame"/>
-		<meta name="msapplication-window" content="width=800;height=600"/>
-		<meta name="msapplication-navbutton-color" content="#4C7C8D"/>
-
-		<meta name="msapplication-task" content="name=Tech;action-uri=http://www.businessinsider.com/sai;icon-uri=http://www.businessinsider.com/favicon.ico"/>
-		<meta name="msapplication-task" content="name=Media;action-uri=http://www.businessinsider.com/thewire;icon-uri=http://www.businessinsider.com/favicon.ico"/>
-		<meta name="msapplication-task" content="name=Wall Street;action-uri=http://www.businessinsider.com/clusterstock;icon-uri=http://www.businessinsider.com/favicon.ico"/>
-		<meta name="msapplication-task" content="name=Markets;action-uri=http://www.businessinsider.com/moneygame;icon-uri=http://www.businessinsider.com/favicon.ico"/>
-		<meta name="msapplication-task" content="name=Strategy;action-uri=http://www.businessinsider.com/warroom;icon-uri=http://www.businessinsider.com/favicon.ico"/>
-
-		<script type="text/javascript">
-		PinnedSite.init({hid: "", vertical: "moneygame"});
-		</script>
-	<![endif]-->
-
-	<!-- OpenX Ads -->
-	<script type="text/javascript" src="http://ox-d.businessinsider.com/w/1.0/jstag"></script>
-    <script type="text/javascript" src="http://static6.businessinsider.com/assets/js/openx.js"></script>
-	<script type="text/javascript">
-		OXH.init(OX());
-		OXH.addAdUnitGroupMapping('2224', ['8396', '8437', '8438', '8439']);
-		OXH.addAdUnitGroupMapping('2222', ['8396', '8437', '8440']);
-		OXH.addAdUnitGroupMapping('2221', ['8396', '8437']);
-
-					OXH.addVariable('pagetype', 'post');
-					OXH.addVariable('vertical', 'moneygame');
-					OXH.addVariable('author', 'Linette Lopez');
-					OXH.addVariable('tag', 'economy');
-			</script>
-<!-- end OpenX Ads -->
-</head>
-
-<body>
-
-<a name="top" class="top-anchor"></a>
-
-<div id="doc3" class="yui-t6 vertical-moneygame">
-    <div id="hd">
-        <div id="hd-top">
-            <div id="hd-top-right" class="float-right">
-
-				<!-- user menubar -->
-				    <div class="user">
-                    <a href="#" onclick="return fb_login()">Login With Facebook</a>
-            <span class="pipe">|</span>
-
-
-                    <a id="tw_login_link" href="/account/twitter" rel="nofollow">Login With Twitter</a>
-            <span class="pipe">|</span>
-
-                    <a class="border-right" href="https://www.businessinsider.com/login" id="login_link" rel="nofollow">Login</a>
-            <span class="pipe">|</span>
-            <a href="https://www.businessinsider.com/register" rel="nofollow">Register</a>
-            </div>
-
-	<!-- add var to login/logout links so we can return here -->
-	<script type="text/javascript">
-		var href = document.URL.replace(/^https?:\/\/[^/.]+.?businessinsider\.com/, "");
-		href = href.replace(/&?msg=[^&]*/, "");
-		href = href.replace(/\?$/, "");
-		if (!href.match(/^\/login/)) {
-			$('a#login_link').attr('href', "https://www.businessinsider.com/login?redirect="+href);
-			$('a#tw_login_link').attr('href', "/account/twitter?redirect="+href);
-			$('a#logout_link').attr('href', "/logout?redirect="+href);
-		}
-	</script>
-
-                <div class="ad ad-leaderboard">
-					<div>
-	                    <!-- OpenX Ad placeholder -->
-<div id="leaderboard">
-	<script type="text/javascript">
-		OXH.addAdUnitPlaceholder('8396', 'leaderboard');
-	</script>
-	<noscript>
-		<iframe id="" name="" src="http://ox-d.businessinsider.com/w/1.0/afr?auid=8396&target=_blank&cb=2058709269" frameborder="0" scrolling="no" width="728" height="90">
-			<a href="http://ox-d.businessinsider.com/w/1.0/rc?cs=bba32ae1&cb=2058709269" target="_blank">
-				<img src="http://ox-d.businessinsider.com/w/1.0/ai?auid=8396&cs=bba32ae1&cb=2058709269" border="0" alt="" />
-			</a>
-		</iframe>
-	</noscript>
-
-</div>
-<!-- end OpenX Ad placeholder -->					</div>
-                </div>
-            </div>
-
-            <div class="date">
-                            </div>
-
-							<div class="logo-main">
-					<a href="http://www.businessinsider.com/moneygame"><img src="http://static8.businessinsider.com/assets/images/logos/logo-bi-vertical.png" width="166" height="67" alt="Business Insider" /></a>
-
-							<img class="print" src="http://static7.businessinsider.com/assets/images/logos/tbi_print.jpg" width="250" height="113" alt="Business Insider" />
-
-                    <div class="subtitle"><a href="http://www.businessinsider.com/moneygame" alt="moneygame">Money Game</a></div>
-	                        </div>
-        </div>
-
-        <div class="menu clear-both">
-            <ul class="menu">
-                <li class="first"><a href="/">Home</a></li>
-
-                                                            <li><a href="http://www.businessinsider.com/sai">Tech</a></li>
-						                                                                                <li><a href="http://www.businessinsider.com/thewire">Entertainment</a></li>
-						                                                                                <li><a href="http://www.businessinsider.com/clusterstock">Wall Street</a></li>
-						                                                                                <li class="selected"><a href="http://www.businessinsider.com/moneygame">Markets</a></li>
-						                                                                                                                    <li><a href="http://www.businessinsider.com/warroom">Strategy</a></li>
-						                                                                                                                    <li><a href="http://www.businessinsider.com/sportspage">Sports</a></li>
-
-						                                                                                                                                                        <li><a href="http://www.businessinsider.com/thelife">Lifestyle</a></li>
-						                                                                                <li><a href="http://www.businessinsider.com/politics">Politics</a></li>
-						                                                                                <li><a href="http://www.businessinsider.com/europe">Europe</a></li>
-													<li ><a href="/data_center">Data</a></li>
-						                                                                                                                    <li><a href="http://www.businessinsider.com/misc">Misc.</a></li>
-						                                                                                                                    <li><a href="http://www.businessinsider.com/yourmoney">Your Money</a></li>
-
-						                                                                         				<li ><a href="/category/video">Video</a></li>
- 				<li ><a href="/latest">Latest</a></li>
- 				<li  class="last"><a href="/yournews">Your News</a></li>
-			</ul>
-        </div>
-
-                    <div class="menu-subvert container">
-
-                <ul class="menu-subvert">
-
-					<li class="first"><a href="/moneygame">Money Game Home</a></li>
-                                                                <li>
-                            <a href="/moneygame/economy">Economy</a>                        </li>
-                                            <li>
-                            <a href="/moneygame/markets">Markets</a>                        </li>
-                                            <li>
-
-                            <a href="/moneygame/investing">Investing</a>                        </li>
-                                            <li>
-                            <a href="/moneygame/etfs">ETFs</a>                        </li>
-                                        																																								              		<li><a href="/moneygame/thehive">Hive</a></li>
-										<li><a href="/moneygame/thetape">Tape</a></li>
-					<li><a href="/moneygame/pr">PR</a></li>
-
-									<!--	<li><a href="/moneygame/questions">Questions</a></li> -->
-																												<li class="last"><a href="/moneygame/contributor">Contributors</a></li>
-															</ul>
-            </div>
-
-
-        <div id="hd-below">
-			<div class="yui-gc">
-				<div class="yui-u first">
-									</div>
-
-				<div class="yui-u">
-				    <div class="search">
-    					<form action="/s" method="get">
-    	                    <label class="search moveable"><input name="q" type="text" value="" /></label>
-    	                    <input class="button-form" value="search" type="submit" />    	                </form>
-    	                <script type="text/javascript">
-                            $('label.search input').focus(function() {
-                                $('#hd-below div.search').addClass('search-expanded');
-                                $('#hd-below ul.icons').fadeOut('fast');
-                                $(this).animate({
-                                    width: 262
-                                }, 400);
-                            });
-                            $('label.moveable input').blur(function() {
-                            if (!$(this).val()) {
-                               $('#hd-below ul.icons').fadeIn('fast');
-                               $(this).animate({
-                                   width: 100
-                               }, 400, function(){
-                                   $('#hd-below div.search').removeClass('search-expanded');
-                               });
-                            }
-                            });
-    	                </script>
-	                </div>
-										<ul class="icons">
-						<li class="icon-feed"><a href="http://feeds.feedburner.com/TheMoneyGame" title="Follow our RSS feed"></a></li>
-
-	                    <li class="icon-twitter"><a href="http://twitter.com/themoneygame" title="Spread the word on Twitter"></a></li>
-						<li class="icon-facebook">
-							<fb:like href="http://www.facebook.com/businessinsider.moneygame" layout="button_count" show_faces="false" width="80"></fb:like>
-							<div class="facebook-hover">
-								Follow us on Facebook and get updates from Money Game posted directly to your news feed
-								<div class="arrow">&nbsp;</div>
-							</div>
-						</li>
-						<script type="text/javascript">
-							$('.icon-facebook').hover(function(){
-								$('.facebook-hover').show();
-							});
-							$('.icon-facebook').mouseout(function(){
-								$('.facebook-hover').hide();
-							});
-						</script>
-
-	                </ul>
-									</div>
-			</div>
-        </div>
-    </div>
-
-</div>
-
-<div class="ad-wallpaper">
-	<div id="doc4" class="yui-t6">
-
-	    <div id="bd">
-
-				            <div class="container ad-wide-expanding">
-	                <!-- OpenX Ad placeholder -->
-<div id="subnav">
-	<script type="text/javascript">
-		OXH.addAdUnitPlaceholder('8440', 'subnav');
-	</script>
-	<noscript>
-		<iframe id="" name="" src="http://ox-d.businessinsider.com/w/1.0/afr?auid=8440&target=_blank&cb=1459785617" frameborder="0" scrolling="no"  >
-			<a href="http://ox-d.businessinsider.com/w/1.0/rc?cs=7300b282&cb=1459785617" target="_blank">
-				<img src="http://ox-d.businessinsider.com/w/1.0/ai?auid=8440&cs=7300b282&cb=1459785617" border="0" alt="" />
-			</a>
-		</iframe>
-	</noscript>
-</div>
-<!-- end OpenX Ad placeholder -->	            </div>
-
-
-
-<a name="post-top"></a>
-
-	<div id="alerts-dialog" class="display-none">
-    <p>Enter you email address and zip code to set up customized email alerts.</p>
-    <form action="/newsletter?source=sidebar" method="post">
-        <fieldset class="login">
-            <div class="text">
-                <label class="newsletter-signup" for="news-email">Email</label>
-                <input class="newsletter-text" name="email" type="text" value="" />
-            </div>
-
-            <div class="text">
-                <label class="newsletter-signup" for="news-zip">Zip</label>
-                <input id="news-zip" name="zip" type="text" value="" />
-            </div
-
-            <input type="hidden" name="newsletters[]" value="alerts"  />
-            <input type="hidden" name="optin" value="on" />
-            <input type="hidden" name="from_post" value="4e7a385b69beddb048000021" />
-
-            <div class="login">
-                <input type="submit" class="button" value="Sign-Up" />
-
-            </div>
-        </fieldset>
-    </form>
-</div>
-<script type="text/javascript">
-    $(document).ready(function() {
-        $('#alerts-dialog').dialog({
-            'modal': true,
-            'width': "auto",
-            'title': 'Subscribe to Instant Alerts',
-            'resizable': false,
-            'autoOpen': false,
-            'draggable': false
-        });
-    });
-</script>
-<div id="yui-main">
-	<div class="yui-b">
-
-<div  class="content post">
-	<div class="sl-layout-post">
-
-
-<h1>MEANWHILE: Developments In Greece...</h1>
-
-<div id="content" class="content">
-    <div class="post-top">
-        <!-- Byline -->
-
-<div class="byline">
-
-					<a href="/author/linette-lopez">Linette Lopez</a>
-	    	        	            <span class="pipe">|</span>
-	        	        <span class="date">Sep. 21, 2011,  3:17 PM</span>
-
-
-                        <span class="pipe">|</span>
-
-
-            <nobr><span class="red views" title="views">1,209</span></nobr>
-
-                        <span class="pipe">|</span>
-
-
-
-        <nobr title="Read comments">
-	        <a href="http://www.businessinsider.com/meanwhile-developments-in-greece-2011-9#comments" class="comments-icon"></a>
-			<a class="comment_count" href="http://www.businessinsider.com/meanwhile-developments-in-greece-2011-9#comments">5</a>        </nobr>
-    </div>
-        <!-- Sharing -->
-
-        <div class="share">
-
-<ul class="share clear-both">
-	<li class="font-sizes">
-	    <div class="font-sizes">
-	        <span class="small"><a href="#" onclick="changeFontSize('small','BI_fontSize');return false">A</a></span>
-	        <span class="med"><a href="#" onclick="changeFontSize('med','BI_fontSize');return false">A</a></span>
-	        <span class="large"><a href="#" onclick="changeFontSize('large','BI_fontSize');return false">A</a></span>
-
-	    </div>
-	</li>
-	<li class="email">
-	    <a class="buttons-image button-email" href="" onclick="gaPageTrack('Share','Click','Email');$('#share-email').toggle();$('.post embed').toggleClass('visibility-hidden'); return false;">&nbsp;</a>
-	    <div id="share-email" class="layer-box" style="display:none">
-			<div class="inner">
-				<a class="layer-close" href="#" onclick="return email_form_hide();">x</a>
-				<div class="share-email-error errors" style="display:none"></div>
-				<div class="share-email-body">
-
-					<h2 class="required">Email Article</h2>
-					<div class="register">
-						<form id="form_form" action="" method="post">
-						<div class="row required">
-							<label for="mail_from">From</label>
-							<input id="mail-from" size="27" name="mail_from" type="text" value="Your Email Address" />
-						</div>
-						<div class="row required">
-							<label for="mail_to">To</label>
-
-							<input id="mail-to" size="27" name="mail_to" type="text" value="Friend's Email Address" />
-						</div>
-						<div class="submit">
-							<input id="email-post" class="button-form float-right" value="Send" type="submit" />						</div>
-						<input name="action" value="email" type="hidden" />					</form>
-				</div>
-			</div>
-			<div class="share-email-success" style="display:none">
-				<h2 class="required">Email Sent!</h2>
-
-					<div class="register">
-						You have successfully emailed the post.
-					</div>
-				</div>
-			</div>
-		</div>
-	</li>
-	<li class="gplusone">
-		<g:plusone size="medium"></g:plusone>
-
-	</li>
-		    <li class="tweet">
-			<iframe allowtransparency="true" frameborder="0" scrolling="no" src="http://platform.twitter.com/widgets/tweet_button.html?url=http%3A%2F%2Fwww.businessinsider.com%2Fmeanwhile-developments-in-greece-2011-9%3Futm_source%3Dtwbutton%26utm_medium%3Dsocial%26utm_campaign%3Dmoneygame&counturl=http%3A%2F%2Fwww.businessinsider.com%2Fmeanwhile-developments-in-greece-2011-9&via=themoneygame&count=horizontal&text=MEANWHILE%3A%20Developments%20In%20Greece..." style="width:110px; height:20px;"></iframe>	    </li>
-		<li class="linkedin">
-		<script type="text/javascript" src="http://platform.linkedin.com/in.js"></script>
-		<script type="in/share" data-counter="right" data-url="http://www.businessinsider.com/meanwhile-developments-in-greece-2011-9"></script>
-	</li>
-	<li class="fb">
-
-		<fb:like href="http://www.businessinsider.com/meanwhile-developments-in-greece-2011-9" layout="button_count" show_faces="false" action="recommend" width="80"></fb:like>
-	</li>
-</ul>
-
-<script type="text/javascript">
-    shadowType('#mail-from');
-    shadowType('#mail-to');
-
-    var ef_value = $("#email-from").val();
-    var et_value = $("#email-to").val();
-
-    function email_form_hide() {
-        $('#share-email').toggle();
-        $('.post embed').toggleClass('visibility-hidden');
-        $('#mail-from').val(ef_value);
-        $('#mail-to').val(et_value);
-        $('div.share-email-success').hide();
-        $('div.share-email-body').show();
-        return false;
-    }
-
-    $('#email-post').click(function() {
-        var form = $('form#form_form');
-        var form_data = form.serialize();
-
-    	$.ajax({
-    	    type: 'POST',
-    		url: '' + document.location,
-    		data: form_data,
-    		dataType: 'json',
-    		success: function(result) {
-    		    if (result.ok) {
-    		        $('div.share-email-error').hide();
-                    $('div.share-email-body').hide();
-                    $('div.share-email-success').show();
-    		    }
-    		    else {
-    		        $('div.share-email-error').html(result.error_msg);
-                    $('div.share-email-error').show();
-    		    }
-    		}
-    	});
-
-    	return false;
-    });
-</script>        </div>
-    </div>
-
-    <div class="small clear-both">
-		<div class="KonaBody post-content">
-
-            <p><div class="image-container float_right" style="width:400px"><div class="image"><img src="http://static6.businessinsider.com/image/4e77323e69beddba4c00001c-400-300/greece-flag-water.jpg" border="0" alt="greece flag water" width="400" height="300" /></div><p class="source">Image: <a href="http://www.flickr.com/photos/aster-oid/3383912837/">John D. Carnessiotis on Flickr</a></p></div>As everyone in the world was <a href="http://www.businessinsider.com/federal-reserve-announcement-fomc-operation-twist-2011-9">transfixed</a> on the Fed, Greece continues to do what it takes to get its next bailout tranche and stay in the Eurozone.</p>
-
-<p>The Greek cabinet agreed on yet another round of austerity measures today. The details come to us from <a href="http://www.reuters.com/article/2011/09/21/idUSA8E7JO01A20110921">Reuters</a>:</p>
-<ul>
-<li>Pensions of more than <span id="articleText">$1,642.913 a month</span> wil be cut by 20%</li>
-<li>Payments to state workers who retired before age 55 will be reduced</li>
-<li>30,000 civil servants will go into "labor reserve"-- that means their pay will be reduced to 60% of their salaries while they have 12 months to find a new job in the state sector or be laid off.</li>
-</ul>
-<p>The government also promised to extend a new real estate, intended to end next year, til 2014.</p>
-
-<p>An official sign-off from the Troika is still not official AND, according to journalist Matina Stevis, <a href="http://twitter.com/MatinaStevis/statuses/116565454474051584">there still needs to be a parliament vote</a>.</p>
-					</div>
-
-            </div>
-</div>
-		<p class="border-bottom-dotted">
-			Please follow <a href="http://www.businessinsider.com/moneygame">Money Game</a> on <a href="http://twitter.com/#!/themoneygame">Twitter</a> and <a href="http://facebook.com/businessinsider.moneygame">Facebook</a>.
-								              <br>Follow Linette Lopez on <a href="http://twitter.com/lopezlinette">Twitter</a>.
-	            								  <br><a href="/questions/ask?ask_author=4de5075f4bd7c8ac38080000">Ask Linette A Question ></a>
-
-									</p>
-
-		    <div class="container">
-        <div class="tags">
-            Tags:
-                            <a href="/category/economy">Economy</a>,                            <a href="/category/greece">Greece</a>,                            <a href="/category/austerity">Austerity</a>,                            <a href="/category/economic-crisis">Economic Crisis</a>,                            <a href="/category/eurozone">Eurozone</a>,                            <a href="/category/euro">Euro</a>,                            <a href="/category/europe">Europe</a>                        |
-                            <a class="get-alerts" href="#" onclick="$('#alerts-dialog').dialog('open'); return false;">Get Alerts for these topics &#187;</a>
-
-                    </div>
-    </div>
-
-
-							<h4 class="sponsored-text">Advertisement:</h4>
-				<div class="align-center">
-					<!-- OpenX Ad placeholder -->
-<div id="river">
-	<script type="text/javascript">
-		OXH.addAdUnitPlaceholder('8439', 'river');
-	</script>
-	<noscript>
-		<iframe id="" name="" src="http://ox-d.businessinsider.com/w/1.0/afr?auid=8439&target=_blank&cb=560026675" frameborder="0" scrolling="no" width="600" height="250">
-			<a href="http://ox-d.businessinsider.com/w/1.0/rc?cs=466b0369&cb=560026675" target="_blank">
-				<img src="http://ox-d.businessinsider.com/w/1.0/ai?auid=8439&cs=466b0369&cb=560026675" border="0" alt="" />
-			</a>
-		</iframe>
-	</noscript>
-
-</div>
-<!-- end OpenX Ad placeholder -->				</div>
-
-			</div>
-
-			<div class="clear-both box-post sl-layout-post">
-
-<h3>
-            <div class="bitly">
-            <form>
-                <label>Short URL</label>
-                <input type="text" value="http://read.bi/nsCYBh" onclick="this.select();">
-
-            </form>
-        </div>
-
-    Share:
-    		<div class="clear-both"></div>
-</h3>
-
-<div class="bottom-share content">
-    <ul class="container bottom-share">
-        <li>
-            <a class="twitter"></a>
-			<a class="text" onclick="gaPageTrack('Share','Click','Twitter')" href="http://twitter.com/home?status=MEANWHILE%3A+Developments+In+Greece...+http%3A%2F%2Fread.bi%2FnsCYBh">Twitter</a>
-
-        </li>
-        <li>
-            <a class="facebook"></a>
-			<a class="text" onclick="gaPageTrack('Share','Click','Facebook_Below'); return fb_share()" href="http://www.facebook.com/sharer.php?u=http%3A%2F%2Fwww.businessinsider.com%2Fmeanwhile-developments-in-greece-2011-9">Facebook</a>
-        </li>
-        <li>
-            <a class="gbuzz"></a>
-            <a class="text" onclick="gaPageTrack('Share','Click','Buzz_Below')" href="http://www.google.com/reader/link?url=http%3A%2F%2Fwww.businessinsider.com%2Fmeanwhile-developments-in-greece-2011-9&amp;title=MEANWHILE%3A+Developments+In+Greece...&amp;srcURL=http://www.businessinsider.com&amp;snippet=More+moves+to+survive." rel="nofollow">Buzz</a>
-
-        </li>
-        <li>
-			<a class="digg"></a>
-            <a class="text" onclick="gaPageTrack('Share','Click','Digg')" href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fwww.businessinsider.com%2Fmeanwhile-developments-in-greece-2011-9" rel="nofollow">Digg</a>
-        </li>
-        <li>
-			<a class="su"></a>
-			<a class="text" onclick="gaPageTrack('Share','Click','Stumble')" href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fwww.businessinsider.com%2Fmeanwhile-developments-in-greece-2011-9">StumbleUpon</a>
-        </li>
-        <li>
-
-			<a class="reddit"></a>
-			<a class="text" onclick="gaPageTrack('Share','Click','Reddot')" href="http://www.reddit.com/submit?url=http%3A%2F%2Fwww.businessinsider.com%2Fmeanwhile-developments-in-greece-2011-9&amp;title=MEANWHILE%3A+Developments+In+Greece..." rel="nofollow">Reddit</a>
-        </li>
-        <li>
-			<a class="linkedin"></a>
-			<a class="text" onclick="gaPageTrack('Share','Click','LinkedIn')" href="http://www.linkedin.com/shareArticle?mini=true&amp;url=http%3A%2F%2Fwww.businessinsider.com%2Fmeanwhile-developments-in-greece-2011-9&amp;title=MEANWHILE%3A+Developments+In+Greece...&amp;summary=More+moves+to+survive.&amp;source=Business+Insider">LinkedIn</a>
-        </li>
-        <li>
-
-			<a class="email"></a>
-			<a class="text" href="#top" onclick="gaPageTrack('Share','Click','Email');$('#share-email').toggle();$('.post embed').toggleClass('visibility-hidden');">Email</a>
-        </li>
-		<li>
-                            <a class="tip" href="/embed-post"><span>More about embedding posts &#187;</span></a>
-				<a class="embed"></a>
-				<a class="text embed-button" href="#">Embed</a>
-
-
-        </li>
-
-
-        <li class="alerts">
-            <a class="tip" href="/customized-email-alerts-2010-7"><span>More about Alerts &#187;</span></a>
-            				<a class="alerts"></a>
-                <a class="text" href="#" onclick="$('#alerts-dialog').dialog('open'); return false;">Alerts</a>
-                    </li>
-        <li class="last">
-			<a class="newsletter"></a>
-
-            <a class="text" href="/newsletter?source=sharebox">Newsletter</a>
-        </li>
-		<div style="clear:left;"></div>
-    </ul>
-</div>
-	<!-- Embed Post -->
-	<div class="embed-post" style="display: none">
-		<div class="embed-info">
-			<div class="embed-info-inner">
-
-				<a class="layer-close embed-button" href="#">x</a>
-				<p>To embed this post, copy the code below and paste into your website or blog.</p>
-				<div>
-					<h4>600px wide <span><a class="button-preview" href="#">(preview)</a></span></h4>
-					<div class="embed-preview" style="display: none; "></div>
-					<div class="code"><textarea onclick="this.focus(); this.select();">&lt;iframe src=&quot;http://www.businessinsider.com/embed?id=4e7a385b69beddb048000021&amp;amp;width=600&amp;amp;height=430&quot; width=&quot;600&quot; height=&quot;430&quot; border=&quot;0&quot; frameborder=&quot;0&quot;&gt;&lt;/iframe&gt;</textarea></div>
-
-				</div>
-				<div>
-					<h4>400px wide <span><a class="button-preview" href="#">(preview)</a></span></h4>
-					<div class="embed-preview" style="display: none; "></div>
-					<div class="code"><textarea onclick="this.focus(); this.select();">&lt;iframe src=&quot;http://www.businessinsider.com/embed?id=4e7a385b69beddb048000021&amp;amp;width=400&amp;amp;height=430&quot; width=&quot;400&quot; height=&quot;430&quot; border=&quot;0&quot; frameborder=&quot;0&quot;&gt;&lt;/iframe&gt;</textarea></div>
-				</div>
-				<div>
-					<h4>300px wide <span><a class="button-preview" href="#">(preview)</a></span></h4>
-
-					<div class="embed-preview" style="display: none; "></div>
-					<div class="code"><textarea onclick="this.focus(); this.select();">&lt;iframe src=&quot;http://www.businessinsider.com/embed?id=4e7a385b69beddb048000021&amp;amp;width=300&amp;amp;height=430&quot; width=&quot;300&quot; height=&quot;430&quot; border=&quot;0&quot; frameborder=&quot;0&quot;&gt;&lt;/iframe&gt;</textarea></div>
-				</div>
-			</div>
-		</div>
-	</div>
-
-	<script type="text/javascript">
-		// setup button event listeners
-		// embed post
-		var widgetSizes = [ "<iframe src=\"http:\/\/www.businessinsider.com\/embed?id=4e7a385b69beddb048000021&amp;width=600&amp;height=430\" width=\"600\" height=\"430\" border=\"0\" frameborder=\"0\"><\/iframe>", "<iframe src=\"http:\/\/www.businessinsider.com\/embed?id=4e7a385b69beddb048000021&amp;width=400&amp;height=430\" width=\"400\" height=\"430\" border=\"0\" frameborder=\"0\"><\/iframe>", "<iframe src=\"http:\/\/www.businessinsider.com\/embed?id=4e7a385b69beddb048000021&amp;width=300&amp;height=430\" width=\"300\" height=\"430\" border=\"0\" frameborder=\"0\"><\/iframe>" ];
-		embedWidgets(widgetSizes);
-	</script>
-
-		</div>
-
-
-
-
-
-					    <div class="author-container">
-                    <div class="yui-gc author-info author-top">
-                <div class="yui-u first">
-                                            <div class="author-thumbnail">
-                            <a href="http://www.businessinsider.com/author/linette-lopez"><img src="http://static8.businessinsider.com/image/4de5075f4bd7c8ac38040000-70-70/linette-lopez.jpg" alt="" border="0" /></a>                        </div>
-                                        <div class="info">
-                        <div class="summary">
-                            <a rel="author" href="http://www.businessinsider.com/author/linette-lopez">Linette Lopez</a>
-                            writes primarily for Money Game, but dabbles in Clusterstock and Europe. She also edits the site's podcasts.                                <div class="contact">
-
-        Contact:
-
-        <dl>
-                            <dt>e-mail:</dt>
-                <dd><script type="text/javascript">var dw = function(s) { document.write(s); };dw('<a h');dw('ref=');dw('"mai');dw('lto:');dw('llop');dw('ez@b');dw('usin');dw('essi');dw('nsid');dw('er.c');dw('om">');dw('llop');dw('ez@b');dw('usin');dw('essi');dw('nsid');dw('er.c');dw('om</');dw('a>');</script><noscript><a href="/contact">use contact page</a></noscript></dd>
-
-
-
-
-                    </dl>
-
-                    Subscribe to her
-
-
-
-                            <a href="http://twitter.com/lopezlinette">twitter feed</a>                                </div>
-
-                        </div>
-                    </div>
-                </div>
-				<div class="yui-u">
-					<div class="yui-right recent-stories">
-													<div class="author-question">
-								<a href="/questions/ask?ask_author=4de5075f4bd7c8ac38080000" class="author-question-button">
-									Ask Linette a Question
-								</a>
-
-							</div>
-
-													<h4>Recent Posts</h4>
-							<ul>
-																	<li><a href="http://www.businessinsider.com/china-cancels-talent-show-because-its-too-democratic-2011-9">China Cancels Talent Show '...</a></li>
-																	<li><a href="http://www.businessinsider.com/senate-china-currency-manipulation-2011-9">REBUKING CHINA: Here's What...</a></li>
-																	<li><a href="http://www.businessinsider.com/china-subprime-wenzou-bankrupt-2011-9">A Bunch Of Chinese Manufact...</a></li>
-															</ul>
-
-											</div>
-				</div>
-            </div>
-            </div>
-
-
-<!-- Comments -->
-<div id="comments">
-
-
-<a name="comments"></a>
-<!-- Comments -->
-<div class="comments">
-
-            <div class="container">
-
-			<h2 class="comments-header">The Water Cooler <br> <img src="http://static5.businessinsider.com/assets/images/icons/icon_vikings_watercooler.png" width="170" height="108" class="watercooler-vikings" alt="">
-				<div class="comments-updates">
-					<div class="post_comment_alert_link" onclick="CommentAlerts.showSubscribeForm(this); return false;">
-						Receive email updates on new comments!
-					</div>
-					<div class="post_comment_alert" style="display:none">
-						<div class="post_comment_alert_error"></div>cvsz
-						<form action="" method="post" onsubmit="CommentAlerts.subscribeEmail(this); return false;">
-							<input type="hidden" name="action" value="subscribe_email_alerts" />
-
-							<label>Email</label>
-							<input name="email" type="text" value="" />							<input type="submit" class="submit-button" value="Subscribe" />
-						</form>
-					</div>
-				</div>
-				<span>5 Comments</span>
-				<a href="http://www.businessinsider.com/meanwhile-developments-in-greece-2011-9/comments.rss" class="icon-feed" title="RSS"></a>
-
-			</h2>
-        </div>
-
-        <div class="comments-content">
-                            <div id="comment-shell-4e7a3b1ceab8ea6406000046">
-
-
-    <div id="comment-4e7a3b1ceab8ea6406000046" class="comments odd cid-4e7a3b1ceab8ea6406000046">
-        <div class="inner">
-            <div>
-                                <div class="ratings-area  ">
-                    <div class="float-left ratings ratings-up">
-
-                        <a class="rate-up" href="#" onclick="return comment_rate(this, 1)">
-							<span class="count">0</span>
-						</a>
-                    </div>
-                    <div class="float-left ratings ratings-down">
-                        <a class="rate-down voted" href="#" onclick="return comment_rate(this, -1)">
-							<span class="count">2</span>
-						</a>
-
-                    </div>
-											<div class="clear-both">
-							<div class="offensive-link">
-								<a href="#" onclick="return comment_flag_offensive(this)">Flag as Offensive</a>
-							</div>
-						</div>
-					                </div>
-
-
-                <b>
-
-                                            facebook_expert
-                                    </b>
-
-
-                <span class="smaller">
-                    on
-											<a href="http://www.businessinsider.com/c/4e7a3b1ceab8ea6406000046" title="Permalink to this comment" rel="nofollow">
-						                    Sep 21,  3:29 PM						                    </a>
-										said:
-                </span>
-
-
-                <div class="comment-content" id="comment-content-4e7a3b1ceab8ea6406000046">
-
-
-
-                    <div class="comment-text">
-                        buy the dips<br />
-<a href="http://seekingalpha.com/user/926530/instablog/full_index" target="_blank">http://seekingalpha.com/user/926530/instablog/full_index</a>                    </div>
-
-                                    </div>
-            </div>
-
-							<div class="reply-button">
-					<a class="button-link" href="#comment-form" onclick="return comment_reply(this)"><span>Reply</span></a>
-
-				</div>
-				<div class="reply-container"></div>
-
-                    </div>
-    </div>
-
-				</div>
-                            <div id="comment-shell-4e7a3d4eeab8ea7a1e000009">
-
-
-    <div id="comment-4e7a3d4eeab8ea7a1e000009" class="comments even cid-4e7a3d4eeab8ea7a1e000009">
-        <div class="inner">
-
-            <div>
-                                <div class="ratings-area  ">
-                    <div class="float-left ratings ratings-up">
-                        <a class="rate-up voted" href="#" onclick="return comment_rate(this, 1)">
-							<span class="count">3</span>
-						</a>
-                    </div>
-                    <div class="float-left ratings ratings-down">
-
-                        <a class="rate-down" href="#" onclick="return comment_rate(this, -1)">
-							<span class="count">0</span>
-						</a>
-                    </div>
-											<div class="clear-both">
-							<div class="offensive-link">
-								<a href="#" onclick="return comment_flag_offensive(this)">Flag as Offensive</a>
-							</div>
-
-						</div>
-					                </div>
-
-
-                <b>
-                                            zorba the meek
-                                    </b>
-
-
-                <span class="smaller">
-                    on
-											<a href="http://www.businessinsider.com/c/4e7a3d4eeab8ea7a1e000009" title="Permalink to this comment" rel="nofollow">
-						                    Sep 21,  3:38 PM						                    </a>
-
-										said:
-                </span>
-
-
-                <div class="comment-content" id="comment-content-4e7a3d4eeab8ea7a1e000009">
-
-
-                    <div class="comment-text">
-                        the average stavros souvlaki on the street how only two choices 1) leave the country which will soon resemble east germany during the cold war or 2) order the baklava, it's delicious.                    </div>
-
-                                    </div>
-            </div>
-
-							<div class="reply-button">
-					<a class="button-link" href="#comment-form" onclick="return comment_reply(this)"><span>Reply</span></a>
-				</div>
-				<div class="reply-container"></div>
-
-                    </div>
-    </div>
-
-				</div>
-                            <div id="comment-shell-4e7a415b69beddb14800002d">
-
-
-
-    <div id="comment-4e7a415b69beddb14800002d" class="comments odd cid-4e7a415b69beddb14800002d">
-        <div class="inner">
-            <div>
-                                <div class="ratings-area  ">
-                    <div class="float-left ratings ratings-up">
-                        <a class="rate-up" href="#" onclick="return comment_rate(this, 1)">
-							<span class="count">0</span>
-						</a>
-
-                    </div>
-                    <div class="float-left ratings ratings-down">
-                        <a class="rate-down" href="#" onclick="return comment_rate(this, -1)">
-							<span class="count">0</span>
-						</a>
-                    </div>
-											<div class="clear-both">
-							<div class="offensive-link">
-
-								<a href="#" onclick="return comment_flag_offensive(this)">Flag as Offensive</a>
-							</div>
-						</div>
-					                </div>
-
-
-                <b>
-                                            depression
-                                    </b>
-
-
-                <span class="smaller">
-
-                    on
-											<a href="http://www.businessinsider.com/c/4e7a415b69beddb14800002d" title="Permalink to this comment" rel="nofollow">
-						                    Sep 21,  3:56 PM						                    </a>
-										said:
-                </span>
-
-
-                <div class="comment-content" id="comment-content-4e7a415b69beddb14800002d">
-
-
-                    <div class="comment-text">
-                        these types of cuts are unthinkable ......                    </div>
-
-                                    </div>
-            </div>
-
-							<div class="reply-button">
-					<a class="button-link" href="#comment-form" onclick="return comment_reply(this)"><span>Reply</span></a>
-				</div>
-				<div class="reply-container"></div>
-
-                    </div>
-
-    </div>
-
-				</div>
-                            <div id="comment-shell-4e7a47cb69beddac0c000001">
-
-
-    <div id="comment-4e7a47cb69beddac0c000001" class="comments even cid-4e7a47cb69beddac0c000001">
-        <div class="inner">
-            <div>
-                                <div class="ratings-area ratings-fb ">
-                    <div class="float-left ratings ratings-up">
-
-                        <a class="rate-up voted" href="#" onclick="return comment_rate(this, 1)">
-							<span class="count">2</span>
-						</a>
-                    </div>
-                    <div class="float-left ratings ratings-down">
-                        <a class="rate-down" href="#" onclick="return comment_rate(this, -1)">
-							<span class="count">0</span>
-						</a>
-
-                    </div>
-											<div class="clear-both">
-							<div class="offensive-link">
-								<a href="#" onclick="return comment_flag_offensive(this)">Flag as Offensive</a>
-							</div>
-						</div>
-					                </div>
-
-                                    <div class="user-image"><img src="http://static6.businessinsider.com/image/4dbe339e4bd7c89052120000-50-sq/image.jpg" /></div>
-
-
-                <b>
-                                            <a href="/commenter?id=4dbe320f49e2ae8260070000">marktheshark</a>
-                                    </b>
-
-
-                <span class="smaller">
-                    on
-											<a href="http://www.businessinsider.com/c/4e7a47cb69beddac0c000001" title="Permalink to this comment" rel="nofollow">
-						                    Sep 21,  4:23 PM						                    </a>
-										said:
-                </span>
-
-
-                <div class="comment-content" id="comment-content-4e7a47cb69beddac0c000001">
-
-
-                    <div class="comment-text">
-                        Greece will blow up right after the next election. Whoever is up against the status quo will demand "hope" and "change", promising to return to the Drachma and preserve social spending. Papandreou is obviously on his way out, no Prime Minister who is rioted against stays in power in a legitimate democracy. If we don't see any difference with the next party in power, Greece will rip itself apart. The birthplace of democracy will be the deathbed of it.                    </div>
-
-                                    </div>
-            </div>
-
-							<div class="reply-button">
-					<a class="button-link" href="#comment-form" onclick="return comment_reply(this)"><span>Reply</span></a>
-
-				</div>
-				<div class="reply-container"></div>
-
-                    </div>
-    </div>
-
-				</div>
-                            <div id="comment-shell-4e7af926eab8ea2a31000005">
-
-
-    <div id="comment-4e7af926eab8ea2a31000005" class="comments odd cid-4e7af926eab8ea2a31000005">
-        <div class="inner">
-
-            <div>
-                                <div class="ratings-area  ">
-                    <div class="float-left ratings ratings-up">
-                        <a class="rate-up voted" href="#" onclick="return comment_rate(this, 1)">
-							<span class="count">2</span>
-						</a>
-                    </div>
-                    <div class="float-left ratings ratings-down">
-
-                        <a class="rate-down voted" href="#" onclick="return comment_rate(this, -1)">
-							<span class="count">1</span>
-						</a>
-                    </div>
-											<div class="clear-both">
-							<div class="offensive-link">
-								<a href="#" onclick="return comment_flag_offensive(this)">Flag as Offensive</a>
-							</div>
-
-						</div>
-					                </div>
-
-
-                <b>
-                                            Jamie Sims
-                                    </b>
-
-
-                <span class="smaller">
-                    on
-											<a href="http://www.businessinsider.com/c/4e7af926eab8ea2a31000005" title="Permalink to this comment" rel="nofollow">
-						                    Sep 22,  5:00 AM						                    </a>
-
-										said:
-                </span>
-
-
-                <div class="comment-content" id="comment-content-4e7af926eab8ea2a31000005">
-
-
-                    <div class="comment-text">
-                        Why is it the rich of Greece has still got away without paying as most of them got their fortunes through corruption. We want the government to be punished for their poor behavior towards their country and people but unfortunately it is the people who are paying the price with their lives, jobs, houses. The Government still has not collected taxes from 2009. There is still tax free on boats which the government loses out on 300 million euro per year because of a stupid loop hole and not one politician has taken a pay cut. and yet when people were starving and the markets were in turmoil the government went on their summer hols, now is that the behavior of a government who is determined to make change?                    </div>
-
-                                    </div>
-            </div>
-
-							<div class="reply-button">
-					<a class="button-link" href="#comment-form" onclick="return comment_reply(this)"><span>Reply</span></a>
-				</div>
-				<div class="reply-container"></div>
-
-                    </div>
-    </div>
-
-				</div>
-                    </div>
-
-            </div>
-<!-- / Comments -->
-
-
-</div>
-<!-- / Comments -->
-
-
-	<!-- Comments Form -->
-	<a name="comment-form"></a>
-	    <div class="comments-open" id="comments-open">
-		<h2 class="comments-open-header">
-
-			Join the discussion with Business Insider
-							<br />
-				<span class="fb"></span><a href="#" class="fb" onclick="return fb_login()">Login With Facebook</a>
-										<span class="twitter"></span><a id="comment_tw_login_link" href="/account/twitter" class="twitter" rel="nofollow">Login With Twitter</a>
-					</h2>
-
-
-		<div class="comments-open-content">
-			<form id="comments-form" action="#comment-form" method="post" name="comment">
-
-				<input type="hidden" name="action" value="create" />
-				<input type="hidden" name="reply" value="" />
-				<input type="hidden" name="slide" value="" />
-
-				<div class="comments-open-data">
-
-						<div id="comment-form-name">
-							<label for="author">Name (Required)</label>
-							<input name="author" size="30" value="" />
-						</div>
-
-						<div id="comment-form-email">
-							<label for="email">Email Address (Required but never displayed)</label>
-							<input name="email" size="30" value="" />
-						</div>
-						<div id="comment-form-url">
-							<label for="url">URL</label>
-							<input name="url" size="30" value="" />
-						</div>
-
-
-					<div class="container clear-both comments-open-text">
-						<label for="text">Comments (You may use HTML tags for style)</label>
-													<textarea rows="5" cols="50" name="text" id="bi-comment-textarea"></textarea>
-											</div>
-
-					<input class="submit-button float-right" type="submit" value="Submit" />
-
-					<script type="text/javascript" charset="utf-8">
-						$(document).ready(function() {
-							commentListener('#comments-form .submit-button', '#comments-form');
-						});
-                    </script>
-
-
-
-								</div>
-
-			</form>
-			<div style="clear: both;"></div>
-		</div>
-
-	</div>
-
-	<script type="text/javascript">
-		$(window).ready(function() {
-			shadowType('#bi-comment-textarea', 'Add a comment...');
-		});
-
-		var href = document.URL.replace(/^https?:\/\/[^/.]+.?businessinsider\.com/, "");
-		href = href.replace(/&?msg=[^&]*/, "");
-		href = href.replace(/\?$/, "");
-		$('a#comment_tw_login_link').attr('href', "/account/twitter?redirect="+href);
-
-	</script>
-	<!-- / Comments Form -->
-
-	<!-- Facebook Comments Form -->
-
-
-<h2 class="comments-open-header facebook">Join the discussion with your Facebook Login</h2>
-<div id="fb-root"></div>
-<script src="http://connect.facebook.net/en_US/all.js#xfbml=1"></script>
-<fb:comments href="http://www.businessinsider.com/meanwhile-developments-in-greece-2011-9" num_posts="25" width="619"></fb:comments>
-
-	</div>
-
-	</div>
-</div>
-		            		                <div class="yui-b">
-		                    <div id="right-rail">
-
-	<div class="sl-layout-post">
-
-		<!-- Send Us A Tip -->
-		<div class="sendtip htop hbot">
-			<a href="/contact" class="icon-sendtip"></a> <a href="/contact">Send Us A Tip!</a>
-		</div>
-
-		<!-- BI Live Widget (Above Fold Placement) -->
-
-		<!-- Editorial Sidebar -->
-					<!-- editorial sidebar -->
-
-			<!-- editorial sidebar -->
-<div class="box-1 hbot" id="editorial">
-	<h4 class="bar">Facebook's New Layout: Pros & Cons</h4>
-	<div class="first">
-		<script type="text/javascript">// <![CDATA[
-function raiseVolume()
-{
-    document.getElementById("p1").setVolume(1.0);
-}
-
-function receiveOoyalaEvent(playerId, eventName, p)
-{
-   if ( eventName == "apiReady" )
-   {
-     document.getElementById("p1").setVolume(0.0);
-   }
-}
-// ]]></script>
-<div onclick="raiseVolume(this); return true;" onmousedown="raiseVolume(this); return true;"><!--[ PASTE VIDEO EMBED CODE HERE ]-->
-<script src="http://player.ooyala.com/player.js?callback=receiveOoyalaEvent&amp;playerId=p1&amp;width=300&amp;height=168&amp;embedCode=1hanV0MjoY3rprE4vaSg6wjL-3eQGa0g&amp;videoPcode=BhdmY6l9g002rBhQ6aEBZiheacDu&amp;autoplay=1" type="text/javascript"></script>
-<noscript><object classid="clsid:D27CDB6E-AE6D-11cf-96B8-444553540000" id="ooyalaPlayer_l0tr_gsxpm0hn" width="300" height="168" codebase="http://fpdownload.macromedia.com/get/flashplayer/current/swflash.cab"><param name="movie" value="http://player.ooyala.com/player.swf?embedCode=1hanV0MjoY3rprE4vaSg6wjL-3eQGa0g&version=2" /><param name="bgcolor" value="#000000" /><param name="allowScriptAccess" value="always" /><param name="allowFullScreen" value="true" /><param name="flashvars" value="embedType=noscriptObjectTag&embedCode=1hanV0MjoY3rprE4vaSg6wjL-3eQGa0g&videoPcode=BhdmY6l9g002rBhQ6aEBZiheacDu&autoplay=1" /><embed src="http://player.ooyala.com/player.swf?embedCode=1hanV0MjoY3rprE4vaSg6wjL-3eQGa0g&version=2" bgcolor="#000000" width="300" height="168" name="ooyalaPlayer_l0tr_gsxpm0hn" align="middle" play="true" loop="false" allowscriptaccess="always" allowfullscreen="true" type="application/x-shockwave-flash" flashvars="&embedCode=1hanV0MjoY3rprE4vaSg6wjL-3eQGa0g&videoPcode=BhdmY6l9g002rBhQ6aEBZiheacDu&autoplay=1" pluginspage="http://www.adobe.com/go/getflashplayer"></embed></object></noscript><!--[ PASTE VIDEO EMBED CODE HERE ]--></div>
-
-<ul>
-<li><a href="http://www.businessinsider.com/facebook-timeline-screenshots-2011-9">This Is What Your New Facebook Profile Will Look Like In A Few Weeks</a></li>
-<li><a href="http://www.businessinsider.com/get-facebook-timeline-2011-9">How To Get Your New Facebook Profile In Just 5 Minutes</a></li>
-</ul>	</div>
-</div>
-<script type="text/javascript">$("#editorial a").mousedown(function() { $.post("/ws/heatmap", { homepage: 'editorial', uri: $(this).attr("href") } ); } );</script>
-<!-- / editorial sidebar -->
-
-		<!-- Newsletters -->
-		<div class="right-subscribe htop hbot">
-	<div class="right-news">
-
-		<form action="/newsletter?source=sidebar" method="post">
-		<h2>Get <span class="news-vert">Money Game</span> Emails &amp; Alerts</h2>
-		<input id="news-email" class="newsletter-text" name="email" type="text" value="Your Email Address" />		<input name="optin" type="hidden" value="on" />
-		<input id="news-signup" class="button-form" value="sign-up" type="submit" />
-		<h3 id="learn-more"><a href="/newsletter">Learn More &#187;</a></h3>
-
-		<div id="newsletter-expand" style="display:none">
-
-			<div class="zip">
-				<input id="news-zip" class="zip-text" name="zip" type="text" value="Your Zip Code" />			</div>
-			<ul class="container newsletter-expand">
-				<li>
-					<label class="color-red">
-						<input value="alerts" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />						Customized instant email alerts
-					</label>
-					<a title="Business Insider Alerts" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_alerts.jpg">(sample)</a>
-
-				</li>
-										<li >
-							<label>
-								<input value="Business Insider Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" checked="checked" />								Business Insider Select							</label>
-							<a title="Business Insider Select" href="http://static6.businessinsider.com/assets/images/screenshots/newsletter_businessinsider_select.jpg">(sample)</a>
-						</li>
-												<li >
-
-							<label>
-								<input value="Money Game Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" checked="checked" />								Money Game Select							</label>
-							<a title="Money Game Select" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_themoneygame.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="10 Things Before the Opening Bell" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" checked="checked" />								10 Things Before the Opening Bell							</label>
-
-							<a title="10 Things Before the Opening Bell" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_10_things.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="Money Game Chart Of The Day" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" checked="checked" />								Money Game Chart Of The Day							</label>
-							<a title="Money Game Chart Of The Day" href="http://static5.businessinsider.com/assets/images/screenshots/newsletter_moneygame_chart.jpg">(sample)</a>
-						</li>
-
-													<div class="more">
-								<h5>More:</h5>
-							</div>
-												<li >
-							<label>
-								<input value="SAI Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								SAI Select							</label>
-							<a title="SAI Select" href="http://static6.businessinsider.com/assets/images/screenshots/newsletter_sai_select.jpg">(sample)</a>
-
-						</li>
-												<li >
-							<label>
-								<input value="The Wire Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								The Wire Select							</label>
-							<a title="The Wire Select" href="http://static7.businessinsider.com/assets/images/screenshots/newsletter_the_wire_select.jpg">(sample)</a>
-						</li>
-												<li >
-
-							<label>
-								<input value="Clusterstock Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Clusterstock Select							</label>
-							<a title="Clusterstock Select" href="http://static5.businessinsider.com/assets/images/screenshots/newsletter_clusterstock_select.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="War Room Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								War Room Select							</label>
-
-							<a title="War Room Select" href="http://static5.businessinsider.com/assets/images/screenshots/newsletter_warroom.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="Sports Page Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Sports Page Select							</label>
-							<a title="Sports Page Select" href="http://static7.businessinsider.com/assets/images/screenshots/newsletter_sportspage_select.jpg">(sample)</a>
-						</li>
-
-												<li >
-							<label>
-								<input value="Politix Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Politix Select							</label>
-							<a title="Politix Select" href="http://static7.businessinsider.com/assets/images/screenshots/newsletter_politix_select.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-
-								<input value="The Life Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								The Life Select							</label>
-							<a title="The Life Select" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_the_life_select.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="Tools Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Tools Select							</label>
-
-							<a title="Tools Select" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_tools_select.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="Europe Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Europe Select							</label>
-							<a title="Europe Select" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_europe_select.jpg">(sample)</a>
-						</li>
-
-												<li >
-							<label>
-								<input value="Your Money Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Your Money Select							</label>
-							<a title="Your Money Select" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_your_money_select.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-
-								<input value="SAI Chart Of The Day" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								SAI Chart Of The Day							</label>
-							<a title="SAI Chart Of The Day" href="http://static5.businessinsider.com/assets/images/screenshots/newsletter_sai_chart.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="Sports Page Chart Of The Day" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Sports Page Chart Of The Day							</label>
-
-							<a title="Sports Page Chart Of The Day" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_sportspage_chart.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="10 Things In Tech You Need To Know" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								10 Things In Tech You Need To Know							</label>
-							<a title="10 Things In Tech You Need To Know" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_10_things_know.jpg">(sample)</a>
-						</li>
-
-												<li >
-							<label>
-								<input value="Politics in 60 Seconds" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Politics in 60 Seconds							</label>
-							<a title="Politics in 60 Seconds" href="http://static7.businessinsider.com/assets/images/screenshots/newsletter_politics_in_60_seconds.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-
-								<input value="Instant MBA" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Instant MBA							</label>
-							<a title="Instant MBA" href="http://static6.businessinsider.com/assets/images/screenshots/newsletter_instant_mba.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="Marketing Mondays" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Marketing Mondays							</label>
-
-							<a title="Marketing Mondays" href="http://static7.businessinsider.com/assets/images/screenshots/newsletter_marketing_mondays.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="Apple Investor" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Apple Investor							</label>
-							<a title="Apple Investor" href="http://static5.businessinsider.com/assets/images/screenshots/newsletter_apple_investor.jpg">(sample)</a>
-						</li>
-
-												<li >
-							<label>
-								<input value="Google Investor" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Google Investor							</label>
-							<a title="Google Investor" href="http://static6.businessinsider.com/assets/images/screenshots/newsletter_google_investor.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-
-								<input value="Microsoft Investor" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Microsoft Investor							</label>
-							<a title="Microsoft Investor" href="http://static5.businessinsider.com/assets/images/screenshots/newsletter_microsoft_investor.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="Breaking News" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Breaking News Alerts							</label>
-
-							<a title="Breaking News" href="http://static6.businessinsider.com/assets/images/screenshots/newsletter_breaking_news.jpg">(sample)</a>
-						</li>
-									</ul>
-		</div>
-		</form>
-	</div>
-</div>
-
-<script type="text/javascript" charset="utf-8">
-	// Select all links in object with gallery ID
-	$('#newsletter-expand a').lightBox();
-	$('#news-signup').click(function() {
-		if (!$('#news-email').val().match(/@/)) {
-			alert('Please enter a valid email address.');
-			return false;
-		}
-		if (!$('#news-zip').val()) {
-			alert('Please enter a valid zipcode.');
-			return false;
-		}
-		var checkedOne = false;
-		$('.newsletter-expand input').each(function() {
-			if ($(this).attr('checked')) {
-				checkedOne = true;
-			}
-		});
-		if (!checkedOne) {
-			alert('Please sign up for at least one newsletter.');
-			return false;
-		}
-		return true;
-	});
-	$('#news-email').focus(function() {
-		$('#newsletter-expand').show();
-		$('#learn-more').css({ 'float' : 'right' });
-	});
-	$(document).ready(function() {
-  	shadowType('#news-email', 'Your Email Address');
-  	shadowType('#ews-zip', 'Your Zip Code');
-	});
-</script>
-
-		<!-- ad 300x250 -->
-					<div class="right-ad hbot">
-				<h4>Advertisement</h4>
-				<!-- OpenX Ad placeholder -->
-<div id="Upper 300x250">
-	<script type="text/javascript">
-		OXH.addAdUnitPlaceholder('8437', 'Upper 300x250');
-	</script>
-	<noscript>
-		<iframe id="" name="" src="http://ox-d.businessinsider.com/w/1.0/afr?auid=8437&target=_blank&cb=2023679856" frameborder="0" scrolling="no"  >
-			<a href="http://ox-d.businessinsider.com/w/1.0/rc?cs=8ba8e6cd&cb=2023679856" target="_blank">
-				<img src="http://ox-d.businessinsider.com/w/1.0/ai?auid=8437&cs=8ba8e6cd&cb=2023679856" border="0" alt="" />
-			</a>
-		</iframe>
-	</noscript>
-</div>
-
-<!-- end OpenX Ad placeholder -->			</div>
-
-		            <div class="box-1 hbot htop">
-	<h4 class="bar your-money">Your Money</h4>
-	<div class="first">
-	    <table class="xignite-indices"><tr>
-                    <td>NASDAQ Composite</td>
-                    <td>2,456</td>
-
-                    <td><span class="xignite-indices-green">+27.56</span></td>
-                    <td><span class="xignite-indices-green">(+1.122%)</span></td>
-                    </tr><tr>
-                    <td>S&P 500</td>
-                    <td>1,130</td>
-                    <td><span class="xignite-indices-green">+6.87</span></td>
-
-                    <td><span class="xignite-indices-green">(+0.608%)</span></td>
-                    </tr><tr>
-                    <td>NYSE Composite</td>
-                    <td>6,727</td>
-                    <td><span class="xignite-indices-green">+44.11</span></td>
-                    <td><span class="xignite-indices-green">(+0.656%)</span></td>
-
-                    </tr></table>    </div>
-</div>
-
-				<!-- Chartbeat -->
-		<div class="chartbeat htop">
-			<a target="_new" href="http://chartbeat.com/dashboard/?url=businessinsider.com&k=4bbb5a03ffbd1d760ecf0ba8d9f27ef7">
-			<div id="chartbeat_widget"></div>
-			</a>
-			<script src='http://static.chartbeat.com/js/sitewidgets/sitetotal.js' type='text/javascript'></script>
-
-			<script type="text/javascript" src="http://static7.businessinsider.com/assets/js/chartbeat.js"></script>
-			<p>Active Users on BI right now...<br /><a href="http://chartbeat.com/dashboard/?url=businessinsider.com&k=4bbb5a03ffbd1d760ecf0ba8d9f27ef7">Click for more live stats &#187;</a></p>
-		</div>
-		<script type="text/javascript">
-			if ($.browser.msie == true && $.browser.version <= 8.0) {
-				$('#right-rail .chartbeat').html('');
-			}
-		</script>
-	 	<!-- / Chartbeat -->
-
-
-		<!-- Editorial Sidebar moved under BI Live until 9/30 -->
-
-					<!-- BI Live Widget (Below Fold Placement) -->
-
-        <!-- mobile promo -->
-        <div class="right-ad">
-            <a href="/about/mobile">
-                <img src="http://static5.businessinsider.com/assets/images/bi_mobile.png" alt="Get Business Insider Mobile" />
-            </a>
-        </div>
-
-		<!-- doc center -->
-
-		<div class="box-1 module doc-center htop hbot">
-        	<h4 class="bar" style="overflow: visible">
-        		<span class="float-left">Startup Document Center</span>
-        	</h4>
-
-        	<h5 class="tagline">Templates To Jump Start Your Business</h5>
-
-        	<div class="first module-content">
-
-        		<div class="yui-gb">
-
-        		    <div class="yui-u first">
-        		        <a href="/document-center/business-plan-template-executive-summary">Business Plan Exec Summary</a>
-        		    </div>
-        		    <div class="yui-u">
-        		        <a href="/document-center/sample-bylaws">Sample<br />By-Laws</a>
-        		    </div>
-        		    <div class="yui-u">
-
-        		        <a href="/document-center/financial-model">Financial<br />Model</a>
-        		    </div>
-        		</div>
-
-        		<div class="module-link">
-        			<a href="/document-center">See All &#187;</a>
-        		</div>
-        	</div>
-
-        </div>
-
-		<!-- read me -->
-				<div class="box-1 htop readme">
-	<h4 class="bar"><a href="/contribute-to-business-insider" class="contributor-button" title=
-		"Sign up to become a contributor">Become a Contributor</a>Read Me</h4>
-	<div class="first">
-		<div class="list-image">
-							<div class="container tout tout-0 id-4e7b33776bb3f76222000053">
-
-											<div class="float-left">
-							<a href="http://www.businessinsider.com/mutual-funds-and-the-global-crisis-2011-9">
-																	<img src="http://static6.businessinsider.com/image/4d55a0a1cadcbbc558140000-90-90/paul-petillo.jpg" alt="" border="0" />															</a>
-						</div>
-
-					<h3><a href="/author/paul-petillo">Paul Petillo</a></h3>
-					<p>
-						<span>|</span>
-
-						<a class="title" href="http://www.businessinsider.com/mutual-funds-and-the-global-crisis-2011-9">Are Mutual Funds To Blame For The Global Financial Crisis?</a>
-						<nobr>
-						   <a href="http://www.businessinsider.com/mutual-funds-and-the-global-crisis-2011-9#comments" class="comments-icon"></a>
-						   <a class="comment_count" href="http://www.businessinsider.com/mutual-funds-and-the-global-crisis-2011-9#comments">6</a>						</nobr>
-					</p>
-
-											<p class="excerpt-text"><p>Here comes the herd.</p></p>
-									</div>
-
-							<div class="container tout tout-1 id-4e7c4e4e6bb3f7793700000b">
-											<div class="float-left">
-							<a href="http://www.businessinsider.com/every-age-group-is-getting-poorer-in-america-except-for-one-2011-9">
-																	<img src="http://static8.businessinsider.com/image/4e453cfdecad04d122000004-50-50/doug-short.jpg" alt="" border="0" />															</a>
-						</div>
-
-					<h3><a href="/author/doug-short">Doug Short</a></h3>
-					<p>
-						<span>|</span>
-
-						<a class="title" href="http://www.businessinsider.com/every-age-group-is-getting-poorer-in-america-except-for-one-2011-9">Every Age Group Is Getting Poorer In America, Except For One</a>
-						<nobr>
-						   <a href="http://www.businessinsider.com/every-age-group-is-getting-poorer-in-america-except-for-one-2011-9#comments" class="comments-icon"></a>
-						   <a class="comment_count" href="http://www.businessinsider.com/every-age-group-is-getting-poorer-in-america-except-for-one-2011-9#comments">24</a>						</nobr>
-					</p>
-
-									</div>
-							<div class="container tout tout-2 id-4e7bb9ed85b582dd27000014">
-
-											<div class="float-left">
-							<a href="http://www.businessinsider.com/you-are-not-as-dumb-as-you-think-2011-9">
-																	<img src="http://static8.businessinsider.com/image/4ce2e53a49e2ae5e4a140000-50-50/vitaliy-katsenelson.jpg" alt="" border="0" />															</a>
-						</div>
-
-					<h3><a href="/author/vitaliy-katsenelson">Vitaliy Katsenelson</a></h3>
-					<p>
-						<span>|</span>
-
-						<a class="title" href="http://www.businessinsider.com/you-are-not-as-dumb-as-you-think-2011-9">You Are Not as Dumb as You Think</a>
-						<nobr>
-						   <a href="http://www.businessinsider.com/you-are-not-as-dumb-as-you-think-2011-9#comments" class="comments-icon"></a>
-						   <a class="comment_count" href="http://www.businessinsider.com/you-are-not-as-dumb-as-you-think-2011-9#comments"></a>						</nobr>
-					</p>
-
-									</div>
-							<div class="container tout tout-3 id-4e7c53c3eab8eac01b000016">
-											<div class="float-left">
-
-							<a href="http://www.businessinsider.com/there-are-no-calculated-risks-in-the-casino-2011-9">
-																	<img src="http://static6.businessinsider.com/image/4d220eec49e2aebe0e0c0000-50-50/cullen-roche.jpg" alt="" border="0" />															</a>
-						</div>
-
-					<h3><a href="/author/cullen-roche">Cullen Roche</a></h3>
-					<p>
-						<span>|</span>
-						<a class="title" href="http://www.businessinsider.com/there-are-no-calculated-risks-in-the-casino-2011-9">There Are No Calculated Risks In The Casino</a>
-
-						<nobr>
-						   <a href="http://www.businessinsider.com/there-are-no-calculated-risks-in-the-casino-2011-9#comments" class="comments-icon"></a>
-						   <a class="comment_count" href="http://www.businessinsider.com/there-are-no-calculated-risks-in-the-casino-2011-9#comments">5</a>						</nobr>
-					</p>
-
-									</div>
-
-							<div class="contrib">
-											<div class="columnist-text id-4e7cb4c385b582ec0800000c">
-							<div class="tout">
-
-								<h3><a href="/author/marc-chandler">Marc Chandler</a></h3>
-								<p>
-									<span>|</span>
-									<a class="title" href="http://www.businessinsider.com/the-pain-in-spain-2011-9">Meanwhile Things Are About To Get A Whole Lot Worse In Spain</a>
-								</p>
-							</div>
-						</div>
-
-											<div class="columnist-text id-4e7cbc536bb3f76d6c000026">
-							<div class="tout">
-								<h3><a href="/author/kapitall">Kapitall</a></h3>
-								<p>
-									<span>|</span>
-									<a class="title" href="http://www.businessinsider.com/neuro-economics-this-is-what-your-brain-looks-like-when-you-trade-2011-9">Neuro Economics: This is What Your Brain Looks Like When You Trade</a>
-								</p>
-
-							</div>
-						</div>
-											<div class="columnist-text id-4e7ba48a85b582f37c00001b">
-							<div class="tout">
-								<h3><a href="/author/charles-hugh-smith">Charles Hugh Smith</a></h3>
-								<p>
-									<span>|</span>
-									<a class="title" href="http://www.businessinsider.com/three-more-reasons-the-eurozone-is-doomed-2011-9">Three More Reasons The Eurozone Is Doomed</a>
-
-								</p>
-							</div>
-						</div>
-											<div class="columnist-text id-4e7b575385b5827e6000001b">
-							<div class="tout">
-								<h3><a href="/author/simon-black">Simon Black</a></h3>
-								<p>
-									<span>|</span>
-
-									<a class="title" href="http://www.businessinsider.com/in-some-key-areas-chile-leads-the-world-2011-9">Guess Which Country Pretty Much Leads The World In Hours Worked, Fruit Exports And Water Purity</a>
-								</p>
-							</div>
-						</div>
-											<div class="columnist-text id-4e7b36d485b582191e00000a">
-							<div class="tout">
-								<h3><a href="/author/marc-chandler">Marc Chandler</a></h3>
-								<p>
-
-									<span>|</span>
-									<a class="title" href="http://www.businessinsider.com/fed-twists-markets-shout-and-some-thoughts-on-currency-levels-2011-9">Here's How Different Currencies Reacted To The Fed Announcement And Eurozone Plans</a>
-								</p>
-							</div>
-						</div>
-											<div class="columnist-text id-4e7b247585b5826e78000011">
-							<div class="tout">
-								<h3><a href="/author/angry-bear-blog">Angry Bear Blog</a></h3>
-
-								<p>
-									<span>|</span>
-									<a class="title" href="http://www.businessinsider.com/basic-macroeconomics-2011-9">Macroeconomics 101: Take This Test To See How Much You Really Know</a>
-								</p>
-							</div>
-						</div>
-											<div class="columnist-text id-4e7a2dbbeab8eab876000012">
-							<div class="tout">
-
-								<h3><a href="/author/wealthfront">Wealthfront</a></h3>
-								<p>
-									<span>|</span>
-									<a class="title" href="http://www.businessinsider.com/why-volatility-is-the-new-norm-2011-9">Why Volatility Is The New Norm</a>
-								</p>
-							</div>
-						</div>
-
-									</div>
-				<div class="module-link">
-					<a href="http://www.businessinsider.com/moneygame/contributor">More &#187;</a>
-				</div>
-					</div>
-	</div>
-</div>
-
-<script type="text/javascript">
-	$(".readme a").mousedown(function() {
-		$.post("/ws/heatmap", {
-			homepage: '4c3cc81a7f8b9ade6d510000',
-			uri: $(this).attr("href")
-		});
-	});
-</script>
-
-	</div>
-
-	<!-- The Hive -->
-	<div class="box-1 module thehive-module">
-	<h4 class="bar" style="overflow: visible">
-		<span class="float-left"><a href="http://www.businessinsider.com/moneygame/thehive.rss" class="icon-feed" title="RSS"></a> The Hive</span>
-		<a class="tip" href="/thehive/whatsthis"><span>About The Hive &#187;</span></a>
-	</h4>
-
-
-	<h5 class="tagline">What Smart People Are Reading Right Now</h5>
-
-	<div class="first module-content">
-		<ul>
-							<li id="4e7bdf2f85b5821770000041">
-					<a class="hive-link" href="http://dynamichedge.com/2011/09/22/how-to-time-a-market-crash/" target="_blank">How To Time A Market Crash | Dynamic Hedge</a>
-					<span class="hive-count">6</span>
-				</li>
-
-
-							<li id="4e7ca8c485b582d26e000080">
-					<a class="hive-link" href="http://ifbusinessinsiderwasaroundwhen.tumblr.com/" target="_blank">If Business Insider Was Around When..</a>
-					<span class="hive-count">4</span>
-				</li>
-
-
-							<li id="4e7cb25885b5824a03000084">
-					<a class="hive-link" href="http://highchartpatterns.net/market-thoughts-2/" target="_blank">Market Thoughts | High Chart Patterns</a>
-
-					<span class="hive-count">4</span>
-				</li>
-
-									</ul>
-					<ul id="hive-list" style="display:none">
-
-							<li id="4e7c706485b5825a7e00003c">
-					<a class="hive-link" href="http://peterlbrandt.com/it-is-not-over-until-the-___-____-sings/" target="_blank">It is not over until the ___ ____ sings… | PeterLBrandt</a>
-					<span class="hive-count">4</span>
-
-				</li>
-
-
-							<li id="4e7be87c85b5822703000033">
-					<a class="hive-link" href="http://howardlindzon.com/stock-market-crashes-like-a-pivot-for-entrepreneurs/" target="_blank">Stock Market Crashes…Like a ‘Pivot’ for Entrepreneurs | Howard Lindzon</a>
-					<span class="hive-count">4</span>
-				</li>
-
-
-							<li id="4e7ce10585b582c061000048">
-					<a class="hive-link" href="http://registration.ft.com/registration/barrier?location=http%3A%2F%2Fwww.ft.com%2Fcms%2Fs%2F0%2F40c692a4-e603-11e0-960c-00144feabdc0.html&referer=" target="_blank">Facebook shake-up meets mixed response - FT.com</a>
-
-					<span class="hive-count">3</span>
-				</li>
-
-
-							<li id="4e7bffd085b582ed2e00004e">
-					<a class="hive-link" href="http://www.voxeu.org/index.php?q=node%2F7016" target="_blank">Currency wars | vox - Research-based policy analysis and commentary from leading economists</a>
-					<span class="hive-count">3</span>
-				</li>
-
-
-							<li id="4e7c9fe185b582965b000099">
-					<a class="hive-link" href="http://econompicdata.blogspot.com/2011/09/leading-indicators-outside-feds-control.html" target="_blank">EconomPic: Leading Indicators Outside the Fed's Control Remain Weak</a>
-					<span class="hive-count">3</span>
-				</li>
-
-
-							<li id="4e7ca8c485b582d26e00007e">
-					<a class="hive-link" href="http://www.bloomberg.com/news/2011-09-23/no-sign-of-recession-with-rising-rail-shipments-showing-trend-to-expansion.html" target="_blank">Rail Shipments Defy Recession With Growth - Bloomberg</a>
-					<span class="hive-count">3</span>
-
-				</li>
-
-
-							<li id="4e7c919985b5822e3f000076">
-					<a class="hive-link" href="http://www.investingwithoptions.com/2011/09/23/options-trading-in-a-bear-market-5-tips-to-keep-your-money-safe/" target="_blank">Options Trading in a Bear Market: 5 Tips to Keep Your Money Safe | Investing With Options</a>
-					<span class="hive-count">3</span>
-				</li>
-
-
-
-
-							</ul>
-					</ul>
-
-		<div class="module-link">
-							<div class="float-left">
-					<a id="hive-more" href="#" onclick="$('#hive-list').slideToggle(); $(this).toggle(); $('#hive-less').toggle(); return false">More</a>
-					<a id="hive-less" class="display-none" href="#" onclick="$('#hive-list').slideToggle(); $(this).toggle(); $('#hive-more').toggle(); return false">Less</a>
-				</div>
-						<a href="http://www.businessinsider.com/moneygame/thehive">See All &#187;</a>
-		</div>
-
-	</div>
-</div>
-
-<script type="text/javascript">
-	$(".thehive-module a.hive-link").mousedown(function() {
-		$.post("/ws/heatmap", {
-			homepage: 'thehive-moneygame',
-			uri: $(this).attr("href")
-		});
-	});
-</script>
-	<div class="sl-layout-post">
-		<!-- most popular -->
-
-<div class="box-1 tabs">
-	<h4 class="bar">Most Read</h4>
-	<div class="container">
-
-			    	<div id="sh-tab1" class="selected"><a href="javascript:showHide('sh-', 1 , 3, 'Most Read');">Read</a><div class="arrow">&nbsp;</div></div>
-	    	<div id="sh-tab2" class="not-selected"><a href="javascript:showHide('sh-', 2 , 3, 'Most Commented');">Commented</a><div class="arrow">&nbsp;</div></div>
-	    	<div id="sh-tab3" class="not-selected"><a href="javascript:showHide('sh-', 3 , 3, 'Recommended');loadSidebarRecommend()">Recommended</a><div class="arrow">&nbsp;</div></div>
-			</div>
-    <div id="sh-body1" class="selected-listings">
-        <ul>
-                            <li>
-
-                    <div class="float-left"><a href="http://www.businessinsider.com/2011-digital-100"><a href="http://www.businessinsider.com/2011-digital-100"><img src="http://static6.businessinsider.com/image/4e78821f6bb3f7b87d000019-60-45/the-2011-digital-100-the-worlds-most-valuable-startups.jpg" alt="digital 100 2011" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/2011-digital-100">The 2011 Digital 100: The World's Most Valuable Startups</a> <span class="views">571,883 Views</span></p>
-                </li>
-                            <li>
-                    <div class="float-left"><a href="http://www.businessinsider.com/get-facebook-timeline-2011-9"><a href="http://www.businessinsider.com/get-facebook-timeline-2011-9"><img src="http://static7.businessinsider.com/image/4e7b9b0deab8eadd45000034-60-45/how-to-get-your-new-facebook-profile-in-just-5-minutes.jpg" alt="New Facebook Profile" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/get-facebook-timeline-2011-9">How To Get Your New Facebook Profile In Just 5 Minutes</a> <span class="views">196,688 Views</span></p>
-
-                </li>
-                            <li>
-                    <div class="float-left"><a href="http://www.businessinsider.com/meet-crews-14-most-corrupt-members-of-congress-2011-9"><a href="http://www.businessinsider.com/meet-crews-14-most-corrupt-members-of-congress-2011-9"><img src="http://static5.businessinsider.com/image/4e7b74d26bb3f77d26000021-60-45/the-14-most-corrupt-members-of-congress.jpg" alt="Rep. Maxine Waters" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/meet-crews-14-most-corrupt-members-of-congress-2011-9">The 14 Most Corrupt Members of Congress</a> <span class="views">111,394 Views</span></p>
-                </li>
-                            <li>
-                    <div class="float-left"><a href="http://www.businessinsider.com/what-its-like-working-at-the-digital-100-2011-9"><a href="http://www.businessinsider.com/what-its-like-working-at-the-digital-100-2011-9"><img src="http://static5.businessinsider.com/image/4e09ea4849e2ae2f21050000-60-45/what-its-like-working-at-the-worlds-most-valuable-startups.jpg" alt="Happy, Encouraging, working" border="0" /></a></a></div>
-
-                    <p><a href="http://www.businessinsider.com/what-its-like-working-at-the-digital-100-2011-9">What It's Like Working At The World's Most Valuable Startups</a> <span class="views">102,760 Views</span></p>
-                </li>
-                            <li>
-                    <div class="float-left"><a href="http://www.businessinsider.com/berlusconi-women-bunga-bunga-2011-9"><a href="http://www.businessinsider.com/berlusconi-women-bunga-bunga-2011-9"><img src="http://static7.businessinsider.com/image/4e79f0bfecad04c42800007f-60-45/the-girls-of-berlusconi-who-they-are-and-why-it-matters.jpg" alt="Berlusconi Babes" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/berlusconi-women-bunga-bunga-2011-9">THE GIRLS OF BERLUSCONI: Who They Are And Why It Matters</a> <span class="views">85,912 Views</span></p>
-                </li>
-
-                    </ul>
-    </div>
-    <div id="sh-body2" class="selected-listings" style="display: none; ">
-        <ul>
-                            <li>
-                    <div class="float-left"><a href="http://www.businessinsider.com/reuters-business-insider-sucks-2011-9"><a href="http://www.businessinsider.com/reuters-business-insider-sucks-2011-9"><img src="http://static6.businessinsider.com/image/4e7cdc986bb3f7cf2c000026-60-45/reuters-business-insider-sucks.jpg" alt="henry blodget" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/reuters-business-insider-sucks-2011-9">REUTERS: Business Insider Sucks</a> <span class="views">255 Comments</span></p>
-                </li>
-
-                            <li>
-                    <div class="float-left"><a href="http://www.businessinsider.com/the-viral-video-of-elizabeth-warren-going-after-gop-on-class-warfare-2011-9"><a href="http://www.businessinsider.com/the-viral-video-of-elizabeth-warren-going-after-gop-on-class-warfare-2011-9"><img src="http://static5.businessinsider.com/image/4ddcdfd8ccd1d50b2c290000-60-45/heres-the-viral-video-of-elizabeth-warren-going-after-gop-on-class-warfare-charge.jpg" alt="Elizabeth Warren" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/the-viral-video-of-elizabeth-warren-going-after-gop-on-class-warfare-2011-9">Here's The Viral Video Of Elizabeth Warren Going After GOP On 'Class Warfare' Charge</a> <span class="views">254 Comments</span></p>
-                </li>
-                            <li>
-                    <div class="float-left"><a href="http://www.businessinsider.com/gold-is-tanking-right-now-2011-9"><a href="http://www.businessinsider.com/gold-is-tanking-right-now-2011-9"><img src="http://static5.businessinsider.com/image/4e7cc44c69bedd880d000008-60-45/gold-meltdown.jpg" alt="GOLD MELTDOWN" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/gold-is-tanking-right-now-2011-9">GOLD MELTDOWN</a> <span class="views">87 Comments</span></p>
-                </li>
-
-                            <li>
-                    <div class="float-left"><a href="http://www.businessinsider.com/obama-makes-it-personal-call-out-boehner-on-his-home-turf-2011-9"><a href="http://www.businessinsider.com/obama-makes-it-personal-call-out-boehner-on-his-home-turf-2011-9"><img src="http://static5.businessinsider.com/image/4e7b8c0deab8ea9a2c000044-60-45/obama-i-am-a-warrior-for-the-middle-class.jpg" alt="Obama Bridge" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/obama-makes-it-personal-call-out-boehner-on-his-home-turf-2011-9">OBAMA: 'I Am A Warrior For The Middle Class'</a> <span class="views">82 Comments</span></p>
-                </li>
-                            <li>
-                    <div class="float-left"><a href="http://www.businessinsider.com/america-lost-generation-census-2011-9"><a href="http://www.businessinsider.com/america-lost-generation-census-2011-9"><img src="http://static7.businessinsider.com/image/4e7b3baeecad04644a000023-60-45/its-official-the-recession-has-created-a-new-lost-generation.jpg" alt="young bored sad man" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/america-lost-generation-census-2011-9">IT'S OFFICIAL: The Recession Has Created A New Lost Generation</a> <span class="views">74 Comments</span></p>
-                </li>
-
-                    </ul>
-    </div>
-
-			<div id="sh-body3" class="selected-listings" style="display: none; ">
-	        <ul>
-				<div id="sailthru-scout">
-		            <div class="loading">
-		                Loading, please wait...
-		            </div>
-		        </div>
-
-				<script type="text/javascript" src="http://cdn.sailthru.com/scout/v1.js"></script>
-	        </ul>
-
-			<div class="sailthru-scout-loaded module-link" style="display:none">
-				<a href="http://www.sailthru.com/scout?domain=horizon.businessinsider.com"><div class="sailthru-logo">powered by <span></span></div></a>
-				<a href="/yournews" title="See more recommendations">See more »</a>
-				<div style="clear: both;"></div>
-
-			</div>
-	    </div>
-	</div>
-
-		<!-- ad box -->
-
-		<!-- jobs -->
-		<div class="hbot">
-			<script type="text/javascript" src="http://jobs.businessinsider.com/feeds/jobroll/?num_jobs=10&amp;num_featured_jobs=1&amp;subtype=businessinsider&amp;custom_section=moneygame&amp;display_method=default&amp;version=2.0"></script>
-		</div>
-
-
-		<!-- Author Sold -->
-					<div class="right-ad">
-				<h4>Advertisement</h4>
-				<!-- OpenX Ad placeholder -->
-<div id="Lower 300x250">
-	<script type="text/javascript">
-		OXH.addAdUnitPlaceholder('8438', 'Lower 300x250');
-	</script>
-	<noscript>
-		<iframe id="" name="" src="http://ox-d.businessinsider.com/w/1.0/afr?auid=8438&target=_blank&cb=803588047" frameborder="0" scrolling="no"  >
-			<a href="http://ox-d.businessinsider.com/w/1.0/rc?cs=86dfd7e6&cb=803588047" target="_blank">
-				<img src="http://ox-d.businessinsider.com/w/1.0/ai?auid=8438&cs=86dfd7e6&cb=803588047" border="0" alt="" />
-			</a>
-		</iframe>
-	</noscript>
-
-</div>
-<!-- end OpenX Ad placeholder -->			</div>
-
-
-
-
-		<!-- partners -->
-		<div class="box-1 htop">
-			<h4 class="bar">Thanks to our partners</h4>
-			<div class="first partner-thanks">
-				<a href="http://www.datapipe.com/" title="Visit Datapipe" target="_blank"><img src="http://static7.businessinsider.com/assets/images/partners/datapipe.png" alt="Datapipe" class="float-left" /></a>
-				<a href="http://www.openx.org/" title="Visit OpenX" target="_blank"><img src="http://static7.businessinsider.com/assets/images/partners/openx.png" alt="OpenX" class="float-left" /></a>
-				<a href="http://www.catchpoint.com/" title="Visit Catchpoint" target="_blank"><img src="http://static6.businessinsider.com/assets/images/partners/catchpoint.png" alt="Catchpoint - Web Performance Monitoring" class="float-left" /></a>
-
-				<a href="http://www.ooyala.com/?utm_source=BusinessInsider&utm_medium=Sponsor&utm_campaign=Rebranding" title="Visit Ooyala" target="_blank"><img src="http://static6.businessinsider.com/assets/images/partners/ooyala.png" alt="Ooyala" class="float-left" /></a>
-				<a href="https://www.ad-juster.com/" title="Visit Ad-Juster" target="_blank"><img src="http://static7.businessinsider.com/assets/images/partners/ad-juster.png" alt="Ad-Juster" class="float-left" /></a>
-				<a href="http://www.financialcontent.com/" title="Visit Financial Content" target="_blank"><img src="http://static8.businessinsider.com/assets/images/partners/financial-content.png" alt="Financial Content" class="float-left" /></a>
-			</div>
-		</div>
-	</div>
-
-</div>
-
-		                </div>
-
-		            		        </div>
-
-
-		        <!-- headline module -->
-		        				<!-- / headline module -->
-		    </div>
-		</div>
-
-        <div id="ft">
-            <div class="footer">
-                <div class="footer-index">
-                     <div class="yui-g">
-
-                         <div class="yui-g first">
-                            <div class="yui-u first">
-                                <ul>
-                                    <li><h2>A-Z Index</h2></li>
-                                    <li><a href="/companies">Companies</a></li>
-                                    <li><a href="/authors">Authors</a></li>
-                                    <li><a href="/categories">Tags</a></li>
-
-                                    <li><a href="/site-map">Site Map</a></li>
-									<li><a href="/latest">Latest</a></li>
-									<li><a href="/contributor">Contributors</a></li>
-									<li><a href="/category/video">Video</a></li>
-								</ul>
-								<ul>
-                                    <li><h2>Tools</h2></li>
-
-                                    <li><a href="http://jobs.businessinsider.com">Job Listings</a></li>
-                                </ul>
-                            </div>
-                            <div class="yui-u">
-                                <ul>
-                                    <li><h2>Lists &amp; Rankings</h2></li>
-                                    <li><a href="/2011-digital-100">Digital 100</a></li>
-
-                                    <li><a href="/sa100">Silicon Alley 100</a></li>
-                                    <li><a href="/sv100/2010">Silicon Valley 100</a></li>
-                                    <li><a href="/clusterstock50">Clusterstock 50</a></li>
-									<li><a href="/thelife50">The Life 50</a></li>
-                                    <li><a href="/best-colleges-2010-11">America's Best Colleges</a></li>
-                                    <li><a href="/best-business-schools-2011">Best Business Schools</a></li>
-
-                                    <li><a href="/the-sexiest-ceos-alive-2009-8">Sexiest CEOs</a></li>
-									<li><a href="/features">More</a></li>
-                                </ul>
-                                <ul>
-                                    <li><h2>Your Account</h2></li>
-                                    									<li><a href="https://www.businessinsider.com/register">Register</a></li>
-									                                    <li><a href="https://www.businessinsider.com/account">Change Your Email</a></li>
-                                    <li><a href="https://www.businessinsider.com/account">Preferences</a></li>
-                                </ul>
-
-                            </div>
-                         </div>
-                         <div class="yui-g">
-                             <div class="yui-u first">
-                                 <ul>
-                                     <li><h2>About BI</h2></li>
-                                     <li><a href="/about">About</a></li>
-                                     <li><a href="/category/business-insider-jobs">Jobs at BI</a></li>
-
-									 <li><a href="/masthead">Masthead</a></li>
-                                     <li><a href="/contact">Contact</a></li>
-                                     <li><a href="/advertise">Advertise</a></li>
-                                     <li><a href="/about/mobile">Mobile</a></li>
-                                 </ul>
-                                 <ul>
-                                     <li><h2>Follow BI</h2></li>
-                                     <li><a href="/newsletter?source=footer">Email Newsletters</a></li>
-
-									 <li><a href="/account/alerts">Alerts</a></li>
-                                     <li><a href="http://feeds2.feedburner.com/businessinsider">RSS</a></li>
-                                     <li><a href=" http://www.twitter.com/businessinsider">Twitter</a></li>
-                                     <li><a href="http://www.linkedin.com/companies/683279">LinkedIn</a></li>
-                                     <li><a href="http://www.facebook.com/thebusinessinsider">Facebook</a></li>
-                                 </ul>
-                             </div>
-
-                            <div class="yui-u">
-                                <ul>
-                                    <li><h2>Verticals</h2></li>
-                                    <li><a href="/sai">Tech</a></li>
-                                    <li><a href="/thewire">Entertainment</a></li>
-                                    <li><a href="/clusterstock">Wall Street</a></li>
-                                    <li><a href="/moneygame">Markets</a></li>
-
-                                    <li><a href="/warroom">Strategy</a></li>
-									<li><a href="/sportspage">Sports</a></li>
-									<li><a href="/thelife">Lifestyle</a></li>
-									<li><a href="/tools">Tools</a></li>
-									<li><a href="/politics">Politics</a></li>
-									<li><a href="/europe">Europe</a></li>
-
-									<li><a href="/data_center">Data Center</a></li>
-									<li><a href="/misc">Misc.</a></li>
-									<li><a href="/yourmoney">Your Money</a></li>
-									<li><a href="/video">Video</a></li>
-									<li><a href="/latest">Latest</a></li>
-									<li><a href="/pr">PR</a></li>
-                                </ul>
-
-                            </div>
-                         </div>
-                    </div>
-                </div>
-
-                <p>
-                    * Copyright &copy; 2011 Business Insider, Inc. All rights reserved.
-					Registration on or use of this site constitutes acceptance of our <a href="/terms">Terms of Service</a>
-					and <a href="/privacy-policy">Privacy Policy</a>.
-					<span class="pipe">|</span> <a href="/disclaimer">Disclaimer</a>
-
-                </p>
-
-                <p class="contributions">
-                    Powered by <a href="http://www.mongodb.org">MongoDB</a>
-					<span class="pipe">|</span>
-					Hosted by <a href="http://www.datapipe.com">Datapipe</a>
-					<span class="pipe">|</span>
-					Web analytics by  <a href="http://www.empiricalpath.com/offer?utm_source=bi&utm_medium=partner&utm_content=footer&utm_campaign=audit">Empirical Path</a>
-
-                </p>
-            </div>
-        </div>
-    </div>
-</div>
-
-<!-- Google Analytics -->
-<script type="text/javascript">
-(function() {
-	var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
-	ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
-	var s = document.getElementsByTagName('script')[0];
-	s.parentNode.insertBefore(ga,s);
-})();
-</script>
-<!-- / Google Analytics -->
-
-<!-- BI Analytics -->
-<script type="text/javascript">
-(function() {
-    var ba = document.createElement('script');
-	ba.type = 'text/javascript';
-	ba.async = true;
-    ba.src = 'http://static6.businessinsider.com/assets/js/track.js';
-    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ba, s);
-})();
-</script>
-
-<!-- / BI Analytics -->
-
-<!-- FB Connect -->
-<div id="fb-root"></div>
-<script type="text/javascript">
-(function() {
-	window.fbAsyncInit = function() {
-	    FB.init({appId: '155043519637', status: true, cookie: true, xfbml: true});
-	};
-	var e = document.createElement('script'); e.async = true;
-	e.src = document.location.protocol + '//connect.facebook.net/en_US/all.js';
-	document.getElementById('fb-root').appendChild(e);
-})();
-</script>
-<!-- / FB Connect -->
-
-<!-- Twitter -->
-<script type="text/javascript">
-(function() {
-	var s = document.createElement('script');
-	s.type = 'text/javascript';
-	s.async = true;
-	s.src = 'http://platform.twitter.com/widgets.js';
-	$.each($('iframe'), function(idx, val) {
-		var jqval = $(val);
-		if (jqval.attr('src') && jqval.attr('src').indexOf('platform.twitter.com', 0) > -1) {
-			jqval.parent().prepend(s);
-		}
-	});
-})();
-</script>
-<!-- / Twitter -->
-
-<!-- Google +1 -->
-<script type="text/javascript">
-(function() {
-    var po = document.createElement('script'); po.type = 'text/javascript'; po.async = true;
-    po.src = 'https://apis.google.com/js/plusone.js';
-    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(po, s);
-})();
-</script>
-<!-- / Google +1 -->
-
-
-	<!-- OpenX Fetch Ads -->
-	<script type="text/javascript">
-		OXH.fetchAds();
-	</script>
-	<script type="text/javascript">
-		OXH.renderAds();
-		$(document).ready(function() {
-			OXH.moveAds();
-		});
-	</script>
-	<!-- end OpenX Fetch Ads -->
-
-
-	<!-- START Nielsen Online SiteCensus V6.0 -->
-	<!-- COPYRIGHT 2010 Nielsen Online -->
-
-	<script type="text/javascript">
-	(function () {
-	    var d = new Image(1, 1);
-	    d.onerror = d.onload = function () { d.onerror = d.onload = null; };
-	    d.src = ["//secure-us.imrworldwide.com/cgi-bin/m?ci=us-103525h&cg=0&cc=1&si=", escape(window.location.href), "&rp=", escape(document.referrer), "&ts=compact&rnd=", (new Date()).getTime()].join('');
-	})();
-	</script>
-	<noscript>
-	    <div><img src="//secure-us.imrworldwide.com/cgi-bin/m?ci=us-103525h&amp;cg=0&amp;cc=1&amp;ts=noscript" width="1" height="1" alt="" /></div>
-	</noscript>
-	<!-- END Nielsen Online SiteCensus V6.0 -->
-
-	<!-- Vibrant -->
-		<!-- / Vibrant -->
-
-    	<!-- Tynt -->
-	<script type="text/javascript" src="http://tcr.tynt.com/javascripts/Tracer.js?user=a-j2SKmdSr37y5adbiUzgI&amp;s=142"></script>
-
-	<!-- / Tynt -->
-
-	<!-- Quantcast tag, part 2 -->
-	<script type="text/javascript">
-	_qevents.push({qacct:"p-96ijG55b-4KKI"});
-	</script>
-	<noscript>
-		<div style="display:none;">
-			<img src="//pixel.quantserve.com/pixel/p-96ijG55b-4KKI.gif" border="0" height="1" width="1" alt="Quantcast"/>
-		</div>
-	</noscript>
-	<!-- / Quantcast tag -->
-
-	<!-- Begin comScore Tag -->
-	<script>
-		var _comscore = _comscore || [];
-		_comscore.push({ c1: "2", c2: "9900186" });
-		(function() {
-			var s = document.createElement("script"), el = document.getElementsByTagName("script")[0]; s.async = true;
-			s.src = (document.location.protocol == "https:" ? "https://sb" : "http://b") + ".scorecardresearch.com/beacon.js";
-			el.parentNode.insertBefore(s, el);
-		})();
-	</script>
-
-	<noscript>
-	  <img src="http://b.scorecardresearch.com/p?c1=2&c2=9900186&cv=2.0&cj=1" />
-	</noscript>
-	<!-- End comScore Tag -->
-
-			<!-- Sailthru Horizon -->
-                <script type="text/javascript">
-                    (function() {
-                        function loadHorizon() { var s = document.createElement('script'); s.type = 'text/javascript'; s.async = true;
-                            s.src = ('https:'==location.protocol?'https://dyrkrau635c04.cloudfront.net':'http://cdn.sailthru.com')+'/horizon/v1.js';
-                            var x = document.getElementsByTagName('script')[0]; x.parentNode.insertBefore(s, x);
-                        }
-                        loadHorizon();
-                        window.onload = function() {
-                            Sailthru.setup({
-                                domain: 'horizon.businessinsider.com'
-                                , concierge: { from: 'top', threshold: $('div.bottom-share') }
-                            });
-                        }
-                    })();
-                </script>
-		<!-- / Sailthru Horizon -->
-
-	<!-- Chartbeat -->
-		<script type="text/javascript">
-		var _sf_async_config={uid:14447,domain:"businessinsider.com"};
-		(function(){
-		  function loadChartbeat() {
-		    window._sf_endpt=(new Date()).getTime();
-		    var e = document.createElement('script');
-		    e.setAttribute('language', 'javascript');
-		    e.setAttribute('type', 'text/javascript');
-		    e.setAttribute('src',
-		       (("https:" == document.location.protocol) ? "https://s3.amazonaws.com/" : "http://") +
-		       "static.chartbeat.com/js/chartbeat.js");
-		    document.body.appendChild(e);
-		  }
-		  var oldonload = window.onload;
-		  window.onload = (typeof window.onload != 'function') ?
-		     loadChartbeat : function() { oldonload(); loadChartbeat(); };
-		})();
-		</script>
-
-	<!-- / Chartbeat -->
-
-
-</body>
-</html>
diff --git a/tests/data/extractors/test_businessinsider1.json b/tests/data/extractors/test_businessinsider1.json
deleted file mode 100644
index a12c5838..00000000
--- a/tests/data/extractors/test_businessinsider1.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "url": "http://articles.businessinsider.com/2011-09-21/markets/30183619_1_parliament-vote-greece-civil-servants", 
-    "expected": {
-        "meta_description": "More moves to survive.", 
-        "domain": "articles.businessinsider.com", 
-        "final_url": "http://articles.businessinsider.com/2011-09-21/markets/30183619_1_parliament-vote-greece-civil-servants", 
-        "meta_keywords": "Economy, Greece, Austerity, Economic Crisis, Eurozone, Euro, Europe, Linette Lopez", 
-        "cleaned_text": "As everyone in the world was transfixed on the Fed", 
-        "meta_favicon": "http://static7.businessinsider.com/assets/images/faviconBI.ico", 
-        "meta_lang": "en"
-    }
-}
\ No newline at end of file
diff --git a/tests/data/extractors/test_businessinsider2.html b/tests/data/extractors/test_businessinsider2.html
deleted file mode 100644
index 56573300..00000000
--- a/tests/data/extractors/test_businessinsider2.html
+++ /dev/null
@@ -1,2278 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" xmlns:fb="http://www.facebook.com/2008/fbml"  class="hidden-sidebar">
-<head>
-    <meta http-equiv="content-type" content="text/html;charset=utf-8" />
-    <title>GOLDMAN: 4 Key Points On The FOMC Announcement</title>
-    <link type="text/css" rel="stylesheet" media="all" href="http://static5.businessinsider.com/assets/css/min-all.css?1316646254" />
-<link type="text/css" rel="stylesheet" media="print" href="http://static5.businessinsider.com/assets/css/min-print.css?1316646254" />
-<script type="text/javascript" src="http://static8.businessinsider.com/assets/js/min.js?1316646254"></script>
-<script type="text/javascript" src="http://scripts.verticalacuity.com/vat/mon/vt.js?1316646254"></script>
-<meta name="author" content="Joe Weisenthal" />
-<meta name="date" content="2011-09-21" />
-
-<meta name="keywords" content="Federal Reserve, Joe Weisenthal" />
-<meta name="description" content="Here it is." />
-<meta name="tbi-image" content="http://static8.businessinsider.com/image/4e7a0dd26bb3f7da4800003d/goldman-4-key-points-on-the-fomc-announcement.jpg" />
-<meta name="tbi-searchable" content="1" />
-<meta name="tbi-contributed" content="0" />
-<meta name="sailthru.thumb" content="http://static5.businessinsider.com/image/4e7a0dd26bb3f7da4800003d-50-50/goldman-4-key-points-on-the-fomc-announcement.jpg" />
-<meta name="sailthru.title" content="GOLDMAN: 4 Key Points On The FOMC Announcement" />
-<meta property="og:title" content="GOLDMAN: 4 Key Points On The FOMC Announcement" />
-<meta property="og:type" content="article" />
-<meta property="og:url" content="http://www.businessinsider.com/goldman-on-the-fed-announcement-2011-9" />
-<meta property="og:site_name" content="Business Insider" />
-<meta property="fb:app_id" content="155043519637" />
-<meta property="og:image" content="http://static8.businessinsider.com/image/4e7a0dd26bb3f7da4800003d/goldman-4-key-points-on-the-fomc-announcement.jpg" />
-<script type="text/javascript">post = {"id":"4e7a34e9eab8eab576000034","uri":"http:\/\/www.businessinsider.com\/goldman-on-the-fed-announcement-2011-9","thumb":"http:\/\/static8.businessinsider.com\/image\/4e7a0dd26bb3f7da4800003d-90-90\/goldman-4-key-points-on-the-fomc-announcement.jpg","author":"Joe Weisenthal","author_ids":"Joe Weisenthal:03"};</script>
-    <meta name="tbi-vertical" content="moneygame" />
-
-	<meta property="fb:page_id" content="20446254070" />
-
-    <!--[if lt IE 8]>
-        <link type="text/css" rel="stylesheet" media="all" href="http://static5.businessinsider.com/assets/css/ie7_or_lower.css" />
-    <![endif]-->
-    <!--[if IE 8]>
-        <link type="text/css" rel="stylesheet" media="all" href="http://static7.businessinsider.com/assets/css/ie8.css" />
-    <![endif]-->
-    <link rel="alternate" type="application/rss+xml" title="RSS" href="http://feeds.feedburner.com/TheMoneyGame" />
-    <link rel="shortcut icon" href="http://static7.businessinsider.com/assets/images/faviconBI.ico" />
-	<link rel="icon" type="image/ico" href="http://static7.businessinsider.com/assets/images/faviconBI.ico" />
-	<link rel="apple-touch-icon" href="http://static8.businessinsider.com/assets/images/apple-touch-icon.png" />
-
-    <script type="text/javascript">
-        var vertical = 'moneygame';
-        var federated_media_section = 'gold';
-        var post_id = '';
-        var _sf_startpt=(new Date()).getTime();
-    </script>
-
-    	<!-- outclip js code -->
-	<script type='text/javascript'>
-	var _oc = { setKeyValueList: function(o) {}, setKeyValue: function(x,y) {}, add: function(t, w, h, k) { }}; (function(){var L ="http://g.3gl.net/jp/t0/18-s.C"; var V='t0'; var w=window;var d=document;var g=!!w.chrome;var O=!!w.opera;var i=!!(d.compatMode && d.all && !O);var f=!!w.Iterator;var S=!!d.getElementsByClassName && !(f||O||g) && WebSocket!==undefined;var h="createElement";var z="appendChild";if(!d.getElementById||!(S||O||i||f||g)){return;}f=false;var a={i:[],a:function(n,v){this.i[n]=v;},k:{l:{},p:{}}};var k=a.k;_oc.setKeyValueList=function(o){k.l=o;};_oc.setKeyValue=function(x,y){k.p[x]=y;};_oc.add=function(tn,wi,hi,kv){var s=d.getElementsByTagName("script");if(!(s.length)){return;}var x=s.length-1;a.a(x+" "+tn,{n:tn,b:s[x],s:f,t:{w:wi,h:hi,k:kv}});};_oc.p=function(fn){fn(V,a);};function ev(t,el,fn){if(w.attachEvent){el.attachEvent('on'+t,fn);}else {el.addEventListener(t,fn,f);}}ev('load',w,function(){var r=d[h]('iframe');var s=r.style;s.position='absolute';s.top='-10000px';s.left='-1000px';d.body[z](r);var i=r.contentWindow.document.open("text/html","replace");i.write('<body onload=\'var d=document, s = d.createElement("script"); s.type="text/javascript"; s.src="'+L+'"; d.body.appendChild(s); \'><\/body>');i.close();});})();
-	</script>
-	<!-- end outclip code -->
-
-	<!-- Quantcast Tag, part 1 -->
-	<script type="text/javascript">
-	var _qevents = _qevents || [];
-	(function() {
-	var elem = document.createElement('script');
-	elem.src = (document.location.protocol == "https:" ? "https://secure" : "http://edge") + ".quantserve.com/quant.js";
-	elem.async = true;
-	elem.type = "text/javascript";
-	var scpt = document.getElementsByTagName('script')[0];
-	scpt.parentNode.insertBefore(elem, scpt);
-	})();
-	</script>
-	<!-- / Quantcast Tag, part 1 -->
-
-	<!-- Google Analytics -->
-
-	<script type="text/javascript">
-		var _gaq = _gaq || [];
-		_gaq.push(['_setAccount', 'UA-2241657-6']);
-		_gaq.push(['_setCustomVar', 1, "Vertical", "moneygame", 3]);
-		_gaq.push(['_setCustomVar', 2, "PageType", "post", 3]);
-		_gaq.push(['_trackOutbound']);
-		_gaq.push(['_trackPageview']);
-	</script>
-	<!-- / Google Analytics -->
-
-	<!-- Twitter Anywhere -->
-	<script src="https://platform.twitter.com/anywhere.js?id=ZV0JHq7YJkjozsfohDIleQ&v=1" type="text/javascript"></script>
-	<!-- / Twitter Anywhere -->
-
-    <!--[if gte IE 9]>
-		<meta name="application-name" content="Business Insider - moneygame"/>
-		<meta name="msapplication-tooltip" content="Start the page in Site Mode"/>
-		<meta name="msapplication-starturl" content="http://www.businessinsider.com/moneygame"/>
-		<meta name="msapplication-window" content="width=800;height=600"/>
-		<meta name="msapplication-navbutton-color" content="#4C7C8D"/>
-
-		<meta name="msapplication-task" content="name=Tech;action-uri=http://www.businessinsider.com/sai;icon-uri=http://www.businessinsider.com/favicon.ico"/>
-		<meta name="msapplication-task" content="name=Media;action-uri=http://www.businessinsider.com/thewire;icon-uri=http://www.businessinsider.com/favicon.ico"/>
-		<meta name="msapplication-task" content="name=Wall Street;action-uri=http://www.businessinsider.com/clusterstock;icon-uri=http://www.businessinsider.com/favicon.ico"/>
-		<meta name="msapplication-task" content="name=Markets;action-uri=http://www.businessinsider.com/moneygame;icon-uri=http://www.businessinsider.com/favicon.ico"/>
-		<meta name="msapplication-task" content="name=Strategy;action-uri=http://www.businessinsider.com/warroom;icon-uri=http://www.businessinsider.com/favicon.ico"/>
-
-		<script type="text/javascript">
-		PinnedSite.init({hid: "", vertical: "moneygame"});
-		</script>
-	<![endif]-->
-
-	<!-- OpenX Ads -->
-	<script type="text/javascript" src="http://ox-d.businessinsider.com/w/1.0/jstag"></script>
-    <script type="text/javascript" src="http://static6.businessinsider.com/assets/js/openx.js"></script>
-	<script type="text/javascript">
-		OXH.init(OX());
-		OXH.addAdUnitGroupMapping('2224', ['8396', '8437', '8438', '8439']);
-		OXH.addAdUnitGroupMapping('2222', ['8396', '8437', '8440']);
-		OXH.addAdUnitGroupMapping('2221', ['8396', '8437']);
-
-					OXH.addVariable('pagetype', 'post');
-					OXH.addVariable('vertical', 'moneygame');
-					OXH.addVariable('author', 'Joe Weisenthal');
-					OXH.addVariable('tag', 'gold');
-			</script>
-<!-- end OpenX Ads -->
-</head>
-
-<body>
-
-<a name="top" class="top-anchor"></a>
-
-<div id="doc3" class="yui-t6 vertical-moneygame">
-    <div id="hd">
-        <div id="hd-top">
-            <div id="hd-top-right" class="float-right">
-
-				<!-- user menubar -->
-				    <div class="user">
-                    <a href="#" onclick="return fb_login()">Login With Facebook</a>
-            <span class="pipe">|</span>
-
-
-                    <a id="tw_login_link" href="/account/twitter" rel="nofollow">Login With Twitter</a>
-            <span class="pipe">|</span>
-
-                    <a class="border-right" href="https://www.businessinsider.com/login" id="login_link" rel="nofollow">Login</a>
-            <span class="pipe">|</span>
-            <a href="https://www.businessinsider.com/register" rel="nofollow">Register</a>
-            </div>
-
-	<!-- add var to login/logout links so we can return here -->
-	<script type="text/javascript">
-		var href = document.URL.replace(/^https?:\/\/[^/.]+.?businessinsider\.com/, "");
-		href = href.replace(/&?msg=[^&]*/, "");
-		href = href.replace(/\?$/, "");
-		if (!href.match(/^\/login/)) {
-			$('a#login_link').attr('href', "https://www.businessinsider.com/login?redirect="+href);
-			$('a#tw_login_link').attr('href', "/account/twitter?redirect="+href);
-			$('a#logout_link').attr('href', "/logout?redirect="+href);
-		}
-	</script>
-
-                <div class="ad ad-leaderboard">
-					<div>
-	                    <!-- OpenX Ad placeholder -->
-<div id="leaderboard">
-	<script type="text/javascript">
-		OXH.addAdUnitPlaceholder('8396', 'leaderboard');
-	</script>
-	<noscript>
-		<iframe id="" name="" src="http://ox-d.businessinsider.com/w/1.0/afr?auid=8396&target=_blank&cb=1141188980" frameborder="0" scrolling="no" width="728" height="90">
-			<a href="http://ox-d.businessinsider.com/w/1.0/rc?cs=62bc4d7f&cb=1141188980" target="_blank">
-				<img src="http://ox-d.businessinsider.com/w/1.0/ai?auid=8396&cs=62bc4d7f&cb=1141188980" border="0" alt="" />
-			</a>
-		</iframe>
-	</noscript>
-
-</div>
-<!-- end OpenX Ad placeholder -->					</div>
-                </div>
-            </div>
-
-            <div class="date">
-                            </div>
-
-							<div class="logo-main">
-					<a href="http://www.businessinsider.com/moneygame"><img src="http://static8.businessinsider.com/assets/images/logos/logo-bi-vertical.png" width="166" height="67" alt="Business Insider" /></a>
-
-							<img class="print" src="http://static7.businessinsider.com/assets/images/logos/tbi_print.jpg" width="250" height="113" alt="Business Insider" />
-
-                    <div class="subtitle"><a href="http://www.businessinsider.com/moneygame" alt="moneygame">Money Game</a></div>
-	                        </div>
-        </div>
-
-        <div class="menu clear-both">
-            <ul class="menu">
-                <li class="first"><a href="/">Home</a></li>
-
-                                                            <li><a href="http://www.businessinsider.com/sai">Tech</a></li>
-						                                                                                <li><a href="http://www.businessinsider.com/thewire">Entertainment</a></li>
-						                                                                                <li><a href="http://www.businessinsider.com/clusterstock">Wall Street</a></li>
-						                                                                                <li class="selected"><a href="http://www.businessinsider.com/moneygame">Markets</a></li>
-						                                                                                                                    <li><a href="http://www.businessinsider.com/warroom">Strategy</a></li>
-						                                                                                                                    <li><a href="http://www.businessinsider.com/sportspage">Sports</a></li>
-
-						                                                                                                                                                        <li><a href="http://www.businessinsider.com/thelife">Lifestyle</a></li>
-						                                                                                <li><a href="http://www.businessinsider.com/politics">Politics</a></li>
-						                                                                                <li><a href="http://www.businessinsider.com/europe">Europe</a></li>
-													<li ><a href="/data_center">Data</a></li>
-						                                                                                                                    <li><a href="http://www.businessinsider.com/misc">Misc.</a></li>
-						                                                                                                                    <li><a href="http://www.businessinsider.com/yourmoney">Your Money</a></li>
-
-						                                                                         				<li ><a href="/category/video">Video</a></li>
- 				<li ><a href="/latest">Latest</a></li>
- 				<li  class="last"><a href="/yournews">Your News</a></li>
-			</ul>
-        </div>
-
-                    <div class="menu-subvert container">
-
-                <ul class="menu-subvert">
-
-					<li class="first"><a href="/moneygame">Money Game Home</a></li>
-                                                                <li>
-                            <a href="/moneygame/economy">Economy</a>                        </li>
-                                            <li>
-                            <a href="/moneygame/markets">Markets</a>                        </li>
-                                            <li>
-
-                            <a href="/moneygame/investing">Investing</a>                        </li>
-                                            <li>
-                            <a href="/moneygame/etfs">ETFs</a>                        </li>
-                                        																																								              		<li><a href="/moneygame/thehive">Hive</a></li>
-										<li><a href="/moneygame/thetape">Tape</a></li>
-					<li><a href="/moneygame/pr">PR</a></li>
-
-									<!--	<li><a href="/moneygame/questions">Questions</a></li> -->
-																												<li class="last"><a href="/moneygame/contributor">Contributors</a></li>
-															</ul>
-            </div>
-
-
-        <div id="hd-below">
-			<div class="yui-gc">
-				<div class="yui-u first">
-									</div>
-
-				<div class="yui-u">
-				    <div class="search">
-    					<form action="/s" method="get">
-    	                    <label class="search moveable"><input name="q" type="text" value="" /></label>
-    	                    <input class="button-form" value="search" type="submit" />    	                </form>
-    	                <script type="text/javascript">
-                            $('label.search input').focus(function() {
-                                $('#hd-below div.search').addClass('search-expanded');
-                                $('#hd-below ul.icons').fadeOut('fast');
-                                $(this).animate({
-                                    width: 262
-                                }, 400);
-                            });
-                            $('label.moveable input').blur(function() {
-                            if (!$(this).val()) {
-                               $('#hd-below ul.icons').fadeIn('fast');
-                               $(this).animate({
-                                   width: 100
-                               }, 400, function(){
-                                   $('#hd-below div.search').removeClass('search-expanded');
-                               });
-                            }
-                            });
-    	                </script>
-	                </div>
-										<ul class="icons">
-						<li class="icon-feed"><a href="http://feeds.feedburner.com/TheMoneyGame" title="Follow our RSS feed"></a></li>
-
-	                    <li class="icon-twitter"><a href="http://twitter.com/themoneygame" title="Spread the word on Twitter"></a></li>
-						<li class="icon-facebook">
-							<fb:like href="http://www.facebook.com/businessinsider.moneygame" layout="button_count" show_faces="false" width="80"></fb:like>
-							<div class="facebook-hover">
-								Follow us on Facebook and get updates from Money Game posted directly to your news feed
-								<div class="arrow">&nbsp;</div>
-							</div>
-						</li>
-						<script type="text/javascript">
-							$('.icon-facebook').hover(function(){
-								$('.facebook-hover').show();
-							});
-							$('.icon-facebook').mouseout(function(){
-								$('.facebook-hover').hide();
-							});
-						</script>
-
-	                </ul>
-									</div>
-			</div>
-        </div>
-    </div>
-
-</div>
-
-<div class="ad-wallpaper">
-	<div id="doc4" class="yui-t6">
-
-	    <div id="bd">
-
-				            <div class="container ad-wide-expanding">
-	                <!-- OpenX Ad placeholder -->
-<div id="subnav">
-	<script type="text/javascript">
-		OXH.addAdUnitPlaceholder('8440', 'subnav');
-	</script>
-	<noscript>
-		<iframe id="" name="" src="http://ox-d.businessinsider.com/w/1.0/afr?auid=8440&target=_blank&cb=367267280" frameborder="0" scrolling="no"  >
-			<a href="http://ox-d.businessinsider.com/w/1.0/rc?cs=b9fedd52&cb=367267280" target="_blank">
-				<img src="http://ox-d.businessinsider.com/w/1.0/ai?auid=8440&cs=b9fedd52&cb=367267280" border="0" alt="" />
-			</a>
-		</iframe>
-	</noscript>
-</div>
-<!-- end OpenX Ad placeholder -->	            </div>
-
-
-
-<a name="post-top"></a>
-
-	<div id="alerts-dialog" class="display-none">
-    <p>Enter you email address and zip code to set up customized email alerts.</p>
-    <form action="/newsletter?source=sidebar" method="post">
-        <fieldset class="login">
-            <div class="text">
-                <label class="newsletter-signup" for="news-email">Email</label>
-                <input class="newsletter-text" name="email" type="text" value="" />
-            </div>
-
-            <div class="text">
-                <label class="newsletter-signup" for="news-zip">Zip</label>
-                <input id="news-zip" name="zip" type="text" value="" />
-            </div
-
-            <input type="hidden" name="newsletters[]" value="alerts"  />
-            <input type="hidden" name="optin" value="on" />
-            <input type="hidden" name="from_post" value="4e7a34e9eab8eab576000034" />
-
-            <div class="login">
-                <input type="submit" class="button" value="Sign-Up" />
-
-            </div>
-        </fieldset>
-    </form>
-</div>
-<script type="text/javascript">
-    $(document).ready(function() {
-        $('#alerts-dialog').dialog({
-            'modal': true,
-            'width': "auto",
-            'title': 'Subscribe to Instant Alerts',
-            'resizable': false,
-            'autoOpen': false,
-            'draggable': false
-        });
-    });
-</script>
-<div id="yui-main">
-	<div class="yui-b">
-
-<div  class="content post">
-	<div class="sl-layout-post">
-
-
-<h1>GOLDMAN: 4 Key Points On The FOMC Announcement</h1>
-
-<div id="content" class="content">
-    <div class="post-top">
-        <!-- Byline -->
-
-<div class="byline">
-
-					<a href="/author/joe-weisenthal">Joe Weisenthal</a>
-	    	        	            <span class="pipe">|</span>
-	        	        <span class="date">Sep. 21, 2011,  3:03 PM</span>
-
-
-                        <span class="pipe">|</span>
-
-
-            <nobr><span class="red views" title="views">4,094</span></nobr>
-
-                        <span class="pipe">|</span>
-
-
-
-        <nobr title="Read comments">
-	        <a href="http://www.businessinsider.com/goldman-on-the-fed-announcement-2011-9#comments" class="comments-icon"></a>
-			<a class="comment_count" href="http://www.businessinsider.com/goldman-on-the-fed-announcement-2011-9#comments">6</a>        </nobr>
-    </div>
-        <!-- Sharing -->
-
-        <div class="share">
-
-<ul class="share clear-both">
-	<li class="font-sizes">
-	    <div class="font-sizes">
-	        <span class="small"><a href="#" onclick="changeFontSize('small','BI_fontSize');return false">A</a></span>
-	        <span class="med"><a href="#" onclick="changeFontSize('med','BI_fontSize');return false">A</a></span>
-	        <span class="large"><a href="#" onclick="changeFontSize('large','BI_fontSize');return false">A</a></span>
-
-	    </div>
-	</li>
-	<li class="email">
-	    <a class="buttons-image button-email" href="" onclick="gaPageTrack('Share','Click','Email');$('#share-email').toggle();$('.post embed').toggleClass('visibility-hidden'); return false;">&nbsp;</a>
-	    <div id="share-email" class="layer-box" style="display:none">
-			<div class="inner">
-				<a class="layer-close" href="#" onclick="return email_form_hide();">x</a>
-				<div class="share-email-error errors" style="display:none"></div>
-				<div class="share-email-body">
-
-					<h2 class="required">Email Article</h2>
-					<div class="register">
-						<form id="form_form" action="" method="post">
-						<div class="row required">
-							<label for="mail_from">From</label>
-							<input id="mail-from" size="27" name="mail_from" type="text" value="Your Email Address" />
-						</div>
-						<div class="row required">
-							<label for="mail_to">To</label>
-
-							<input id="mail-to" size="27" name="mail_to" type="text" value="Friend's Email Address" />
-						</div>
-						<div class="submit">
-							<input id="email-post" class="button-form float-right" value="Send" type="submit" />						</div>
-						<input name="action" value="email" type="hidden" />					</form>
-				</div>
-			</div>
-			<div class="share-email-success" style="display:none">
-				<h2 class="required">Email Sent!</h2>
-
-					<div class="register">
-						You have successfully emailed the post.
-					</div>
-				</div>
-			</div>
-		</div>
-	</li>
-	<li class="gplusone">
-		<g:plusone size="medium"></g:plusone>
-
-	</li>
-		    <li class="tweet">
-			<iframe allowtransparency="true" frameborder="0" scrolling="no" src="http://platform.twitter.com/widgets/tweet_button.html?url=http%3A%2F%2Fwww.businessinsider.com%2Fgoldman-on-the-fed-announcement-2011-9%3Futm_source%3Dtwbutton%26utm_medium%3Dsocial%26utm_campaign%3Dmoneygame&counturl=http%3A%2F%2Fwww.businessinsider.com%2Fgoldman-on-the-fed-announcement-2011-9&via=themoneygame&count=horizontal&text=GOLDMAN%3A%204%20Key%20Points%20On%20The%20FOMC%20Announcement" style="width:110px; height:20px;"></iframe>	    </li>
-		<li class="linkedin">
-		<script type="text/javascript" src="http://platform.linkedin.com/in.js"></script>
-		<script type="in/share" data-counter="right" data-url="http://www.businessinsider.com/goldman-on-the-fed-announcement-2011-9"></script>
-	</li>
-	<li class="fb">
-
-		<fb:like href="http://www.businessinsider.com/goldman-on-the-fed-announcement-2011-9" layout="button_count" show_faces="false" action="recommend" width="80"></fb:like>
-	</li>
-</ul>
-
-<script type="text/javascript">
-    shadowType('#mail-from');
-    shadowType('#mail-to');
-
-    var ef_value = $("#email-from").val();
-    var et_value = $("#email-to").val();
-
-    function email_form_hide() {
-        $('#share-email').toggle();
-        $('.post embed').toggleClass('visibility-hidden');
-        $('#mail-from').val(ef_value);
-        $('#mail-to').val(et_value);
-        $('div.share-email-success').hide();
-        $('div.share-email-body').show();
-        return false;
-    }
-
-    $('#email-post').click(function() {
-        var form = $('form#form_form');
-        var form_data = form.serialize();
-
-    	$.ajax({
-    	    type: 'POST',
-    		url: '' + document.location,
-    		data: form_data,
-    		dataType: 'json',
-    		success: function(result) {
-    		    if (result.ok) {
-    		        $('div.share-email-error').hide();
-                    $('div.share-email-body').hide();
-                    $('div.share-email-success').show();
-    		    }
-    		    else {
-    		        $('div.share-email-error').html(result.error_msg);
-                    $('div.share-email-error').show();
-    		    }
-    		}
-    	});
-
-    	return false;
-    });
-</script>        </div>
-    </div>
-
-    <div class="small clear-both">
-		<div class="KonaBody post-content">
-
-            <p><table class="image-container float_right" width="200"><tr><td><div class="image"><img src="http://static8.businessinsider.com/image/4e7a0dd26bb3f7da4800003d/twist.jpg" border="0" alt="twist" /></div><p class="source">Image: <a href="http://www.youtube.com/watch?v=DZAtzcthSxM">YouTUbe</a></p></td></tr></table>From Goldman on the FOMC operation twist <a href="http://www.businessinsider.com/federal-reserve-announcement-fomc-operation-twist-2011-9">announcement</a>:</p>
-
-<p>-------------</p>
-<p><span style="color: #000000; font-family: Arial,Helvetica,sans-serif; font-size: 13px; font-style: normal; font-weight: normal;">1. As we had expected, the Federal Open Market Committee decided to "do the twist" and increase the duration of its securities holdings by selling shorter-maturity securities ($400bn of Treasuries with maturity of 3 years or less) and buying longer-maturity securities ($400bn of Treasuries with maturity 6-30 years).<br /><br />2. The Fed chose to maintain the interest rate on excess reserves (IOER) at 25bp, contrary to our expectations of a small cut, but overall the details of today's action were more aggressive than expected in two respects: First, a relatively large portion of the purchases will occur at the long end (29% in the 20-30 year maturity bucket), implying a total impact of more than $400bn in 10-year equivalents, versus market expectations of perhaps $300-350bn. Second, the Fed will reinvest maturing and prepaid agency MBS and agency debt in agency MBS, rather than Treasuries, suggesting a bit more support for the housing sector. The statement retained an easing bias, noting again that the FOMC "is prepared to employ its tools" to "promote a stronger economic recovery in a context of price stability".<br /><br />3. Consistent with the more aggressive policy easing, the statement emphasizes the weak state of the economy, suggesting "continuing weakness in overall labor market conditions" and "only a modest pace" of growth in consumer spending. The FOMC notes the moderation in (headline) inflation in recent months and, as before, expects it to "settle...at levels at or below those consistent with the Committee's dual mandate". While the FOMC still forecasts some improvement in the pace of growth, "there are significant downside risks to the economic outlook, including strains in global financial markets".<br /><br />4. Once again, three FOMC members--Dallas Fed President Fisher, Minneapolis Fed President Kocherlakota, and Philadelphia Fed President Plosser--dissented, with the statement noting only that they "did not support additional policy accommodation at this time".</span></p>
-					</div>
-
-            </div>
-</div>
-		<p class="border-bottom-dotted">
-			Please follow <a href="http://www.businessinsider.com/moneygame">Money Game</a> on <a href="http://twitter.com/#!/themoneygame">Twitter</a> and <a href="http://facebook.com/businessinsider.moneygame">Facebook</a>.
-								              <br>Follow Joe Weisenthal on <a href="http://twitter.com/thestalwart">Twitter</a>.
-	            								  <br><a href="/questions/ask?ask_author=b4645d06fec16b49f5cbea00">Ask Joe A Question ></a>
-
-									</p>
-
-		    <div class="container">
-        <div class="tags">
-            Tags:
-                            <a href="/category/federal-reserve">Federal Reserve</a>                        |
-                            <a class="get-alerts" href="#" onclick="$('#alerts-dialog').dialog('open'); return false;">Get Alerts for these topics &#187;</a>
-                    </div>
-    </div>
-
-
-
-							<h4 class="sponsored-text">Advertisement:</h4>
-				<div class="align-center">
-					<!-- OpenX Ad placeholder -->
-<div id="river">
-	<script type="text/javascript">
-		OXH.addAdUnitPlaceholder('8439', 'river');
-	</script>
-	<noscript>
-		<iframe id="" name="" src="http://ox-d.businessinsider.com/w/1.0/afr?auid=8439&target=_blank&cb=342739632" frameborder="0" scrolling="no" width="600" height="250">
-			<a href="http://ox-d.businessinsider.com/w/1.0/rc?cs=108c56d3&cb=342739632" target="_blank">
-				<img src="http://ox-d.businessinsider.com/w/1.0/ai?auid=8439&cs=108c56d3&cb=342739632" border="0" alt="" />
-			</a>
-		</iframe>
-	</noscript>
-</div>
-<!-- end OpenX Ad placeholder -->				</div>
-
-
-			</div>
-
-			<div class="clear-both box-post sl-layout-post">
-
-<h3>
-            <div class="bitly">
-            <form>
-                <label>Short URL</label>
-                <input type="text" value="http://read.bi/nWVRqX" onclick="this.select();">
-            </form>
-
-        </div>
-
-    Share:
-    		<div class="clear-both"></div>
-</h3>
-
-<div class="bottom-share content">
-    <ul class="container bottom-share">
-        <li>
-            <a class="twitter"></a>
-			<a class="text" onclick="gaPageTrack('Share','Click','Twitter')" href="http://twitter.com/home?status=GOLDMAN%3A+4+Key+Points+On+The+FOMC+Announcement+http%3A%2F%2Fread.bi%2FnWVRqX">Twitter</a>
-        </li>
-
-        <li>
-            <a class="facebook"></a>
-			<a class="text" onclick="gaPageTrack('Share','Click','Facebook_Below'); return fb_share()" href="http://www.facebook.com/sharer.php?u=http%3A%2F%2Fwww.businessinsider.com%2Fgoldman-on-the-fed-announcement-2011-9">Facebook</a>
-        </li>
-        <li>
-            <a class="gbuzz"></a>
-            <a class="text" onclick="gaPageTrack('Share','Click','Buzz_Below')" href="http://www.google.com/reader/link?url=http%3A%2F%2Fwww.businessinsider.com%2Fgoldman-on-the-fed-announcement-2011-9&amp;title=GOLDMAN%3A+4+Key+Points+On+The+FOMC+Announcement&amp;srcURL=http://www.businessinsider.com&amp;snippet=Here+it+is." rel="nofollow">Buzz</a>
-        </li>
-        <li>
-
-			<a class="digg"></a>
-            <a class="text" onclick="gaPageTrack('Share','Click','Digg')" href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fwww.businessinsider.com%2Fgoldman-on-the-fed-announcement-2011-9" rel="nofollow">Digg</a>
-        </li>
-        <li>
-			<a class="su"></a>
-			<a class="text" onclick="gaPageTrack('Share','Click','Stumble')" href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fwww.businessinsider.com%2Fgoldman-on-the-fed-announcement-2011-9">StumbleUpon</a>
-        </li>
-        <li>
-			<a class="reddit"></a>
-
-			<a class="text" onclick="gaPageTrack('Share','Click','Reddot')" href="http://www.reddit.com/submit?url=http%3A%2F%2Fwww.businessinsider.com%2Fgoldman-on-the-fed-announcement-2011-9&amp;title=GOLDMAN%3A+4+Key+Points+On+The+FOMC+Announcement" rel="nofollow">Reddit</a>
-        </li>
-        <li>
-			<a class="linkedin"></a>
-			<a class="text" onclick="gaPageTrack('Share','Click','LinkedIn')" href="http://www.linkedin.com/shareArticle?mini=true&amp;url=http%3A%2F%2Fwww.businessinsider.com%2Fgoldman-on-the-fed-announcement-2011-9&amp;title=GOLDMAN%3A+4+Key+Points+On+The+FOMC+Announcement&amp;summary=Here+it+is.&amp;source=Business+Insider">LinkedIn</a>
-        </li>
-        <li>
-			<a class="email"></a>
-
-			<a class="text" href="#top" onclick="gaPageTrack('Share','Click','Email');$('#share-email').toggle();$('.post embed').toggleClass('visibility-hidden');">Email</a>
-        </li>
-		<li>
-                            <a class="tip" href="/embed-post"><span>More about embedding posts &#187;</span></a>
-				<a class="embed"></a>
-				<a class="text embed-button" href="#">Embed</a>
-
-        </li>
-
-
-        <li class="alerts">
-            <a class="tip" href="/customized-email-alerts-2010-7"><span>More about Alerts &#187;</span></a>
-            				<a class="alerts"></a>
-                <a class="text" href="#" onclick="$('#alerts-dialog').dialog('open'); return false;">Alerts</a>
-                    </li>
-        <li class="last">
-			<a class="newsletter"></a>
-            <a class="text" href="/newsletter?source=sharebox">Newsletter</a>
-
-        </li>
-		<div style="clear:left;"></div>
-    </ul>
-</div>
-	<!-- Embed Post -->
-	<div class="embed-post" style="display: none">
-		<div class="embed-info">
-			<div class="embed-info-inner">
-				<a class="layer-close embed-button" href="#">x</a>
-
-				<p>To embed this post, copy the code below and paste into your website or blog.</p>
-				<div>
-					<h4>600px wide <span><a class="button-preview" href="#">(preview)</a></span></h4>
-					<div class="embed-preview" style="display: none; "></div>
-					<div class="code"><textarea onclick="this.focus(); this.select();">&lt;iframe src=&quot;http://www.businessinsider.com/embed?id=4e7a34e9eab8eab576000034&amp;amp;width=600&amp;amp;height=430&quot; width=&quot;600&quot; height=&quot;430&quot; border=&quot;0&quot; frameborder=&quot;0&quot;&gt;&lt;/iframe&gt;</textarea></div>
-				</div>
-				<div>
-
-					<h4>400px wide <span><a class="button-preview" href="#">(preview)</a></span></h4>
-					<div class="embed-preview" style="display: none; "></div>
-					<div class="code"><textarea onclick="this.focus(); this.select();">&lt;iframe src=&quot;http://www.businessinsider.com/embed?id=4e7a34e9eab8eab576000034&amp;amp;width=400&amp;amp;height=430&quot; width=&quot;400&quot; height=&quot;430&quot; border=&quot;0&quot; frameborder=&quot;0&quot;&gt;&lt;/iframe&gt;</textarea></div>
-				</div>
-				<div>
-					<h4>300px wide <span><a class="button-preview" href="#">(preview)</a></span></h4>
-					<div class="embed-preview" style="display: none; "></div>
-					<div class="code"><textarea onclick="this.focus(); this.select();">&lt;iframe src=&quot;http://www.businessinsider.com/embed?id=4e7a34e9eab8eab576000034&amp;amp;width=300&amp;amp;height=430&quot; width=&quot;300&quot; height=&quot;430&quot; border=&quot;0&quot; frameborder=&quot;0&quot;&gt;&lt;/iframe&gt;</textarea></div>
-
-				</div>
-			</div>
-		</div>
-	</div>
-
-	<script type="text/javascript">
-		// setup button event listeners
-		// embed post
-		var widgetSizes = [ "<iframe src=\"http:\/\/www.businessinsider.com\/embed?id=4e7a34e9eab8eab576000034&amp;width=600&amp;height=430\" width=\"600\" height=\"430\" border=\"0\" frameborder=\"0\"><\/iframe>", "<iframe src=\"http:\/\/www.businessinsider.com\/embed?id=4e7a34e9eab8eab576000034&amp;width=400&amp;height=430\" width=\"400\" height=\"430\" border=\"0\" frameborder=\"0\"><\/iframe>", "<iframe src=\"http:\/\/www.businessinsider.com\/embed?id=4e7a34e9eab8eab576000034&amp;width=300&amp;height=430\" width=\"300\" height=\"430\" border=\"0\" frameborder=\"0\"><\/iframe>" ];
-		embedWidgets(widgetSizes);
-	</script>
-
-		</div>
-
-
-
-
-
-					    <div class="author-container">
-                    <div class="yui-gc author-info author-top">
-
-                <div class="yui-u first">
-                                            <div class="author-thumbnail">
-                            <a href="http://www.businessinsider.com/author/joe-weisenthal"><img src="http://static5.businessinsider.com/image/4b61eb730000000000012a68-70-70/joe-weisenthal.jpg" alt="" border="0" /></a>                        </div>
-                                        <div class="info">
-                        <div class="summary">
-                            <a rel="author" href="http://www.businessinsider.com/author/joe-weisenthal">Joe Weisenthal</a>
-                                                            <div class="contact">
-        Contact:
-
-        <dl>
-
-                            <dt>e-mail:</dt>
-                <dd><script type="text/javascript">var dw = function(s) { document.write(s); };dw('<a h');dw('ref=');dw('"mai');dw('lto:');dw('jwei');dw('sent');dw('hal@');dw('busi');dw('ness');dw('insi');dw('der.');dw('com"');dw('>jwe');dw('isen');dw('thal');dw('@bus');dw('ines');dw('sins');dw('ider');dw('.com');dw('</a>');</script><noscript><a href="/contact">use contact page</a></noscript></dd>
-
-                            <dt>AIM:</dt>
-                <dd><a href="aim:goim?screenname=openfilerook">openfilerook</a></dd>
-
-                            <dt>Work Phone:</dt>
-                <dd>917-621-7438</dd>
-
-
-                            <dt>SMS:</dt>
-                <dd>917-621-7438</dd>
-
-                    </dl>
-
-                    Subscribe to his
-
-                            <a href="http://www.businessinsider.com/author/joe-weisenthal/rss">RSS feed</a>
-
-                |
-
-                            <a href="http://twitter.com/thestalwart">twitter feed</a>                                </div>
-
-                        </div>
-                    </div>
-                </div>
-				<div class="yui-u">
-					<div class="yui-right recent-stories">
-													<div class="author-question">
-								<a href="/questions/ask?ask_author=b4645d06fec16b49f5cbea00" class="author-question-button">
-									Ask Joe a Question
-								</a>
-
-							</div>
-
-													<h4>Recent Posts</h4>
-							<ul>
-																	<li><a href="http://www.businessinsider.com/chf-vs-gold-vs-ust-vs-jpy-2011-9">Why Half Of The World's 'Sa...</a></li>
-																	<li><a href="http://www.businessinsider.com/closing-bell-september-22-2011-9">MARKETS DESTROYED AROUND TH...</a></li>
-																	<li><a href="http://www.businessinsider.com/dow-off-over-400-2011-9">MARKETS GETTING WORSE: DOW ...</a></li>
-															</ul>
-
-											</div>
-				</div>
-            </div>
-            </div>
-
-
-<!-- Comments -->
-<div id="comments">
-
-
-<a name="comments"></a>
-<!-- Comments -->
-<div class="comments">
-
-            <div class="container">
-
-			<h2 class="comments-header">The Water Cooler <br> <img src="http://static5.businessinsider.com/assets/images/icons/icon_vikings_watercooler.png" width="170" height="108" class="watercooler-vikings" alt="">
-				<div class="comments-updates">
-					<div class="post_comment_alert_link" onclick="CommentAlerts.showSubscribeForm(this); return false;">
-						Receive email updates on new comments!
-					</div>
-					<div class="post_comment_alert" style="display:none">
-						<div class="post_comment_alert_error"></div>cvsz
-						<form action="" method="post" onsubmit="CommentAlerts.subscribeEmail(this); return false;">
-							<input type="hidden" name="action" value="subscribe_email_alerts" />
-
-							<label>Email</label>
-							<input name="email" type="text" value="" />							<input type="submit" class="submit-button" value="Subscribe" />
-						</form>
-					</div>
-				</div>
-				<span>6 Comments</span>
-				<a href="http://www.businessinsider.com/goldman-on-the-fed-announcement-2011-9/comments.rss" class="icon-feed" title="RSS"></a>
-
-			</h2>
-        </div>
-
-        <div class="comments-content">
-                            <div id="comment-shell-4e7a3582eab8eab87600003f">
-
-
-    <div id="comment-4e7a3582eab8eab87600003f" class="comments odd cid-4e7a3582eab8eab87600003f">
-        <div class="inner">
-            <div>
-                                <div class="ratings-area  ">
-                    <div class="float-left ratings ratings-up">
-
-                        <a class="rate-up voted" href="#" onclick="return comment_rate(this, 1)">
-							<span class="count">2</span>
-						</a>
-                    </div>
-                    <div class="float-left ratings ratings-down">
-                        <a class="rate-down voted" href="#" onclick="return comment_rate(this, -1)">
-							<span class="count">6</span>
-						</a>
-
-                    </div>
-											<div class="clear-both">
-							<div class="offensive-link">
-								<a href="#" onclick="return comment_flag_offensive(this)">Flag as Offensive</a>
-							</div>
-						</div>
-					                </div>
-
-
-                <b>
-
-                                            Beltway Greg
-                                    </b>
-
-
-                <span class="smaller">
-                    on
-											<a href="http://www.businessinsider.com/c/4e7a3582eab8eab87600003f" title="Permalink to this comment" rel="nofollow">
-						                    Sep 21,  3:05 PM						                    </a>
-										said:
-                </span>
-
-
-                <div class="comment-content" id="comment-content-4e7a3582eab8eab87600003f">
-
-
-
-                    <div class="comment-text">
-                        If you're short you better cover.                    </div>
-
-                                    </div>
-            </div>
-
-							<div class="reply-button">
-					<a class="button-link" href="#comment-form" onclick="return comment_reply(this)"><span>Reply</span></a>
-				</div>
-
-				<div class="reply-container"></div>
-
-                    </div>
-    </div>
-
-				</div>
-                            <div id="comment-shell-4e7a39cbecad049c39000029">
-
-
-    <div id="comment-4e7a39cbecad049c39000029" class="comments even cid-4e7a39cbecad049c39000029">
-        <div class="inner">
-            <div>
-
-                                <div class="ratings-area  ">
-                    <div class="float-left ratings ratings-up">
-                        <a class="rate-up voted" href="#" onclick="return comment_rate(this, 1)">
-							<span class="count">4</span>
-						</a>
-                    </div>
-                    <div class="float-left ratings ratings-down">
-                        <a class="rate-down" href="#" onclick="return comment_rate(this, -1)">
-
-							<span class="count">0</span>
-						</a>
-                    </div>
-											<div class="clear-both">
-							<div class="offensive-link">
-								<a href="#" onclick="return comment_flag_offensive(this)">Flag as Offensive</a>
-							</div>
-						</div>
-
-					                </div>
-
-
-                <b>
-                                            QE2
-                    						<div class="strikecount strike1 explanation">
-							<span>
-							What are these? Strikes! Earn three of them in a month, and you'll be <a href="/penalty-box">sent
-							to the Penalty Box for 24 hours</a>. How do you earn strikes? Write comments that our editors kick to the Bleachers.
-							Want to get rid of the strikes and start fresh? Write excellent comments that our editors promote
-							to the Board Room.
-							</span>
-						</div>
-                                    </b>
-
-
-                <span class="smaller">
-                    on
-											<a href="http://www.businessinsider.com/c/4e7a39cbecad049c39000029" title="Permalink to this comment" rel="nofollow">
-						                    Sep 21,  3:23 PM						                    </a>
-										said:
-                </span>
-
-
-                <div class="comment-content" id="comment-content-4e7a39cbecad049c39000029">
-
-
-                    <div class="comment-text">
-
-                        Cover shorts in what? No FED QE3 money - what's exactly going to push the market up higher in the near term, the sputtering U.S. economy - you seen the data recently? International markets in Europe and Asia? U.S. Housing market? Everything has already peaked - commodities, gold, silver, oil. Only emerging bull markets left are in U.S. Dollar and long bonds now.                    </div>
-
-                                    </div>
-            </div>
-
-							<div class="reply-button">
-					<a class="button-link" href="#comment-form" onclick="return comment_reply(this)"><span>Reply</span></a>
-				</div>
-				<div class="reply-container"></div>
-
-
-                    </div>
-    </div>
-
-				</div>
-                            <div id="comment-shell-4e7a39f569beddad4800001f">
-
-
-    <div id="comment-4e7a39f569beddad4800001f" class="comments odd cid-4e7a39f569beddad4800001f">
-        <div class="inner">
-            <div>
-                                <div class="ratings-area  ">
-
-                    <div class="float-left ratings ratings-up">
-                        <a class="rate-up" href="#" onclick="return comment_rate(this, 1)">
-							<span class="count">0</span>
-						</a>
-                    </div>
-                    <div class="float-left ratings ratings-down">
-                        <a class="rate-down voted" href="#" onclick="return comment_rate(this, -1)">
-							<span class="count">9</span>
-
-						</a>
-                    </div>
-											<div class="clear-both">
-							<div class="offensive-link">
-								<a href="#" onclick="return comment_flag_offensive(this)">Flag as Offensive</a>
-							</div>
-						</div>
-					                </div>
-
-
-
-                <b>
-                                            facebook
-                                    </b>
-
-
-                <span class="smaller">
-                    on
-											<a href="http://www.businessinsider.com/c/4e7a39f569beddad4800001f" title="Permalink to this comment" rel="nofollow">
-						                    Sep 21,  3:24 PM						                    </a>
-										said:
-                </span>
-
-
-                <div class="comment-content" id="comment-content-4e7a39f569beddad4800001f">
-
-
-                    <div class="comment-text">
-                        great FOMC ...it's just like QE2 all over again, so this will lay the groundwork for a strong stock market & commodity rally till may 2012.<br />
-Interest rates never to go up again<br />
-Housing, small biz, and job creation just not that important anymore in this profits, earnings, productivity & exports driven economic boom<br />
-<a href="http://seekingalpha.com/user/926530/instablog/full_index" target="_blank">http://seekingalpha.com/user/926530/instablog/full_index</a>                    </div>
-
-                                    </div>
-            </div>
-
-							<div class="reply-button">
-					<a class="button-link" href="#comment-form" onclick="return comment_reply(this)"><span>Reply</span></a>
-				</div>
-				<div class="reply-container"></div>
-
-                    </div>
-
-    </div>
-
-				</div>
-                            <div id="comment-shell-4e7a3a47eab8eaed15000005">
-
-
-    <div id="comment-4e7a3a47eab8eaed15000005" class="comments even cid-4e7a3a47eab8eaed15000005">
-        <div class="inner">
-            <div>
-                                <div class="ratings-area  ">
-                    <div class="float-left ratings ratings-up">
-
-                        <a class="rate-up voted" href="#" onclick="return comment_rate(this, 1)">
-							<span class="count">6</span>
-						</a>
-                    </div>
-                    <div class="float-left ratings ratings-down">
-                        <a class="rate-down" href="#" onclick="return comment_rate(this, -1)">
-							<span class="count">0</span>
-						</a>
-
-                    </div>
-											<div class="clear-both">
-							<div class="offensive-link">
-								<a href="#" onclick="return comment_flag_offensive(this)">Flag as Offensive</a>
-							</div>
-						</div>
-					                </div>
-
-
-                <b>
-
-                                             r cohn
-                                    </b>
-
-
-                <span class="smaller">
-                    on
-											<a href="http://www.businessinsider.com/c/4e7a3a47eab8eaed15000005" title="Permalink to this comment" rel="nofollow">
-						                    Sep 21,  3:25 PM						                    </a>
-										said:
-                </span>
-
-
-                <div class="comment-content" id="comment-content-4e7a3a47eab8eaed15000005">
-
-
-
-                    <div class="comment-text">
-                        sell all banks, insurance companie beause of margin compression.sell all muni ond because all pension funds will have to reduce their assumed rate of return <br />
-sell all companies who cater to the baby boomers as almost all of them will run out of money in 10 years<br />
-<br />
-Good job Fed .Your actions have reduced the incentive to save and you have destroyed your own banking system.One of the most immoral moves in American history outside of slavery and wars                    </div>
-
-                                    </div>
-            </div>
-
-							<div class="reply-button">
-
-					<a class="button-link" href="#comment-form" onclick="return comment_reply(this)"><span>Reply</span></a>
-				</div>
-				<div class="reply-container"></div>
-
-                    </div>
-    </div>
-
-				</div>
-                            <div id="comment-shell-4e7aac1669bedd926d00001e">
-
-
-    <div id="comment-4e7aac1669bedd926d00001e" class="comments odd cid-4e7aac1669bedd926d00001e">
-
-        <div class="inner">
-            <div>
-                                <div class="ratings-area  ">
-                    <div class="float-left ratings ratings-up">
-                        <a class="rate-up" href="#" onclick="return comment_rate(this, 1)">
-							<span class="count">0</span>
-						</a>
-                    </div>
-
-                    <div class="float-left ratings ratings-down">
-                        <a class="rate-down" href="#" onclick="return comment_rate(this, -1)">
-							<span class="count">0</span>
-						</a>
-                    </div>
-											<div class="clear-both">
-							<div class="offensive-link">
-								<a href="#" onclick="return comment_flag_offensive(this)">Flag as Offensive</a>
-
-							</div>
-						</div>
-					                </div>
-
-
-                <b>
-                                            Deep Thoughts
-                                    </b>
-
-
-                <span class="smaller">
-                    on
-											<a href="http://www.businessinsider.com/c/4e7aac1669bedd926d00001e" title="Permalink to this comment" rel="nofollow">
-
-						                    Sep 21, 11:31 PM						                    </a>
-										said:
-                </span>
-
-
-                <div class="comment-content" id="comment-content-4e7aac1669bedd926d00001e">
-
-
-                    <div class="comment-text">
-                        huh? what was that noise? <br />
-Oh it's nothing honey, go back to sleep.<br />
-Alright sweetie...Zzzzzzzz                    </div>
-
-                                    </div>
-            </div>
-
-							<div class="reply-button">
-					<a class="button-link" href="#comment-form" onclick="return comment_reply(this)"><span>Reply</span></a>
-				</div>
-				<div class="reply-container"></div>
-
-                    </div>
-
-    </div>
-
-				</div>
-                            <div id="comment-shell-4e7ac162eab8ea3c34000033">
-
-
-    <div id="comment-4e7ac162eab8ea3c34000033" class="comments even cid-4e7ac162eab8ea3c34000033">
-        <div class="inner">
-            <div>
-                                <div class="ratings-area  ">
-                    <div class="float-left ratings ratings-up">
-
-                        <a class="rate-up" href="#" onclick="return comment_rate(this, 1)">
-							<span class="count">0</span>
-						</a>
-                    </div>
-                    <div class="float-left ratings ratings-down">
-                        <a class="rate-down" href="#" onclick="return comment_rate(this, -1)">
-							<span class="count">0</span>
-						</a>
-
-                    </div>
-											<div class="clear-both">
-							<div class="offensive-link">
-								<a href="#" onclick="return comment_flag_offensive(this)">Flag as Offensive</a>
-							</div>
-						</div>
-					                </div>
-
-
-                <b>
-
-                                            rhh
-                                    </b>
-
-
-                <span class="smaller">
-                    on
-											<a href="http://www.businessinsider.com/c/4e7ac162eab8ea3c34000033" title="Permalink to this comment" rel="nofollow">
-						                    Sep 22,  1:02 AM						                    </a>
-										said:
-                </span>
-
-
-                <div class="comment-content" id="comment-content-4e7ac162eab8ea3c34000033">
-
-
-
-                    <div class="comment-text">
-                        Operation Peter /Paul.....you are Peter.                    </div>
-
-                                    </div>
-            </div>
-
-							<div class="reply-button">
-					<a class="button-link" href="#comment-form" onclick="return comment_reply(this)"><span>Reply</span></a>
-				</div>
-
-				<div class="reply-container"></div>
-
-                    </div>
-    </div>
-
-				</div>
-                    </div>
-
-            </div>
-<!-- / Comments -->
-
-
-</div>
-<!-- / Comments -->
-
-
-	<!-- Comments Form -->
-	<a name="comment-form"></a>
-	    <div class="comments-open" id="comments-open">
-		<h2 class="comments-open-header">
-			Join the discussion with Business Insider
-							<br />
-				<span class="fb"></span><a href="#" class="fb" onclick="return fb_login()">Login With Facebook</a>
-
-										<span class="twitter"></span><a id="comment_tw_login_link" href="/account/twitter" class="twitter" rel="nofollow">Login With Twitter</a>
-					</h2>
-
-
-		<div class="comments-open-content">
-			<form id="comments-form" action="#comment-form" method="post" name="comment">
-
-				<input type="hidden" name="action" value="create" />
-				<input type="hidden" name="reply" value="" />
-				<input type="hidden" name="slide" value="" />
-
-				<div class="comments-open-data">
-
-						<div id="comment-form-name">
-							<label for="author">Name (Required)</label>
-							<input name="author" size="30" value="" />
-						</div>
-						<div id="comment-form-email">
-							<label for="email">Email Address (Required but never displayed)</label>
-
-							<input name="email" size="30" value="" />
-						</div>
-						<div id="comment-form-url">
-							<label for="url">URL</label>
-							<input name="url" size="30" value="" />
-						</div>
-
-
-					<div class="container clear-both comments-open-text">
-						<label for="text">Comments (You may use HTML tags for style)</label>
-
-													<textarea rows="5" cols="50" name="text" id="bi-comment-textarea"></textarea>
-											</div>
-
-					<input class="submit-button float-right" type="submit" value="Submit" />
-
-					<script type="text/javascript" charset="utf-8">
-						$(document).ready(function() {
-							commentListener('#comments-form .submit-button', '#comments-form');
-						});
-                    </script>
-
-
-
-								</div>
-
-			</form>
-			<div style="clear: both;"></div>
-
-		</div>
-
-	</div>
-
-	<script type="text/javascript">
-		$(window).ready(function() {
-			shadowType('#bi-comment-textarea', 'Add a comment...');
-		});
-
-		var href = document.URL.replace(/^https?:\/\/[^/.]+.?businessinsider\.com/, "");
-		href = href.replace(/&?msg=[^&]*/, "");
-		href = href.replace(/\?$/, "");
-		$('a#comment_tw_login_link').attr('href', "/account/twitter?redirect="+href);
-
-	</script>
-	<!-- / Comments Form -->
-
-	<!-- Facebook Comments Form -->
-
-<h2 class="comments-open-header facebook">Join the discussion with your Facebook Login</h2>
-<div id="fb-root"></div>
-<script src="http://connect.facebook.net/en_US/all.js#xfbml=1"></script>
-
-<fb:comments href="http://www.businessinsider.com/goldman-on-the-fed-announcement-2011-9" num_posts="25" width="619"></fb:comments>
-
-	</div>
-
-	</div>
-</div>
-		            		                <div class="yui-b">
-		                    <div id="right-rail">
-	<div class="sl-layout-post">
-
-		<!-- Send Us A Tip -->
-		<div class="sendtip htop hbot">
-
-			<a href="/contact" class="icon-sendtip"></a> <a href="/contact">Send Us A Tip!</a>
-		</div>
-
-		<!-- BI Live Widget (Above Fold Placement) -->
-
-		<!-- Newsletters -->
-		<div class="right-subscribe htop hbot">
-	<div class="right-news">
-		<form action="/newsletter?source=sidebar" method="post">
-		<h2>Get <span class="news-vert">Money Game</span> Emails &amp; Alerts</h2>
-		<input id="news-email" class="newsletter-text" name="email" type="text" value="Your Email Address" />		<input name="optin" type="hidden" value="on" />
-
-		<input id="news-signup" class="button-form" value="sign-up" type="submit" />
-		<h3 id="learn-more"><a href="/newsletter">Learn More &#187;</a></h3>
-
-		<div id="newsletter-expand" style="display:none">
-			<div class="zip">
-				<input id="news-zip" class="zip-text" name="zip" type="text" value="Your Zip Code" />			</div>
-			<ul class="container newsletter-expand">
-				<li>
-					<label class="color-red">
-
-						<input value="alerts" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />						Customized instant email alerts
-					</label>
-					<a title="Business Insider Alerts" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_alerts.jpg">(sample)</a>
-				</li>
-										<li >
-							<label>
-								<input value="Business Insider Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" checked="checked" />								Business Insider Select							</label>
-
-							<a title="Business Insider Select" href="http://static6.businessinsider.com/assets/images/screenshots/newsletter_businessinsider_select.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="Money Game Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" checked="checked" />								Money Game Select							</label>
-							<a title="Money Game Select" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_themoneygame.jpg">(sample)</a>
-						</li>
-
-												<li >
-							<label>
-								<input value="10 Things Before the Opening Bell" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" checked="checked" />								10 Things Before the Opening Bell							</label>
-							<a title="10 Things Before the Opening Bell" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_10_things.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-
-								<input value="Money Game Chart Of The Day" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" checked="checked" />								Money Game Chart Of The Day							</label>
-							<a title="Money Game Chart Of The Day" href="http://static5.businessinsider.com/assets/images/screenshots/newsletter_moneygame_chart.jpg">(sample)</a>
-						</li>
-													<div class="more">
-								<h5>More:</h5>
-							</div>
-												<li >
-
-							<label>
-								<input value="SAI Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								SAI Select							</label>
-							<a title="SAI Select" href="http://static6.businessinsider.com/assets/images/screenshots/newsletter_sai_select.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="The Wire Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								The Wire Select							</label>
-
-							<a title="The Wire Select" href="http://static7.businessinsider.com/assets/images/screenshots/newsletter_the_wire_select.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="Clusterstock Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Clusterstock Select							</label>
-							<a title="Clusterstock Select" href="http://static5.businessinsider.com/assets/images/screenshots/newsletter_clusterstock_select.jpg">(sample)</a>
-						</li>
-
-												<li >
-							<label>
-								<input value="War Room Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								War Room Select							</label>
-							<a title="War Room Select" href="http://static5.businessinsider.com/assets/images/screenshots/newsletter_warroom.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-
-								<input value="Sports Page Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Sports Page Select							</label>
-							<a title="Sports Page Select" href="http://static7.businessinsider.com/assets/images/screenshots/newsletter_sportspage_select.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="Politix Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Politix Select							</label>
-
-							<a title="Politix Select" href="http://static7.businessinsider.com/assets/images/screenshots/newsletter_politix_select.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="The Life Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								The Life Select							</label>
-							<a title="The Life Select" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_the_life_select.jpg">(sample)</a>
-						</li>
-
-												<li >
-							<label>
-								<input value="Tools Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Tools Select							</label>
-							<a title="Tools Select" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_tools_select.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-
-								<input value="Europe Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Europe Select							</label>
-							<a title="Europe Select" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_europe_select.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="Your Money Select" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Your Money Select							</label>
-
-							<a title="Your Money Select" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_your_money_select.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="SAI Chart Of The Day" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								SAI Chart Of The Day							</label>
-							<a title="SAI Chart Of The Day" href="http://static5.businessinsider.com/assets/images/screenshots/newsletter_sai_chart.jpg">(sample)</a>
-						</li>
-
-												<li >
-							<label>
-								<input value="Sports Page Chart Of The Day" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Sports Page Chart Of The Day							</label>
-							<a title="Sports Page Chart Of The Day" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_sportspage_chart.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-
-								<input value="10 Things In Tech You Need To Know" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								10 Things In Tech You Need To Know							</label>
-							<a title="10 Things In Tech You Need To Know" href="http://static8.businessinsider.com/assets/images/screenshots/newsletter_10_things_know.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="Politics in 60 Seconds" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Politics in 60 Seconds							</label>
-
-							<a title="Politics in 60 Seconds" href="http://static7.businessinsider.com/assets/images/screenshots/newsletter_politics_in_60_seconds.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="Instant MBA" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Instant MBA							</label>
-							<a title="Instant MBA" href="http://static6.businessinsider.com/assets/images/screenshots/newsletter_instant_mba.jpg">(sample)</a>
-						</li>
-
-												<li >
-							<label>
-								<input value="Marketing Mondays" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Marketing Mondays							</label>
-							<a title="Marketing Mondays" href="http://static7.businessinsider.com/assets/images/screenshots/newsletter_marketing_mondays.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-
-								<input value="Apple Investor" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Apple Investor							</label>
-							<a title="Apple Investor" href="http://static5.businessinsider.com/assets/images/screenshots/newsletter_apple_investor.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="Google Investor" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Google Investor							</label>
-
-							<a title="Google Investor" href="http://static6.businessinsider.com/assets/images/screenshots/newsletter_google_investor.jpg">(sample)</a>
-						</li>
-												<li >
-							<label>
-								<input value="Microsoft Investor" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Microsoft Investor							</label>
-							<a title="Microsoft Investor" href="http://static5.businessinsider.com/assets/images/screenshots/newsletter_microsoft_investor.jpg">(sample)</a>
-						</li>
-
-												<li >
-							<label>
-								<input value="Breaking News" class="border-none bgcolor-none" name="newsletters[]" type="checkbox" />								Breaking News Alerts							</label>
-							<a title="Breaking News" href="http://static6.businessinsider.com/assets/images/screenshots/newsletter_breaking_news.jpg">(sample)</a>
-						</li>
-									</ul>
-		</div>
-
-		</form>
-	</div>
-</div>
-
-<script type="text/javascript" charset="utf-8">
-	// Select all links in object with gallery ID
-	$('#newsletter-expand a').lightBox();
-	$('#news-signup').click(function() {
-		if (!$('#news-email').val().match(/@/)) {
-			alert('Please enter a valid email address.');
-			return false;
-		}
-		if (!$('#news-zip').val()) {
-			alert('Please enter a valid zipcode.');
-			return false;
-		}
-		var checkedOne = false;
-		$('.newsletter-expand input').each(function() {
-			if ($(this).attr('checked')) {
-				checkedOne = true;
-			}
-		});
-		if (!checkedOne) {
-			alert('Please sign up for at least one newsletter.');
-			return false;
-		}
-		return true;
-	});
-	$('#news-email').focus(function() {
-		$('#newsletter-expand').show();
-		$('#learn-more').css({ 'float' : 'right' });
-	});
-	$(document).ready(function() {
-  	shadowType('#news-email', 'Your Email Address');
-  	shadowType('#ews-zip', 'Your Zip Code');
-	});
-</script>
-
-		<!-- ad 300x250 -->
-					<div class="right-ad hbot">
-				<h4>Advertisement</h4>
-				<!-- OpenX Ad placeholder -->
-
-<div id="Upper 300x250">
-	<script type="text/javascript">
-		OXH.addAdUnitPlaceholder('8437', 'Upper 300x250');
-	</script>
-	<noscript>
-		<iframe id="" name="" src="http://ox-d.businessinsider.com/w/1.0/afr?auid=8437&target=_blank&cb=268398767" frameborder="0" scrolling="no"  >
-			<a href="http://ox-d.businessinsider.com/w/1.0/rc?cs=671c0d05&cb=268398767" target="_blank">
-				<img src="http://ox-d.businessinsider.com/w/1.0/ai?auid=8437&cs=671c0d05&cb=268398767" border="0" alt="" />
-			</a>
-		</iframe>
-	</noscript>
-</div>
-<!-- end OpenX Ad placeholder -->			</div>
-
-		            <div class="box-1 hbot htop">
-	<h4 class="bar your-money">Your Money</h4>
-	<div class="first">
-
-	    <table class="xignite-indices"><tr>
-                    <td>NASDAQ Composite</td>
-                    <td>2,456</td>
-                    <td><span class="xignite-indices-green">+24.33</span></td>
-                    <td><span class="xignite-indices-green">(+0.991%)</span></td>
-                    </tr><tr>
-                    <td>S&P 500</td>
-
-                    <td>1,130</td>
-                    <td><span class="xignite-indices-green">+8.19</span></td>
-                    <td><span class="xignite-indices-green">(+0.725%)</span></td>
-                    </tr><tr>
-                    <td>NYSE Composite</td>
-                    <td>6,727</td>
-
-                    <td><span class="xignite-indices-green">+34.88</span></td>
-                    <td><span class="xignite-indices-green">(+0.519%)</span></td>
-                    </tr></table>    </div>
-</div>
-
-				<!-- Chartbeat -->
-		<div class="chartbeat htop">
-			<a target="_new" href="http://chartbeat.com/dashboard/?url=businessinsider.com&k=4bbb5a03ffbd1d760ecf0ba8d9f27ef7">
-			<div id="chartbeat_widget"></div>
-
-			</a>
-			<script src='http://static.chartbeat.com/js/sitewidgets/sitetotal.js' type='text/javascript'></script>
-			<script type="text/javascript" src="http://static7.businessinsider.com/assets/js/chartbeat.js"></script>
-			<p>Active Users on BI right now...<br /><a href="http://chartbeat.com/dashboard/?url=businessinsider.com&k=4bbb5a03ffbd1d760ecf0ba8d9f27ef7">Click for more live stats &#187;</a></p>
-		</div>
-		<script type="text/javascript">
-			if ($.browser.msie == true && $.browser.version <= 8.0) {
-				$('#right-rail .chartbeat').html('');
-			}
-		</script>
-
-	 	<!-- / Chartbeat -->
-
-
-					<!-- editorial sidebar -->
-			<!-- editorial sidebar -->
-<div class="box-1 hbot" id="editorial">
-	<h4 class="bar">Facebook & 99 Most Valuable Startups</h4>
-	<div class="first">
-		<p class="image-editorial float-none margin-top"><a href="http://www.businessinsider.com/2011-digital-100"><img src="http://static5.businessinsider.com/image/4df6143f4bd7c8af22050000/mark-zuckerberg-editorial-sidebar.jpg" border="0" alt="mark zuckerberg, editorial sidebar" width="285" height="122" /></a></p>
-<ul>
-<li><a href="http://www.businessinsider.com/2011-digital-100">The 2011 Digital 100: The World's Most Valuable Startups</a></li>
-
-<li><a href="http://www.businessinsider.com/facebook-introduces-open-graph-2011-9">Facebook Introduces Open Graph, Lets You Share Music, Movies, And More&nbsp;</a></li>
-<li><a href="http://www.businessinsider.com/facebook-timeline-2011-9">What's Facebook Timeline?</a></li>
-<li><a href="http://www.businessinsider.com/last-month-500-million-people-used-facebook-on-the-same-day-2011-9">Last Month, 500 Million People Used Facebook On The Same Day</a></li>
-<li><a href="http://www.businessinsider.com/dropio-sam-lessin-facebook-timeline2011-9">Here Is The Big Plan Facebook Had In Mind When It Acqui-Hired Drop.io's Sam Lessin</a></li>
-<li><a href="http://www.businessinsider.com/facebook-users-are-about-to-riot-over-massive-changes-and-this-is-fanastic-news-for-facebook-2011-9">Facebook Users Are About To Riot Over Massive Changes, And This Is Fantastic News For Facebook</a></li>
-</ul>	</div>
-</div>
-<script type="text/javascript">$("#editorial a").mousedown(function() { $.post("/ws/heatmap", { homepage: 'editorial', uri: $(this).attr("href") } ); } );</script>
-<!-- / editorial sidebar -->
-
-					<!-- BI Live Widget (Below Fold Placement) -->
-
-
-        <!-- mobile promo -->
-        <div class="right-ad">
-            <a href="/about/mobile">
-                <img src="http://static5.businessinsider.com/assets/images/bi_mobile.png" alt="Get Business Insider Mobile" />
-            </a>
-        </div>
-
-		<!-- doc center -->
-		<div class="box-1 module doc-center htop hbot">
-
-        	<h4 class="bar" style="overflow: visible">
-        		<span class="float-left">Startup Document Center</span>
-        	</h4>
-
-        	<h5 class="tagline">Templates To Jump Start Your Business</h5>
-
-        	<div class="first module-content">
-
-        		<div class="yui-gb">
-        		    <div class="yui-u first">
-
-        		        <a href="/document-center/business-plan-template-executive-summary">Business Plan Exec Summary</a>
-        		    </div>
-        		    <div class="yui-u">
-        		        <a href="/document-center/sample-bylaws">Sample<br />By-Laws</a>
-        		    </div>
-        		    <div class="yui-u">
-        		        <a href="/document-center/financial-model">Financial<br />Model</a>
-
-        		    </div>
-        		</div>
-
-        		<div class="module-link">
-        			<a href="/document-center">See All &#187;</a>
-        		</div>
-        	</div>
-        </div>
-
-		<!-- read me -->
-
-				<div class="box-1 htop readme">
-	<h4 class="bar"><a href="/contribute-to-business-insider" class="contributor-button" title=
-		"Sign up to become a contributor">Become a Contributor</a>Read Me</h4>
-	<div class="first">
-		<div class="list-image">
-							<div class="container tout tout-0 id-4e7b33776bb3f76222000053">
-											<div class="float-left">
-							<a href="http://www.businessinsider.com/mutual-funds-and-the-global-crisis-2011-9">
-																	<img src="http://static6.businessinsider.com/image/4d55a0a1cadcbbc558140000-90-90/paul-petillo.jpg" alt="" border="0" />															</a>
-
-						</div>
-
-					<h3><a href="/author/paul-petillo">Paul Petillo</a></h3>
-					<p>
-						<span>|</span>
-						<a class="title" href="http://www.businessinsider.com/mutual-funds-and-the-global-crisis-2011-9">Are Mutual Funds To Blame For The Global Financial Crisis?</a>
-						<nobr>
-						   <a href="http://www.businessinsider.com/mutual-funds-and-the-global-crisis-2011-9#comments" class="comments-icon"></a>
-
-						   <a class="comment_count" href="http://www.businessinsider.com/mutual-funds-and-the-global-crisis-2011-9#comments">6</a>						</nobr>
-					</p>
-
-											<p class="excerpt-text"><p>Here comes the herd.</p></p>
-									</div>
-							<div class="container tout tout-1 id-4e7c4e4e6bb3f7793700000b">
-											<div class="float-left">
-							<a href="http://www.businessinsider.com/every-age-group-is-getting-poorer-in-america-except-for-one-2011-9">
-
-																	<img src="http://static8.businessinsider.com/image/4e453cfdecad04d122000004-50-50/doug-short.jpg" alt="" border="0" />															</a>
-						</div>
-
-					<h3><a href="/author/doug-short">Doug Short</a></h3>
-					<p>
-						<span>|</span>
-						<a class="title" href="http://www.businessinsider.com/every-age-group-is-getting-poorer-in-america-except-for-one-2011-9">Every Age Group Is Getting Poorer In America, Except For One</a>
-						<nobr>
-
-						   <a href="http://www.businessinsider.com/every-age-group-is-getting-poorer-in-america-except-for-one-2011-9#comments" class="comments-icon"></a>
-						   <a class="comment_count" href="http://www.businessinsider.com/every-age-group-is-getting-poorer-in-america-except-for-one-2011-9#comments">17</a>						</nobr>
-					</p>
-
-									</div>
-							<div class="container tout tout-2 id-4e7bb9ed85b582dd27000014">
-											<div class="float-left">
-							<a href="http://www.businessinsider.com/you-are-not-as-dumb-as-you-think-2011-9">
-																	<img src="http://static8.businessinsider.com/image/4ce2e53a49e2ae5e4a140000-50-50/vitaliy-katsenelson.jpg" alt="" border="0" />															</a>
-
-						</div>
-
-					<h3><a href="/author/vitaliy-katsenelson">Vitaliy Katsenelson</a></h3>
-					<p>
-						<span>|</span>
-						<a class="title" href="http://www.businessinsider.com/you-are-not-as-dumb-as-you-think-2011-9">You Are Not as Dumb as You Think</a>
-						<nobr>
-						   <a href="http://www.businessinsider.com/you-are-not-as-dumb-as-you-think-2011-9#comments" class="comments-icon"></a>
-
-						   <a class="comment_count" href="http://www.businessinsider.com/you-are-not-as-dumb-as-you-think-2011-9#comments"></a>						</nobr>
-					</p>
-
-									</div>
-							<div class="container tout tout-3 id-4e7c53c3eab8eac01b000016">
-											<div class="float-left">
-							<a href="http://www.businessinsider.com/there-are-no-calculated-risks-in-the-casino-2011-9">
-																	<img src="http://static6.businessinsider.com/image/4d220eec49e2aebe0e0c0000-50-50/cullen-roche.jpg" alt="" border="0" />															</a>
-						</div>
-
-
-					<h3><a href="/author/cullen-roche">Cullen Roche</a></h3>
-					<p>
-						<span>|</span>
-						<a class="title" href="http://www.businessinsider.com/there-are-no-calculated-risks-in-the-casino-2011-9">There Are No Calculated Risks In The Casino</a>
-						<nobr>
-						   <a href="http://www.businessinsider.com/there-are-no-calculated-risks-in-the-casino-2011-9#comments" class="comments-icon"></a>
-						   <a class="comment_count" href="http://www.businessinsider.com/there-are-no-calculated-risks-in-the-casino-2011-9#comments">5</a>						</nobr>
-
-					</p>
-
-									</div>
-
-							<div class="contrib">
-											<div class="columnist-text id-4e7cb4c385b582ec0800000c">
-							<div class="tout">
-								<h3><a href="/author/marc-chandler">Marc Chandler</a></h3>
-								<p>
-									<span>|</span>
-
-									<a class="title" href="http://www.businessinsider.com/the-pain-in-spain-2011-9">Meanwhile Things Are About To Get A Whole Lot Worse In Spain</a>
-								</p>
-							</div>
-						</div>
-											<div class="columnist-text id-4e7cbc536bb3f76d6c000026">
-							<div class="tout">
-								<h3><a href="/author/kapitall">Kapitall</a></h3>
-								<p>
-
-									<span>|</span>
-									<a class="title" href="http://www.businessinsider.com/neuro-economics-this-is-what-your-brain-looks-like-when-you-trade-2011-9">Neuro Economics: This is What Your Brain Looks Like When You Trade</a>
-								</p>
-							</div>
-						</div>
-											<div class="columnist-text id-4e7ba48a85b582f37c00001b">
-							<div class="tout">
-								<h3><a href="/author/charles-hugh-smith">Charles Hugh Smith</a></h3>
-
-								<p>
-									<span>|</span>
-									<a class="title" href="http://www.businessinsider.com/three-more-reasons-the-eurozone-is-doomed-2011-9">Three More Reasons The Eurozone Is Doomed</a>
-								</p>
-							</div>
-						</div>
-											<div class="columnist-text id-4e7b575385b5827e6000001b">
-							<div class="tout">
-
-								<h3><a href="/author/simon-black">Simon Black</a></h3>
-								<p>
-									<span>|</span>
-									<a class="title" href="http://www.businessinsider.com/in-some-key-areas-chile-leads-the-world-2011-9">Guess Which Country Pretty Much Leads The World In Hours Worked, Fruit Exports And Water Purity</a>
-								</p>
-							</div>
-						</div>
-
-											<div class="columnist-text id-4e7b36d485b582191e00000a">
-							<div class="tout">
-								<h3><a href="/author/marc-chandler">Marc Chandler</a></h3>
-								<p>
-									<span>|</span>
-									<a class="title" href="http://www.businessinsider.com/fed-twists-markets-shout-and-some-thoughts-on-currency-levels-2011-9">Here's How Different Currencies Reacted To The Fed Announcement And Eurozone Plans</a>
-								</p>
-
-							</div>
-						</div>
-											<div class="columnist-text id-4e7b247585b5826e78000011">
-							<div class="tout">
-								<h3><a href="/author/angry-bear-blog">Angry Bear Blog</a></h3>
-								<p>
-									<span>|</span>
-									<a class="title" href="http://www.businessinsider.com/basic-macroeconomics-2011-9">Macroeconomics 101: Take This Test To See How Much You Really Know</a>
-
-								</p>
-							</div>
-						</div>
-											<div class="columnist-text id-4e7a2dbbeab8eab876000012">
-							<div class="tout">
-								<h3><a href="/author/wealthfront">Wealthfront</a></h3>
-								<p>
-									<span>|</span>
-
-									<a class="title" href="http://www.businessinsider.com/why-volatility-is-the-new-norm-2011-9">Why Volatility Is The New Norm</a>
-								</p>
-							</div>
-						</div>
-									</div>
-				<div class="module-link">
-					<a href="http://www.businessinsider.com/moneygame/contributor">More &#187;</a>
-				</div>
-
-					</div>
-	</div>
-</div>
-
-<script type="text/javascript">
-	$(".readme a").mousedown(function() {
-		$.post("/ws/heatmap", {
-			homepage: '4c3cc81a7f8b9ade6d510000',
-			uri: $(this).attr("href")
-		});
-	});
-</script>
-	</div>
-
-	<!-- The Hive -->
-	<div class="box-1 module thehive-module">
-	<h4 class="bar" style="overflow: visible">
-
-		<span class="float-left"><a href="http://www.businessinsider.com/moneygame/thehive.rss" class="icon-feed" title="RSS"></a> The Hive</span>
-		<a class="tip" href="/thehive/whatsthis"><span>About The Hive &#187;</span></a>
-	</h4>
-
-	<h5 class="tagline">What Smart People Are Reading Right Now</h5>
-
-	<div class="first module-content">
-		<ul>
-							<li id="4e7bdf2f85b5821770000041">
-
-					<a class="hive-link" href="http://dynamichedge.com/2011/09/22/how-to-time-a-market-crash/" target="_blank">How To Time A Market Crash | Dynamic Hedge</a>
-					<span class="hive-count">6</span>
-				</li>
-
-
-							<li id="4e7ca8c485b582d26e000080">
-					<a class="hive-link" href="http://ifbusinessinsiderwasaroundwhen.tumblr.com/" target="_blank">If Business Insider Was Around When..</a>
-					<span class="hive-count">4</span>
-
-				</li>
-
-
-							<li id="4e7cb25885b5824a03000084">
-					<a class="hive-link" href="http://highchartpatterns.net/market-thoughts-2/" target="_blank">Market Thoughts | High Chart Patterns</a>
-					<span class="hive-count">4</span>
-				</li>
-
-									</ul>
-					<ul id="hive-list" style="display:none">
-
-
-							<li id="4e7c706485b5825a7e00003c">
-					<a class="hive-link" href="http://peterlbrandt.com/it-is-not-over-until-the-___-____-sings/" target="_blank">It is not over until the ___ ____ sings… | PeterLBrandt</a>
-					<span class="hive-count">4</span>
-				</li>
-
-
-							<li id="4e7be87c85b5822703000033">
-					<a class="hive-link" href="http://howardlindzon.com/stock-market-crashes-like-a-pivot-for-entrepreneurs/" target="_blank">Stock Market Crashes…Like a ‘Pivot’ for Entrepreneurs | Howard Lindzon</a>
-					<span class="hive-count">4</span>
-
-				</li>
-
-
-							<li id="4e7b945f85b5829f5c000069">
-					<a class="hive-link" href="http://www.ritholtz.com/blog/2011/09/stephen-roach-on-global-recession-fed/" target="_blank">Stephen Roach on Global Recession, Fed, Europe | The Big Picture</a>
-					<span class="hive-count">4</span>
-				</li>
-
-
-							<li id="4e7ba5f185b582bc78000084">
-					<a class="hive-link" href="http://www.ritholtz.com/blog/2011/09/what-should-investors-do-now/" target="_blank">What Should Investors Do NOW ? | The Big Picture</a>
-
-					<span class="hive-count">4</span>
-				</li>
-
-
-							<li id="4e7bffd085b582ed2e00004e">
-					<a class="hive-link" href="http://www.voxeu.org/index.php?q=node%2F7016" target="_blank">Currency wars | vox - Research-based policy analysis and commentary from leading economists</a>
-					<span class="hive-count">3</span>
-				</li>
-
-
-							<li id="4e7c9fe185b582965b000099">
-					<a class="hive-link" href="http://econompicdata.blogspot.com/2011/09/leading-indicators-outside-feds-control.html" target="_blank">EconomPic: Leading Indicators Outside the Fed's Control Remain Weak</a>
-					<span class="hive-count">3</span>
-				</li>
-
-
-							<li id="4e7ca8c485b582d26e00007e">
-					<a class="hive-link" href="http://www.bloomberg.com/news/2011-09-23/no-sign-of-recession-with-rising-rail-shipments-showing-trend-to-expansion.html" target="_blank">Rail Shipments Defy Recession With Growth - Bloomberg</a>
-					<span class="hive-count">3</span>
-
-				</li>
-
-
-
-
-							</ul>
-					</ul>
-
-		<div class="module-link">
-							<div class="float-left">
-					<a id="hive-more" href="#" onclick="$('#hive-list').slideToggle(); $(this).toggle(); $('#hive-less').toggle(); return false">More</a>
-					<a id="hive-less" class="display-none" href="#" onclick="$('#hive-list').slideToggle(); $(this).toggle(); $('#hive-more').toggle(); return false">Less</a>
-
-				</div>
-						<a href="http://www.businessinsider.com/moneygame/thehive">See All &#187;</a>
-		</div>
-	</div>
-</div>
-
-<script type="text/javascript">
-	$(".thehive-module a.hive-link").mousedown(function() {
-		$.post("/ws/heatmap", {
-			homepage: 'thehive-moneygame',
-			uri: $(this).attr("href")
-		});
-	});
-</script>
-	<div class="sl-layout-post">
-		<!-- most popular -->
-
-
-<div class="box-1 tabs">
-	<h4 class="bar">Most Read</h4>
-	<div class="container">
-			    	<div id="sh-tab1" class="selected"><a href="javascript:showHide('sh-', 1 , 3, 'Most Read');">Read</a><div class="arrow">&nbsp;</div></div>
-	    	<div id="sh-tab2" class="not-selected"><a href="javascript:showHide('sh-', 2 , 3, 'Most Commented');">Commented</a><div class="arrow">&nbsp;</div></div>
-	    	<div id="sh-tab3" class="not-selected"><a href="javascript:showHide('sh-', 3 , 3, 'Recommended');loadSidebarRecommend()">Recommended</a><div class="arrow">&nbsp;</div></div>
-			</div>
-
-    <div id="sh-body1" class="selected-listings">
-        <ul>
-                            <li>
-                    <div class="float-left"><a href="http://www.businessinsider.com/2011-digital-100"><a href="http://www.businessinsider.com/2011-digital-100"><img src="http://static6.businessinsider.com/image/4e78821f6bb3f7b87d000019-60-45/the-2011-digital-100-the-worlds-most-valuable-startups.jpg" alt="digital 100 2011" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/2011-digital-100">The 2011 Digital 100: The World's Most Valuable Startups</a> <span class="views">558,798 Views</span></p>
-                </li>
-                            <li>
-
-                    <div class="float-left"><a href="http://www.businessinsider.com/get-facebook-timeline-2011-9"><a href="http://www.businessinsider.com/get-facebook-timeline-2011-9"><img src="http://static7.businessinsider.com/image/4e7b9b0deab8eadd45000034-60-45/how-to-get-your-new-facebook-profile-in-just-5-minutes.jpg" alt="New Facebook Profile" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/get-facebook-timeline-2011-9">How To Get Your New Facebook Profile In Just 5 Minutes</a> <span class="views">186,649 Views</span></p>
-                </li>
-                            <li>
-                    <div class="float-left"><a href="http://www.businessinsider.com/meet-crews-14-most-corrupt-members-of-congress-2011-9"><a href="http://www.businessinsider.com/meet-crews-14-most-corrupt-members-of-congress-2011-9"><img src="http://static5.businessinsider.com/image/4e7b74d26bb3f77d26000021-60-45/the-14-most-corrupt-members-of-congress.jpg" alt="Rep. Maxine Waters" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/meet-crews-14-most-corrupt-members-of-congress-2011-9">The 14 Most Corrupt Members of Congress</a> <span class="views">109,731 Views</span></p>
-
-                </li>
-                            <li>
-                    <div class="float-left"><a href="http://www.businessinsider.com/what-its-like-working-at-the-digital-100-2011-9"><a href="http://www.businessinsider.com/what-its-like-working-at-the-digital-100-2011-9"><img src="http://static5.businessinsider.com/image/4e09ea4849e2ae2f21050000-60-45/what-its-like-working-at-the-worlds-most-valuable-startups.jpg" alt="Happy, Encouraging, working" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/what-its-like-working-at-the-digital-100-2011-9">What It's Like Working At The World's Most Valuable Startups</a> <span class="views">101,324 Views</span></p>
-                </li>
-                            <li>
-                    <div class="float-left"><a href="http://www.businessinsider.com/berlusconi-women-bunga-bunga-2011-9"><a href="http://www.businessinsider.com/berlusconi-women-bunga-bunga-2011-9"><img src="http://static7.businessinsider.com/image/4e79f0bfecad04c42800007f-60-45/the-girls-of-berlusconi-who-they-are-and-why-it-matters.jpg" alt="Berlusconi Babes" border="0" /></a></a></div>
-
-                    <p><a href="http://www.businessinsider.com/berlusconi-women-bunga-bunga-2011-9">THE GIRLS OF BERLUSCONI: Who They Are And Why It Matters</a> <span class="views">85,216 Views</span></p>
-                </li>
-                    </ul>
-    </div>
-    <div id="sh-body2" class="selected-listings" style="display: none; ">
-        <ul>
-                            <li>
-
-                    <div class="float-left"><a href="http://www.businessinsider.com/the-viral-video-of-elizabeth-warren-going-after-gop-on-class-warfare-2011-9"><a href="http://www.businessinsider.com/the-viral-video-of-elizabeth-warren-going-after-gop-on-class-warfare-2011-9"><img src="http://static5.businessinsider.com/image/4ddcdfd8ccd1d50b2c290000-60-45/heres-the-viral-video-of-elizabeth-warren-going-after-gop-on-class-warfare-charge.jpg" alt="Elizabeth Warren" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/the-viral-video-of-elizabeth-warren-going-after-gop-on-class-warfare-2011-9">Here's The Viral Video Of Elizabeth Warren Going After GOP On 'Class Warfare' Charge</a> <span class="views">253 Comments</span></p>
-                </li>
-                            <li>
-                    <div class="float-left"><a href="http://www.businessinsider.com/reuters-business-insider-sucks-2011-9"><a href="http://www.businessinsider.com/reuters-business-insider-sucks-2011-9"><img src="http://static6.businessinsider.com/image/4e7cdc986bb3f7cf2c000026-60-45/reuters-business-insider-sucks.jpg" alt="henry blodget" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/reuters-business-insider-sucks-2011-9">REUTERS: Business Insider Sucks</a> <span class="views">212 Comments</span></p>
-                </li>
-                            <li>
-
-                    <div class="float-left"><a href="http://www.businessinsider.com/obama-makes-it-personal-call-out-boehner-on-his-home-turf-2011-9"><a href="http://www.businessinsider.com/obama-makes-it-personal-call-out-boehner-on-his-home-turf-2011-9"><img src="http://static5.businessinsider.com/image/4e7b8c0deab8ea9a2c000044-60-45/obama-i-am-a-warrior-for-the-middle-class.jpg" alt="Obama Bridge" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/obama-makes-it-personal-call-out-boehner-on-his-home-turf-2011-9">OBAMA: 'I Am A Warrior For The Middle Class'</a> <span class="views">82 Comments</span></p>
-                </li>
-                            <li>
-                    <div class="float-left"><a href="http://www.businessinsider.com/gold-is-tanking-right-now-2011-9"><a href="http://www.businessinsider.com/gold-is-tanking-right-now-2011-9"><img src="http://static5.businessinsider.com/image/4e7cc44c69bedd880d000008-60-45/gold-meltdown.jpg" alt="GOLD MELTDOWN" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/gold-is-tanking-right-now-2011-9">GOLD MELTDOWN</a> <span class="views">78 Comments</span></p>
-                </li>
-                            <li>
-
-                    <div class="float-left"><a href="http://www.businessinsider.com/america-lost-generation-census-2011-9"><a href="http://www.businessinsider.com/america-lost-generation-census-2011-9"><img src="http://static7.businessinsider.com/image/4e7b3baeecad04644a000023-60-45/its-official-the-recession-has-created-a-new-lost-generation.jpg" alt="young bored sad man" border="0" /></a></a></div>
-                    <p><a href="http://www.businessinsider.com/america-lost-generation-census-2011-9">IT'S OFFICIAL: The Recession Has Created A New Lost Generation</a> <span class="views">72 Comments</span></p>
-                </li>
-                    </ul>
-    </div>
-
-			<div id="sh-body3" class="selected-listings" style="display: none; ">
-	        <ul>
-				<div id="sailthru-scout">
-
-		            <div class="loading">
-		                Loading, please wait...
-		            </div>
-		        </div>
-
-				<script type="text/javascript" src="http://cdn.sailthru.com/scout/v1.js"></script>
-	        </ul>
-
-			<div class="sailthru-scout-loaded module-link" style="display:none">
-				<a href="http://www.sailthru.com/scout?domain=horizon.businessinsider.com"><div class="sailthru-logo">powered by <span></span></div></a>
-
-				<a href="/yournews" title="See more recommendations">See more »</a>
-				<div style="clear: both;"></div>
-			</div>
-	    </div>
-	</div>
-
-		<!-- ad box -->
-
-		<!-- jobs -->
-		<div class="hbot">
-
-			<script type="text/javascript" src="http://jobs.businessinsider.com/feeds/jobroll/?num_jobs=10&amp;num_featured_jobs=1&amp;subtype=businessinsider&amp;custom_section=moneygame&amp;display_method=default&amp;version=2.0"></script>
-		</div>
-
-
-		<!-- Author Sold -->
-					<div class="right-ad">
-				<h4>Advertisement</h4>
-				<!-- OpenX Ad placeholder -->
-<div id="Lower 300x250">
-	<script type="text/javascript">
-		OXH.addAdUnitPlaceholder('8438', 'Lower 300x250');
-	</script>
-
-	<noscript>
-		<iframe id="" name="" src="http://ox-d.businessinsider.com/w/1.0/afr?auid=8438&target=_blank&cb=2071905785" frameborder="0" scrolling="no"  >
-			<a href="http://ox-d.businessinsider.com/w/1.0/rc?cs=6061fcbb&cb=2071905785" target="_blank">
-				<img src="http://ox-d.businessinsider.com/w/1.0/ai?auid=8438&cs=6061fcbb&cb=2071905785" border="0" alt="" />
-			</a>
-		</iframe>
-	</noscript>
-</div>
-<!-- end OpenX Ad placeholder -->			</div>
-
-
-
-
-		<!-- partners -->
-		<div class="box-1 htop">
-			<h4 class="bar">Thanks to our partners</h4>
-			<div class="first partner-thanks">
-				<a href="http://www.datapipe.com/" title="Visit Datapipe" target="_blank"><img src="http://static7.businessinsider.com/assets/images/partners/datapipe.png" alt="Datapipe" class="float-left" /></a>
-
-				<a href="http://www.openx.org/" title="Visit OpenX" target="_blank"><img src="http://static7.businessinsider.com/assets/images/partners/openx.png" alt="OpenX" class="float-left" /></a>
-				<a href="http://www.catchpoint.com/" title="Visit Catchpoint" target="_blank"><img src="http://static6.businessinsider.com/assets/images/partners/catchpoint.png" alt="Catchpoint - Web Performance Monitoring" class="float-left" /></a>
-				<a href="http://www.ooyala.com/?utm_source=BusinessInsider&utm_medium=Sponsor&utm_campaign=Rebranding" title="Visit Ooyala" target="_blank"><img src="http://static6.businessinsider.com/assets/images/partners/ooyala.png" alt="Ooyala" class="float-left" /></a>
-				<a href="https://www.ad-juster.com/" title="Visit Ad-Juster" target="_blank"><img src="http://static7.businessinsider.com/assets/images/partners/ad-juster.png" alt="Ad-Juster" class="float-left" /></a>
-				<a href="http://www.financialcontent.com/" title="Visit Financial Content" target="_blank"><img src="http://static8.businessinsider.com/assets/images/partners/financial-content.png" alt="Financial Content" class="float-left" /></a>
-			</div>
-		</div>
-	</div>
-
-</div>
-
-		                </div>
-		            		        </div>
-
-
-		        <!-- headline module -->
-		        				<!-- / headline module -->
-		    </div>
-		</div>
-
-        <div id="ft">
-            <div class="footer">
-
-                <div class="footer-index">
-                     <div class="yui-g">
-                         <div class="yui-g first">
-                            <div class="yui-u first">
-                                <ul>
-                                    <li><h2>A-Z Index</h2></li>
-                                    <li><a href="/companies">Companies</a></li>
-                                    <li><a href="/authors">Authors</a></li>
-
-                                    <li><a href="/categories">Tags</a></li>
-                                    <li><a href="/site-map">Site Map</a></li>
-									<li><a href="/latest">Latest</a></li>
-									<li><a href="/contributor">Contributors</a></li>
-									<li><a href="/category/video">Video</a></li>
-								</ul>
-
-								<ul>
-                                    <li><h2>Tools</h2></li>
-                                    <li><a href="http://jobs.businessinsider.com">Job Listings</a></li>
-                                </ul>
-                            </div>
-                            <div class="yui-u">
-                                <ul>
-                                    <li><h2>Lists &amp; Rankings</h2></li>
-
-                                    <li><a href="/2011-digital-100">Digital 100</a></li>
-                                    <li><a href="/sa100">Silicon Alley 100</a></li>
-                                    <li><a href="/sv100/2010">Silicon Valley 100</a></li>
-                                    <li><a href="/clusterstock50">Clusterstock 50</a></li>
-									<li><a href="/thelife50">The Life 50</a></li>
-                                    <li><a href="/best-colleges-2010-11">America's Best Colleges</a></li>
-
-                                    <li><a href="/best-business-schools-2011">Best Business Schools</a></li>
-                                    <li><a href="/the-sexiest-ceos-alive-2009-8">Sexiest CEOs</a></li>
-									<li><a href="/features">More</a></li>
-                                </ul>
-                                <ul>
-                                    <li><h2>Your Account</h2></li>
-                                    									<li><a href="https://www.businessinsider.com/register">Register</a></li>
-
-									                                    <li><a href="https://www.businessinsider.com/account">Change Your Email</a></li>
-                                    <li><a href="https://www.businessinsider.com/account">Preferences</a></li>
-                                </ul>
-                            </div>
-                         </div>
-                         <div class="yui-g">
-                             <div class="yui-u first">
-                                 <ul>
-                                     <li><h2>About BI</h2></li>
-
-                                     <li><a href="/about">About</a></li>
-                                     <li><a href="/category/business-insider-jobs">Jobs at BI</a></li>
-									 <li><a href="/masthead">Masthead</a></li>
-                                     <li><a href="/contact">Contact</a></li>
-                                     <li><a href="/advertise">Advertise</a></li>
-                                     <li><a href="/about/mobile">Mobile</a></li>
-                                 </ul>
-
-                                 <ul>
-                                     <li><h2>Follow BI</h2></li>
-                                     <li><a href="/newsletter?source=footer">Email Newsletters</a></li>
-									 <li><a href="/account/alerts">Alerts</a></li>
-                                     <li><a href="http://feeds2.feedburner.com/businessinsider">RSS</a></li>
-                                     <li><a href=" http://www.twitter.com/businessinsider">Twitter</a></li>
-                                     <li><a href="http://www.linkedin.com/companies/683279">LinkedIn</a></li>
-
-                                     <li><a href="http://www.facebook.com/thebusinessinsider">Facebook</a></li>
-                                 </ul>
-                             </div>
-                            <div class="yui-u">
-                                <ul>
-                                    <li><h2>Verticals</h2></li>
-                                    <li><a href="/sai">Tech</a></li>
-                                    <li><a href="/thewire">Entertainment</a></li>
-
-                                    <li><a href="/clusterstock">Wall Street</a></li>
-                                    <li><a href="/moneygame">Markets</a></li>
-                                    <li><a href="/warroom">Strategy</a></li>
-									<li><a href="/sportspage">Sports</a></li>
-									<li><a href="/thelife">Lifestyle</a></li>
-									<li><a href="/tools">Tools</a></li>
-
-									<li><a href="/politics">Politics</a></li>
-									<li><a href="/europe">Europe</a></li>
-									<li><a href="/data_center">Data Center</a></li>
-									<li><a href="/misc">Misc.</a></li>
-									<li><a href="/yourmoney">Your Money</a></li>
-									<li><a href="/video">Video</a></li>
-
-									<li><a href="/latest">Latest</a></li>
-									<li><a href="/pr">PR</a></li>
-                                </ul>
-                            </div>
-                         </div>
-                    </div>
-                </div>
-
-                <p>
-
-                    * Copyright &copy; 2011 Business Insider, Inc. All rights reserved.
-					Registration on or use of this site constitutes acceptance of our <a href="/terms">Terms of Service</a>
-					and <a href="/privacy-policy">Privacy Policy</a>.
-					<span class="pipe">|</span> <a href="/disclaimer">Disclaimer</a>
-                </p>
-
-                <p class="contributions">
-                    Powered by <a href="http://www.mongodb.org">MongoDB</a>
-					<span class="pipe">|</span>
-					Hosted by <a href="http://www.datapipe.com">Datapipe</a>
-					<span class="pipe">|</span>
-					Web analytics by  <a href="http://www.empiricalpath.com/offer?utm_source=bi&utm_medium=partner&utm_content=footer&utm_campaign=audit">Empirical Path</a>
-                </p>
-
-            </div>
-        </div>
-    </div>
-</div>
-
-<!-- Google Analytics -->
-<script type="text/javascript">
-(function() {
-	var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
-	ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
-	var s = document.getElementsByTagName('script')[0];
-	s.parentNode.insertBefore(ga,s);
-})();
-</script>
-<!-- / Google Analytics -->
-
-<!-- BI Analytics -->
-<script type="text/javascript">
-(function() {
-    var ba = document.createElement('script');
-	ba.type = 'text/javascript';
-	ba.async = true;
-    ba.src = 'http://static6.businessinsider.com/assets/js/track.js';
-    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ba, s);
-})();
-</script>
-<!-- / BI Analytics -->
-
-<!-- FB Connect -->
-<div id="fb-root"></div>
-<script type="text/javascript">
-(function() {
-	window.fbAsyncInit = function() {
-	    FB.init({appId: '155043519637', status: true, cookie: true, xfbml: true});
-	};
-	var e = document.createElement('script'); e.async = true;
-	e.src = document.location.protocol + '//connect.facebook.net/en_US/all.js';
-	document.getElementById('fb-root').appendChild(e);
-})();
-</script>
-<!-- / FB Connect -->
-
-<!-- Twitter -->
-<script type="text/javascript">
-(function() {
-	var s = document.createElement('script');
-	s.type = 'text/javascript';
-	s.async = true;
-	s.src = 'http://platform.twitter.com/widgets.js';
-	$.each($('iframe'), function(idx, val) {
-		var jqval = $(val);
-		if (jqval.attr('src') && jqval.attr('src').indexOf('platform.twitter.com', 0) > -1) {
-			jqval.parent().prepend(s);
-		}
-	});
-})();
-</script>
-<!-- / Twitter -->
-
-<!-- Google +1 -->
-<script type="text/javascript">
-(function() {
-    var po = document.createElement('script'); po.type = 'text/javascript'; po.async = true;
-    po.src = 'https://apis.google.com/js/plusone.js';
-    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(po, s);
-})();
-</script>
-<!-- / Google +1 -->
-
-
-	<!-- OpenX Fetch Ads -->
-	<script type="text/javascript">
-		OXH.fetchAds();
-	</script>
-	<script type="text/javascript">
-		OXH.renderAds();
-		$(document).ready(function() {
-			OXH.moveAds();
-		});
-	</script>
-	<!-- end OpenX Fetch Ads -->
-
-
-	<!-- START Nielsen Online SiteCensus V6.0 -->
-	<!-- COPYRIGHT 2010 Nielsen Online -->
-	<script type="text/javascript">
-	(function () {
-	    var d = new Image(1, 1);
-	    d.onerror = d.onload = function () { d.onerror = d.onload = null; };
-	    d.src = ["//secure-us.imrworldwide.com/cgi-bin/m?ci=us-103525h&cg=0&cc=1&si=", escape(window.location.href), "&rp=", escape(document.referrer), "&ts=compact&rnd=", (new Date()).getTime()].join('');
-	})();
-	</script>
-
-	<noscript>
-	    <div><img src="//secure-us.imrworldwide.com/cgi-bin/m?ci=us-103525h&amp;cg=0&amp;cc=1&amp;ts=noscript" width="1" height="1" alt="" /></div>
-	</noscript>
-	<!-- END Nielsen Online SiteCensus V6.0 -->
-
-	<!-- Vibrant -->
-		<!-- / Vibrant -->
-
-    	<!-- Tynt -->
-	<script type="text/javascript" src="http://tcr.tynt.com/javascripts/Tracer.js?user=a-j2SKmdSr37y5adbiUzgI&amp;s=142"></script>
-	<!-- / Tynt -->
-
-
-	<!-- Quantcast tag, part 2 -->
-	<script type="text/javascript">
-	_qevents.push({qacct:"p-96ijG55b-4KKI"});
-	</script>
-	<noscript>
-		<div style="display:none;">
-			<img src="//pixel.quantserve.com/pixel/p-96ijG55b-4KKI.gif" border="0" height="1" width="1" alt="Quantcast"/>
-		</div>
-	</noscript>
-	<!-- / Quantcast tag -->
-
-	<!-- Begin comScore Tag -->
-	<script>
-		var _comscore = _comscore || [];
-		_comscore.push({ c1: "2", c2: "9900186" });
-		(function() {
-			var s = document.createElement("script"), el = document.getElementsByTagName("script")[0]; s.async = true;
-			s.src = (document.location.protocol == "https:" ? "https://sb" : "http://b") + ".scorecardresearch.com/beacon.js";
-			el.parentNode.insertBefore(s, el);
-		})();
-	</script>
-	<noscript>
-	  <img src="http://b.scorecardresearch.com/p?c1=2&c2=9900186&cv=2.0&cj=1" />
-	</noscript>
-
-	<!-- End comScore Tag -->
-
-			<!-- Sailthru Horizon -->
-                <script type="text/javascript">
-                    (function() {
-                        function loadHorizon() { var s = document.createElement('script'); s.type = 'text/javascript'; s.async = true;
-                            s.src = ('https:'==location.protocol?'https://dyrkrau635c04.cloudfront.net':'http://cdn.sailthru.com')+'/horizon/v1.js';
-                            var x = document.getElementsByTagName('script')[0]; x.parentNode.insertBefore(s, x);
-                        }
-                        loadHorizon();
-                        window.onload = function() {
-                            Sailthru.setup({
-                                domain: 'horizon.businessinsider.com'
-                                , concierge: { from: 'top', threshold: $('div.bottom-share') }
-                            });
-                        }
-                    })();
-                </script>
-		<!-- / Sailthru Horizon -->
-
-	<!-- Chartbeat -->
-		<script type="text/javascript">
-		var _sf_async_config={uid:14447,domain:"businessinsider.com"};
-		(function(){
-		  function loadChartbeat() {
-		    window._sf_endpt=(new Date()).getTime();
-		    var e = document.createElement('script');
-		    e.setAttribute('language', 'javascript');
-		    e.setAttribute('type', 'text/javascript');
-		    e.setAttribute('src',
-		       (("https:" == document.location.protocol) ? "https://s3.amazonaws.com/" : "http://") +
-		       "static.chartbeat.com/js/chartbeat.js");
-		    document.body.appendChild(e);
-		  }
-		  var oldonload = window.onload;
-		  window.onload = (typeof window.onload != 'function') ?
-		     loadChartbeat : function() { oldonload(); loadChartbeat(); };
-		})();
-		</script>
-	<!-- / Chartbeat -->
-
-
-</body>
-</html>
\ No newline at end of file
diff --git a/tests/data/extractors/test_businessinsider2.json b/tests/data/extractors/test_businessinsider2.json
deleted file mode 100644
index 0329e87e..00000000
--- a/tests/data/extractors/test_businessinsider2.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "url": "http://www.businessinsider.com/goldman-on-the-fed-announcement-2011-9", 
-    "expected": {
-        "meta_description": "Here it is.", 
-        "domain": "www.businessinsider.com", 
-        "final_url": "http://www.businessinsider.com/goldman-on-the-fed-announcement-2011-9", 
-        "meta_keywords": "Federal Reserve, Joe Weisenthal", 
-        "cleaned_text": "From Goldman on the FOMC operation twist announcement", 
-        "meta_favicon": "http://static7.businessinsider.com/assets/images/faviconBI.ico", 
-        "meta_lang": "en"
-    }
-}
\ No newline at end of file
diff --git a/tests/extractors.py b/tests/extractors.py
index dccae5b2..9969c059 100644
--- a/tests/extractors.py
+++ b/tests/extractors.py
@@ -285,16 +285,6 @@ def test_politico(self):
         fields = ['cleaned_text']
         self.runArticleAssertions(article=article, fields=fields)
 
-    def test_businessinsider1(self):
-        article = self.getArticle()
-        fields = ['cleaned_text']
-        self.runArticleAssertions(article=article, fields=fields)
-
-    def test_businessinsider2(self):
-        article = self.getArticle()
-        fields = ['cleaned_text']
-        self.runArticleAssertions(article=article, fields=fields)
-
     def test_businessinsider3(self):
         article = self.getArticle()
         fields = ['cleaned_text']

From b5ddaf132bb20913389f5f96e8da2c238c28a3a3 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 07:02:42 +0100
Subject: [PATCH 050/100] #115 - add issue 115 test files

---
 tests/data/extractors/test_issue115.html | 1740 ++++++++++++++++++++++
 tests/data/extractors/test_issue115.json |    6 +
 tests/extractors.py                      |    6 +
 3 files changed, 1752 insertions(+)
 create mode 100644 tests/data/extractors/test_issue115.html
 create mode 100644 tests/data/extractors/test_issue115.json

diff --git a/tests/data/extractors/test_issue115.html b/tests/data/extractors/test_issue115.html
new file mode 100644
index 00000000..0b968cfc
--- /dev/null
+++ b/tests/data/extractors/test_issue115.html
@@ -0,0 +1,1740 @@
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml"  xmlns:fb="http://www.facebook.com/2008/fbml" xmlns:og="http://opengraphprotocol.org/schema/" >
+<head>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+    <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
+    <meta name="description" content="JESSICA LIVINGSTON: The most important thing an early-stage startup should know about marketing is rather counterintuitive: that you probably shouldn't be doing anything you'd use the term &quot;marketing&quot; to describe. Sales and marketing are two ends of a continuum. At the sales end your outreach is narrow and deep. At the marketing end it is broad and shallow. And for an early stage startup, narrow and deep is what you want -- not just in the way you appeal to users, but in the type of product you build..." />
+    <meta NAME="ROBOTS" content="noarchive, noodp" />
+    <meta NAME="article_section" content="Small Business" />
+    <meta NAME="article_id" content="BL-232B-2715" />
+    <meta NAME="article_privilege" content="free" />
+
+    <script type="text/javascript">var NREUMQ=NREUMQ||[];NREUMQ.push(["mark","firstbyte",new Date().getTime()]);</script>
+<title>  Jessica Livingston: Why Startups Need to Focus on Sales, Not Marketing - The Accelerators - WSJ</title>
+
+    <!-- begin Facebook Open Graph/Twitter meta tags -->
+<meta property="fb:app_id" content="368513495882" />
+<meta property="og:site_name" content="WSJ" />
+<meta property="fb:url" content="http://blogs.wsj.com/accelerators/2014/06/03/jessica-livingston-why-startups-need-to-focus-on-sales-not-marketing/" />
+<meta property="og:image" content="http://s.wsj.net/img/WSJ_profile_lg.gif" />
+<meta property="og:type" content="article" />
+<meta property="og:title" content="Jessica Livingston: Why Startups Need to Focus on Sales, Not Marketing" />
+<meta property="og:description" content="JESSICA LIVINGSTON: The most important thing an early-stage startup should know about marketing is rather counterintuitive: that you probably shouldn't be doing anything you'd use the term &quot;marketing&quot; to describe. Sales and marketing are two ends of a continuum. At the sales end your outreach is narrow and deep. At the marketing end it is broad and shallow. And for an early stage startup, narrow and deep is what you want -- not just in the way you appeal to users, but in the type of product you build..." />
+<meta property="article:publisher" content="https://www.facebook.com/wsj" />
+
+<meta name="twitter:card" content="summary_large_image">
+<meta name="twitter:title" content="Jessica Livingston: Why Startups Need to Focus on Sales, Not Marketing">
+<meta name="twitter:site" content="WSJ">
+<meta name="twitter:description" content="JESSICA LIVINGSTON: The most important thing an early-stage startup should know about marketing is rather counterintuitive: that you probably shouldn't be doing anything you'd use the term &quot;marketing&quot; to describe. Sales and marketing are two ends of a continuum. At the sales end your outreach is narrow and deep. At the marketing end it is broad and shallow. And for an early stage startup, narrow and deep is what you want -- not just in the way you appeal to users, but in the type of product you build...">
+<meta name="twitter:url" content="http://blogs.wsj.com/accelerators/2014/06/03/jessica-livingston-why-startups-need-to-focus-on-sales-not-marketing/">
+<meta name="twitter:image:src" content="http://s.wsj.net/img/WSJ_profile_lg.gif">
+<meta name="twitter:domain" content="WSJ.com">    <!-- end Facebook Open Graph/Twitter meta tags -->
+
+    <link rel="shortcut icon" href="http://s.wsj.net/favicon.ico" />
+    <link rel="alternate" type="application/rss+xml" title="The Accelerators RSS Feed" href="http://blogs.wsj.com/accelerators/feed/" />
+    <link rel="EditURI" type="application/rsd+xml" title="RSD" href="http://blogs.wsj.com/accelerators/xmlrpc.php?rsd" />
+
+    <meta name="news_keywords" content="investor,Jessica Livingston,Marketing" /><meta name="keywords" content="investor,Jessica Livingston,Marketing" /><link rel="canonical" href="http://blogs.wsj.com/accelerators/2014/06/03/jessica-livingston-why-startups-need-to-focus-on-sales-not-marketing/"/>
+    <script type="text/javascript" src="//fonts.wsj.com/zap5igl.js"></script>
+    <script type="text/javascript">try{Typekit.load();}catch(e){}</script>
+
+    <!--[if lt IE 9]>
+      <script src="http://wsj.com/static_html_files/html5shiv.js"></script>
+    <![endif]-->
+
+    <!-- non-CSS content from global CSS include -->
+      
+<meta name="verify-v1" content="G7OeL6LGnbrn0Zailv/PZqYmqL84WdTGoZWaLNXmSn0="/>
+<meta name="msvalidate.01" content="CD9C2939B71756871FE062470B7C108B"/>
+
+<meta name="msapplication-task" content="name=WSJ;action-uri=http://online.wsj.com/;icon-uri=http://si.wsj.net/favicon.ico"/>
+<meta name="msapplication-task" content="name=MarketWatch;action-uri=http://www.marketwatch.com/;icon-uri=http://www.marketwatch.com/favicon.ico"/>
+<meta name="msapplication-task" content="name=Barron's;action-uri=http://online.barrons.com/;icon-uri=http://online.barrons.com/favicon.ico"/>
+<meta name="msapplication-task" content="name=All Things Digital;action-uri=http://allthingsd.com/;icon-uri=http://allthingsd.com/favicon.ico"/>
+<meta name="msapplication-task" content="name=SmartMoney;action-uri=http://www.smartmoney.com/;icon-uri=http://www.smartmoney.com/favicon.ico"/>
+
+<meta name="application-name" content="WSJ.com"/>
+<meta name="msapplication-tooltip" content="The Wall Street Journal Online"/>
+
+ 
+
+<style type="text/css"> 
+    .wsjblog .header li.form_factor_nav .inlineNav .blogsLinkContainer a, 
+    .wsjblog .header li.form_factor_nav .inlineNav .blogsLinkContainer a:visited { 
+        color: #E36627; 
+    }
+    //.maintenance { display: none; }
+</style>
+
+<script type="text/javascript">var _sf_startpt=(new Date()).getTime()</script>
+
+<script type="text/javascript">
+var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+</script>
+
+<script type="text/javascript">
+    var _wpdq                   = 1;
+    var globalHeaderPageTitle   = "Blogs"; 
+    var gcDomain                = "online.wsj.com"; 
+    var cdnDomain               = "http://s.wsj.net";
+    var wpDomain                = "blogs.wsj.com";
+    var openHouseMode           = "true";
+    var pDate = new Date; pDate = pDate.toString();
+    var pStl                    = 'renovation';
+    var nonrenoAdLabel          = 'blog_' + document.location.toString().split( "/", 4 )[3];
+    if( typeof dj == "undefined" ) { var dj = {}; }
+    if( typeof dj.context == "undefined" ) { dj.context = {}; }
+
+var AT_VARS = '';
+</script>
+
+<script type="text/javascript" src="http://sj.wsj.net/javascript/package/scriptaculous/prototype.js"></script>
+<script type="text/javascript" src="http://sj.wsj.net/djscript/require/j_global_slim/version/b15.js"></script>
+<script type="text/javascript" src="http://sj.wsj.net/public/resources/documents/swfobject.js"></script>
+<script type="text/javascript" src="http://s.wsj.net/javascript/j_top-static.js"></script>
+<script type="text/javascript">
+// Gomez tags for dashboard
+var jsexec = dj.util.JSExec(dj.context.jsexec);
+globalPerfTesting = true;
+djPerf.init(
+{ type: 'gomez', frequency: '30', acctId: '72D329', pgId:'WSJ blog', grpId: 'WSJ Blogs' }
+);
+</script>
+<style type="text/css">
+/* hack: only show 'Add a comment' link on top liveblog entry */
+ul.postList-liveBlog li ul li.pMetadataType-comments { display: none; }
+ul.postList-liveBlog li:first-child ul li.pMetadataType-comments { display: block; }
+
+/* styling for headcuts generated at http://blogs.wsj.com/utilities/hedcut-module-html-generator.php in case WP strips out style tag (it does) */
+.headlineSummary.about .headcut {
+  float: left;
+  font-size: 1.2em;
+  line-height: 1.4em;
+}
+.headlineSummary.about .headcut div, .headlineSummary.about .headcut span {
+  width: 64px;
+  display: block;
+}
+.headlineSummary.about .headcut img {
+  border: 1px solid black;
+  float: left;
+  margin-right: 8px;
+  width: auto;
+}
+
+/* hotfix for some side effects of PSSBSCEN-1930 */
+.post-content .mceTemp.imageFormat-E {
+  width: auto;
+  padding: 0
+}
+
+/* hotfix for PSSBSCEN-2015 */
+.post-snippet div.mceTemp dl[style="width: 458px"] { width: 475px !important; }
+
+/* fix FB button overflow while loading - overflow hidden breaks other stuff though. so this isn't working. */
+.blog-sp .post-snippet footer { max-height: 80px; /*overflow: hidden;*/ clear: both; }
+
+/* EA/EV images overlap byline and other weirdness */
+.blog-sp .post-snippet .post-content .mceTemp.imageFormat-EA dl.alignleft, .post-snippet .post-content .mceTemp.imageFormat-EV dl.alignleft { margin-top: 0px !important; margin-left: 0px; float: left; }
+.blog-sp .post-content a { font-weight: bold; }
+.blog-sp .post-content .socialByline a { font-weight: normal; }
+.myButton {
+    background-color:#ededed;
+    -moz-border-radius:28px;
+    -webkit-border-radius:28px;
+    border-radius:28px;
+    border:1px solid #dcdcdc;
+    display:inline-block;
+    cursor:pointer;
+    color:#777777;
+    font-family:arial;
+    font-size:13px;
+    padding:1px 4px;
+    text-decoration:none;
+    text-shadow:0px 1px 0px #ffffff;
+}
+.myButton:hover {
+    background-color:#dfdfdf;
+}
+.myButton:active {
+    position:relative;
+    top:1px;
+}
+#ad_placeholder iframe { 
+    width : 540px ; 
+    height: 82px ;
+     margin-bottom: 10px;
+}
+.adtext {
+background: #f5f5f5;
+height: 5px;
+text-align: center;
+width: 540px;
+padding-top: 5px;
+padding-bottom: 5px;
+}
+</style><style>
+.blog-sp .nobox .col5wide .textLogo { left: 0; }
+.blog-sp .col5wide .textLogo small { font-size: 1em; }
+.blog-sp .col5wide .textLogo h1 a { font-size: 4em; }
+</style>
+
+<!-- end non-CSS content from global CSS include -->
+
+<link rel="stylesheet" media="all" type="text/css" href="http://blogs.wsj.com/style/sp_header,sp_nav,newGlobalFooter2012,globalHat,hdrSearch,login_scrim,slimHeaderWhite,slimHeaderWhiteMj,scrimEmailThis,pmIndexTickerModule,sp_prevnext,ad-homeSq300x336,blogSearchbox,hsbMostPopular,ctNonHTMLBox,twLatestTweets,emailSignUp,trendingNow,blogAbout,blogsList,toolboxShare,carouselSection,ad-tradingCenter,blogCategories, videoCarousel,blogCommentsTeaser,vltArticleTools,sp_post_header,sp_post_content,art-tabbedNavigation,sp_single_post,blogKeywordLinks,inlineTicker,blogSearchbox,blogAbout,toolboxShare,ad-homeSq300x336/pc/" /><!--[if IE 8]><link rel="stylesheet" media="all" type="text/css" href="http://blogs.wsj.com/style/sp_header,sp_nav,newGlobalFooter2012,globalHat,hdrSearch,login_scrim,slimHeaderWhite,slimHeaderWhiteMj,scrimEmailThis,pmIndexTickerModule,sp_prevnext,ad-homeSq300x336,blogSearchbox,hsbMostPopular,ctNonHTMLBox,twLatestTweets,emailSignUp,trendingNow,blogAbout,blogsList,toolboxShare,carouselSection,ad-tradingCenter,blogCategories, videoCarousel,blogCommentsTeaser,vltArticleTools,sp_post_header,sp_post_content,art-tabbedNavigation,sp_single_post,blogKeywordLinks,inlineTicker,blogSearchbox,blogAbout,toolboxShare,ad-homeSq300x336/ie8/" /><![endif]--><!--[if IE 9]><link rel="stylesheet" media="all" type="text/css" href="http://blogs.wsj.com/style/sp_header,sp_nav,newGlobalFooter2012,globalHat,hdrSearch,login_scrim,slimHeaderWhite,slimHeaderWhiteMj,scrimEmailThis,pmIndexTickerModule,sp_prevnext,ad-homeSq300x336,blogSearchbox,hsbMostPopular,ctNonHTMLBox,twLatestTweets,emailSignUp,trendingNow,blogAbout,blogsList,toolboxShare,carouselSection,ad-tradingCenter,blogCategories, videoCarousel,blogCommentsTeaser,vltArticleTools,sp_post_header,sp_post_content,art-tabbedNavigation,sp_single_post,blogKeywordLinks,inlineTicker,blogSearchbox,blogAbout,toolboxShare,ad-homeSq300x336/ie9/" /><![endif]--><script>  var current_path = "/accelerators/2014/06/03/jessica-livingston-why-startups-need-to-focus-on-sales-not-marketing/";</script>
+<!-- generated by sbkj2kblogwap03 on Tue, 30 Dec 2014 00:52:14 -0500 -->
+    
+<!--
+    generated 283 seconds ago
+    generated in 0.115 seconds
+    served from batcache in 0.000 seconds
+    expires in 17 seconds
+-->
+</head>
+
+<body>
+
+
+<a name="top"></a>
+<div class="fullwide pagewide navType-white blog-sp post-single subType-unsubscribed">
+
+        <!-- begin global header -->
+        <div id="headerWrapper">
+          <div id="corporateHat"  class="corporateHat"></div>
+<div class="hatWSJ_c"><div id="hat_div" class="hat_wsjdn">
+
+
+  <ul  id="hattabs" class="hat_tabs">
+            <li class="hat_tab current " id="hat_tab_wsj">
+            <a class="hat_site_title" href="http://online.wsj.com" rel="nofollow">WSJ</a>       
+              <ul>
+          <li><a class="hat_site_link" href="http://online.wsj.com" rel="nofollow">WSJ</a></li>       
+                              <li class="hat_social_tab"><a id="hat_fb_button" class="hat_fb_link" href="#" title="Follow WSJ on Facebook">Facebook</a></li>
+                              <li class="hat_social_tab"><a id="hat_twt_button" class="hat_twitter_link" href="#" title="Follow WSJ on Twitter">Twitter</a></li>
+              </ul>  
+                <div id="fb_like_overlay" class="hat_overlay hat_overlay_fb hidden"></div>
+                <div id="twitter_overlay" class="hat_overlay hat_overlay_twitter hidden"></div>          
+
+
+          </li>    
+            <li class="hat_tab " id="hat_tab_live">
+            <a class="hat_site_title" href="http://live.wsj.com" rel="nofollow">Live</a>        
+              <ul>
+          <li><a class="hat_site_link" href="http://live.wsj.com" rel="nofollow">Live</a></li>        
+              </ul>  
+
+
+          </li>    
+            <li class="hat_tab " id="hat_tab_realtor">
+            <a class="hat_site_title" href="http://www.realtor.com" rel="nofollow">Realtor</a>      
+              <ul>
+          <li><a class="hat_site_link" href="http://www.realtor.com" rel="nofollow">Realtor</a></li>          
+              </ul>  
+
+
+          </li>    
+            <li class="hat_tab " id="hat_tab_bol">
+            <a class="hat_site_title" href="http://online.barrons.com/home" rel="nofollow">Barron's</a>     
+              <ul>
+          <li><a class="hat_site_link" href="http://online.barrons.com/home" rel="nofollow">Barron's</a></li>         
+              </ul>  
+
+
+          </li>    
+            <li class="hat_tab " id="hat_tab_wsjplus">
+            <a class="hat_site_title" href="http://www.wsjplus.com" rel="nofollow">WSJ+</a>     
+              <ul>
+          <li><a class="hat_site_link" href="http://www.wsjplus.com" rel="nofollow">WSJ+</a></li>         
+              </ul>  
+
+
+          </li>    
+            <li class="hat_tab wallstreetj" id="hat_tab_productx">
+            <a class="hat_site_title" href="http://dj.wsj.com/hub?mod=WSJ_Hat" rel="nofollow">Product X</a>     
+              <ul>
+          <li><a class="hat_site_link" href="http://dj.wsj.com/hub?mod=WSJ_Hat" rel="nofollow">Product X</a></li>         
+              </ul>  
+
+              <div class="hat_product_dropdown">
+                  <div class="hat_product_dropdown-wrap">
+                      <div class="caret_wrap">
+                          <div class="caret"></div>
+                          <div class="caret_border"></div>
+                      </div>
+                      <ul>
+
+                          <li>
+                              <a class="djx" href="http://djx.wsj.com?mod=ProdX_MktgHub">djx</a>
+                          </li>                                          
+
+                          <li>
+                              <a class="rt" href="http://dj.wsj.com/rt/pages/dash?mod=ProdX_MktgHub">rt</a>
+                          </li>
+
+                          <li>
+                              <a class="f" href="https://global.factiva.com/du/global.aspx?mod=ProdX_MktgHub">f</a>
+                          </li>
+
+                          <li>
+                              <a class="rc" href="https://djrc.dowjones.com/?mod=ProdX_MktgHub">r&amp;c</a>
+                          </li>
+
+                          <li>
+                              <a class="pevc" href="http://pevc.dowjones.com/?mod=ProdX_MktgHub">pe&amp;vc</a>
+                          </li>
+
+                          <li>
+                              <a class="wsj" href="#">wsj</a>
+                          </li>                     
+
+                          <li>
+                              <a class="b" href="http://online.barrons.com/?mod=ProdX_MktgHub">b</a>
+                          </li>
+
+                      </ul>
+                  </div>
+              </div>      
+
+          </li>    
+              <li id="hat_tab_dropdown" class="hat_tab_dropdown">      
+              <a id="partyhat_more_dropdown" class="moreTarget" href="#">More</a>                             
+              <ul class="hat_dd_list">  
+            <li id="hat_tab_port" class="hat_dd_item ">
+            <a href="http://portfolio.wsj.com?mod=wsj_port_hat" rel="nofollow"  >Portfolio</a>
+            </li>      
+            <li id="hat tab big charts" class="hat_dd_item ">
+            <a href="http://bigcharts.marketwatch.com/" rel="nofollow"  >BigCharts</a>
+            </li>      
+            <li id="hat_tab_fn" class="hat_dd_item ">
+            <a href="http://www.efinancialnews.com/" rel="nofollow"  >Financial News</a>
+            </li>      
+            <li id="hat_tab_Professor_Journal" class="hat_dd_item ">
+            <a href="http://professor.wsj.com/" rel="nofollow"  >Professor Journal</a>
+            </li>      
+            <li id="hat_tab_SmartMoney" class="hat_dd_item hidden">
+            <a href="http://www.smartmoney.com/" rel="nofollow"  >SmartMoney</a>
+            </li>      
+            <li id="hat_tab_Student_Journal" class="hat_dd_item ">
+            <a href="http://wsjstudent.com/" rel="nofollow"  >Student Journal</a>
+            </li>      
+            <li id="hat tab Virtual Stock Exchange" class="hat_dd_item ">
+            <a href="http://www.marketwatch.com/Game/" rel="nofollow"  >Virtual Stock Exchange</a>
+            </li>      
+            <li id="hat tab WSJ Classifieds" class="hat_dd_item ">
+            <a href="http://online.wsj.com/public/page/classified-search.html" rel="nofollow"  >WSJ Classifieds</a>
+            </li>      
+            <li id="hat tab WSJ Classroom" class="hat_dd_item hidden">
+            <a href="http://classroom.wsj.com/" rel="nofollow"  >WSJ Classroom</a>
+            </li>      
+            <li id="hat tab WSJ Radio" class="hat_dd_item ">
+            <a href="http://www.wsjradio.com/" rel="nofollow"  >WSJ Radio</a>
+            </li>      
+            <li id="wsjwine" class="hat_dd_item ">
+            <a href="http://www.wsjwine.com/" rel="nofollow"  >WSJ Wine</a>
+            </li>      
+                </ul></li>
+  </ul>
+<div class="hdrSearch  hidden" id="searchBlingBox" >
+    <div id="searchInputEle" class="hdrSearchC" >
+        <div >
+            <form onsubmit="return false;">
+                <input type="text" value="News, Quotes, Companies, Videos" class="hdrSearchInput unUsed" id="globalHeaderSearchInput" autocomplete="off" size="28">
+                <button class="hdrSearchBtn" type="button">SEARCH</button>
+            </form>
+        </div>
+    </div>
+    <div id="globalHeaderAutoComplete" class="hdrSearchList hidden">
+        <div class="autocompleteContent" >
+        </div>      
+    </div>
+        <!--  Autocomplete viewTemplate content - could be used on pages where autocomplete is not in header -->
+<!-- Do not change id of textArea its referred in dj.widget.autocomplete.autoCompleteViewTemplate.js--> 
+<textarea id="wsj_autocomplete_template" style="display:none">  
+ 
+    <div>
+            <div class="acHeadline hidden"  >
+            </div>
+            <div class="dropdownContainerClass">
+                <div class="suggestionblock hidden" templateType="C1">  
+                    <ul role="listbox" class="">
+                        <li role="menuitem" class="hdrSearchListName">
+                            headline
+                        </li>
+                        <li role="menuitem" class="lineItem">
+                            <a class="searchResult" href="javascript:void(0);">
+                               <span class="searchTerm">gold</span>man
+                            </a>
+                        </li>                       
+                    </ul>
+                </div>
+                <div class="suggestionblock hidden" templateType="C3">  
+                    <ul role="listbox" class="hdrSearchListComp">
+                        <li role="menuitem" class="hdrSearchListName">
+                            Companies
+                        </li>
+                        <li role="menuitem" class="lineItem">
+                            <a class="searchResult" href="javascript:void(0);">
+                                <div class="searchListCompTicker">
+                                    <span class="searchTerm">GOLD</span>
+                                </div>
+                                <div class="searchListCompName">
+                                    Ran
+                                    <span class="searchTerm">gold</span> Resources Ltd. ADS
+                                </div>
+                                <div class="searchListCompMarkets">
+                                    U.S.
+                                </div>
+                            </a>
+                        </li>                       
+                    </ul>                                   
+                </div>
+            </div>
+            <div class="acFooter hidden">
+                <ul role="listbox" class="hdrSearchListSearch">
+                    <li role="menuitem" class="">
+                        <a class="footer" href="#">View All Search Results &raquo;</a>
+                    </li>
+                </ul>
+            </div>
+            <div id="SearchSponsorBox" class="sponsorBox"></div>
+        </div>
+</textarea>
+
+</div>
+</div></div><!-- no match! -->
+<div id="factivaHeader_placeHolder" class="hidden"></div>
+   
+<!-- START: /static_html_files/headerone/wsjSSHeader.ftl -->
+<header>
+  <div class="slimH_c slimH_rw">
+    <div class="slimHeader" id="slimHeader">
+      <h1>
+        <a class="mainLogo" href="http://online.wsj.com">The Wall Street Journal</a>
+        </h1>
+<div class="centerCol">        
+        <div class="popC lnMenuC sectionsDDPopUpContainer">
+          <span class="popTrigger lnMenu menuOnClickLink">Menu</span>
+          <div class="popBox pop-secs">
+            <div class="colA">
+              <ul>
+                <li class="lnLi"><a id="SECTION_HOME" href="http://online.wsj.com/home-page">HomePage</a></li>
+                <li class="lnLi"><a id="SECTION_WORLD" href="http://online.wsj.com/public/page/news-global-world.html?mod=WSJ_stream_topnav_world_main">World</a></li>
+                <li class="lnLi"><a id="SECTION_US" href="http://online.wsj.com/public/page/news-world-business.html?mod=WSJ_stream_topnav_us_main">U.S.</a></li>
+                <li class="lnLi"><a id="SECTION_NEWYORK" href="http://online.wsj.com/public/page/new-york-main.html?mod=WSJ_stream_topnav_newyork_main">New York</a></li>
+                <li class="lnLi"><a id="SECTION_BUSINESS" href="http://online.wsj.com/public/page/news-business-us.html?mod=WSJ_stream_topnav_business_main">Business</a></li>
+                <li class="lnLi"><a id="SECTION_TECH" href="http://online.wsj.com/public/page/news-tech-technology.html?mod=WSJ_stream_topnav_tech_main">Tech</a></li>
+                <li class="lnLi"><a id="SECTION_MARKETS" href="http://online.wsj.com/public/page/news-financial-markets-stock.html?mod=WSJ_stream_topnav_markets_main">Markets</a></li>
+                <li class="lnLi"><a id="SECTION_MARKETDATA" href="http://online.wsj.com/mdc/public/page/marketsdata.html?mod=WSJ_topnav_marketdata_main">Market Data</a></li>
+                <li class="lnLi"><a id="SECTION_OPINION" href="http://online.wsj.com/public/page/news-opinion-commentary.html?mod=WSJ_stream_topnav_opinion_main">Opinion</a></li>
+                <li class="lnLi"><a id="SECTION_LIFEANDCULTURE" href="http://online.wsj.com/public/page/news-lifestyle-arts-entertainment.html?mod=WSJ_stream_topnav_lifeculture_main">Life &amp; Culture</a></li>
+                <li class="lnLi"><a id="SECTION_REALESTATE" href="http://online.wsj.com/public/page/news-real-estate-homes.html?mod=WSJ_stream_topnav_realestate_main">Real Estate</a></li>
+                <li class="lnLi"><a id="SECTION_MANAGEMENT" href="http://online.wsj.com/public/page/management.html?mod=WSJ_stream_topnav_management_main">Management</a></li>
+                <li class="lnLi"><a id="SECTION_CIOJOURNAL" href="http://online.wsj.com/public/page/cio-journal.html?mod=WSJ_stream_topnav_ciojournal_main">CIO Journal</a></li>
+                <li class="lnLi"><a id="SECTION_CFOJOURNAL" href="http://online.wsj.com/public/page/cfo-journal.html?mod=WSJ_stream_topnav_cfojournal_main">CFO Journal</a></li>
+                <li class="lnLi"><a id="SECTION_RISKCOMPLIANCE" href="http://online.wsj.com/public/page/risk-compliance-journal.html?mod=WSJ_stream_topnav_realestate_main">Risk &amp; Compliance</a></li>
+                <li class="hidden"><span class="map_us hidden"></span></li>
+                <li class="hidden"><span class="map_europe hidden"></span></li>
+                <li class="hidden"><span class="map_asia hidden"></span></li>
+                <li class="hidden"><span class="map_india hidden"></span></li>
+              </ul>
+              <div class="hidden" id="login_username">&nbsp;</div>
+              <div class="hidden" id="login_password">&nbsp;</div>
+              <div class="hidden" id="login_button">&nbsp;</div>
+              <div class="hidden"><form id="login_form" >&nbsp;</form></div>
+            </div>
+            
+            <div class="colB hidden">
+              <h3 class="sLbl">Also in WSJ.com:</h3>
+              <ul>
+                <li class="lnLi hidden"><a href="#">Latest News</a></li>
+                <li class="lnLi"><a href="http://online.wsj.com/itp">Today's Paper</a></li>
+                <li class="lnLi"><a href="http://online.wsj.com/public/page/most_popular.html">Most Popular</a></li>
+                <li class="lnLi hidden"><a href="#">Streams (TBD)</a></li>
+                <li class="lnLi"><a href="http://online.wsj.com/video">Video</a></li>
+                <li class="lnLi"><a href="http://online.wsj.com/blogs">Blogs</a></li>
+                <li class="lnLi">
+                    <h4 class="sLbl noLine">Editions</h4>
+                    <ul class="lnEd">
+                        <li class="lnLi"><a class="ln selected" href="http://online.wsj.com/home-page?_wsjregion=na,us&_homepage=/home/us">U.S.</a></li>
+                        <li class="lnLi"><a class="ln" href="http://asia.wsj.com/home-page?_wsjregion=asia&_homepage=/home/asia">Asia</a></li>
+                        <li class="lnLi"><a class="ln" href="http://europe.wsj.com/home-page?_wsjregion=europe&_homepage=/home/europe">Europe</a></li>
+                    </ul>   
+                      <ul class="lnEd noLine">
+                        <li class="lnLi"><a class="ln" href="http://online.wsj.com/americas">Am&#233;rica Latina <span class="diffLbl">(Spanish)</span></a></li>
+                        <li class="lnLi"><a class="ln" href="http://online.wsj.com/portuguese">Brasil <span class="diffLbl">(Portuguese)</span></a></li>
+                        <li class="lnLi"><a class="ln ln_ea ln_sc" href="http://cn.wsj.com/gb/index.asp"><span class="mnLbl">??-??</span> <span class="diffLbl">(Simplified Chinese)</span></a></li>
+                        <li class="lnLi"><a class="ln ln_ea ln_tc" href="http://cn.wsj.com/big5/"><span class="mnLbl">??-??</span> <span class="diffLbl">(Traditional Chinese)</span></a></li>
+                        <li class="lnLi"><a class="ln ln_ea ln_jp" href="http://jp.wsj.com?_wsjregion=asia,jp&_homepage=/home/jp"><span class="mnLbl">??</span> <span class="diffLbl">(Japanese)</span></a></li>
+                        <li class="lnLi"><a class="ln ln_ea ln_kr" href="http://kr.wsj.com?_wsjregion=asia,kr&_homepage=/home/kr"><span class="mnLbl">??</span> <span class="diffLbl">(Korean)</span></a></li>
+                        <li class="lnLi"><a class="ln" href="http://indo.wsj.com/home-page?_wsjregion=asia,indo&_homepage=/home/indo">Indonesia <span class="diffLbl">(Bahasa)</span></a></li>
+                        <li class="lnLi"><a class="ln" href="http://india.wsj.com/home-page?_wsjregion=asia,india&_homepage=/home/india">India <span class="diffLbl">(English)</span></a></li>
+                        <li class="lnLi"><a class="ln" href="http://www.wallstreetjournal.de/">Deutschland <span class="diffLbl">(German)</span></a></li>
+                        <li class="lnLi"><a class="ln ln_rs" href="http://online.wsj.com/public/page/russia.html"><span class="mnLbl">Russian</span> <span class="diffLbl">(Russian)</span></a></li>
+                        <li class="lnLi"><a class="ln" href="http://www.wsj.com.tr/home-page?_wsjregion=europe,tr&_homepage=/home/tr">T&#252;rkiye <span class="diffLbl">(Turkish)</span></a></li>
+                      </ul> 
+                    </li>
+                  </ul>
+            </div>
+            
+          </div>
+        </div>
+        <div class="siteDesc">
+        <a class="lnSection hidden" id="currentSectionBlk" href="#"></a>
+        </div>
+</div>
+      <div class="subSection hidden" id="promoLoginDetailsId">
+        <ul class="custNav">
+            <li class="cLi"><a class="ln loginClass" href="#">Log In</a></li>
+        </ul>
+        <div class="subPromo" id="headerPromoContainer"></div>
+      </div>
+
+      <div class="subSection hidden" id="subscribedUserDetailsId">
+        <div class="uNav">
+            <div class="popC uName userInfoPopUpContainer">
+              <a class="popTrigger userLinkC userInfoOnClickLink" href="#" ><span id="userName" class="userLinkC"></span><span class="sym"></span></a>
+              <div class="popBox pop-mj">
+              <ul class="lnUl">
+                <li class="lnLi"><a href="http://online.wsj.com/user/profile/myprofile" class="ln">Profile</a></li>        
+                <li class="lnLi"><a href="http://online.wsj.com/user/mynews/pages/reader" class="ln">My News</a></li> 
+                <li class="lnLi"><a href="http://online.wsj.com/page/my-saved-main.html" class="ln">Saved</a></li>
+                <li class="lnLi"><a href="http://portfolio.wsj.com?mod=wsj_port_mj" class="ln">Portfolio</a></li>
+                <li class="lnLi"><a href="http://ds.wsj.com/wsjportfolio/portfolio?cmd=mainwindow&mod=wsj_portold_mj" class="ln">Old Portfolio</a></li>
+                <li class="lnLi"><a class="ln widgetPopout" onclick="document.body.click()" href="#" data-widget-id="0_0_WGT_INVESTORCENTER" data-widget-title="Market Data" data-widget-height="360" data-widget-width="392" data-widget-name="mdmwidget">Customize Watchlist</a></li>
+                <!-- <li class="lnLi"><a href="http://online.s.dev.wsj.com/page/mj-companies.html" class="ln" >Watchlist</a></li> -->
+                <!-- <li class="lnLi"><a href="/page/mj-markets.html" class="ln">Markets</a></li> 
+                <li class="lnLi"><a href="/page/mj-industries.html" class="ln">Industries</a></li> -->
+                <li class="lnLi"><a href="http://online.wsj.com/public/page/email-setup.html" class="ln">Newsletters & Alerts</a></li>
+                <li class="lnLi"><a href="http://online.wsj.com/community" class="ln">Community</a></li>   
+                <li class="lnLi"><a class="ln" href="https://customercenter.wsj.com/view/home.html?mod=WSJ_Login">Customer Center</a></LI>
+                <li class="lnLiLast"><a href="http://online.wsj.com/logout" id="logoutLink" class="lnLogout">LOGOUT</a></li>
+              </ul>
+              </div>
+            </div>
+
+            <div class="lc-btn-box"><a class="lc-btn custChatLink" href="#" onclick="window.open('https://customercenter.s.dev.wsj.com/livechat/chat?product=WSJ', 'DowJones_Live_Chat','width=510,height=420,location=no,menubar=no,status=no,toolbar=no,scrollbars=no,resizable=no');return false;">&nbsp;</a></div>
+
+        </div>
+        <div class="meta hidden" id="goprohook"></div>
+      </div>
+    </div>
+  </div>
+</header>
+
+<div class="hidden">
+  <span class="hidden myOnlineJournalLink" id="goprohookx">
+    <ul>
+      <li id="messageCenterLk"><a class="internalOnly messageCenterLink" href="http://online.wsj.com/community/member/mailbox">Message Center <strong>(<span class="messageCount"></span> new)</strong></a></li>
+    </ul>
+  </span>
+</div>
+<!-- END: /static_html_files/headerone/wsjSSHeader.ftl -->
+
+<!-- fastdynapage - secj2kentwap07 - Mon 12/29/14 - 00:27:44 EST -->
+        </div>
+        <!-- end global header -->
+
+        <!-- begin main content (blog header and article and side rail) -->
+        <div class="contentwide">
+          
+<!-- begin header's col10wide -->
+<div class="col10wide margin-left-big">
+  <!-- begin skybox header-->
+    <header class="sp-header nobox">
+    <div class="col5wide"><span class="logo textLogo"><div class="textLogoWrap">
+               <h1 class="blogtitle">
+                 <a href="http://blogs.wsj.com/accelerators">The Accelerators</a>
+               </h1><h2><small>Startup mentors discuss strategies and challenges of creating a new business.</small></h2></div></span></div><div class="col5wide sp-skybox"></div>    <nav role="navigation" class="sp-flex">
+      <ul class="sp-flex-main"><li class=""><a href="http://blogs.wsj.com/accelerators/category/culture/">Culture</a></li><li class=""><a href="http://blogs.wsj.com/accelerators/category/video-chats/">Video Chats</a></li><li class=""><a href="http://blogs.wsj.com/accelerators/category/raising-capital-2/">Raising Capital</a></li><li class=""><a href="http://blogs.wsj.com/accelerators/category/wsj-startup/">WSJ Startup</a></li><li class=""><a href="http://blogs.wsj.com/accelerators/category/business-model/">Business Model</a></li></ul>    </nav>
+  </header>
+  <!-- end skybox header-->
+</div>
+<!-- end header's col10wide -->
+
+ 
+<!-- begin article/right rail's col10wide -->
+<div class="col10wide margin-left-big">
+  <!-- begin col6wide -->
+  <div class="col6wide">
+    <article>
+
+      <!-- begin post header-->
+          
+
+      <header class="post-header single-post-header">              
+
+        <small class="post-time">
+                1:26 pm  ET<br/>Jun 3, 2014        </small>
+        <h2 class="post-section">
+          <a href="http://blogs.wsj.com/accelerators/category/salesmarketingsocial-media/">Sales/Marketing</a>        </h2>
+
+        <h1 class="post-title h-main">
+          Jessica Livingston: Why Startups Need to Focus on Sales, Not Marketing        </h1>
+
+    </header>
+      <!-- end post header-->
+
+      <!-- begin article tabbed nav -->
+          <div class="art_tabbed_nav">
+        <ul class="tab">
+                        <li class="selected">
+                <a class="article" href="http://blogs.wsj.com/accelerators/2014/06/03/jessica-livingston-why-startups-need-to-focus-on-sales-not-marketing/">Article</a>
+            </li>
+                                                                                    <li class="deselected">
+                <a class="comments" rel="nofollow" href="http://blogs.wsj.com/accelerators/2014/06/03/jessica-livingston-why-startups-need-to-focus-on-sales-not-marketing/tab/comments/">Comments (23)</a>
+            </li>
+                                </ul>
+    </div>
+
+      <!-- end article tabbed nav -->
+
+      <!-- begin post categories/tags -->
+      <div class="postcats post-tags-t"><ul><li><h2><a href="http://blogs.wsj.com/accelerators/tag/investor/" rel="tag">investor</a></h2></li><li><h2><a href="http://blogs.wsj.com/accelerators/tag/jessica-livingston/" rel="tag">Jessica Livingston</a></h2></li><li><h2><a href="http://blogs.wsj.com/accelerators/tag/marketing/" rel="tag">Marketing</a></h2></li></ul></div>      <!-- end post categories/tags -->
+      
+      <!-- begin sharrre content -->
+     
+      
+<script src="//ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js"></script>
+<script src="http://s.wsj.net/blogs/js/sharrre.js"></script>
+<script type='text/javascript' src='https://b.st-hatena.com/js/bookmark_button.js' async='async'></script>
+
+
+<div class='wrapper'>
+<div class='tools1'>
+
+<a class='email' href='#'><img width='54px' height='25px' src='http://s.wsj.net/blogs/img/email_icon.png'></img></a>
+ <a href='http://blogs.wsj.com/accelerators/2014/06/03/jessica-livingston-why-startups-need-to-focus-on-sales-not-marketing/tab/print'><img src='http://s.wsj.net/blogs/img/print_icon.png'></img></a>
+ </div><div id='social_tools'>
+          <div id='facebook' data-url='http://blogs.wsj.com/accelerators/2014/06/03/jessica-livingston-why-startups-need-to-focus-on-sales-not-marketing/'></div>
+          <div id='twitter' data-counturl='http://blogs.wsj.com/accelerators/2014/06/03/jessica-livingston-why-startups-need-to-focus-on-sales-not-marketing/' data-url='http://on.wsj.com/SqFZ5V'  data-text='Why startups need to focus on sales, not marketing by @foundersatwork'></div>
+          <div id='googleplus' data-counturl='http://blogs.wsj.com/accelerators/2014/06/03/jessica-livingston-why-startups-need-to-focus-on-sales-not-marketing/' data-url='http://on.wsj.com/SqFZ5V' data-text='Why startups need to focus on sales, not marketing by @foundersatwork'></div>
+          <div class='sharrre' id='linkedin' data-url='http://blogs.wsj.com/accelerators/2014/06/03/jessica-livingston-why-startups-need-to-focus-on-sales-not-marketing/' data-text='Why startups need to focus on sales, not marketing by @foundersatwork'></div></div>
+    </div><style>
+
+.social_count{
+  
+  letter-spacing: 1px;
+  display:none;
+  color: white ; 
+  font-size : 60%;
+  font:  Arial, sans-serif;
+  padding-bottom: 12px;
+  position: relative;
+  top: -5px; 
+  display: none;
+  
+}
+
+.icon{
+  width: 20px;
+  height: 18px;
+  padding-top: 1.5px;
+  
+}
+
+.fb{
+
+  display:inline-block;
+  width: 33px ; 
+  height: 22px ; 
+  background-color: #01669C ;
+
+}
+
+.tw{
+  
+  left: 500px;
+  display:inline-block;
+  width: 33px ; 
+  height: 22px ;
+  background-color:#00ACED;
+  
+}
+.gp{
+  
+  display:inline-block;
+  width: 33px ; 
+  height: 22px ;
+  background-color: #D14836; 
+}
+.li{
+  display:inline-block;
+  width: 33px ; 
+  height: 22px ; 
+  background-color: #01669C ;
+  
+}
+
+.mixi{
+  display:inline-block;
+  width: 33px ; 
+  height: 22px ; 
+  background-color: #FF9900 ;
+  margin-left: 3px
+}
+
+.hatena {
+
+  display:inline-block;
+  width: 33px ; 
+  height: 22px ; 
+  background-color: #01669C ;
+
+}
+
+  #social_tools{
+  
+  margin-left: 20px ; 
+  display :inline-block;
+      
+ }
+     
+ .tools1 {
+      
+  width: 140px;
+  height: 28px;
+  display: inline-block;
+  border-right: 1px #d5d4cf solid ;
+  float : left;
+     }
+
+
+.tools1 img {display:inline}
+
+
+
+   
+ .wrapper{
+   
+    margin-bottom: 20px;
+    margin-left: 10px;
+   }
+
+  .sharrre {
+
+    display : inline-block;
+
+}
+
+
+   </style>
+
+         
+
+      <!-- end sharrre content -->
+
+      <!-- begin post content -->
+
+      <div class="post-content">
+                <!-- article start --><ul class="socialByline"></ul><div class="mceTemp " style="text-align: left;">
+<dl class="wp-caption alignleft caption-alignleft " style="width: 167px;">
+<dt class="wp-caption-dt"><img class="size-full wp-image-5" src="http://s.wsj.net/public/resources/images/BN-CJ583_Living_C_20140415115638.jpg" alt="" width="167" height="94" /></dt>
+</dl>
+</div>
+<p><a href="http://blogs.wsj.com/accelerators/jessica-livingston-2/">JESSICA LIVINGSTON</a>: The most important thing an early-stage startup should know about marketing is rather counterintuitive: that you probably shouldn&#8217;t be doing anything you&#8217;d use the term &#8220;marketing&#8221; to describe. Sales and marketing are two ends of a continuum. At the sales end your outreach is narrow and deep. At the marketing end it is broad and shallow. And for an early stage startup, narrow and deep is what you want &#8212; not just in the way you appeal to users, but in the type of product you build. Which means the kind of marketing you should be doing should be indistinguishable from sales: you should be talking to a small number of users who are seriously interested in what you&#8217;re making, not a broad audience who are on the whole indifferent.</p>
+<p>Successful startups almost always start narrow and deep. Apple started with a computer Steve Wozniak made to impress his friends at the Homebrew Computer Club. There weren&#8217;t a lot of them, but they were really interested.  Facebook started out just for Harvard University students. Again, not a lot of potential users, but they really wanted it. Successful startups start narrow and deep partly because they don&#8217;t have the power to reach a big audience, so they have to choose a very interested one. But also because the product is still being defined.  The conversation with initial users is also market research.</p>
+<table width="110" border="3" cellpadding="5" align="RIGHT">
+<tbody>
+<tr>
+<td>
+<div style="text-align: left;">
+<dl style="width: 76px;">
+<dt class="wp-caption-dt"><img src="http://s.wsj.net/public/resources/images/OB-VI631_accelg_A_20121113171114.jpg" alt="" width="76" height="76" /></dt>
+</dl>
+</div>
+<h4><a href="http://blogs.wsj.com/accelerators/tag/marketing/">See what other startup mentors have to say about marketing tactics.</a></h4>
+</td>
+</tr>
+</tbody>
+</table>
+<p>At  Y Combinator, we advise most startups to begin by seeking out some core group of early adopters and then engaging with individual users to convince them to sign up.</p>
+<p>For example, the early adopters of Airbnb were hosts and guests in New York City (Y Combinator funded Airbnb in Winter of 2009).  To grow, Airbnb needed to get more hosts and also help existing hosts convert better. So Brian Chesky and Joe Gebbia flew to New York every week to meet with hosts &#8212; teaching them how to price their listings, take better photos, and so on. They also asked hosts for introductions to potential new hosts, who they then met in person.</p>
+<p>Stripe (YC S09) was particularly aggressive about signing up users manually at first. The YC alumni network are a good source of early adopters for a service like Stripe. Co-founders Patrick and John Collison worked their way methodically through it, and when someone agreed to try Stripe, the brothers would install it for them on the spot rather than email a link. We now call their technique &#8220;Collison installation.&#8221;</p>
+<p>Many guest speakers at Y Combinator offer stories about how manual the initial process of getting users was. Pinterest is a mass consumer product, but Ben Silbermann said even he began by recruiting users manually. Ben would literally walk into cafes in Palo Alto and ask random people to try out Pinterest while he gathered feedback over their shoulders.</p>
+<p>The danger of the term &#8220;marketing&#8221; is that it implies the opposite end of the sales/marketing spectrum from the one startups should be focusing on. And just as focusing on the right end has a double benefit &#8212; you acquire users and define the product &#8212; focusing on the wrong end is doubly dangerous, because you not only fail to grow, but you can remain in denial about your product&#8217;s lameness.</p>
+<p>All too often, I’ve seen founders build some initially mediocre product, announce it to the world, find that users never show up, and not know what to do next. As well as not getting any users, the startup never gets the feedback it needs to improve the product.</p>
+<p>So why wouldn’t all founders start by engaging with users individually? Because it’s hard and demoralizing. Sales gives you a kind of harsh feedback that &#8220;marketing&#8221; doesn&#8217;t.  You try to convince someone to use what you&#8217;ve built, and they won&#8217;t. These conversations are painful, but necessary. I suspect from my experience that founders who want to remain in denial about the inadequacy of their product and/or the difficulty of starting a startup subconsciously prefer the broad and shallow &#8220;marketing&#8221; approach precisely because they can&#8217;t face the work and unpleasant truths they&#8217;ll find if they talk to users.</p>
+<p>How should you measure if your manual efforts are effective?  Focus on growth rate rather than absolute numbers. Then you won&#8217;t be dismayed if the absolute numbers are small at first. If you have 20 users, you only need two more this week to grow 10%. And while two users is a small number for most products, 10% a week is a great growth rate. If you keep growing at 10% a week, the absolute numbers will eventually become impressive.</p>
+<p>Our advice at Y Combinator is always to make a really good product and go out and get users manually. The two work hand-in-hand: you need to talk individually to early adopters to make a really good product.  So focusing on the narrow and deep end of the sales/marketing continuum is not just the most effective way to get users. Your startup will die if you don&#8217;t.</p>
+<p><a class="twitter-follow-button" href="https://twitter.com/foundersatwork" data-show-count="false">Follow @foundersatwork</a><br />
+<script type="text/javascript">// <![CDATA[
+!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');
+// ]]&gt;</script></p>
+<p><a class="twitter-follow-button" href="https://twitter.com/WSJstartup" data-show-count="false">Follow @WSJstartup</a><br />
+<script type="text/javascript">// <![CDATA[
+!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');
+// ]]&gt;</script></p>
+      <!-- article end -->
+          </div>      <!-- end post content -->
+      
+      <!-- begin article pagination -->
+            <!-- end article pagination -->
+
+    </article>
+    <!-- end article -->
+    
+    <!-- begin post categories/tags -->
+    <div class="postcats post-tags-b"><ul><li><h2><a href="http://blogs.wsj.com/accelerators/tag/investor/" rel="tag">investor</a></h2></li><li><h2><a href="http://blogs.wsj.com/accelerators/tag/jessica-livingston/" rel="tag">Jessica Livingston</a></h2></li><li><h2><a href="http://blogs.wsj.com/accelerators/tag/marketing/" rel="tag">Marketing</a></h2></li></ul></div>    <!-- end post categories/tags -->
+
+    <!-- begin blognav -->
+    
+    <ul class="blognav blogpostnav">
+        <li class="blognav_prev" id="blognav_prev_b" >
+            <strong>previous</strong><a href="http://blogs.wsj.com/accelerators/2014/06/02/george-deeb-budget-for-proof-of-concept-marketing-from-day-one/" rel="prev">George Deeb: Budget for Proof-of-Concept Marketing from Day One</a>        </li>
+        <li class="blognav_next" id="blognav_next_b" >
+            <strong>next</strong><a href="http://blogs.wsj.com/accelerators/2014/06/03/tina-hsiao-market-your-startup-on-a-dime/" rel="next">Tina Hsiao: Market Your Startup on a Dime</a>          </li>
+    </ul>
+
+    <!-- end blognav -->
+
+    <!-- begin post-bottom-row (back to blog link?) -->
+    <div class="post-bottom-row"><a href="http://blogs.wsj.com/accelerators/" class="post-link-return">The Accelerators HOME PAGE</a></div>    <!-- end post-bottom-row (back to blog link?) -->
+
+    <div class="adSummary msnlinks">
+                    <script type="text/javascript">
+                        <!--
+                        microsoft_adunitid="28099";
+                        microsoft_adunit_width="571";
+                        microsoft_adunit_height="250";
+                        microsoft_adunit_legacy="false";
+                        microsoft_adunit_keywordhints="";
+                        document.write('<script type="text/javascript" src="http://scripts.chitika.net/msft/wsj-wsj-articles.js"></'+'script>');
+                        //-->
+                    </script>
+                  </div><a name="commentform"></a><div class="commentform">  <h4>Add a Comment</h4>  <div class="alertMessage" style="display:none;" id="alertdiv" >    <p id="alertp">Error message</p>  </div>  <form method="post" action="http://blogs.wsj.com/accelerators/wp-comments-post.php" id="comment_form">    <input type=hidden name="comment_post_ID" value="2715" />    <input type=hidden name="redirect_to" rel="nofollow" value="http://blogs.wsj.com/accelerators/2014/06/03/jessica-livingston-why-startups-need-to-focus-on-sales-not-marketing/tab/comments/" />    <fieldset>      <div class="formBlock">        <div class="col3wide">          <label for="author">Name</label>          <input type="text" name="author" class="formtext" id="comment_name" maxlength="100" />        </div>        <span class="formnote">We welcome thoughtful comments from readers. Please comply with our <a href="http://online.wsj.com/community/faq">guidelines</a>. Our blogs do not require the use of your real name.        </span>      </div>      <div class="formBlock">        <label for="comment">Comment</label>        <textarea name="comment" id="comment_text"></textarea>      </div>      <div class="formBlock blockType-buttons">        <input type="reset" class="formbtn" value="CLEAR" />        <input type="submit" class="formbtn" value="POST" />      </div>    </fieldset>  </form></div><div class="postcomments"><div class="headerBox">  <h3>Comments (5 of 23)</h3>  <a class="viewall" rel="nofollow" href="http://blogs.wsj.com/accelerators/2014/06/03/jessica-livingston-why-startups-need-to-focus-on-sales-not-marketing/tab/comments/">View all Comments &raquo;</a></div>  <ul class="commentlist"><a name="newest"></a>    <li class="commententry listFirst "  >    <a name="comment-3364042"></a>      <ul class="unitList unitType-thread">        <li class="unit first">          <ul class="cMetadata metadataType-postInfo">             <li class="postStamp">2:15 pm November 11, 2014</li>            <li class="posterName">                <cite>Marc Adler</cite> wrote:            </li>          </ul>          <div class="commentContent"><p>I feel that Levinson (Guerrilla Marketing) defines marketing in the most simple and true way -- anything you do to help sell a product or service.  These two terms are connected at the hip...sales do not take place without effective marketing no matter what you choose to use to communicate messages.  A face-to-face with early adopters, reaching out through influencers (thank you Malcolm Gladwell), or effective networking within a narrow group can all be considered marketing.  Just as much as direct mail, television commercials or an Adwords campaign. You can't just go out and sell without making sure your message is clear (and differentiates you).  And you can't go out with a clear message without determining what communication method will work most effectively with your target audience.  In the end, we are all marketers and salespeople, and where we are on the marketing - sales continuum is determined by the moment at which you want to ask for someone to buy you/your product.  Even though methodology might evolve and change what doesn't change is the need to make sure your message breaks through the marketing clutter so that the intended target says yes when you ask them to buy what you're marketing to them.</p>
+</div>        </li>      </ul>    </li>    <li class="commententry   "  >    <a name="comment-3084991"></a>      <ul class="unitList unitType-thread">        <li class="unit first">          <ul class="cMetadata metadataType-postInfo">             <li class="postStamp">2:29 pm June 15, 2014</li>            <li class="posterName">                <cite>SPai</cite> wrote:            </li>          </ul>          <div class="commentContent"><p>This article and the associated comments pretty much sums up the primary problem with "marketing", that people have varying definitions. Product management concepts (4P's), outbound campaign concepts (for brand, benefits, offers, promotions, positioning, etc.), inbound (market research, iterative development, etc.), and marketing communications (internal and external, PR). All of these discrete concepts are valid and essential in running a business, and have different focus/priority depending on the stage of the company.</p>
+<p>That said, having been involved in successful and unsuccessful startup, the point of the article is valid, that good analysis of product adoption by early users is essential to the success of a product and more important than activities that drive broad, mass awareness. This feedback loop is an extension of the iterative feedback loop found in most agile development activities, the users are just providing another feedback point, namely the willingness to spend their dollars.</p>
+<p>While I've encountered entrepreneurs that hesitate to engage early users, these are typically the same folks that don't respond well when you tell them certain aspects of the product need to be changed during the development process. It's just their belief that they know best. Unlike Steve Jobs et al, they haven't done their time with users.</p>
+<p>One proposal for future "marketing" articles is to clearly identify the aspect of "marketing" to be discussed. Much like Operations and Product Development, Marketing encompasses many disciplines. Unfortunately, a few of the disciplines call themselves Marketing in various organizations.</p>
+<p>Cheers.</p>
+</div>        </li>      </ul>    </li>    <li class="commententry   "  >    <a name="comment-3081502"></a>      <ul class="unitList unitType-thread">        <li class="unit first">          <ul class="cMetadata metadataType-postInfo">             <li class="postStamp">9:22 pm June 11, 2014</li>            <li class="posterName">                <cite>Tony Pezza American Standard Businesses</cite> wrote:            </li>          </ul>          <div class="commentContent"><p>It appears times have changed and not for the better for the small business owners and start-up entrepreneurs. Sales cannot be generated without marketing and the small owner, not the larger business owner has no consumption of free offers to enhance their business. I had a teaching school for 31 years with business education and trained some 250,000 business owners at a 90% success rating. Coming back from a 10 years retiring stay I have tried to re-start the same program with additional educational program and new concepts, It's been some 10 months and not a nibble on one of the best educational program ever offered into the small business world. The marketing has not gathered one cent in sales and after offering more than $500.00 per person in free services, and materials, and much more. More than 6000 contacts 150 programs  and within those contacts more than 2000 personal face to face presentations, and reviewed materials. It seems I cannot reach the small business owner to make them a better owner and a more profitable and saleable business. I have created new programs, insurance guarantees, seminars guarantees, free promotional advertising, on air free promotions, and so many other business programs.</p>
+<p>I don't know to agree, or disagree but the fact remains that the marketing is not the same as it was going back to when, in 1985 when I started.  When you offer sound and sounder business program for free and no one asked for the program I am at the end of my rope and some 50 years of experience and $35 million in sales in 1989, WHO CAN OFFER THE CORRECT ANSWER TO REACH THE SMALL BUSINESS OWNER.. Thanks  Tony Pezza</p>
+</div>        </li>      </ul>    </li>    <li class="commententry   "  >    <a name="comment-3080065"></a>      <ul class="unitList unitType-thread">        <li class="unit first">          <ul class="cMetadata metadataType-postInfo">             <li class="postStamp">9:56 am June 10, 2014</li>            <li class="posterName">                <cite>Andrew Shea</cite> wrote:            </li>          </ul>          <div class="commentContent"><p>It doesn't appear you understand or appreciate the definition of marketing. There are at least four essential components of marketing (some argue there are as many as six), each with considerable depth and no more important than the other components: The product itself, the place/distribution channel through which you will sell the product, the price of the product and the way in which you'll position/promote the product to drive sales.</p>
+<p>Andrew Shea<br />
+Senior Marketing Executive<br />
+St. Louis, Missouri</p>
+</div>        </li>      </ul>    </li>    <li class="commententry   "  >    <a name="comment-3079492"></a>      <ul class="unitList unitType-thread">        <li class="unit first">          <ul class="cMetadata metadataType-postInfo">             <li class="postStamp">8:53 pm June 9, 2014</li>            <li class="posterName">                <cite>Francis Piscal Jr.</cite> wrote:            </li>          </ul>          <div class="commentContent"><p>I'm not a marketing/sales person, so I won't go there. But, having founded my consulting firm three years ago, one of the most important lessons I've learned is investors and lenders are most impressed by revenue. And, one generates revenue by making sales.  If your company can demonstrate that it can create yield on a small budget investors and lenders will be (1) taken with management (a critical threshold) and (2) more inclined to invest or lend funds.</p>
+</div>        </li>      </ul>    </li>  </ul></div>  </div>
+  <!-- end col6wide -->
+
+  <!-- begin col4wide: right rail -->
+  <div class="col4wide margin-left" id="rightRail">
+    <!-- begin blognav -->
+    
+    <ul class="blognav blogpostnav">
+        <li class="blognav_prev" id="blognav_prev_b" >
+            <strong>previous</strong><a href="http://blogs.wsj.com/accelerators/2014/06/02/george-deeb-budget-for-proof-of-concept-marketing-from-day-one/" rel="prev">George Deeb: Budget for Proof-of-Concept Marketing from Day One</a>        </li>
+        <li class="blognav_next" id="blognav_next_b" >
+            <strong>next</strong><a href="http://blogs.wsj.com/accelerators/2014/06/03/tina-hsiao-market-your-startup-on-a-dime/" rel="next">Tina Hsiao: Market Your Startup on a Dime</a>          </li>
+    </ul>
+
+    <!-- end blognav -->
+    
+    <div class="rightRail">
+                  <div class="searchform">
+                <form action="http://blogs.wsj.com/accelerators/index.php" method="get" id="blgSearchFrm">
+                    <label for="s">Search The Accelerators1</label>
+                    <input type="text" id="blog_search_query" class="formtext unUsed" name="s" value="Search The Accelerators" size="30"/>
+                                    <input type="submit" value="GO" class="searchsubmit"/>
+                            </form>
+            </div>
+<div id="ad_87222" class="adSummary advertisement" > </div>
+    <div class="headlineSummary about">  
+    <div class="headerBox">    
+        <h3>About The Accelerators</h3>
+
+        <ul class="tools">      
+            <li class="listLbl"></li><li class="rssIcon"><a href="http://blogs.wsj.com/accelerators/feed/"><span>RSS</span></a></li></ul>
+
+    </div>
+    <ul class="newsItem">    
+        <li>
+            <div class="newsImage"></div><p>For aspiring or actual entrepreneurs, The Accelerators forum is a lively discussion among startup mentors– entrepreneurs, angel investors and venture capitalists. To reach us: <a href="http://www.twitter.com/wsjstartup">@wsjstartup</a> or <a href="mailto:theaccelerators@wsj.com">theaccelerators@wsj.com</a>.</p>
+<ul class="lk_l">
+           <li class="lk_li lk-fb">
+             <div class="btn_c"><fb:like href="https://www.facebook.com/wsjsmallbusiness" ref="blogabout" send="false" layout="button_count" width="90" show_faces="false" share="false" ></fb:like> </div><span class="lk-lbl">The Accelerators on Facebook</span>
+           </li>
+          </ul>
+        </li>
+    </ul>
+</div><div class="headlineSummary about">
+<div class="headerBox">
+<h3>The Accelerators</h3>
+</div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/nora-abousteit/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-VT208_aboust_A_20121219183637.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Nora<br>Abousteit</b></span></a>
+</dl>
+</div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/christina-bechhold/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/BN-BO553_beccho_A_20140218114412.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Christina<br>Bechhold</b></span></a>
+</dl>
+</div>
+
+<div style="width: 95px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/neil-blumenthal/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-VQ902_Blumen_A_20121212145312.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Neil<br>Blumenthal</b></span></a>
+</dl>
+</div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/david-cohen/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-VR374_cohen_A_20121213141404.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>David<br>Cohen</b></span></a>
+</dl>
+</div>
+
+<div style="clear: both;"></div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/kevin-colleran/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-XG834_Coller_A_20130429170026.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Kevin<br>Colleran</b></span></a>
+</dl>
+</div>
+
+<div style="width: 100px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/john-greathouse/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-YW751_Greath_A_20130912130613.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>John<br>Greathouse</b></span></a>
+</dl>
+</div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/ryan-holmes/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-ZD786_Holmes_A_20131003163543.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Ryan<br>Holmes</b></span></a>
+</dl>
+</div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/jennifer-hyman/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-ZE651_Hyman_A_20131007122006.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Jennifer<br>Hyman</b></span></a>
+</dl>
+</div>
+
+<div style="clear: both;"></div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/jessica-jackley/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-ZD829_Jackle_A_20131003181517.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Jessica<br>Jackley</b></span></a>
+</dl>
+</div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/brad-keywell/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-XK493_BradKe_A_20130509114151.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Brad<br>Keywell</b></span></a>
+</dl>
+</div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/liz-lange/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-VM588_Lange_A_20121128131126.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Liz<br>Lange</b></span></a>
+</dl>
+</div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/matt-maloney/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-VM594_Malone_A_20121128132500.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Matt<br>Maloney</b></span></a>
+</dl>
+</div>
+
+<div style="clear: both;"></div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/kate-mitchell/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-VM595_Mitche_A_20121128132617.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Kate<br>Mitchell</b></span></a>
+</dl>
+</div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/jason-nazar/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/BN-CS086_Nazar_A_20140508094522.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Jason<br>Nazar</b></span></a>
+</dl>
+</div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/naval-ravikant/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-VM599_Ravika_A_20121128133029.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Naval<br>Ravikant</b></span></a>
+</dl>
+</div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/brian-sharples/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-ZD774_Sharpl_A_20131003160456.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Brian<br>Sharples</b></span></a>
+</dl>
+</div>
+
+<div style="clear: both;"></div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/wayne-sutton-2/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-VM600_Sutton_A_20121128133116.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Wayne<br>Sutton</b></span></a>
+</dl>
+</div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/david-tisch/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-VM602_Tisch_A_20121128133231.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>David<br>Tisch</b></span></a>
+</dl>
+</div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/vivek-wadhwa/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-VO002_Wadhwa_A_20121203164946.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Vivek<br>Wadhwa</b></span></a>
+</dl>
+</div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/mike-walsh/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/BN-CR707_Walsh_A_20140507133632.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Mike<br>Walsh</b></span></a>
+</dl>
+</div>
+
+<div style="clear: both;"></div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/maynard-webb/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-XP427_Maynar_A_20130524124620.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Maynard<br>Webb</b></span></a>
+</dl>
+</div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/elaine-wherry/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/BN-AA216_wherry_A_20131016095749.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Elaine<br>Wherry</b></span></a>
+</dl>
+</div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/joanne-wilson/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-VM604_Wilson_A_20121128133350.jpg  " alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Joanne<br>Wilson</b></span></a>
+</dl>
+</div>
+
+<div style="width: 85px; float: left; font-size: 1.2em; line-height: 1.4em;">
+<dl class="wp-caption alignleft caption-alignleft " style="height: 120px;">
+  <a href="http://blogs.wsj.com/accelerators/ed-zimmerman/"><img class="  alignleft" style="border: 1px solid black; float: left; margin-right: 4px;" src="http://s.wsj.net/public/resources/images/OB-VM605_Zimmer_A_20121128133425.jpg" alt="" width="76" height="76" />
+  <span style="padding-left:10px;display:inline-block;"><b>Ed<br>Zimmerman</b></span></a>
+</dl>
+</div>
+
+<div style="clear: both;"></div>
+
+</div><div id="ad_11231" class="adSummary advertisement" > </div><style type="text/css">.sticky-ad { position: fixed; top: 0; width: 377px; }</style>    </div>
+  </div>
+  <!-- end col4wide: right rail -->
+
+</div>
+<!-- end article/right rail's col10wide -->
+
+                    <!--
+WSJ: 0.6M
+ 
+-->
+        </div>
+        <!-- end main content (blog header and article and side rail) -->
+
+        <!-- begin footer -->
+        <div id="footer"><div id="slice" class="hslice" style="display:none;">
+    <div class="entry-title">WSJ Web Slice</div>
+    <div class="entry-content">CONTENT</div>
+    <a rel="entry-content" href="http://ie8.smoothfusion.com/WallStreetJournal/view.aspx">LINKS TO ACTUAL PAGE CONTAINING WEB SLICE FUNCTIONALITY.</a>
+    <a rel="bookmark"  target="_blank" href="http://online.wsj.com" ></a>
+    <span class="ttl" style="display: none;">15</span>
+</div>
+<div id="footerWrapper" class="newFooterWrap">
+  <div class="newFooter">
+    <!--TOP BAR LINKS-->
+    <ul class="topLinks clearFix">
+      <li class="wsjLogo"><a href="http://online.wsj.com/?mod=WSJDE_footer">Wall Street Journal</a></li>
+      <li class="socialLogo"><a title="Facebook" href="http://www.facebook.com/wsj" class="facebook">Facebook</a></li>
+      <li class="socialLogo"><a title="Twitter" href="http://twitter.com/WSJ" class="twitter">Twitter</a></li>
+      <li class="socialLogo"><a title="LinkedIn" href="http://www.linkedin.com/today/online.wsj.com" class="linkedin">LinkedIn</a></li>
+      <li class="socialLogo"><a title="FourSquare" href="https://foursquare.com/wsj" class="foursquare">FourSquare</a></li>
+      <li class="socialLogo"><a title="Google+" href="https://plus.google.com/117720626238470886461/posts" class="gplus" rel="publisher">Google+</a></li>
+      <li class="socialLogo"><a title="YouTube" href="http://www.youtube.com/user/WSJDigitalNetwork" class="youtube">YouTube</a></li>
+      <li class="socialLogo"><a title="Podcasts" href="http://online.wsj.com/public/page/podcast.html?mod=WSJ_footer" class="podcast">Podcasts</a></li>
+      <li class="socialLogo"><a title="RSS" href="http://online.wsj.com/public/page/rss_news_and_feeds.html?mod=WSJ_footer" class="rssfeed">RSS Feed</a></li>
+      <li class="socialLogo"><a title="AppStore" href="http://itunes.apple.com/us/app/the-wall-street-journal./id364387007?mt=8" class="appstore">AppStore</a></li>
+      <li class="login_sub">
+        <ul class="clearFix">
+          <li class="subscribe"><a href="https://www.subscribe.wsj.com/hpfooterlink" rel="nofollow">Subscribe</a></li>
+          <li class="login loginClass"><span>/</span><a href="https://id.wsj.com/auth/log-in" class="loginClass">Login</a></li> <!--- https://id.wsj.com/auth/log-in for scrim use: loginClass--->
+        </ul>
+      </li>
+      <li class="backToTop"><a href="#top">Back to Top<span class="rotate">&laquo;</span></a></li>
+    </ul>
+    <!--MIDDLE COLUMN LINKS-->
+    <ul class="contentLinks clearFix">
+      <li class="linksCol">
+        <h4>Customer Service</h4>
+        <ul>
+          <li><a href="http://help.wsj.com/customer-service/?mod=WSJ_footer">Customer Center</a></li>
+<li><a class="custChatLink" href="#"><span class="lcHighlight">New!</span> Live Help</a></li>
+          <!--<li><a href="http://help.wsj.com/global/?mod=WSJ_footer" rel="nofollow">Asia/Europe Support</a></li>-->
+          <li><a href="https://customercenter.wsj.com/view/contactus.html?mod=WSJ_footer" rel="nofollow" id="contactUs">Contact Us</a></li>
+<li><a href="http://www.subscribe.wsj.com/getweekendnow?mod=WSJ_footer" rel="nofollow">WSJ Weekend</a></li>
+<li><a href="https://customercenter.wsj.com/view/ctdir/contactdirectory.html?mod=WSJ_footer" rel="nofollow">Contact Directory</a></li>
+          <li><a href="http://online.wsj.com/news/column/Corrections?mod=WSJ_footer" rel="nofollow">Corrections</a></li>
+        </ul>
+      </li>
+      <li class="linksCol">
+        <h4>Policy</h4>
+        <ul>
+          <li><a href="http://online.wsj.com/public/page/privacy-policy.html?mod=WSJ_footer" rel="nofollow">Privacy Policy</a></li>
+<li> <a href="http://online.wsj.com/public/page/cookie-policy.html?mod=WSJ_footer" rel="nofollow">Cookie Policy</a></li>
+          <li><a href="http://online.wsj.com/public/page/data-policy.html?mod=WSJ_footer" rel="nofollow">Data Policy</a></li>
+          <li><a href="http://online.wsj.com/public/page/copyright_policy.html?mod=WSJ_footer" rel="nofollow">Copyright Policy</a></li>
+          <li><a href="http://online.wsj.com/public/page/subscriber_agreement.html?mod=WSJ_footer" rel="nofollow">Subscriber Agreement<br>&amp; Terms of Use</a></li>
+           <li><a href="http://online.wsj.com/public/page/cookie-policy.html?mod=WSJ_footer#cookies_advertising" rel="nofollow">Your Ad Choices</a></li>
+     </ul>
+      </li>
+      <li class="linksCol">
+        <h4>Advertise</h4>
+        <ul>          
+          <li><a href="http://www.wsjdigital.com?mod=WSJ_footer" rel="nofollow">Advertise</a></li>
+      <li><a href="http://classifieds.wsj.com?mod=WSJ_footer" rel="nofollow">Place a Classified Ad</a></li>
+      <li><a href="https://classifieds.wsj.com/ad/Residential-Real-Estate-Ads?mod=WSJ_footer" rel="nofollow">Sell Your Home</a></li>
+      <li><a href="https://classifieds.wsj.com/ad/Business-For-Sale-Ads?mod=WSJ_footer" rel="nofollow">Sell Your Business</a></li>
+      <li><a href="https://classifieds.wsj.com/ad/Commercial-Real-Estate-Ads?mod=WSJ_footer" rel="nofollow">Commercial Real Estate Ads</a></li> 
+      <li><a href="https://classifieds.wsj.com/ad/Job-Ads?mod=WSJ_footer" rel="nofollow">Recruitment & Career Ads</a></li> 
+      <li><a href="https://classifieds.wsj.com/ad/Franchise-For-Sale-Ads?mod=WSJ_footer" rel="nofollow">Franchising</a></li>
+          <li><a href="http://www.wsjlocal.com?mod=WSJ_footer" rel="nofollow">Advertise Locally</a></li>          
+        </ul>
+      </li>
+      <li class="linksCol">
+        <h4>Tools &amp; Features</h4>
+        <ul>
+          <li><a href="http://online.wsj.com/public/page/designtech-wsjModuleHome.html?mod=WSJ_footer">Apps</a></li>
+          <li><a href="http://online.wsj.com/public/page/email-setup.html?mod=WSJ_footer" rel="nofollow">Emails &amp; Alerts</a></li>
+          <!--<li><a href="http://online.wsj.com/ksemail?mod=WSJ_footer" rel="nofollow">Alerts</a></li>-->
+          <li><a href="http://online.wsj.com/public/page/news-interactive-features-trends.html?mod=WSJ_footer">Graphics</a></li>
+          <li><a href="http://online.wsj.com/page/columnists.html?mod=WSJ_footer">Columns</a></li>
+          <li><a href="http://topics.wsj.com/?mod=WSJ_footer">Topics</a></li>
+          <li><a href="http://online.wsj.com/public/page/guides.html?mod=WSJ_footer">Guides</a></li>
+          <li><a href="http://portfolio.wsj.com?mod=wsj_port_foot">Portfolio</a></li>
+          <li><a href="http://ds.wsj.com/wsjportfolio/portfolio?cmd=mainwindow&mod=wsj_portold_foot">Old Portfolio</a></li>
+        </ul>
+      </li>
+      <li class="linksCol">
+        <h4>More</h4>
+        <ul>
+          <li><a href="http://subscription.wsj.com/"  rel="nofollow">Why Subscribe</a></li>
+          <li><a href="https://id.wsj.com/access/509b1a086458232f6e000002/latest/register_standalone.html" class="registerUserClass" rel="nofollow">Register for Free</a></li>
+          <li><a href="http://www.djreprints.com/?mod=WSJ_footer" rel="nofollow">Reprints</a></li>
+         <!--  <li><a href="http://wsj.iamplify.com">E-books</a></li> online.wsj.com/public/page/2_1150.html -->
+          <li><a href="http://wsj.com/partner/?mod=WSJ_footer" rel="nofollow">Content Partnerships</a></li>
+          <li><a href="http://online.wsj.com/conferences?mod=WSJ_footer">Conferences</a></li>
+          <!--<li><a href="https://www.wsjsafehouse.com/">SafeHouse</a></li>-->
+          
+<li class="subOnly"><a href="http://setup1.wsj.com/pznsetup/sub/pvemail/setup.html?mod=WSJ_footer">Price & Volume</a></li>
+ <li class="subOnly"><a href="http://setup1.wsj.com/pznsetup/sub/ksemail/setup.html?mod=WSJ_footer">Keyword & Symbol</a></li>
+<li><a href="/public/page/archive.html">News Archive</a></li>
+       <li><a href="http://www.dowjones.com/careers.asp?mod=WSJ_footer" rel="nofollow">Jobs at WSJ</a></li>
+       </ul>
+      </li>
+    </ul>
+    <!--FOOTNOTE LINKS-->
+    <ul class="footnoteLinks clearFix">
+       <!--<li><a href="http://help.wsj.com/?mod=WSJ_footer" target="_blank">Help</a></li>-->
+       <!--<li class=""><a href="http://help.wsj.com/contact-us/?mod=WSJ_footer" rel="nofollow">Contact Directory</a></li>-->
+       <li class="copyright">Copyright &copy;2014 <a href="">Dow Jones &amp; Company</a>, Inc. All Rights Reserved.</li>
+    </ul>
+  </div>
+  <!-- end module newGlobalFooter2012 -->
+</div>
+<div id="cX-root" style="display:none"><!--PLEASE DO NOT REMOVE--></div>
+<div id='SI_73f3k49S18cgGAR'><!--DO NOT REMOVE-CONTENTS PLACED HERE--></div> 
+</div>   
+
+
+
+<div class="scrimWSJ">
+
+<div class="scrimWSJ_overlay" style="display:none">
+    <div class="scrimWSJ_wrapper">
+        <div class="scrimWSJ_inner">
+            <div class="scrimWSJ_module gmDevicesScrim">
+            <a href="#" class="gmDeviceScrimCloseBtn">Schlie&szlig;en</a>
+                <a href="#" class="gmDeviceScrimBtn">Subscribe Now</a>
+                <a href="#" class="gmDeviceScrimReadMore"></a>
+            </div>
+        </div>
+    </div>
+</div>
+</div>
+
+<!-- fastdynapage - secj2kentwap07 - Mon 12/29/14 - 00:11:32 EST -->
+        <!-- end footer -->
+    </div>
+    <div id="fb-root"></div>
+    <div id="emailThisScrim" class="scrimWSJ_overlay hidden">
+    <div class="scrimWSJ_wrapper">
+        <div class="scrimWSJ_inner">
+            <div class="scrim_email_this scrimWSJ_module loginModule">
+                <div class="headerStrap">
+                    <a  href="#" class="closeBtn emailScrimCloseBtn">close</a>
+                </div>
+                <h3>Email This</h3>
+                <div id="emailScrimTopAd" class="scrim-sponsor-bug">                    
+                </div>
+                <form id="emailForm" name="emailForm">
+                    <fieldset>
+                        <div class="login_row">
+                            <span id="emailThisAddressToError" class="emailError hidden"></span>
+                            <label class="login_id">
+                                Recipient's Email Address <span class="smallNote">(Separate multiple address with commas)</span>
+                            </label>
+                            <input name="email" id="emailThisAddressTo" value="" type="text" class="login_id" tabindex="1">
+                        </div>
+                        <div class="login_row">
+                            <span id="emailThisAddressFromError" class="emailError hidden"></span>
+                            <label class="login_pw">
+                                Your Email Address
+                            </label>
+                            <input value="" name="email" id="emailThisAddressFrom" class="login_pw" type="text" tabindex="2">
+                        </div>
+                        <div class="login_row">
+                            <label class="login_pw">
+                                Message <span>(Optional)</span>
+                            </label>
+                            <textarea id="emailThisMsg" class="message" maxlength="500" tabindex="3" ></textarea>
+                        </div>
+                        <div class="login_row">
+                            <input id="ccMeChkBox" type="checkbox" checked="checked" class="login_checkbox selected" tabindex="4">
+                            <label class="login_checkbox">
+                                Send me a copy
+                            </label>
+                        </div>  
+                        <span id="recaptchaError" class="emailError hidden"></span>                     
+                        <div id="recaptcha_div" class="captcha_row">
+                        
+                        </div>
+                        <div class="login_row">
+                            <input type="submit" class="login_submit emailScrimSubmit" value="Send" tabindex="5" id="emailScrimSubmit" tabindex="4">
+                            <div class="cancelLink">or <a href="#" class="emailScrimCloseBtn" tabindex="6">Cancel</a></div>
+                        </div>
+                    </fieldset>             
+                 </form>                            
+            </div>
+            
+            
+        </div>
+    </div>
+</div>
+<div id="emailConfScrim" class="scrimWSJ_overlay hidden">
+    <div class="scrimWSJ_wrapper">
+        <div class="scrimWSJ_inner">
+            <div class="scrim_thank_you scrimWSJ_module loginModule">
+                <div class="headerStrap">
+                    <a id="emailConfScrimCloseBtn" href="#" class="closeBtn">close</a>
+                </div>
+                <h3>Thank You</h3>
+                <p>Your email has been sent.</p>
+            </div>
+        </div>
+    </div>
+</div>
+<div id="emailErrorScrim" class="scrimWSJ_overlay hidden">
+    <div class="scrimWSJ_wrapper">
+        <div class="scrimWSJ_inner">
+            <div class="scrim_thank_you scrimWSJ_module loginModule scrim_error">
+                <div class="headerStrap">
+                    <a id="emailErrorScrimCloseBtn" href="#" class="closeBtn">close</a>
+                </div>
+                <h3>Error.</h3>
+                <p>An error has occured and your email has not been sent. <br />
+                Please <a id="reopenEmailScrimFromError" href="#">try again</a>.
+                </p>
+            </div>
+        </div>
+    </div>
+</div>
+
+
+<div class="hidden">
+<div id="invalidEmailAddress" class="hidden">&bull; Invalid email address.</div>
+<div id="tooManyEmailAddresses" class="hidden">&bull; You can't enter more than 20 emails.</div>
+<div id="invalidDelimiter" class="hidden">&bull; Seperate multiple addresses with Commas.</div>
+<div id="blankEmailAddress" class="hidden">&bull; Must enter an email address.</div>
+<div id="emptyRecaptchaCd" class="hidden">&bull; You must enter the verification code below to send.</div>
+<div id="invalidRecaptchaCd" class="hidden">&bull; Invalid entry: Please type the verification code again.</div>
+</div>
+
+
+<!-- fastdynapage - secj2kentwap09 - Mon 12/29/14 - 02:26:15 EST -->
+
+<script type="text/javascript">if(!NREUMQ.f){NREUMQ.f=function(){NREUMQ.push(["load",new Date().getTime()]);var e=document.createElement("script");e.type="text/javascript";e.src=(("http:"===document.location.protocol)?"http:":"https:")+"//"+"js-agent.newrelic.com/nr-100.js";document.body.appendChild(e);if(NREUMQ.a)NREUMQ.a();};NREUMQ.a=window.onload;window.onload=NREUMQ.f;};NREUMQ.push(["nrfj","beacon-3.newrelic.com","d20006d06b","1229860","ZQQDMREHWRcFBREPWFxONBcKSV4KAAMdSEdaEQ==",0,1,new Date().getTime(),"","","","",""]);</script>
+</body>
+
+<!-- global and local js include, clickability button -->
+<script type="text/javascript" src="http://sj.wsj.net/blogs/js/wsj_blogs_omniture.js"></script>
+<script type="text/javascript" src="http://sj.wsj.net/djscript/require/js_header_nonreno,bluekai,j_blogs,articleTicker,acHeaderSearch,wsjAutocomplete,vltArticleTools,emailThis_js,hsbTrendingNow,onVisibleLoader,outbrain,popoutWidgetize/provided/j_global_slim/version/b17.js"></script><script type="text/javascript" src="http://tealium.hs.llnwd.net/o43/utag/wsjdn/blogs/prod/utag.js"></script> <script data-exclude="ttt" type="text/javascript" src="http://sj.wsj.net/public/page/0_0_W0_1300_BLOGS.js"></script> <script type="text/javascript" src="http://sj.wsj.net/blogs/js/wsj_blogs1.js"></script> <script> function adrefresher() { document.getElementsByClassName( "postList-liveBlog" )[0].addEventListener( "DOMNodeInserted", function ( event ) { if( event.target.id == "postList-liveBlog") { var id = document.querySelector('[id^="ad_"]').children[0].id ; document.getElementById(id).contentWindow.location.reload() ; } }, false ); } </script> <script type="text/javascript" src="http://sj.wsj.net/djscript/bucket/NA_WSJ/page/0_0_WP_Dont_Miss/provided/j_global_slim/version/20120417130456.js"></script> <script type="text/javascript"> if (dojo.getObject("dj.util.onVisibleWidget", true).scan) {dj.util.onVisibleWidget.scan();} </script> <script type="text/javascript"> if (typeof current_path == "undefined") { var current_path = "/"; } dj.blogs.util.renderHeaderDate(); var wsj_section = document.location.toString().split( "/", 4 )[3]; var _sf_async_config={uid:8900,domain:"online.wsj.com",path:current_path, sections:wsj_section}; (function(){ function loadChartbeat() { window._sf_endpt=(new Date()).getTime(); var e = document.createElement("script"); e.setAttribute("language", "javascript"); e.setAttribute("type", "text/javascript"); e.setAttribute("src", (("https:" == document.location.protocol) ? "https://s3.amazonaws.com/" : "http://") + "static.chartbeat.com/js/chartbeat.js"); document.body.appendChild(e); } var oldonload = window.onload; window.onload = (typeof window.onload != "function") ? loadChartbeat : function() { oldonload(); loadChartbeat(); }; })(); </script> <script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0];if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src="//platform.twitter.com/widgets.js";fjs.parentNode.insertBefore(js,fjs);}}(document,"script","twitter-wjs");</script> <script> // enable "live help" popup in footer var custChatWin=null; dojo.forEach( dojo.query("a.custChatLink"), function(tag) { tag.className = tag.className.replace( /(?:^|\s)custChatLink(?!\S)/g , '' ); // hack: remove class custChatLink so that we don't try to set another popup handler later (base wsj template does, for instance). this class is not targeted by any CSS or any other JS right now dojo.connect(tag,"onclick", function(event) { dojo.stopEvent(event); if(custChatWin==undefined||custChatWin==null||custChatWin.closed) { custChatWin=window.open("https://customercenter.wsj.com/livechat/chat?product=WSJ","DowJones_Live_Chat","width=510,height=420,location=no,menubar=no,status=no,toolbar=no,scrollbars=no,resizable=no"); } else { custChatWin.focus(); } }); }); </script> <script type="text/javascript" src="http://sj.wsj.net/blogs/js/FCF-module.js"></script> <script type="text/javascript"> var FCF = new dj.blogs.FirstClickFree({}); FCF.checkFCF(); </script> <script type="text/javascript" data-exclude="ttt"> // https://jira.dowjones.net/browse/PSSBSCEN-1515 dj.module.sitewidget.popoutWidgetize.init(); dojo.setObject("dj.context.sitewidgets.popupurl", "http://online.wsj.com/page/0_0_WGT_INVESTORCENTER_MST.html"); dj.util.User.isLoggedIn(function(loggedIn) { if (loggedIn) { dj.module.sitewidget.popoutWidgetize.init(); dojo.setObject("dj.context.sitewidgets.popupurl", "http://online.wsj.com/page/0_0_WGT_INVESTORCENTER_MST.html"); } }); <script src="http://mw1.wsj.net/MW5/content/analytics/hooks.js" type="text/javascript"></script>
+
+<!-- end global and local js include, clickability button -->
+
+<!-- begin $GLOBALS[ 'wsj_footer_js' ] -->
+<script type="text/javascript">
+
+       // so that you can click anywhere and close the dropdown:
+       dojo.connect(document, "onclick", function() {
+         dojo.query(".sp-ddown").removeClass("expanded").addClass("collapsed");
+       }, false);
+     
+
+       dojo.query(".sp-ddown .title a").connect("onclick", function(event) {
+         if ( dojo.hasClass(event.target.parentNode.parentNode, "collapsed") || dojo.hasClass(event.target.parentNode.parentNode.parentNode, "collapsed") ) {
+           event.preventDefault(); // only go to link if dropdown is opened
+         }
+         else {
+           event.stopPropagation();
+         }
+       });
+     
+
+var short_url  = "http://on.wsj.com/SqFZ5V" ; 
+var url  = "http://blogs.wsj.com/accelerators/2014/06/03/jessica-livingston-why-startups-need-to-focus-on-sales-not-marketing/" ; 
+var twitter_handle = "WSJ" ; 
+$( ".email" ).click(function() {
+var topemailButton = new dj.widget.vlttools.Button('att.at.emailthis', function(){
+        var topEmail = new dj.widget.email.EmailThis('att.at.emailthis', {mod:"wsj_valettop_email"});
+        topEmail.init();
+      }, false);
+      topemailButton.init();
+
+    topemailButton.clickHandler()
+      
+});
+
+
+function count_display (social,count) {
+    
+    if(  count !== 0 && count <= 99 ){
+         $('.' + social).children()[1].style.display = 'inline-block' ; 
+       
+       }else if(count > 99 && count <= 999 ) {
+           $('.' + social).children()[1].style.display = 'inline-block' ; 
+           $('.' + social).css("width","37px");
+        }       
+      
+        else if(count > 999 ) {
+           $('.' + social).children()[1].style.display = 'inline-block' ; 
+           $('.' + social).css("width","43px");
+        }
+
+     }
+
+
+$( ".mixi" ).click(function() {
+   
+   mixi_url = 'http://mixi.jp/share.pl?u='+url+'&k=cb06210f9fbb7d10ad9d37664e4973a84ccf524f';
+   window.open(mixi_url,' share ', [ 'width = 632', 'height = 456','location = yes','resizable = yes','toolbar = no', 'menubar = no', 'scrollbars = no', 'status = no']. join (','));
+
+});
+
+
+
+
+var twitt = $('#twitter') ; 
+
+
+//mini wsj_pro hack
+if (typeof twitt.sharrre !== 'function') {
+      
+    sharrre_exec(jQuery, window , document );
+}
+   
+$('#twitter').sharrre({
+  share: {
+    twitter: true
+  },
+  template: '<a class="tw" href="#"><img class="icon" src="http://s.wsj.net/blogs/img/tweet1.png"><div class="social_count">{total}</div></a>',
+  enableHover: false,
+  enableTracking: true,
+  buttons: { twitter: { via: twitter_handle, url : short_url }},
+  render: function(api, options){
+    
+    window.tw_count = api.options.count.twitter  ; 
+    count_display('tw', tw_count );
+    
+  },
+
+click: function(api, options){
+    
+    api.simulateClick();
+    var tw_count2 =  $('.tw').children()[1].innerHTML ;
+    count_display('tw', tw_count2 );
+    api.openPopup('twitter');
+  }
+});
+$('#facebook').sharrre({
+  share: {
+    facebook: true
+  },
+  template: '<a class="fb"  href="#"><img class="icon" src="http://s.wsj.net/blogs/img/facebook1.png"></img><div class="social_count">{total}</div></a>',
+  enableHover: false,
+  
+  render: function(api, options){
+    
+    window.fb_count = api.options.count.facebook  ; 
+    count_display('fb', fb_count );
+
+  },
+  
+  click: function(api, options){
+   
+    api.simulateClick();
+    var fb_count2 =  $('.fb').children()[1].innerHTML ;
+    count_display('fb', fb_count2 );
+    api.openPopup('facebook');
+  
+  }
+});
+$('#googleplus').sharrre({
+  
+  share: {
+    googlePlus: true
+  },
+  template: '<a class="gp" href="#"><img class="icon" src="http://s.wsj.net/blogs/img/google1.png"><div class="social_count">{total}</div></a>',
+  enableHover: false,
+  render: function(api, options){
+    
+    window.gp_count = api.options.count.googlePlus  ; 
+    count_display('gp', gp_count );
+
+  },
+  click: function(api, options){
+    
+    api.simulateClick();
+    var gp_count2 =  $('.gp').children()[1].innerHTML ;
+    count_display('gp', gp_count2 );
+    api.openPopup('googlePlus');
+
+  }
+});
+
+$('#linkedin').sharrre({
+    url: "",
+    share: {
+      linkedin: true
+    },
+    template: '<a class="li" href="#"><img class="icon" src="http://s.wsj.net/blogs/img/linkedin1.png"><div class="social_count">{total}</div></a>',
+    enableHover: false,
+    render: function(api, options){
+    
+        window.li_count = api.options.count.linkedin ; 
+        count_display('li', li_count );
+   },
+    click: function(api, options){
+        api.simulateClick();
+        var li_count2 =  $('.li').children()[1].innerHTML ;
+        count_display('li', li_count2 );
+        api.openPopup('linkedin');
+    }
+  });
+
+   //clone the tool from the top and add it to the bottom 
+   setTimeout(function () {
+
+     if($(".post-bottom-row").length > 0) {
+
+        $(".post-bottom-row").after($(".wrapper").clone(true)) ; 
+        $(".wrapper")[1].style.marginTop = '10px' ; 
+        $(".wrapper")[1].style.marginTop = '10px' ; 
+
+     }else {
+
+      $(".bottomRow").after($(".wrapper").clone(true)) ; 
+
+     }   
+  
+    }, 2000);
+ 
+new dj.blogs.CommentForm(); 
+dj.blogs.AdManager.createAd(
+                                    'standard_top',
+                                    'ad_87222',
+                                    'js',
+                                    { width: 300,
+                                      height: 250,
+                                      size:'300x250,336x280,300x600,336x850,300x1050',
+                                      site:'interactive.wsj.com',
+                                      zone:'blog_accelerators',
+                                      adClass:'A',
+                                      title: 87222,
+                                      meta:'',
+                                      category:'',
+                                      frequency:'',
+                                      cacheId:'',
+                                      classEnabled:'true',
+                                      classValue:'adSummary advertisement',
+                                      styleValue:'',
+                                      conditionType:'',
+                                      conditionValue:'',
+                                      conditionalString:''});
+whenElementVisibleDo(document.getElementById('ad_11231'), function(){ dj.blogs.AdManager.createAd(
+                                    'standard_bottom',
+                                    'ad_11231',
+                                    'js',
+                                    { width: 300,
+                                      height: 250,
+                                      size:'300x250,336x280,300x250',
+                                      site:'bottom.interactive.wsj.com',
+                                      zone:'blog_accelerators',
+                                      adClass:'A',
+                                      title: 11231,
+                                      meta:'',
+                                      category:'',
+                                      frequency:'',
+                                      cacheId:'',
+                                      classEnabled:'true',
+                                      classValue:'adSummary advertisement',
+                                      styleValue:'',
+                                      conditionType:'',
+                                      conditionValue:'',
+                                      conditionalString:''}); });
+
+/*
+ * example invocation:
+ * whenElementVisibleDo(document.getElementById('some_id'), function(){ console.log('visible now!'); });
+ */
+
+// performs callback when element becomes visible, then removes listeners
+function whenElementVisibleDo(el, cb) {
+  var handler = function() {
+    if (isElementInViewportAtAll(el)) {
+      cb();
+      removeListeners(handler);
+    }
+  }
+  attachListeners(handler);
+}
+
+// similar to http://stackoverflow.com/a/7557433/458614 but returns true if any part of the element is in the viewport, not just the whole thing
+// should work on: Chrome, Firefox, Safari, IE8+, iOS Safari, Android2+, Blackberry, Opera Mobile, IE Mobile
+// doesn't handle z-index, being outside of viewable area of parent element, and possibly other edge cases
+// we check if either the top OR the bottom of the element is within the viewport, while either the left right or the element is also in the viewport
+function isElementInViewportAtAll (el) {
+    var rect = el.getBoundingClientRect();
+    var height = window.innerHeight || document.documentElement.clientHeight;
+    var width = window.innerWidth || document.documentElement.clientWidth;
+    return (
+      (
+        (rect.top <= height && rect.top >= 0) ||
+        (rect.bottom <= height && rect.bottom >= 0)
+      ) &&
+      (
+        (rect.left <= width && rect.left >= 0) ||
+        (rect.right <= width && rect.right >= 0)
+      ) 
+    );
+}
+
+function attachListeners(fn) {
+  if (window.addEventListener) {
+    window.addEventListener('DOMContentLoaded', fn, false);
+    window.addEventListener('load', fn, false);
+    window.addEventListener('scroll', fn, false);
+    window.addEventListener('resize', fn, false);
+  } else if (window.attachEvent) { // IE fallback
+    window.attachEvent('DOMContentLoaded', fn);
+    window.attachEvent('load', fn);
+    window.attachEvent('onscroll', fn);
+    window.attachEvent('onresize', fn);
+  }
+}
+function removeListeners(fn) {
+  if (window.removeEventListener) {
+    window.removeEventListener('DOMContentLoaded', fn, false);
+    window.removeEventListener('load', fn, false);
+    window.removeEventListener('scroll', fn, false);
+    window.removeEventListener('resize', fn, false);
+  } else if (window.attachEvent) { // IE fallback
+    window.detachEvent('DOMContentLoaded', fn);
+    window.detachEvent('load', fn);
+    window.detachEvent('onscroll', fn);
+    window.detachEvent('onresize', fn);
+  }
+}
+        
+var test; addClassWhenOffTop(document.getElementById('ad_11231'), 'sticky-ad');
+
+// adds a class when top of element is off screen, removes class again when scrolling back up to that point
+function addClassWhenOffTop(el, classname) {
+  var scrollTopWhenAdded;
+  attachListeners(function() {
+    if (scrollTopWhenAdded === undefined) {
+      // class has not been added
+      if (el.getBoundingClientRect().top < 0) {
+        // it's off the top of the screen
+        el.className += ' ' + classname;
+        scrollTopWhenAdded = document.documentElement.scrollTop || document.body.scrollTop;
+      }
+    }
+    else {
+      if ((document.documentElement.scrollTop || document.body.scrollTop) <= scrollTopWhenAdded) {
+        // we've gone back up past it
+        el.className = el.className.replace(new RegExp('\\b'+classname+'\\b'), ''); // remove class. in PHP double-quoted string have to do 4 backslashes to get 2
+        scrollTopWhenAdded = undefined;
+      }
+    }
+  });
+}
+
+// okay fine we might have a duplicate declaration
+function attachListeners(fn) {
+  if (window.addEventListener) {
+    window.addEventListener('DOMContentLoaded', fn, false);
+    window.addEventListener('load', fn, false);
+    window.addEventListener('scroll', fn, false);
+    window.addEventListener('resize', fn, false);
+  } else if (window.attachEvent) { // IE fallback
+    window.attachEvent('DOMContentLoaded', fn);
+    window.attachEvent('load', fn);
+    window.attachEvent('onscroll', fn);
+    window.attachEvent('onresize', fn);
+  }
+}
+        
+</script>
+<!-- end $GLOBALS[ 'wsj_footer_js' ] -->
+
+<!-- fix for PSSBSCEN-2197 -->
+<script>
+  // module.exports is breaking linkedin js, remove it for now
+  window.tempOldModule = window.module;
+  window.module = undefined;
+</script>
+<script src='http://platform.linkedin.com/in.js'></script>
+<script>
+  // give time for linkedin js do it its thing, then put it back
+  window.setTimeout(function() {
+    window.module = window.tempOldModule;
+  }, 3000);
+</script>
+<!-- end fix for PSSBSCEN-2197 -->
+
+
+<script type="text/javascript">
+     dj.module.header2012.lifp.init();
+
+    var tracking;
+  var proomni = new dj.blogs.util.omni(function(){ 
+      tracking = new dj.blogs.Tracking(this);
+      tracking.title       = 'Jessica Livingston Why Startups Need to Focus on Sales Not Marketing_article';
+      tracking.section     = 'Small Business';
+      tracking.subsection  = 'StartUpJournal';
+      tracking.publishTime = '2014-06-03 13:26';
+      tracking.pageType    = 'article';
+      tracking.articleAuthor = 'Marina Garcia Vasquez';
+      tracking.articleID = '2715';
+      tracking.blogLanguage = 'english';
+      tracking.originalBlog =  '';
+
+
+                tracking.blogName    = 'Theacceleratorsblog';
+            tracking.ad_zone     = 'blog_accelerators';         
+        tracking.articlePrivilege = 'free_Blogs';
+        tracking.articlePrivilegeOrLoggedIn = 'free';
+        tracking.isSubscriberBlog    = 'free';
+
+            tracking.firePixel();      
+   });
+
+</script> 
+
+<!-- variables for populating global header -->
+<script>
+  window.slimHeaderSection = "Business";
+  window.slimHeaderSectionURL = "http://online.wsj.com/public/page/news-business-us.html";
+  window.headerVersion = "blg68";
+
+</script>
+
+<!-- ttt_footer_js -->
+<script>
+  var section = window.slimHeaderSection;
+  if (section == "Managemement") section = "Management";
+  var sectionUrl = window.slimHeaderSectionURL;
+
+  var headerWrapperNode = dojo.byId("headerWrapper"); if ((typeof dj.module.header === "object") && headerWrapperNode) {
+     dj.context.djHeaderEnabled = true;
+  }
+  dj.context.core.quotesUrlPrefix = "http://quotes.wsj.com"; if (headerWrapperNode) {
+     var overrideEnv = 'NA_WSJ_PUB_SS';
+     dj.util.User.isLoggedIn(function (isLoggedIn) {
+       if (isLoggedIn) {
+         overrideEnv = 'NA_WSJ_SUB_SS';
+       }
+     });
+     dj.module.header.nonreno.init({
+       version: window.headerVersion,
+       domNodeId: "headerWrapper",
+       isLoadCss: false,
+       headerPageId: "0_0_WG_HeaderOne_WSJSS",
+       overrideUserEnvStr: overrideEnv,
+       onJsExec: function () {
+         var _secpage = section.toUpperCase();
+         var _secUrl = sectionUrl;
+         if (section) {
+           dojo.byId("currentSectionBlk").href = _secUrl;
+           dojo.byId("currentSectionBlk").innerHTML = _secpage;
+           dojo.removeClass(dojo.byId("currentSectionBlk"), "hidden");
+
+           //Changes to update the logo, if section is Tech
+           if(section == "Tech"){
+            dojo.query("header div.slimH_c").addClass("slimH_tech");
+            dojo.query(".slimHeader h1 a").removeClass("mainLogo").addClass("mainLogo-tech");
+
+            // also change link:
+            dojo.query(".slimHeader h1 a")[0].href = "http://online.wsj.com/news/technology";
+          }
+         }
+         dojo.style('headerWrapper', 'color', '#000000');
+
+         // https://jira.dowjones.net/browse/PSSBSCEN-2017
+         if (dj.util.Cookie && dj.util.Cookie.getCookie("wsjregion") == "asia,india") {
+           dojo.query(".slimHeader h1 a")[0].href = "http://india.wsj.com";
+         }
+
+         // https://jira.dowjones.net/browse/ID-1105
+         dj.util.User.renderFirstName("userName"); var uNamePlaceholder = dojo.byId("userName").innerHTML; if (uNamePlaceholder && uNamePlaceholder!=='' && uNamePlaceholder!=='undefined') { uNamePlaceholder += "'s Journal"; } else { dj.util.User.renderCallsign("userName"); var uNamePlaceholder = dojo.byId("userName").innerHTML; uNamePlaceholder += "'s Journal"; } dojo.byId("userName").innerHTML = uNamePlaceholder;
+
+         // https://jira.dowjones.net/browse/PSSBSCEN-1515
+         dj.util.User.isLoggedIn(function(loggedIn) {
+          if (loggedIn) {
+           dj.module.sitewidget.popoutWidgetize.init();
+           dojo.setObject("dj.context.sitewidgets.popupurl", "http://online.wsj.com/page/0_0_WGT_INVESTORCENTER_MST.html");
+          }
+         });
+       }
+     });
+  }
+</script><!-- end ttt_footer_js -->
+
+<!-- end of footer.php -->
+</html>
diff --git a/tests/data/extractors/test_issue115.json b/tests/data/extractors/test_issue115.json
new file mode 100644
index 00000000..8f87a9bc
--- /dev/null
+++ b/tests/data/extractors/test_issue115.json
@@ -0,0 +1,6 @@
+{
+    "url": "http://blogs.wsj.com/accelerators/2014/06/03/jessica-livingston-why-startups-need-to-focus-on-sales-not-marketing/",
+    "expected": {
+        "cleaned_text": "JESSICA LIVINGSTON: The most important thing an early-stage startup should know about marketing is rather counterintuitive: that you probably shouldn’t be doing anything you’d use the term"
+    }
+}
diff --git a/tests/extractors.py b/tests/extractors.py
index 9969c059..07b9362e 100644
--- a/tests/extractors.py
+++ b/tests/extractors.py
@@ -365,6 +365,12 @@ def test_issue129(self):
         fields = ['cleaned_text']
         self.runArticleAssertions(article=article, fields=fields)
 
+    def test_issue115(self):
+        # https://github.com/grangier/python-goose/issues/115
+        article = self.getArticle()
+        fields = ['cleaned_text']
+        self.runArticleAssertions(article=article, fields=fields)
+
 
 class TestPublishDate(TestExtractionBase):
 

From c7ec678ba411a598c0d3d0970fdbe717cae2d351 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 07:09:02 +0100
Subject: [PATCH 051/100] #115 - use known content tags to be article main body

---
 goose/crawler.py    |  9 +++------
 goose/extractors.py | 27 +++++++++++++++++++--------
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/goose/crawler.py b/goose/crawler.py
index 43aaf4ea..6b205916 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -108,10 +108,10 @@ def crawl(self, crawl_candidate):
         self.article.authors = self.extractor.extract_authors()
         self.article.title = self.extractor.get_title()
 
-        # check for an articleBody
-        # if we find one force the article.doc to be the articleBody node
+        # check for known node as content body
+        # if we find one force the article.doc to be the found node
         # this will prevent the cleaner to remove unwanted text content
-        article_body = self.extractor.get_articlebody()
+        article_body = self.extractor.get_known_article_tags()
         if article_body is not None:
             self.article.doc = article_body
 
@@ -141,9 +141,6 @@ def crawl(self, crawl_candidate):
             # post cleanup
             self.article.top_node = self.extractor.post_cleanup()
 
-            # article links
-            self.article.links = self.extractor.extract_links()
-
             # clean_text
             self.article.cleaned_text = self.formatter.get_formatted_text()
 
diff --git a/goose/extractors.py b/goose/extractors.py
index 07f5321f..3376df7d 100644
--- a/goose/extractors.py
+++ b/goose/extractors.py
@@ -40,6 +40,7 @@
 A_REL_TAG_SELECTOR = "a[rel=tag]"
 A_HREF_TAG_SELECTOR = "a[href*='/tag/'], a[href*='/tags/'], a[href*='/topic/'], a[href*='?keyword=']"
 RE_LANG = r'^[A-Za-z]{2}$'
+
 KNOWN_PUBLISH_DATE_TAGS = [
     {'attribute': 'property', 'value': 'rnews:datePublished', 'content': 'content'},
     {'attribute': 'property', 'value': 'article:published_time', 'content': 'content'},
@@ -47,6 +48,11 @@
     {'attribute': 'itemprop', 'value': 'datePublished', 'content': 'datetime'},
 ]
 
+KNOWN_ARTICLE_CONTENT_TAGS = [
+    {'attr': 'itemprop', 'value': 'articleBody'},
+    {'attr': 'class', 'value': 'post-content'},
+]
+
 
 class ContentExtractor(object):
 
@@ -249,6 +255,16 @@ def get_domain(self):
             return o.hostname
         return None
 
+    def get_known_article_tags(self):
+        for item in KNOWN_ARTICLE_CONTENT_TAGS:
+            nodes = self.parser.getElementsByTag(
+                            self.article.doc,
+                            attr=item['attr'],
+                            value=item['value'])
+            if len(nodes):
+                return nodes[0]
+        return None
+
     def get_articlebody(self):
         article_body = self.parser.getElementsByTag(
                             self.article.doc,
@@ -261,8 +277,9 @@ def get_articlebody(self):
         return None
 
     def is_articlebody(self, node):
-        if self.parser.getAttribute(node, 'itemprop') == 'articleBody':
-            return True
+        for item in KNOWN_ARTICLE_CONTENT_TAGS:
+            if self.parser.getAttribute(node, item['attr']) == item['value']:
+                return True
         return False
 
     def extract_opengraph(self):
@@ -594,12 +611,6 @@ def nodes_to_check(self, doc):
         """
         nodes_to_check = []
 
-        # microdata
-        # set the most score to articleBody node
-        article_body_node = self.get_articlebody()
-        if article_body_node is not None:
-            self.update_score(article_body_node, 99)
-
         for tag in ['p', 'pre', 'td']:
             items = self.parser.getElementsByTag(doc, tag=tag)
             nodes_to_check += items

From 0e6201d1acf08cd8cc392bbfcc545d95a51b45e8 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 07:30:47 +0100
Subject: [PATCH 052/100] #81 - use correct language for stopwords file

---
 goose/extractors.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/goose/extractors.py b/goose/extractors.py
index 3376df7d..e4efcbae 100644
--- a/goose/extractors.py
+++ b/goose/extractors.py
@@ -66,12 +66,21 @@ def __init__(self, config, article):
         # article
         self.article = article
 
-        # language
-        self.language = config.target_language
-
         # stopwords class
         self.stopwords_class = config.stopwords_class
 
+    def get_language(self):
+        """\
+        Returns the language is by the article or
+        the configuration language
+        """
+        # we don't want to force the target language
+        # so we use the article.meta_lang
+        if self.config.use_meta_language:
+            if self.article.meta_lang:
+                return self.article.meta_lang[:2]
+        return self.config.target_language
+
     def clean_title(self, title):
         """Clean title with the use of og:site_name
         in this case try to get ride of site name
@@ -371,7 +380,7 @@ def calculate_best_node(self):
 
         for node in nodes_to_check:
             text_node = self.parser.getText(node)
-            word_stats = self.stopwords_class(language=self.language).get_stopword_count(text_node)
+            word_stats = self.stopwords_class(language=self.get_language()).get_stopword_count(text_node)
             high_link_density = self.is_highlink_density(node)
             if word_stats.get_stopword_count() > 2 and not high_link_density:
                 nodes_with_text.append(node)
@@ -397,7 +406,7 @@ def calculate_best_node(self):
                         boost_score = float(5)
 
             text_node = self.parser.getText(node)
-            word_stats = self.stopwords_class(language=self.language).get_stopword_count(text_node)
+            word_stats = self.stopwords_class(language=self.get_language()).get_stopword_count(text_node)
             upscore = int(word_stats.get_stopword_count() + boost_score)
 
             # parent node
@@ -453,7 +462,7 @@ def is_boostable(self, node):
                 if steps_away >= max_stepsaway_from_node:
                     return False
                 paraText = self.parser.getText(current_node)
-                word_stats = self.stopwords_class(language=self.language).get_stopword_count(paraText)
+                word_stats = self.stopwords_class(language=self.get_language()).get_stopword_count(paraText)
                 if word_stats.get_stopword_count() > minimum_stopword_count:
                     return True
                 steps_away += 1
@@ -500,7 +509,7 @@ def get_siblings_content(self, current_sibling, baselinescore_siblings_para):
                 for first_paragraph in potential_paragraphs:
                     text = self.parser.getText(first_paragraph)
                     if len(text) > 0:
-                        word_stats = self.stopwords_class(language=self.language).get_stopword_count(text)
+                        word_stats = self.stopwords_class(language=self.get_language()).get_stopword_count(text)
                         paragraph_score = word_stats.get_stopword_count()
                         sibling_baseline_score = float(.30)
                         high_link_density = self.is_highlink_density(first_paragraph)
@@ -527,7 +536,7 @@ def get_siblings_score(self, top_node):
 
         for node in nodes_to_check:
             text_node = self.parser.getText(node)
-            word_stats = self.stopwords_class(language=self.language).get_stopword_count(text_node)
+            word_stats = self.stopwords_class(language=self.get_language()).get_stopword_count(text_node)
             high_link_density = self.is_highlink_density(node)
             if word_stats.get_stopword_count() > 2 and not high_link_density:
                 paragraphs_number += 1

From 4632df70b7b79f70e0c37bd2f1e9c71f92a0b6eb Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 07:36:15 +0100
Subject: [PATCH 053/100] #182 - rename soup parser

---
 goose/configuration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/goose/configuration.py b/goose/configuration.py
index fe26b22a..cc99e222 100644
--- a/goose/configuration.py
+++ b/goose/configuration.py
@@ -31,7 +31,7 @@
 
 AVAILABLE_PARSERS = {
     'lxml' : Parser,
-    'soupparser': ParserSoup,
+    'soup': ParserSoup,
 }
 
 

From fe5f5e9dad84ed775854a93809f1fab89e4f527c Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 07:47:52 +0100
Subject: [PATCH 054/100] #183 - pep8

---
 goose/configuration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/goose/configuration.py b/goose/configuration.py
index cc99e222..fcfa5b9a 100644
--- a/goose/configuration.py
+++ b/goose/configuration.py
@@ -30,7 +30,7 @@
 HTTP_DEFAULT_TIMEOUT = 30
 
 AVAILABLE_PARSERS = {
-    'lxml' : Parser,
+    'lxml': Parser,
     'soup': ParserSoup,
 }
 

From 22ded4b3126e878f7608bd4a5b72c9a4c8daf1fe Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 08:04:47 +0100
Subject: [PATCH 055/100] #183 - use article tag for a top node

---
 goose/extractors.py                           | 26 ++++++++-----------
 tests/data/extractors/test_articlebody.json   |  6 -----
 .../test_articlebody_attribute.html           | 15 +++++++++++
 .../test_articlebody_attribute.json           |  6 +++++
 ...dy.html => test_articlebody_itemprop.html} |  0
 .../extractors/test_articlebody_itemprop.json |  6 +++++
 .../data/extractors/test_articlebody_tag.html | 15 +++++++++++
 .../data/extractors/test_articlebody_tag.json |  6 +++++
 tests/extractors.py                           | 23 ++++++++++++----
 9 files changed, 77 insertions(+), 26 deletions(-)
 delete mode 100644 tests/data/extractors/test_articlebody.json
 create mode 100644 tests/data/extractors/test_articlebody_attribute.html
 create mode 100644 tests/data/extractors/test_articlebody_attribute.json
 rename tests/data/extractors/{test_articlebody.html => test_articlebody_itemprop.html} (100%)
 create mode 100644 tests/data/extractors/test_articlebody_itemprop.json
 create mode 100644 tests/data/extractors/test_articlebody_tag.html
 create mode 100644 tests/data/extractors/test_articlebody_tag.json

diff --git a/goose/extractors.py b/goose/extractors.py
index e4efcbae..9440aea6 100644
--- a/goose/extractors.py
+++ b/goose/extractors.py
@@ -51,6 +51,7 @@
 KNOWN_ARTICLE_CONTENT_TAGS = [
     {'attr': 'itemprop', 'value': 'articleBody'},
     {'attr': 'class', 'value': 'post-content'},
+    {'tag': 'article'},
 ]
 
 
@@ -268,27 +269,22 @@ def get_known_article_tags(self):
         for item in KNOWN_ARTICLE_CONTENT_TAGS:
             nodes = self.parser.getElementsByTag(
                             self.article.doc,
-                            attr=item['attr'],
-                            value=item['value'])
+                            **item)
             if len(nodes):
                 return nodes[0]
         return None
 
-    def get_articlebody(self):
-        article_body = self.parser.getElementsByTag(
-                            self.article.doc,
-                            attr='itemprop',
-                            value='articleBody')
-        if len(article_body):
-            article_body = article_body[0]
-            self.parser.setAttribute(article_body, "extraction", "microDataExtration")
-            return article_body
-        return None
-
     def is_articlebody(self, node):
         for item in KNOWN_ARTICLE_CONTENT_TAGS:
-            if self.parser.getAttribute(node, item['attr']) == item['value']:
-                return True
+            # attribute
+            if "attr" in item and "value" in item:
+                if self.parser.getAttribute(node, item['attr']) == item['value']:
+                    return True
+            # tag
+            if "tag" in item:
+                if node.tag == item['tag']:
+                    return True
+
         return False
 
     def extract_opengraph(self):
diff --git a/tests/data/extractors/test_articlebody.json b/tests/data/extractors/test_articlebody.json
deleted file mode 100644
index a775091d..00000000
--- a/tests/data/extractors/test_articlebody.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "url": "http://exemple.com/test_opengraphcontent",
-    "expected": {
-        "cleaned_text": "Search-and-rescue teams were mobilized from across Southeast Asia on Sunday after a commercial airliner with 162 people on board lost contact with ground controllers off the coast of Borneo, a search effort that evoked a distressingly familiar mix of grief and mystery nine months after a Malaysia Airlines jetliner disappeared over the Indian Ocean."
-    }
-}
diff --git a/tests/data/extractors/test_articlebody_attribute.html b/tests/data/extractors/test_articlebody_attribute.html
new file mode 100644
index 00000000..bbf00f65
--- /dev/null
+++ b/tests/data/extractors/test_articlebody_attribute.html
@@ -0,0 +1,15 @@
+<html>
+  <body>
+    <div>
+      <p>
+          Not an Actual Content
+          TextNode 1 - The Scala supported IDE is one of the few pain points of developers who want to start using Scala in their Java project. On existing long term project developed by a team its hard to step in and introduce a new language that is not supported by the existing IDE. On way to go about it is to hid the fact that you use Scala from the Java world by using one way dependency injection. Still, if you wish to truly absorb Scala into your existing java environment then you'll soon introduced cross language dependencies.
+      </p>
+    </div>
+    <div class='post-content'>
+      <p>
+        Search-and-rescue teams were mobilized from across Southeast Asia on Sunday after a commercial airliner with 162 people on board lost contact with ground controllers off the coast of Borneo, a search effort that evoked a distressingly familiar mix of grief and mystery nine months after a Malaysia Airlines jetliner disappeared over the Indian Ocean.
+      </p>
+    </div>
+  </body>
+</html>
diff --git a/tests/data/extractors/test_articlebody_attribute.json b/tests/data/extractors/test_articlebody_attribute.json
new file mode 100644
index 00000000..7fbebcaf
--- /dev/null
+++ b/tests/data/extractors/test_articlebody_attribute.json
@@ -0,0 +1,6 @@
+{
+    "url": "http://exemple.com/test_opengraphcontent",
+    "expected": {
+        "cleaned_text": "Search-and-rescue teams were mobilized "
+    }
+}
diff --git a/tests/data/extractors/test_articlebody.html b/tests/data/extractors/test_articlebody_itemprop.html
similarity index 100%
rename from tests/data/extractors/test_articlebody.html
rename to tests/data/extractors/test_articlebody_itemprop.html
diff --git a/tests/data/extractors/test_articlebody_itemprop.json b/tests/data/extractors/test_articlebody_itemprop.json
new file mode 100644
index 00000000..7fbebcaf
--- /dev/null
+++ b/tests/data/extractors/test_articlebody_itemprop.json
@@ -0,0 +1,6 @@
+{
+    "url": "http://exemple.com/test_opengraphcontent",
+    "expected": {
+        "cleaned_text": "Search-and-rescue teams were mobilized "
+    }
+}
diff --git a/tests/data/extractors/test_articlebody_tag.html b/tests/data/extractors/test_articlebody_tag.html
new file mode 100644
index 00000000..6e7ca4be
--- /dev/null
+++ b/tests/data/extractors/test_articlebody_tag.html
@@ -0,0 +1,15 @@
+<html>
+  <body>
+    <div>
+      <p>
+          Not an Actual Content
+          TextNode 1 - The Scala supported IDE is one of the few pain points of developers who want to start using Scala in their Java project. On existing long term project developed by a team its hard to step in and introduce a new language that is not supported by the existing IDE. On way to go about it is to hid the fact that you use Scala from the Java world by using one way dependency injection. Still, if you wish to truly absorb Scala into your existing java environment then you'll soon introduced cross language dependencies.
+      </p>
+    </div>
+    <article>
+      <p>
+        Search-and-rescue teams were mobilized from across Southeast Asia on Sunday after a commercial airliner with 162 people on board lost contact with ground controllers off the coast of Borneo, a search effort that evoked a distressingly familiar mix of grief and mystery nine months after a Malaysia Airlines jetliner disappeared over the Indian Ocean.
+      </p>
+    </article>
+  </body>
+</html>
diff --git a/tests/data/extractors/test_articlebody_tag.json b/tests/data/extractors/test_articlebody_tag.json
new file mode 100644
index 00000000..7fbebcaf
--- /dev/null
+++ b/tests/data/extractors/test_articlebody_tag.json
@@ -0,0 +1,6 @@
+{
+    "url": "http://exemple.com/test_opengraphcontent",
+    "expected": {
+        "cleaned_text": "Search-and-rescue teams were mobilized "
+    }
+}
diff --git a/tests/extractors.py b/tests/extractors.py
index 07b9362e..b9496b8c 100644
--- a/tests/extractors.py
+++ b/tests/extractors.py
@@ -345,11 +345,6 @@ def test_okaymarketing(self):
         fields = ['cleaned_text']
         self.runArticleAssertions(article=article, fields=fields)
 
-    def test_articlebody(self):
-        article = self.getArticle()
-        fields = ['cleaned_text']
-        self.runArticleAssertions(article=article, fields=fields)
-
     def test_opengraph(self):
         article = self.getArticle()
         fields = ['opengraph']
@@ -372,6 +367,24 @@ def test_issue115(self):
         self.runArticleAssertions(article=article, fields=fields)
 
 
+class TestArticleTopNode(TestExtractionBase):
+
+    def test_articlebody_itemprop(self):
+        article = self.getArticle()
+        fields = ['cleaned_text']
+        self.runArticleAssertions(article=article, fields=fields)
+
+    def test_articlebody_attribute(self):
+        article = self.getArticle()
+        fields = ['cleaned_text']
+        self.runArticleAssertions(article=article, fields=fields)
+
+    def test_articlebody_tag(self):
+        article = self.getArticle()
+        fields = ['cleaned_text']
+        self.runArticleAssertions(article=article, fields=fields)
+
+
 class TestPublishDate(TestExtractionBase):
 
     def test_publish_date(self):

From 57b1534284e6404d6009ca64968bfaec2e9148a3 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 08:31:00 +0100
Subject: [PATCH 056/100] #185 - movies info

---
 goose/article.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/goose/article.py b/goose/article.py
index d4885616..e3f522f5 100644
--- a/goose/article.py
+++ b/goose/article.py
@@ -124,7 +124,7 @@ def infos(self):
             "opengraph": self.opengraph,
             "tags": self.tags,
             "tweets": self.tweets,
-            "movies": self.movies,
+            "movies": [],
             "links": self.links,
             "authors": self.authors,
             "publish_date": self.publish_date
@@ -132,6 +132,22 @@ def infos(self):
 
         # image
         if self.top_image is not None:
-            data['image'] = self.top_image.src
+            data['image'] = {
+                'url': self.top_image.src,
+                'width': self.top_image.width,
+                'height': self.top_image.height,
+                'type': 'image'
+            }
+
+        # movies
+        for movie in self.movies:
+            data['movies'].append({
+                'embed_type': movie.embed_type,
+                'provider': movie.provider,
+                'width': movie.width,
+                'height': movie.height,
+                'embed_code': movie.embed_code,
+                'src': movie.src,
+            })
 
         return data

From 4eda345d202797820c81f8d6af510298289cc004 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 08:33:45 +0100
Subject: [PATCH 057/100] bump version

---
 goose/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/goose/version.py b/goose/version.py
index 43693f9c..875065c7 100644
--- a/goose/version.py
+++ b/goose/version.py
@@ -21,5 +21,5 @@
 limitations under the License.
 """
 
-version_info = (1, 0, 22)
+version_info = (1, 0, 23)
 __version__ = ".".join(map(str, version_info))

From dd33aab172138266c90940ee9bdcef1712996a22 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 08:42:06 +0100
Subject: [PATCH 058/100] ignore egg files

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index bea68953..4bfadf57 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,5 +8,6 @@ env/
 *~
 .idea
 ._*
+*.egg
 venv/
 goose_extractor.egg-info/

From 3ebc97cffa40298bcad87e9113b42cb0b1704940 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Tue, 30 Dec 2014 09:04:51 +0100
Subject: [PATCH 059/100] #187 - empty list

---
 goose/extractors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/goose/extractors.py b/goose/extractors.py
index 9440aea6..10e32a21 100644
--- a/goose/extractors.py
+++ b/goose/extractors.py
@@ -36,7 +36,7 @@
 ARROWS_SPLITTER = StringSplitter("»")
 COLON_SPLITTER = StringSplitter(":")
 SPACE_SPLITTER = StringSplitter(' ')
-NO_STRINGS = set()
+NO_STRINGS = []
 A_REL_TAG_SELECTOR = "a[rel=tag]"
 A_HREF_TAG_SELECTOR = "a[href*='/tag/'], a[href*='/tags/'], a[href*='/topic/'], a[href*='?keyword=']"
 RE_LANG = r'^[A-Za-z]{2}$'

From 8eccabf1338fd653cad6f2934db86ceed4492910 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 02:07:05 +0100
Subject: [PATCH 060/100] #188 - mv article extractor to extractors directory

---
 goose/{extractors.py => extractors/content.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename goose/{extractors.py => extractors/content.py} (100%)

diff --git a/goose/extractors.py b/goose/extractors/content.py
similarity index 100%
rename from goose/extractors.py
rename to goose/extractors/content.py

From 731f104dbcd80b38176e7a073be90a59f10240a3 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 02:10:47 +0100
Subject: [PATCH 061/100] #188 - create a base extractor class

---
 goose/extractors/__init__.py | 38 ++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 goose/extractors/__init__.py

diff --git a/goose/extractors/__init__.py b/goose/extractors/__init__.py
new file mode 100644
index 00000000..5554efbc
--- /dev/null
+++ b/goose/extractors/__init__.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+class BaseExtractor(object):
+
+    def __init__(self, config, article):
+        # config
+        self.config = config
+
+        # parser
+        self.parser = self.config.get_parser()
+
+        # article
+        self.article = article
+
+        # stopwords class
+        self.stopwords_class = config.stopwords_class

From 6ef3f68e29a78e5f4718d6191ef58bf962e600a3 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 02:12:02 +0100
Subject: [PATCH 062/100] #188 - contentextractor inherits form baseextractor

---
 goose/extractors/content.py | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/goose/extractors/content.py b/goose/extractors/content.py
index 10e32a21..0053f3f1 100644
--- a/goose/extractors/content.py
+++ b/goose/extractors/content.py
@@ -23,6 +23,7 @@
 import re
 from copy import deepcopy
 from urlparse import urlparse, urljoin
+from goose.extractors import BaseExtractor
 from goose.utils import StringSplitter
 from goose.utils import StringReplacement
 from goose.utils import ReplaceSequence
@@ -55,20 +56,7 @@
 ]
 
 
-class ContentExtractor(object):
-
-    def __init__(self, config, article):
-        # config
-        self.config = config
-
-        # parser
-        self.parser = self.config.get_parser()
-
-        # article
-        self.article = article
-
-        # stopwords class
-        self.stopwords_class = config.stopwords_class
+class ContentExtractor(BaseExtractor):
 
     def get_language(self):
         """\

From bcf4654867b4915fe926613406208e78db8fb443 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 02:14:18 +0100
Subject: [PATCH 063/100] #188 - create specific extractors classes

---
 goose/extractors/meta.py        | 28 ++++++++++++++++++++++++++++
 goose/extractors/opengraph.py   | 28 ++++++++++++++++++++++++++++
 goose/extractors/publishdate.py | 28 ++++++++++++++++++++++++++++
 goose/extractors/title.py       | 28 ++++++++++++++++++++++++++++
 4 files changed, 112 insertions(+)
 create mode 100644 goose/extractors/meta.py
 create mode 100644 goose/extractors/opengraph.py
 create mode 100644 goose/extractors/publishdate.py
 create mode 100644 goose/extractors/title.py

diff --git a/goose/extractors/meta.py b/goose/extractors/meta.py
new file mode 100644
index 00000000..7a92df21
--- /dev/null
+++ b/goose/extractors/meta.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from goose.extractors import BaseExtractor
+
+
+class ContentMetaExtractor(BaseExtractor):
+    pass
diff --git a/goose/extractors/opengraph.py b/goose/extractors/opengraph.py
new file mode 100644
index 00000000..ee916b82
--- /dev/null
+++ b/goose/extractors/opengraph.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from goose.extractors import BaseExtractor
+
+
+class ContentOpenGraphExtractor(BaseExtractor):
+    pass
diff --git a/goose/extractors/publishdate.py b/goose/extractors/publishdate.py
new file mode 100644
index 00000000..7ea1635a
--- /dev/null
+++ b/goose/extractors/publishdate.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from goose.extractors import BaseExtractor
+
+
+class ContentPublishDateExtractor(BaseExtractor):
+    pass
diff --git a/goose/extractors/title.py b/goose/extractors/title.py
new file mode 100644
index 00000000..1afdb37e
--- /dev/null
+++ b/goose/extractors/title.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from goose.extractors import BaseExtractor
+
+
+class ContentTitleExtractor(BaseExtractor):
+    pass

From cbbfba3c3544439675fac8b2d0de49b23530cf01 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 02:17:59 +0100
Subject: [PATCH 064/100] #188 - add tags and author extractors

---
 goose/extractors/author.py | 28 ++++++++++++++++++++++++++++
 goose/extractors/tags.py   | 28 ++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 goose/extractors/author.py
 create mode 100644 goose/extractors/tags.py

diff --git a/goose/extractors/author.py b/goose/extractors/author.py
new file mode 100644
index 00000000..bc18925a
--- /dev/null
+++ b/goose/extractors/author.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from goose.extractors import BaseExtractor
+
+
+class ContentAuthorExtractor(BaseExtractor):
+    pass
diff --git a/goose/extractors/tags.py b/goose/extractors/tags.py
new file mode 100644
index 00000000..28f835ef
--- /dev/null
+++ b/goose/extractors/tags.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from goose.extractors import BaseExtractor
+
+
+class ContentTagsExtractor(BaseExtractor):
+    pass

From a957931ce9df9daf65007a5314f4e42521e733eb Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 02:20:28 +0100
Subject: [PATCH 065/100] #188 - correct import

---
 goose/crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/goose/crawler.py b/goose/crawler.py
index 0c0eaa8f..cc25c5e9 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -25,7 +25,7 @@
 from copy import deepcopy
 from goose.article import Article
 from goose.utils import URLHelper, RawHelper
-from goose.extractors import StandardContentExtractor
+from goose.extractors.content import StandardContentExtractor
 from goose.cleaners import StandardDocumentCleaner
 from goose.outputformatters import StandardOutputFormatter
 from goose.images.extractors import UpgradedImageIExtractor

From 8d6d49e94e8de50b692c10631e9d649948c944e8 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 02:31:03 +0100
Subject: [PATCH 066/100] #188 - move video to extractor directory

---
 goose/crawler.py                                   |  3 ++-
 .../{videos/extractors.py => extractors/videos.py} | 14 ++++----------
 goose/{videos => }/videos.py                       |  0
 goose/videos/__init__.py                           |  0
 4 files changed, 6 insertions(+), 11 deletions(-)
 rename goose/{videos/extractors.py => extractors/videos.py} (95%)
 rename goose/{videos => }/videos.py (100%)
 delete mode 100644 goose/videos/__init__.py

diff --git a/goose/crawler.py b/goose/crawler.py
index cc25c5e9..e77f9218 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -26,10 +26,11 @@
 from goose.article import Article
 from goose.utils import URLHelper, RawHelper
 from goose.extractors.content import StandardContentExtractor
+from goose.extractors.videos import VideoExtractor
 from goose.cleaners import StandardDocumentCleaner
 from goose.outputformatters import StandardOutputFormatter
 from goose.images.extractors import UpgradedImageIExtractor
-from goose.videos.extractors import VideoExtractor
+
 from goose.network import HtmlFetcher
 
 
diff --git a/goose/videos/extractors.py b/goose/extractors/videos.py
similarity index 95%
rename from goose/videos/extractors.py
rename to goose/extractors/videos.py
index 71c52895..569b5f15 100644
--- a/goose/videos/extractors.py
+++ b/goose/extractors/videos.py
@@ -21,25 +21,19 @@
 limitations under the License.
 """
 
-from goose.videos.videos import Video
+from goose.extractors import BaseExtractor
+from goose.videos import Video
 
 VIDEOS_TAGS = ['iframe', 'embed', 'object', 'video']
 VIDEO_PROVIDERS = ['youtube', 'vimeo', 'dailymotion', 'kewego']
 
 
-class VideoExtractor(object):
+class VideoExtractor(BaseExtractor):
     """\
     Extracts a list of video from Article top node
     """
     def __init__(self, config, article):
-        # article
-        self.article = article
-
-        # config
-        self.config = config
-
-        # parser
-        self.parser = self.config.get_parser()
+        super(VideoExtractor, self).__init__(config, article)
 
         # candidates
         self.candidates = []
diff --git a/goose/videos/videos.py b/goose/videos.py
similarity index 100%
rename from goose/videos/videos.py
rename to goose/videos.py
diff --git a/goose/videos/__init__.py b/goose/videos/__init__.py
deleted file mode 100644
index e69de29b..00000000

From ab81954745cf57e172c7b31d1bf4eeb314a34959 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 02:41:22 +0100
Subject: [PATCH 067/100] #188 - move images extractor to extractors dir and
 correct videos

---
 goose/crawler.py                                     | 2 +-
 goose/{images/extractors.py => extractors/images.py} | 4 ++--
 goose/extractors/videos.py                           | 2 +-
 goose/{images => }/image.py                          | 0
 goose/images/__init__.py                             | 0
 goose/{images/utils.py => utils/images.py}           | 4 ++--
 goose/{videos.py => video.py}                        | 0
 tests/images.py                                      | 7 ++++---
 8 files changed, 10 insertions(+), 9 deletions(-)
 rename goose/{images/extractors.py => extractors/images.py} (99%)
 rename goose/{images => }/image.py (100%)
 delete mode 100644 goose/images/__init__.py
 rename goose/{images/utils.py => utils/images.py} (97%)
 rename goose/{videos.py => video.py} (100%)

diff --git a/goose/crawler.py b/goose/crawler.py
index e77f9218..4f360822 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -27,9 +27,9 @@
 from goose.utils import URLHelper, RawHelper
 from goose.extractors.content import StandardContentExtractor
 from goose.extractors.videos import VideoExtractor
+from goose.extractors.images import UpgradedImageIExtractor
 from goose.cleaners import StandardDocumentCleaner
 from goose.outputformatters import StandardOutputFormatter
-from goose.images.extractors import UpgradedImageIExtractor
 
 from goose.network import HtmlFetcher
 
diff --git a/goose/images/extractors.py b/goose/extractors/images.py
similarity index 99%
rename from goose/images/extractors.py
rename to goose/extractors/images.py
index 4372ae8c..1cf9af09 100644
--- a/goose/images/extractors.py
+++ b/goose/extractors/images.py
@@ -24,8 +24,8 @@
 import os
 from urlparse import urlparse, urljoin
 from goose.utils import FileHelper
-from goose.images.image import Image
-from goose.images.utils import ImageUtils
+from goose.image import Image
+from goose.utils.images import ImageUtils
 
 KNOWN_IMG_DOM_NAMES = [
     "yn-story-related-media",
diff --git a/goose/extractors/videos.py b/goose/extractors/videos.py
index 569b5f15..88fdf20d 100644
--- a/goose/extractors/videos.py
+++ b/goose/extractors/videos.py
@@ -22,7 +22,7 @@
 """
 
 from goose.extractors import BaseExtractor
-from goose.videos import Video
+from goose.video import Video
 
 VIDEOS_TAGS = ['iframe', 'embed', 'object', 'video']
 VIDEO_PROVIDERS = ['youtube', 'vimeo', 'dailymotion', 'kewego']
diff --git a/goose/images/image.py b/goose/image.py
similarity index 100%
rename from goose/images/image.py
rename to goose/image.py
diff --git a/goose/images/__init__.py b/goose/images/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/goose/images/utils.py b/goose/utils/images.py
similarity index 97%
rename from goose/images/utils.py
rename to goose/utils/images.py
index daf5eddb..388d5c85 100644
--- a/goose/images/utils.py
+++ b/goose/utils/images.py
@@ -25,8 +25,8 @@
 import urllib2
 from PIL import Image
 from goose.utils.encoding import smart_str
-from goose.images.image import ImageDetails
-from goose.images.image import LocallyStoredImage
+from goose.image import ImageDetails
+from goose.image import LocallyStoredImage
 
 
 class ImageUtils(object):
diff --git a/goose/videos.py b/goose/video.py
similarity index 100%
rename from goose/videos.py
rename to goose/video.py
diff --git a/tests/images.py b/tests/images.py
index e0fc2d08..ace6d323 100644
--- a/tests/images.py
+++ b/tests/images.py
@@ -29,10 +29,11 @@
 from extractors import TestExtractionBase
 
 from goose.configuration import Configuration
-from goose.images.image import Image
-from goose.images.image import ImageDetails
-from goose.images.utils import ImageUtils
+from goose.image import Image
+from goose.image import ImageDetails
 from goose.utils import FileHelper
+from goose.utils.images import ImageUtils
+
 
 CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
 

From 9597fe152b45b685862b746ff5acbb71f89c05e9 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 02:45:59 +0100
Subject: [PATCH 068/100] #188 - rename UpgradedImageIExtractor to
 ImageExtractor

---
 goose/crawler.py           | 4 ++--
 goose/extractors/images.py | 4 ----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/goose/crawler.py b/goose/crawler.py
index 4f360822..4d2518ac 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -27,7 +27,7 @@
 from goose.utils import URLHelper, RawHelper
 from goose.extractors.content import StandardContentExtractor
 from goose.extractors.videos import VideoExtractor
-from goose.extractors.images import UpgradedImageIExtractor
+from goose.extractors.images import ImageExtractor
 from goose.cleaners import StandardDocumentCleaner
 from goose.outputformatters import StandardOutputFormatter
 
@@ -176,7 +176,7 @@ def get_html(self, crawl_candidate, parsing_candidate):
         return html
 
     def get_image_extractor(self):
-        return UpgradedImageIExtractor(self.config, self.article)
+        return ImageExtractor(self.config, self.article)
 
     def get_video_extractor(self):
         return VideoExtractor(self.config, self.article)
diff --git a/goose/extractors/images.py b/goose/extractors/images.py
index 1cf9af09..b3396b32 100644
--- a/goose/extractors/images.py
+++ b/goose/extractors/images.py
@@ -44,10 +44,6 @@ def __init__(self, node, parent_depth, sibling_depth):
 
 
 class ImageExtractor(object):
-    pass
-
-
-class UpgradedImageIExtractor(ImageExtractor):
 
     def __init__(self, config, article):
         self.custom_site_mapping = {}

From a5e96e74ca387fdd8996a602c32e05bd33fb4339 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 02:50:40 +0100
Subject: [PATCH 069/100] #188 - ImageExtractor extends from BaseExtractor

---
 goose/extractors/images.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/goose/extractors/images.py b/goose/extractors/images.py
index b3396b32..3af44f5f 100644
--- a/goose/extractors/images.py
+++ b/goose/extractors/images.py
@@ -22,9 +22,12 @@
 """
 import re
 import os
+
 from urlparse import urlparse, urljoin
-from goose.utils import FileHelper
+
+from goose.extractors import BaseExtractor
 from goose.image import Image
+from goose.utils import FileHelper
 from goose.utils.images import ImageUtils
 
 KNOWN_IMG_DOM_NAMES = [
@@ -43,20 +46,14 @@ def __init__(self, node, parent_depth, sibling_depth):
         self.sibling_depth = sibling_depth
 
 
-class ImageExtractor(object):
+class ImageExtractor(BaseExtractor):
 
     def __init__(self, config, article):
-        self.custom_site_mapping = {}
-        self.load_customesite_mapping()
-
-        # article
-        self.article = article
+        super(ImageExtractor, self).__init__(config, article)
 
-        # config
-        self.config = config
+        self.custom_site_mapping = {}
 
-        # parser
-        self.parser = self.config.get_parser()
+        self.load_customesite_mapping()
 
         # What's the minimum bytes for an image we'd accept is
         self.images_min_bytes = 4000

From 0492fb8509f85c8e3c28c1e07b1ffcbe15c8952c Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 03:02:45 +0100
Subject: [PATCH 070/100] #188 - move title extractor from content to title
 extractor class

---
 goose/crawler.py            |   9 +++-
 goose/extractors/content.py | 100 ++----------------------------------
 goose/extractors/title.py   |  75 ++++++++++++++++++++++++++-
 goose/utils/__init__.py     |  13 -----
 4 files changed, 84 insertions(+), 113 deletions(-)

diff --git a/goose/crawler.py b/goose/crawler.py
index 4d2518ac..e059e902 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -27,6 +27,7 @@
 from goose.utils import URLHelper, RawHelper
 from goose.extractors.content import StandardContentExtractor
 from goose.extractors.videos import VideoExtractor
+from goose.extractors.title import TitleExtractor
 from goose.extractors.images import ImageExtractor
 from goose.cleaners import StandardDocumentCleaner
 from goose.outputformatters import StandardOutputFormatter
@@ -67,6 +68,9 @@ def __init__(self, config):
         # video extractor
         self.video_extractor = self.get_video_extractor()
 
+        # title extractor
+        self.title_extractor = self.get_title_extractor()
+
         # image extrator
         self.image_extractor = self.get_image_extractor()
 
@@ -107,7 +111,7 @@ def crawl(self, crawl_candidate):
         self.article.domain = self.extractor.get_domain()
         self.article.tags = self.extractor.extract_tags()
         self.article.authors = self.extractor.extract_authors()
-        self.article.title = self.extractor.get_title()
+        self.article.title = self.title_extractor.extract()
 
         # check for known node as content body
         # if we find one force the article.doc to be the found node
@@ -175,6 +179,9 @@ def get_html(self, crawl_candidate, parsing_candidate):
             })
         return html
 
+    def get_title_extractor(self):
+        return TitleExtractor(self.config, self.article)
+
     def get_image_extractor(self):
         return ImageExtractor(self.config, self.article)
 
diff --git a/goose/extractors/content.py b/goose/extractors/content.py
index 0053f3f1..c580e844 100644
--- a/goose/extractors/content.py
+++ b/goose/extractors/content.py
@@ -21,22 +21,12 @@
 limitations under the License.
 """
 import re
+
 from copy import deepcopy
 from urlparse import urlparse, urljoin
+
 from goose.extractors import BaseExtractor
-from goose.utils import StringSplitter
-from goose.utils import StringReplacement
-from goose.utils import ReplaceSequence
-
-MOTLEY_REPLACEMENT = StringReplacement("&#65533;", "")
-ESCAPED_FRAGMENT_REPLACEMENT = StringReplacement(u"#!", u"?_escaped_fragment_=")
-TITLE_REPLACEMENTS = ReplaceSequence().create(u"&raquo;").append(u"»")
-TITLE_SPLITTERS = [u"|", u"-", u"»", u":"]
-PIPE_SPLITTER = StringSplitter("\\|")
-DASH_SPLITTER = StringSplitter(" - ")
-ARROWS_SPLITTER = StringSplitter("»")
-COLON_SPLITTER = StringSplitter(":")
-SPACE_SPLITTER = StringSplitter(' ')
+
 NO_STRINGS = []
 A_REL_TAG_SELECTOR = "a[rel=tag]"
 A_HREF_TAG_SELECTOR = "a[href*='/tag/'], a[href*='/tags/'], a[href*='/topic/'], a[href*='?keyword=']"
@@ -70,90 +60,6 @@ def get_language(self):
                 return self.article.meta_lang[:2]
         return self.config.target_language
 
-    def clean_title(self, title):
-        """Clean title with the use of og:site_name
-        in this case try to get ride of site name
-        and use TITLE_SPLITTERS to reformat title
-        """
-        # check if we have the site name in opengraph data
-        if "site_name" in self.article.opengraph.keys():
-            site_name = self.article.opengraph['site_name']
-            # remove the site name from title
-            title = title.replace(site_name, '').strip()
-
-        # try to remove the domain from url
-        if self.article.domain:
-            pattern = re.compile(self.article.domain, re.IGNORECASE)
-            title = pattern.sub("", title).strip()
-
-        # split the title in words
-        # TechCrunch | my wonderfull article
-        # my wonderfull article | TechCrunch
-        title_words = title.split()
-
-        # check if first letter is in TITLE_SPLITTERS
-        # if so remove it
-        if title_words[0] in TITLE_SPLITTERS:
-            title_words.pop(0)
-
-        # check if last letter is in TITLE_SPLITTERS
-        # if so remove it
-        if title_words[-1] in TITLE_SPLITTERS:
-            title_words.pop(-1)
-
-        # rebuild the title
-        title = u" ".join(title_words).strip()
-
-        return title
-
-    def get_title(self):
-        """\
-        Fetch the article title and analyze it
-        """
-        title = ''
-
-        # rely on opengraph in case we have the data
-        if "title" in self.article.opengraph.keys():
-            title = self.article.opengraph['title']
-            return self.clean_title(title)
-
-        # try to fetch the meta headline
-        meta_headline = self.parser.getElementsByTag(
-                            self.article.doc,
-                            tag="meta",
-                            attr="name",
-                            value="headline")
-        if meta_headline is not None and len(meta_headline) > 0:
-            title = self.parser.getAttribute(meta_headline[0], 'content')
-            return self.clean_title(title)
-
-        # otherwise use the title meta
-        title_element = self.parser.getElementsByTag(self.article.doc, tag='title')
-        if title_element is not None and len(title_element) > 0:
-            title = self.parser.getText(title_element[0])
-            return self.clean_title(title)
-
-        return title
-
-    def split_title(self, title, splitter):
-        """\
-        Split the title to best part possible
-        """
-        large_text_length = 0
-        large_text_index = 0
-        title_pieces = splitter.split(title)
-
-        # find the largest title piece
-        for i in range(len(title_pieces)):
-            current = title_pieces[i]
-            if len(current) > large_text_length:
-                large_text_length = len(current)
-                large_text_index = i
-
-        # replace content
-        title = title_pieces[large_text_index]
-        return TITLE_REPLACEMENTS.replaceAll(title).strip()
-
     def get_publish_date(self):
         for known_meta_tag in KNOWN_PUBLISH_DATE_TAGS:
             meta_tags = self.parser.getElementsByTag(self.article.doc,
diff --git a/goose/extractors/title.py b/goose/extractors/title.py
index 1afdb37e..8104c52b 100644
--- a/goose/extractors/title.py
+++ b/goose/extractors/title.py
@@ -20,9 +20,80 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 """
+import re
 
 from goose.extractors import BaseExtractor
 
 
-class ContentTitleExtractor(BaseExtractor):
-    pass
+TITLE_SPLITTERS = [u"|", u"-", u"»", u":"]
+
+
+class TitleExtractor(BaseExtractor):
+
+    def clean_title(self, title):
+        """Clean title with the use of og:site_name
+        in this case try to get ride of site name
+        and use TITLE_SPLITTERS to reformat title
+        """
+        # check if we have the site name in opengraph data
+        if "site_name" in self.article.opengraph.keys():
+            site_name = self.article.opengraph['site_name']
+            # remove the site name from title
+            title = title.replace(site_name, '').strip()
+
+        # try to remove the domain from url
+        if self.article.domain:
+            pattern = re.compile(self.article.domain, re.IGNORECASE)
+            title = pattern.sub("", title).strip()
+
+        # split the title in words
+        # TechCrunch | my wonderfull article
+        # my wonderfull article | TechCrunch
+        title_words = title.split()
+
+        # check if first letter is in TITLE_SPLITTERS
+        # if so remove it
+        if title_words[0] in TITLE_SPLITTERS:
+            title_words.pop(0)
+
+        # check if last letter is in TITLE_SPLITTERS
+        # if so remove it
+        if title_words[-1] in TITLE_SPLITTERS:
+            title_words.pop(-1)
+
+        # rebuild the title
+        title = u" ".join(title_words).strip()
+
+        return title
+
+    def get_title(self):
+        """\
+        Fetch the article title and analyze it
+        """
+        title = ''
+
+        # rely on opengraph in case we have the data
+        if "title" in self.article.opengraph.keys():
+            title = self.article.opengraph['title']
+            return self.clean_title(title)
+
+        # try to fetch the meta headline
+        meta_headline = self.parser.getElementsByTag(
+                            self.article.doc,
+                            tag="meta",
+                            attr="name",
+                            value="headline")
+        if meta_headline is not None and len(meta_headline) > 0:
+            title = self.parser.getAttribute(meta_headline[0], 'content')
+            return self.clean_title(title)
+
+        # otherwise use the title meta
+        title_element = self.parser.getElementsByTag(self.article.doc, tag='title')
+        if title_element is not None and len(title_element) > 0:
+            title = self.parser.getText(title_element[0])
+            return self.clean_title(title)
+
+        return title
+
+    def extract(self):
+        return self.get_title()
diff --git a/goose/utils/__init__.py b/goose/utils/__init__.py
index a8be19b5..5a1de7d4 100644
--- a/goose/utils/__init__.py
+++ b/goose/utils/__init__.py
@@ -105,19 +105,6 @@ def get_parsing_candidate(self, url_to_crawl):
         return ParsingCandidate(final_url, link_hash)
 
 
-class StringSplitter(object):
-    """\
-
-    """
-    def __init__(self, pattern):
-        self.pattern = re.compile(pattern)
-
-    def split(self, string):
-        if not string:
-            return []
-        return self.pattern.split(string)
-
-
 class StringReplacement(object):
 
     def __init__(self, pattern, replaceWith):

From 12dfda500248e093b20c7faa6f1c25be67e1ef8e Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 03:07:45 +0100
Subject: [PATCH 071/100] #188 - move links extraction to LinksExtractor class

---
 goose/crawler.py            |  9 ++++++++-
 goose/extractors/content.py |  9 ---------
 goose/extractors/links.py   | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 10 deletions(-)
 create mode 100644 goose/extractors/links.py

diff --git a/goose/crawler.py b/goose/crawler.py
index e059e902..8482318f 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -29,6 +29,7 @@
 from goose.extractors.videos import VideoExtractor
 from goose.extractors.title import TitleExtractor
 from goose.extractors.images import ImageExtractor
+from goose.extractors.links import LinksExtractor
 from goose.cleaners import StandardDocumentCleaner
 from goose.outputformatters import StandardOutputFormatter
 
@@ -65,6 +66,9 @@ def __init__(self, config):
         # init the output formatter
         self.formatter = self.get_formatter()
 
+        # links extractor
+        self.links_extractor = self.get_links_extractor()
+
         # video extractor
         self.video_extractor = self.get_video_extractor()
 
@@ -131,7 +135,7 @@ def crawl(self, crawl_candidate):
         if self.article.top_node is not None:
 
             # article links
-            self.article.links = self.extractor.extract_links()
+            self.article.links = self.links_extractor.extract()
 
             # tweets
             self.article.tweets = self.extractor.extract_tweets()
@@ -179,6 +183,9 @@ def get_html(self, crawl_candidate, parsing_candidate):
             })
         return html
 
+    def get_links_extractor(self):
+        return LinksExtractor(self.config, self.article)
+
     def get_title_extractor(self):
         return TitleExtractor(self.config, self.article)
 
diff --git a/goose/extractors/content.py b/goose/extractors/content.py
index c580e844..74fe230a 100644
--- a/goose/extractors/content.py
+++ b/goose/extractors/content.py
@@ -192,15 +192,6 @@ def extract_opengraph(self):
                 opengraph_dict.update({attr.split(":")[1]: value})
         return opengraph_dict
 
-    def extract_links(self):
-        links = []
-        items = self.parser.getElementsByTag(self.article.top_node, 'a')
-        for i in items:
-            attr = self.parser.getAttribute(i, 'href')
-            if attr:
-                links.append(attr)
-        return links
-
     def extract_tweets(self):
         tweets = []
         items = self.parser.getElementsByTag(
diff --git a/goose/extractors/links.py b/goose/extractors/links.py
new file mode 100644
index 00000000..6ba668fe
--- /dev/null
+++ b/goose/extractors/links.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from goose.extractors import BaseExtractor
+
+
+class LinksExtractor(BaseExtractor):
+
+    def extract(self):
+        links = []
+        items = self.parser.getElementsByTag(self.article.top_node, 'a')
+        for i in items:
+            attr = self.parser.getAttribute(i, 'href')
+            if attr:
+                links.append(attr)
+        return links

From 2608e43591c0b3eeff7477e7dafe1643356607bb Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 03:11:04 +0100
Subject: [PATCH 072/100] #188 - move tweet extraction to TweetExtractor class

---
 goose/crawler.py            |  9 +++++++-
 goose/extractors/content.py | 15 -------------
 goose/extractors/tweets.py  | 42 +++++++++++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 16 deletions(-)
 create mode 100644 goose/extractors/tweets.py

diff --git a/goose/crawler.py b/goose/crawler.py
index 8482318f..297b4ff8 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -30,6 +30,7 @@
 from goose.extractors.title import TitleExtractor
 from goose.extractors.images import ImageExtractor
 from goose.extractors.links import LinksExtractor
+from goose.extractors.tweets import TweetsExtractor
 from goose.cleaners import StandardDocumentCleaner
 from goose.outputformatters import StandardOutputFormatter
 
@@ -66,6 +67,9 @@ def __init__(self, config):
         # init the output formatter
         self.formatter = self.get_formatter()
 
+        # tweets extractor
+        self.tweets_extractor = self.get_tweets_extractor()
+
         # links extractor
         self.links_extractor = self.get_links_extractor()
 
@@ -138,7 +142,7 @@ def crawl(self, crawl_candidate):
             self.article.links = self.links_extractor.extract()
 
             # tweets
-            self.article.tweets = self.extractor.extract_tweets()
+            self.article.tweets = self.tweets_extractor.extract()
 
             # video handling
             self.video_extractor.get_videos()
@@ -183,6 +187,9 @@ def get_html(self, crawl_candidate, parsing_candidate):
             })
         return html
 
+    def get_tweets_extractor(self):
+        return TweetsExtractor(self.config, self.article)
+
     def get_links_extractor(self):
         return LinksExtractor(self.config, self.article)
 
diff --git a/goose/extractors/content.py b/goose/extractors/content.py
index 74fe230a..d0442b97 100644
--- a/goose/extractors/content.py
+++ b/goose/extractors/content.py
@@ -192,21 +192,6 @@ def extract_opengraph(self):
                 opengraph_dict.update({attr.split(":")[1]: value})
         return opengraph_dict
 
-    def extract_tweets(self):
-        tweets = []
-        items = self.parser.getElementsByTag(
-                        self.article.top_node,
-                        tag='blockquote',
-                        attr="class",
-                        value="twitter-tweet")
-
-        for i in items:
-            for attr in ['gravityScore', 'gravityNodes']:
-                self.parser.delAttribute(i, attr)
-            tweets.append(self.parser.nodeToString(i))
-
-        return tweets
-
     def extract_authors(self):
         authors = []
         author_nodes = self.parser.getElementsByTag(
diff --git a/goose/extractors/tweets.py b/goose/extractors/tweets.py
new file mode 100644
index 00000000..3c17ad8d
--- /dev/null
+++ b/goose/extractors/tweets.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from goose.extractors import BaseExtractor
+
+
+class TweetsExtractor(BaseExtractor):
+
+    def extract(self):
+        tweets = []
+        items = self.parser.getElementsByTag(
+                        self.article.top_node,
+                        tag='blockquote',
+                        attr="class",
+                        value="twitter-tweet")
+
+        for i in items:
+            for attr in ['gravityScore', 'gravityNodes']:
+                self.parser.delAttribute(i, attr)
+            tweets.append(self.parser.nodeToString(i))
+
+        return tweets

From 4de0f4b0ed54c857bde89af131606f7a30b34846 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 03:13:46 +0100
Subject: [PATCH 073/100] #188 - move authors extraction to AuthorsExtractor
 class

---
 goose/crawler.py            | 13 ++++++++++++-
 goose/extractors/author.py  | 22 ++++++++++++++++++++--
 goose/extractors/content.py | 19 -------------------
 3 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/goose/crawler.py b/goose/crawler.py
index 297b4ff8..53f56283 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -31,6 +31,7 @@
 from goose.extractors.images import ImageExtractor
 from goose.extractors.links import LinksExtractor
 from goose.extractors.tweets import TweetsExtractor
+from goose.extractors.authors import AuthorsExtractor
 from goose.cleaners import StandardDocumentCleaner
 from goose.outputformatters import StandardOutputFormatter
 
@@ -67,6 +68,9 @@ def __init__(self, config):
         # init the output formatter
         self.formatter = self.get_formatter()
 
+        # authors extractor
+        self.authors_extractor = self.get_authors_extractor()
+
         # tweets extractor
         self.tweets_extractor = self.get_tweets_extractor()
 
@@ -118,7 +122,11 @@ def crawl(self, crawl_candidate):
         self.article.canonical_link = self.extractor.get_canonical_link()
         self.article.domain = self.extractor.get_domain()
         self.article.tags = self.extractor.extract_tags()
-        self.article.authors = self.extractor.extract_authors()
+
+        # authors
+        self.article.authors = self.authors_extractor.extract()
+
+        # title
         self.article.title = self.title_extractor.extract()
 
         # check for known node as content body
@@ -187,6 +195,9 @@ def get_html(self, crawl_candidate, parsing_candidate):
             })
         return html
 
+    def get_authors_extractor(self):
+        return AuthorsExtractor(self.config, self.article)
+
     def get_tweets_extractor(self):
         return TweetsExtractor(self.config, self.article)
 
diff --git a/goose/extractors/author.py b/goose/extractors/author.py
index bc18925a..414f4eea 100644
--- a/goose/extractors/author.py
+++ b/goose/extractors/author.py
@@ -24,5 +24,23 @@
 from goose.extractors import BaseExtractor
 
 
-class ContentAuthorExtractor(BaseExtractor):
-    pass
+class AuthorsExtractor(BaseExtractor):
+
+    def extract(self):
+        authors = []
+        author_nodes = self.parser.getElementsByTag(
+                            self.article.doc,
+                            attr='itemprop',
+                            value='author')
+
+        for author in author_nodes:
+            name_nodes = self.parser.getElementsByTag(
+                            author,
+                            attr='itemprop',
+                            value='name')
+
+            if len(name_nodes) > 0:
+                name = self.parser.getText(name_nodes[0])
+                authors.append(name)
+
+        return list(set(authors))
diff --git a/goose/extractors/content.py b/goose/extractors/content.py
index d0442b97..4b4e894a 100644
--- a/goose/extractors/content.py
+++ b/goose/extractors/content.py
@@ -192,25 +192,6 @@ def extract_opengraph(self):
                 opengraph_dict.update({attr.split(":")[1]: value})
         return opengraph_dict
 
-    def extract_authors(self):
-        authors = []
-        author_nodes = self.parser.getElementsByTag(
-                            self.article.doc,
-                            attr='itemprop',
-                            value='author')
-
-        for author in author_nodes:
-            name_nodes = self.parser.getElementsByTag(
-                            author,
-                            attr='itemprop',
-                            value='name')
-
-            if len(name_nodes) > 0:
-                name = self.parser.getText(name_nodes[0])
-                authors.append(name)
-
-        return list(set(authors))
-
     def extract_tags(self):
         node = self.article.doc
 

From cd4cc7e3beab0dc349a2cada3ee5137b65fc8b16 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 03:14:16 +0100
Subject: [PATCH 074/100] #188 - renamve authors class file

---
 goose/extractors/{author.py => authors.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename goose/extractors/{author.py => authors.py} (100%)

diff --git a/goose/extractors/author.py b/goose/extractors/authors.py
similarity index 100%
rename from goose/extractors/author.py
rename to goose/extractors/authors.py

From 8eb74d8e404c8158c98fead93c40a87e77e513a9 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 03:19:37 +0100
Subject: [PATCH 075/100] #188 - move tags extraction to TagsExtractor class

---
 goose/crawler.py            | 11 ++++++++++-
 goose/extractors/content.py | 24 ------------------------
 goose/extractors/tags.py    | 27 +++++++++++++++++++++++++--
 3 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/goose/crawler.py b/goose/crawler.py
index 53f56283..048dc83c 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -32,6 +32,7 @@
 from goose.extractors.links import LinksExtractor
 from goose.extractors.tweets import TweetsExtractor
 from goose.extractors.authors import AuthorsExtractor
+from goose.extractors.tags import TagsExtractor
 from goose.cleaners import StandardDocumentCleaner
 from goose.outputformatters import StandardOutputFormatter
 
@@ -68,6 +69,9 @@ def __init__(self, config):
         # init the output formatter
         self.formatter = self.get_formatter()
 
+        # tags extractor
+        self.tags_extractor = self.get_tags_extractor()
+
         # authors extractor
         self.authors_extractor = self.get_authors_extractor()
 
@@ -121,7 +125,9 @@ def crawl(self, crawl_candidate):
         self.article.meta_keywords = self.extractor.get_meta_keywords()
         self.article.canonical_link = self.extractor.get_canonical_link()
         self.article.domain = self.extractor.get_domain()
-        self.article.tags = self.extractor.extract_tags()
+
+        # tags
+        self.article.tags = self.tags_extractor.extract()
 
         # authors
         self.article.authors = self.authors_extractor.extract()
@@ -195,6 +201,9 @@ def get_html(self, crawl_candidate, parsing_candidate):
             })
         return html
 
+    def get_tags_extractor(self):
+        return TagsExtractor(self.config, self.article)
+
     def get_authors_extractor(self):
         return AuthorsExtractor(self.config, self.article)
 
diff --git a/goose/extractors/content.py b/goose/extractors/content.py
index 4b4e894a..db88a406 100644
--- a/goose/extractors/content.py
+++ b/goose/extractors/content.py
@@ -27,9 +27,6 @@
 
 from goose.extractors import BaseExtractor
 
-NO_STRINGS = []
-A_REL_TAG_SELECTOR = "a[rel=tag]"
-A_HREF_TAG_SELECTOR = "a[href*='/tag/'], a[href*='/tags/'], a[href*='/topic/'], a[href*='?keyword=']"
 RE_LANG = r'^[A-Za-z]{2}$'
 
 KNOWN_PUBLISH_DATE_TAGS = [
@@ -192,27 +189,6 @@ def extract_opengraph(self):
                 opengraph_dict.update({attr.split(":")[1]: value})
         return opengraph_dict
 
-    def extract_tags(self):
-        node = self.article.doc
-
-        # node doesn't have chidren
-        if len(list(node)) == 0:
-            return NO_STRINGS
-
-        elements = self.parser.css_select(node, A_REL_TAG_SELECTOR)
-        if not elements:
-            elements = self.parser.css_select(node, A_HREF_TAG_SELECTOR)
-            if not elements:
-                return NO_STRINGS
-
-        tags = []
-        for el in elements:
-            tag = self.parser.getText(el)
-            if tag:
-                tags.append(tag)
-
-        return list(set(tags))
-
     def calculate_best_node(self):
 
         doc = self.article.doc
diff --git a/goose/extractors/tags.py b/goose/extractors/tags.py
index 28f835ef..466e7f81 100644
--- a/goose/extractors/tags.py
+++ b/goose/extractors/tags.py
@@ -23,6 +23,29 @@
 
 from goose.extractors import BaseExtractor
 
+A_REL_TAG_SELECTOR = "a[rel=tag]"
+A_HREF_TAG_SELECTOR = "a[href*='/tag/'], a[href*='/tags/'], a[href*='/topic/'], a[href*='?keyword=']"
 
-class ContentTagsExtractor(BaseExtractor):
-    pass
+
+class TagsExtractor(BaseExtractor):
+
+    def extract(self):
+        node = self.article.doc
+        tags = []
+
+        # node doesn't have chidren
+        if len(list(node)) == 0:
+            return tags
+
+        elements = self.parser.css_select(node, A_REL_TAG_SELECTOR)
+        if not elements:
+            elements = self.parser.css_select(node, A_HREF_TAG_SELECTOR)
+            if not elements:
+                return tags
+
+        for el in elements:
+            tag = self.parser.getText(el)
+            if tag:
+                tags.append(tag)
+
+        return list(set(tags))

From 1cb9ed44aeeab1619c14d0859dbfd5ab2f80c3b6 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 03:23:10 +0100
Subject: [PATCH 076/100] #188 - move opengraph extraction to
 OpenGraphExtractor class

---
 goose/crawler.py              | 12 +++++++++++-
 goose/extractors/content.py   | 11 -----------
 goose/extractors/opengraph.py | 14 ++++++++++++--
 3 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/goose/crawler.py b/goose/crawler.py
index 048dc83c..416e93a8 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -33,6 +33,7 @@
 from goose.extractors.tweets import TweetsExtractor
 from goose.extractors.authors import AuthorsExtractor
 from goose.extractors.tags import TagsExtractor
+from goose.extractors.opengraph import OpenGraphExtractor
 from goose.cleaners import StandardDocumentCleaner
 from goose.outputformatters import StandardOutputFormatter
 
@@ -69,6 +70,9 @@ def __init__(self, config):
         # init the output formatter
         self.formatter = self.get_formatter()
 
+        # opengraph_ extractor
+        self.opengraph_extractor = self.get_opengraph_extractor()
+
         # tags extractor
         self.tags_extractor = self.get_tags_extractor()
 
@@ -116,7 +120,10 @@ def crawl(self, crawl_candidate):
         self.article.raw_html = raw_html
         self.article.doc = doc
         self.article.raw_doc = deepcopy(doc)
-        self.article.opengraph = self.extractor.extract_opengraph()
+
+        # open graph
+        self.article.opengraph = self.opengraph_extractor.extract()
+
         self.article.publish_date = self.extractor.get_publish_date()
         # self.article.additional_data = config.get_additionaldata_extractor.extract(doc)
         self.article.meta_lang = self.extractor.get_meta_lang()
@@ -201,6 +208,9 @@ def get_html(self, crawl_candidate, parsing_candidate):
             })
         return html
 
+    def get_opengraph_extractor(self):
+        return OpenGraphExtractor(self.config, self.article)
+
     def get_tags_extractor(self):
         return TagsExtractor(self.config, self.article)
 
diff --git a/goose/extractors/content.py b/goose/extractors/content.py
index db88a406..832cbc21 100644
--- a/goose/extractors/content.py
+++ b/goose/extractors/content.py
@@ -178,17 +178,6 @@ def is_articlebody(self, node):
 
         return False
 
-    def extract_opengraph(self):
-        opengraph_dict = {}
-        node = self.article.doc
-        metas = self.parser.getElementsByTag(node, 'meta')
-        for meta in metas:
-            attr = self.parser.getAttribute(meta, 'property')
-            if attr is not None and attr.startswith("og:"):
-                value = self.parser.getAttribute(meta, 'content')
-                opengraph_dict.update({attr.split(":")[1]: value})
-        return opengraph_dict
-
     def calculate_best_node(self):
 
         doc = self.article.doc
diff --git a/goose/extractors/opengraph.py b/goose/extractors/opengraph.py
index ee916b82..a52ac349 100644
--- a/goose/extractors/opengraph.py
+++ b/goose/extractors/opengraph.py
@@ -24,5 +24,15 @@
 from goose.extractors import BaseExtractor
 
 
-class ContentOpenGraphExtractor(BaseExtractor):
-    pass
+class OpenGraphExtractor(BaseExtractor):
+
+    def extract(self):
+        opengraph_dict = {}
+        node = self.article.doc
+        metas = self.parser.getElementsByTag(node, 'meta')
+        for meta in metas:
+            attr = self.parser.getAttribute(meta, 'property')
+            if attr is not None and attr.startswith("og:"):
+                value = self.parser.getAttribute(meta, 'content')
+                opengraph_dict.update({attr.split(":")[1]: value})
+        return opengraph_dict

From 8320262dbcb8bdad9c65e6dac08903eea140ddec Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 03:32:12 +0100
Subject: [PATCH 077/100] #188 - move publishdate extraction to
 PublishDateExtractor class

---
 goose/crawler.py                | 13 +++++++++++--
 goose/extractors/content.py     | 15 ---------------
 goose/extractors/publishdate.py | 22 ++++++++++++++++++++--
 3 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/goose/crawler.py b/goose/crawler.py
index 416e93a8..ea5a5221 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -34,6 +34,7 @@
 from goose.extractors.authors import AuthorsExtractor
 from goose.extractors.tags import TagsExtractor
 from goose.extractors.opengraph import OpenGraphExtractor
+from goose.extractors.publishdate import PublishDateExtractor
 from goose.cleaners import StandardDocumentCleaner
 from goose.outputformatters import StandardOutputFormatter
 
@@ -70,7 +71,10 @@ def __init__(self, config):
         # init the output formatter
         self.formatter = self.get_formatter()
 
-        # opengraph_ extractor
+        # publishdate extractor
+        self.publishdate_extractor = self.get_publishdate_extractor()
+
+        # opengraph extractor
         self.opengraph_extractor = self.get_opengraph_extractor()
 
         # tags extractor
@@ -124,7 +128,9 @@ def crawl(self, crawl_candidate):
         # open graph
         self.article.opengraph = self.opengraph_extractor.extract()
 
-        self.article.publish_date = self.extractor.get_publish_date()
+        # publishdate
+        self.article.publish_date = self.publishdate_extractor.extract()
+
         # self.article.additional_data = config.get_additionaldata_extractor.extract(doc)
         self.article.meta_lang = self.extractor.get_meta_lang()
         self.article.meta_favicon = self.extractor.get_favicon()
@@ -208,6 +214,9 @@ def get_html(self, crawl_candidate, parsing_candidate):
             })
         return html
 
+    def get_publishdate_extractor(self):
+        return PublishDateExtractor(self.config, self.article)
+
     def get_opengraph_extractor(self):
         return OpenGraphExtractor(self.config, self.article)
 
diff --git a/goose/extractors/content.py b/goose/extractors/content.py
index 832cbc21..3ca40a82 100644
--- a/goose/extractors/content.py
+++ b/goose/extractors/content.py
@@ -29,13 +29,6 @@
 
 RE_LANG = r'^[A-Za-z]{2}$'
 
-KNOWN_PUBLISH_DATE_TAGS = [
-    {'attribute': 'property', 'value': 'rnews:datePublished', 'content': 'content'},
-    {'attribute': 'property', 'value': 'article:published_time', 'content': 'content'},
-    {'attribute': 'name', 'value': 'OriginalPublicationDate', 'content': 'content'},
-    {'attribute': 'itemprop', 'value': 'datePublished', 'content': 'datetime'},
-]
-
 KNOWN_ARTICLE_CONTENT_TAGS = [
     {'attr': 'itemprop', 'value': 'articleBody'},
     {'attr': 'class', 'value': 'post-content'},
@@ -57,14 +50,6 @@ def get_language(self):
                 return self.article.meta_lang[:2]
         return self.config.target_language
 
-    def get_publish_date(self):
-        for known_meta_tag in KNOWN_PUBLISH_DATE_TAGS:
-            meta_tags = self.parser.getElementsByTag(self.article.doc,
-                                                attr=known_meta_tag['attribute'],
-                                                value=known_meta_tag['value'])
-            if meta_tags:
-                return self.parser.getAttribute(meta_tags[0], known_meta_tag['content'])
-
     def get_favicon(self):
         """\
         Extract the favicon from a website
diff --git a/goose/extractors/publishdate.py b/goose/extractors/publishdate.py
index 7ea1635a..1768b1a0 100644
--- a/goose/extractors/publishdate.py
+++ b/goose/extractors/publishdate.py
@@ -23,6 +23,24 @@
 
 from goose.extractors import BaseExtractor
 
+KNOWN_PUBLISH_DATE_TAGS = [
+    {'attribute': 'property', 'value': 'rnews:datePublished', 'content': 'content'},
+    {'attribute': 'property', 'value': 'article:published_time', 'content': 'content'},
+    {'attribute': 'name', 'value': 'OriginalPublicationDate', 'content': 'content'},
+    {'attribute': 'itemprop', 'value': 'datePublished', 'content': 'datetime'},
+]
 
-class ContentPublishDateExtractor(BaseExtractor):
-    pass
+
+class PublishDateExtractor(BaseExtractor):
+    def extract(self):
+        for known_meta_tag in KNOWN_PUBLISH_DATE_TAGS:
+            meta_tags = self.parser.getElementsByTag(
+                            self.article.doc,
+                            attr=known_meta_tag['attribute'],
+                            value=known_meta_tag['value'])
+            if meta_tags:
+                return self.parser.getAttribute(
+                    meta_tags[0],
+                    known_meta_tag['content']
+                )
+        return None

From 08fd6b975147fc86018f07346c09de19e682ca55 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 05:07:38 +0100
Subject: [PATCH 078/100] #188 - move meta extraction to MetasExtractor class

---
 goose/crawler.py            |  22 +++++---
 goose/extractors/content.py |  89 ------------------------------
 goose/extractors/meta.py    | 104 +++++++++++++++++++++++++++++++++++-
 3 files changed, 118 insertions(+), 97 deletions(-)

diff --git a/goose/crawler.py b/goose/crawler.py
index ea5a5221..fd577405 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -35,6 +35,7 @@
 from goose.extractors.tags import TagsExtractor
 from goose.extractors.opengraph import OpenGraphExtractor
 from goose.extractors.publishdate import PublishDateExtractor
+from goose.extractors.metas import MetasExtractor
 from goose.cleaners import StandardDocumentCleaner
 from goose.outputformatters import StandardOutputFormatter
 
@@ -71,6 +72,9 @@ def __init__(self, config):
         # init the output formatter
         self.formatter = self.get_formatter()
 
+        # metas extractor
+        self.metas_extractor = self.get_metas_extractor()
+
         # publishdate extractor
         self.publishdate_extractor = self.get_publishdate_extractor()
 
@@ -131,12 +135,15 @@ def crawl(self, crawl_candidate):
         # publishdate
         self.article.publish_date = self.publishdate_extractor.extract()
 
-        # self.article.additional_data = config.get_additionaldata_extractor.extract(doc)
-        self.article.meta_lang = self.extractor.get_meta_lang()
-        self.article.meta_favicon = self.extractor.get_favicon()
-        self.article.meta_description = self.extractor.get_meta_description()
-        self.article.meta_keywords = self.extractor.get_meta_keywords()
-        self.article.canonical_link = self.extractor.get_canonical_link()
+        # meta
+        metas = self.metas_extractor.extract()
+        self.article.meta_lang = metas['lang']
+        self.article.meta_favicon = metas['favicon']
+        self.article.meta_description = metas['description']
+        self.article.meta_keywords = metas['keywords']
+        self.article.canonical_link = metas['canonical']
+
+        # domain
         self.article.domain = self.extractor.get_domain()
 
         # tags
@@ -214,6 +221,9 @@ def get_html(self, crawl_candidate, parsing_candidate):
             })
         return html
 
+    def get_metas_extractor(self):
+        return MetasExtractor(self.config, self.article)
+
     def get_publishdate_extractor(self):
         return PublishDateExtractor(self.config, self.article)
 
diff --git a/goose/extractors/content.py b/goose/extractors/content.py
index 3ca40a82..557840f4 100644
--- a/goose/extractors/content.py
+++ b/goose/extractors/content.py
@@ -20,14 +20,10 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import re
-
 from copy import deepcopy
-from urlparse import urlparse, urljoin
 
 from goose.extractors import BaseExtractor
 
-RE_LANG = r'^[A-Za-z]{2}$'
 
 KNOWN_ARTICLE_CONTENT_TAGS = [
     {'attr': 'itemprop', 'value': 'articleBody'},
@@ -50,91 +46,6 @@ def get_language(self):
                 return self.article.meta_lang[:2]
         return self.config.target_language
 
-    def get_favicon(self):
-        """\
-        Extract the favicon from a website
-        http://en.wikipedia.org/wiki/Favicon
-        <link rel="shortcut icon" type="image/png" href="favicon.png" />
-        <link rel="icon" type="image/png" href="favicon.png" />
-        """
-        kwargs = {'tag': 'link', 'attr': 'rel', 'value': 'icon'}
-        meta = self.parser.getElementsByTag(self.article.doc, **kwargs)
-        if meta:
-            favicon = self.parser.getAttribute(meta[0], 'href')
-            return favicon
-        return ''
-
-    def get_meta_lang(self):
-        """\
-        Extract content language from meta
-        """
-        # we have a lang attribute in html
-        attr = self.parser.getAttribute(self.article.doc, attr='lang')
-        if attr is None:
-            # look up for a Content-Language in meta
-            items = [
-                {'tag': 'meta', 'attr': 'http-equiv', 'value': 'content-language'},
-                {'tag': 'meta', 'attr': 'name', 'value': 'lang'}
-            ]
-            for item in items:
-                meta = self.parser.getElementsByTag(self.article.doc, **item)
-                if meta:
-                    attr = self.parser.getAttribute(meta[0], attr='content')
-                    break
-
-        if attr:
-            value = attr[:2]
-            if re.search(RE_LANG, value):
-                return value.lower()
-
-        return None
-
-    def get_meta_content(self, doc, metaName):
-        """\
-        Extract a given meta content form document
-        """
-        meta = self.parser.css_select(doc, metaName)
-        content = None
-
-        if meta is not None and len(meta) > 0:
-            content = self.parser.getAttribute(meta[0], 'content')
-
-        if content:
-            return content.strip()
-
-        return ''
-
-    def get_meta_description(self):
-        """\
-        if the article has meta description set in the source, use that
-        """
-        return self.get_meta_content(self.article.doc, "meta[name=description]")
-
-    def get_meta_keywords(self):
-        """\
-        if the article has meta keywords set in the source, use that
-        """
-        return self.get_meta_content(self.article.doc, "meta[name=keywords]")
-
-    def get_canonical_link(self):
-        """\
-        if the article has meta canonical link set in the url
-        """
-        if self.article.final_url:
-            kwargs = {'tag': 'link', 'attr': 'rel', 'value': 'canonical'}
-            meta = self.parser.getElementsByTag(self.article.doc, **kwargs)
-            if meta is not None and len(meta) > 0:
-                href = self.parser.getAttribute(meta[0], 'href')
-                if href:
-                    href = href.strip()
-                    o = urlparse(href)
-                    if not o.hostname:
-                        z = urlparse(self.article.final_url)
-                        domain = '%s://%s' % (z.scheme, z.hostname)
-                        href = urljoin(domain, href)
-                    return href
-        return self.article.final_url
-
     def get_domain(self):
         if self.article.final_url:
             o = urlparse(self.article.final_url)
diff --git a/goose/extractors/meta.py b/goose/extractors/meta.py
index 7a92df21..efde6714 100644
--- a/goose/extractors/meta.py
+++ b/goose/extractors/meta.py
@@ -21,8 +21,108 @@
 limitations under the License.
 """
 
+import re
+from urlparse import urljoin
+from urlparse import urlparse
+
 from goose.extractors import BaseExtractor
 
 
-class ContentMetaExtractor(BaseExtractor):
-    pass
+RE_LANG = r'^[A-Za-z]{2}$'
+
+
+class MetasExtractor(BaseExtractor):
+
+    def get_favicon(self):
+        """\
+        Extract the favicon from a website
+        http://en.wikipedia.org/wiki/Favicon
+        <link rel="shortcut icon" type="image/png" href="favicon.png" />
+        <link rel="icon" type="image/png" href="favicon.png" />
+        """
+        kwargs = {'tag': 'link', 'attr': 'rel', 'value': 'icon'}
+        meta = self.parser.getElementsByTag(self.article.doc, **kwargs)
+        if meta:
+            favicon = self.parser.getAttribute(meta[0], 'href')
+            return favicon
+        return ''
+
+    def get_canonical_link(self):
+        """\
+        if the article has meta canonical link set in the url
+        """
+        if self.article.final_url:
+            kwargs = {'tag': 'link', 'attr': 'rel', 'value': 'canonical'}
+            meta = self.parser.getElementsByTag(self.article.doc, **kwargs)
+            if meta is not None and len(meta) > 0:
+                href = self.parser.getAttribute(meta[0], 'href')
+                if href:
+                    href = href.strip()
+                    o = urlparse(href)
+                    if not o.hostname:
+                        z = urlparse(self.article.final_url)
+                        domain = '%s://%s' % (z.scheme, z.hostname)
+                        href = urljoin(domain, href)
+                    return href
+        return self.article.final_url
+
+    def get_meta_lang(self):
+        """\
+        Extract content language from meta
+        """
+        # we have a lang attribute in html
+        attr = self.parser.getAttribute(self.article.doc, attr='lang')
+        if attr is None:
+            # look up for a Content-Language in meta
+            items = [
+                {'tag': 'meta', 'attr': 'http-equiv', 'value': 'content-language'},
+                {'tag': 'meta', 'attr': 'name', 'value': 'lang'}
+            ]
+            for item in items:
+                meta = self.parser.getElementsByTag(self.article.doc, **item)
+                if meta:
+                    attr = self.parser.getAttribute(meta[0], attr='content')
+                    break
+
+        if attr:
+            value = attr[:2]
+            if re.search(RE_LANG, value):
+                return value.lower()
+
+        return None
+
+    def get_meta_content(self, metaName):
+        """\
+        Extract a given meta content form document
+        """
+        meta = self.parser.css_select(self.article.doc, metaName)
+        content = None
+
+        if meta is not None and len(meta) > 0:
+            content = self.parser.getAttribute(meta[0], 'content')
+
+        if content:
+            return content.strip()
+
+        return ''
+
+    def get_meta_description(self):
+        """\
+        if the article has meta description set in the source, use that
+        """
+        return self.get_meta_content("meta[name=description]")
+
+    def get_meta_keywords(self):
+        """\
+        if the article has meta keywords set in the source, use that
+        """
+        return self.get_meta_content("meta[name=keywords]")
+
+    def extract(self):
+        return {
+            "description": self.get_meta_description(),
+            "keywords": self.get_meta_keywords(),
+            "lang": self.get_meta_lang(),
+            "favicon": self.get_favicon(),
+            "canonical": self.get_canonical_link()
+        }

From 45843417a7565268557a64afe9601214b3797263 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 05:08:08 +0100
Subject: [PATCH 079/100] #188 - rename meta extractor file

---
 goose/extractors/{meta.py => metas.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename goose/extractors/{meta.py => metas.py} (100%)

diff --git a/goose/extractors/meta.py b/goose/extractors/metas.py
similarity index 100%
rename from goose/extractors/meta.py
rename to goose/extractors/metas.py

From 530ab522e422e71e6b456fc22b77c084539f3fe0 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 05:12:41 +0100
Subject: [PATCH 080/100] #188 - move domain extraction to meta extractor

---
 goose/crawler.py            | 4 +---
 goose/extractors/content.py | 6 ------
 goose/extractors/metas.py   | 9 ++++++++-
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/goose/crawler.py b/goose/crawler.py
index fd577405..34daf048 100644
--- a/goose/crawler.py
+++ b/goose/crawler.py
@@ -142,9 +142,7 @@ def crawl(self, crawl_candidate):
         self.article.meta_description = metas['description']
         self.article.meta_keywords = metas['keywords']
         self.article.canonical_link = metas['canonical']
-
-        # domain
-        self.article.domain = self.extractor.get_domain()
+        self.article.domain = metas['domain']
 
         # tags
         self.article.tags = self.tags_extractor.extract()
diff --git a/goose/extractors/content.py b/goose/extractors/content.py
index 557840f4..e0703d55 100644
--- a/goose/extractors/content.py
+++ b/goose/extractors/content.py
@@ -46,12 +46,6 @@ def get_language(self):
                 return self.article.meta_lang[:2]
         return self.config.target_language
 
-    def get_domain(self):
-        if self.article.final_url:
-            o = urlparse(self.article.final_url)
-            return o.hostname
-        return None
-
     def get_known_article_tags(self):
         for item in KNOWN_ARTICLE_CONTENT_TAGS:
             nodes = self.parser.getElementsByTag(
diff --git a/goose/extractors/metas.py b/goose/extractors/metas.py
index efde6714..95acadd5 100644
--- a/goose/extractors/metas.py
+++ b/goose/extractors/metas.py
@@ -33,6 +33,12 @@
 
 class MetasExtractor(BaseExtractor):
 
+    def get_domain(self):
+        if self.article.final_url:
+            o = urlparse(self.article.final_url)
+            return o.hostname
+        return None
+
     def get_favicon(self):
         """\
         Extract the favicon from a website
@@ -124,5 +130,6 @@ def extract(self):
             "keywords": self.get_meta_keywords(),
             "lang": self.get_meta_lang(),
             "favicon": self.get_favicon(),
-            "canonical": self.get_canonical_link()
+            "canonical": self.get_canonical_link(),
+            "domain": self.get_domain()
         }

From 49f50b00a67d6a2ae8b1896b1592b123b0b0aa01 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 06:17:39 +0100
Subject: [PATCH 081/100] #188 - move test files

---
 .../{ => content}/test_allnewlyrics1.html     |   0
 .../{ => content}/test_allnewlyrics1.json     |   0
 .../{ => content}/test_aolNews.html           |   0
 .../{ => content}/test_aolNews.json           |   0
 .../test_articlebody_attribute.html           |   0
 .../test_articlebody_attribute.json           |   0
 .../test_articlebody_itemprop.html            |   0
 .../test_articlebody_itemprop.json            |   0
 .../{ => content}/test_articlebody_tag.html   |   0
 .../{ => content}/test_articlebody_tag.json   |   0
 .../{ => content}/test_author_schema.html     |   0
 .../{ => content}/test_author_schema.json     |   0
 .../{ => content}/test_bbc_chinese.html       |   0
 .../{ => content}/test_bbc_chinese.json       |   0
 .../{ => content}/test_businessWeek1.html     |   0
 .../{ => content}/test_businessWeek1.json     |   0
 .../{ => content}/test_businessWeek2.html     |   0
 .../{ => content}/test_businessWeek2.json     |   0
 .../{ => content}/test_businessWeek3.html     |   0
 .../{ => content}/test_businessWeek3.json     |   0
 .../{ => content}/test_businessinsider3.html  |   0
 .../{ => content}/test_businessinsider3.json  |   0
 .../{ => content}/test_cbslocal.html          |   0
 .../{ => content}/test_cbslocal.json          |   0
 .../extractors/{ => content}/test_cnbc1.html  |   0
 .../extractors/{ => content}/test_cnbc1.json  |   0
 .../extractors/{ => content}/test_cnet.html   |   0
 .../extractors/{ => content}/test_cnet.json   |   0
 .../extractors/{ => content}/test_cnn1.html   |   0
 .../extractors/{ => content}/test_cnn1.json   |   0
 .../{ => content}/test_cnn_arabic.html        |   0
 .../{ => content}/test_cnn_arabic.json        |   0
 .../{ => content}/test_donga_korean.html      |   0
 .../{ => content}/test_donga_korean.json      |   0
 .../{ => content}/test_elmondo1.html          |   0
 .../{ => content}/test_elmondo1.json          |   0
 .../extractors/{ => content}/test_elpais.html |   0
 .../extractors/{ => content}/test_elpais.json |   0
 .../{ => content}/test_engadget.html          |   0
 .../{ => content}/test_engadget.json          |   0
 .../extractors/{ => content}/test_espn.html   |   0
 .../extractors/{ => content}/test_espn.json   |   0
 .../{ => content}/test_foxNews.html           |   0
 .../{ => content}/test_foxNews.json           |   0
 .../{ => content}/test_get_canonical_url.html |   0
 .../{ => content}/test_get_canonical_url.json |   0
 .../{ => content}/test_gizmodo1.html          |   0
 .../{ => content}/test_gizmodo1.json          |   0
 .../{ => content}/test_guardian1.html         |   0
 .../{ => content}/test_guardian1.json         |   0
 .../{ => content}/test_huffingtonPost2.html   |   0
 .../{ => content}/test_huffingtonPost2.json   |   0
 .../{ => content}/test_issue115.html          |   0
 .../{ => content}/test_issue115.json          |   0
 .../{ => content}/test_issue129.html          |   0
 .../{ => content}/test_issue129.json          |   0
 .../{ => content}/test_issue24.html           |   0
 .../{ => content}/test_issue24.json           |   0
 .../{ => content}/test_issue25.html           |   0
 .../{ => content}/test_issue25.json           |   0
 .../{ => content}/test_issue28.html           |   0
 .../{ => content}/test_issue28.json           |   0
 .../{ => content}/test_issue32.html           |   0
 .../{ => content}/test_issue32.json           |   0
 .../extractors/{ => content}/test_issue4.html |   0
 .../extractors/{ => content}/test_issue4.json |   0
 .../{ => content}/test_lefigaro.html          |   0
 .../{ => content}/test_lefigaro.json          |   0
 .../{ => content}/test_liberation.html        |   0
 .../{ => content}/test_liberation.json        |   0
 .../extractors/{ => content}/test_links.html  |   0
 .../extractors/{ => content}/test_links.json  |   0
 .../{ => content}/test_marketplace.html       |   0
 .../{ => content}/test_marketplace.json       |   0
 .../{ => content}/test_mashable_issue_74.html |   0
 .../{ => content}/test_mashable_issue_74.json |   0
 .../extractors/{ => content}/test_msn1.html   |   0
 .../extractors/{ => content}/test_msn1.json   |   0
 .../{ => content}/test_okaymarketing.html     |   0
 .../{ => content}/test_okaymarketing.json     |   0
 .../{ => content}/test_opengraph.html         |   0
 .../{ => content}/test_opengraph.json         |   0
 .../{ => content}/test_politico.html          |   0
 .../{ => content}/test_politico.json          |   0
 .../{ => content}/test_publish_date.html      |   0
 .../{ => content}/test_publish_date.json      |   0
 .../test_publish_date_article.html            |   0
 .../test_publish_date_article.json            |   0
 .../test_publish_date_rnews.html              |   0
 .../test_publish_date_rnews.json              |   0
 .../test_publish_date_schema.html             |   0
 .../test_publish_date_schema.json             |   0
 .../{ => content}/test_tags_abcau.html        |   0
 .../{ => content}/test_tags_abcau.json        |   0
 .../{ => content}/test_tags_cnet.html         |   0
 .../{ => content}/test_tags_cnet.json         |   0
 .../{ => content}/test_tags_deadline.html     |   0
 .../{ => content}/test_tags_deadline.json     |   0
 .../{ => content}/test_tags_kexp.html         |   0
 .../{ => content}/test_tags_kexp.json         |   0
 .../{ => content}/test_tags_wnyc.html         |   0
 .../{ => content}/test_tags_wnyc.json         |   0
 .../{ => content}/test_techcrunch1.html       |   0
 .../{ => content}/test_techcrunch1.json       |   0
 .../test_testHuffingtonPost.html              |   0
 .../test_testHuffingtonPost.json              |   0
 .../extractors/{ => content}/test_time.html   |   0
 .../extractors/{ => content}/test_time.json   |   0
 .../extractors/{ => content}/test_time2.html  |   0
 .../extractors/{ => content}/test_time2.json  |   0
 .../{ => content}/test_title_opengraph.html   |   0
 .../{ => content}/test_title_opengraph.json   |   0
 .../extractors/{ => content}/test_tweet.html  |   0
 .../extractors/{ => content}/test_tweet.json  |   0
 .../{ => content}/test_usatoday_issue_74.html |   0
 .../{ => content}/test_usatoday_issue_74.json |   0
 .../extractors/{ => content}/test_yahoo.html  |   0
 .../extractors/{ => content}/test_yahoo.json  |   0
 .../{ => extractors}/videos/test_embed.html   |   0
 .../{ => extractors}/videos/test_embed.json   |   0
 .../{ => extractors}/videos/test_iframe.html  |   0
 .../{ => extractors}/videos/test_iframe.json  |   0
 .../{ => extractors}/videos/test_object.html  |   0
 .../{ => extractors}/videos/test_object.json  |   0
 tests/extractors/__init__.py                  |   0
 tests/extractors/base.py                      | 252 ++++++++++++++++++
 .../{extractors.py => extractors/content.py}  |   0
 tests/{ => extractors}/images.py              |   0
 tests/{ => extractors}/videos.py              |   0
 129 files changed, 252 insertions(+)
 rename tests/data/extractors/{ => content}/test_allnewlyrics1.html (100%)
 rename tests/data/extractors/{ => content}/test_allnewlyrics1.json (100%)
 rename tests/data/extractors/{ => content}/test_aolNews.html (100%)
 rename tests/data/extractors/{ => content}/test_aolNews.json (100%)
 rename tests/data/extractors/{ => content}/test_articlebody_attribute.html (100%)
 rename tests/data/extractors/{ => content}/test_articlebody_attribute.json (100%)
 rename tests/data/extractors/{ => content}/test_articlebody_itemprop.html (100%)
 rename tests/data/extractors/{ => content}/test_articlebody_itemprop.json (100%)
 rename tests/data/extractors/{ => content}/test_articlebody_tag.html (100%)
 rename tests/data/extractors/{ => content}/test_articlebody_tag.json (100%)
 rename tests/data/extractors/{ => content}/test_author_schema.html (100%)
 rename tests/data/extractors/{ => content}/test_author_schema.json (100%)
 rename tests/data/extractors/{ => content}/test_bbc_chinese.html (100%)
 rename tests/data/extractors/{ => content}/test_bbc_chinese.json (100%)
 rename tests/data/extractors/{ => content}/test_businessWeek1.html (100%)
 rename tests/data/extractors/{ => content}/test_businessWeek1.json (100%)
 rename tests/data/extractors/{ => content}/test_businessWeek2.html (100%)
 rename tests/data/extractors/{ => content}/test_businessWeek2.json (100%)
 rename tests/data/extractors/{ => content}/test_businessWeek3.html (100%)
 rename tests/data/extractors/{ => content}/test_businessWeek3.json (100%)
 rename tests/data/extractors/{ => content}/test_businessinsider3.html (100%)
 rename tests/data/extractors/{ => content}/test_businessinsider3.json (100%)
 rename tests/data/extractors/{ => content}/test_cbslocal.html (100%)
 rename tests/data/extractors/{ => content}/test_cbslocal.json (100%)
 rename tests/data/extractors/{ => content}/test_cnbc1.html (100%)
 rename tests/data/extractors/{ => content}/test_cnbc1.json (100%)
 rename tests/data/extractors/{ => content}/test_cnet.html (100%)
 rename tests/data/extractors/{ => content}/test_cnet.json (100%)
 rename tests/data/extractors/{ => content}/test_cnn1.html (100%)
 rename tests/data/extractors/{ => content}/test_cnn1.json (100%)
 rename tests/data/extractors/{ => content}/test_cnn_arabic.html (100%)
 rename tests/data/extractors/{ => content}/test_cnn_arabic.json (100%)
 rename tests/data/extractors/{ => content}/test_donga_korean.html (100%)
 rename tests/data/extractors/{ => content}/test_donga_korean.json (100%)
 rename tests/data/extractors/{ => content}/test_elmondo1.html (100%)
 rename tests/data/extractors/{ => content}/test_elmondo1.json (100%)
 rename tests/data/extractors/{ => content}/test_elpais.html (100%)
 rename tests/data/extractors/{ => content}/test_elpais.json (100%)
 rename tests/data/extractors/{ => content}/test_engadget.html (100%)
 rename tests/data/extractors/{ => content}/test_engadget.json (100%)
 rename tests/data/extractors/{ => content}/test_espn.html (100%)
 rename tests/data/extractors/{ => content}/test_espn.json (100%)
 rename tests/data/extractors/{ => content}/test_foxNews.html (100%)
 rename tests/data/extractors/{ => content}/test_foxNews.json (100%)
 rename tests/data/extractors/{ => content}/test_get_canonical_url.html (100%)
 rename tests/data/extractors/{ => content}/test_get_canonical_url.json (100%)
 rename tests/data/extractors/{ => content}/test_gizmodo1.html (100%)
 rename tests/data/extractors/{ => content}/test_gizmodo1.json (100%)
 rename tests/data/extractors/{ => content}/test_guardian1.html (100%)
 rename tests/data/extractors/{ => content}/test_guardian1.json (100%)
 rename tests/data/extractors/{ => content}/test_huffingtonPost2.html (100%)
 rename tests/data/extractors/{ => content}/test_huffingtonPost2.json (100%)
 rename tests/data/extractors/{ => content}/test_issue115.html (100%)
 rename tests/data/extractors/{ => content}/test_issue115.json (100%)
 rename tests/data/extractors/{ => content}/test_issue129.html (100%)
 rename tests/data/extractors/{ => content}/test_issue129.json (100%)
 rename tests/data/extractors/{ => content}/test_issue24.html (100%)
 rename tests/data/extractors/{ => content}/test_issue24.json (100%)
 rename tests/data/extractors/{ => content}/test_issue25.html (100%)
 rename tests/data/extractors/{ => content}/test_issue25.json (100%)
 rename tests/data/extractors/{ => content}/test_issue28.html (100%)
 rename tests/data/extractors/{ => content}/test_issue28.json (100%)
 rename tests/data/extractors/{ => content}/test_issue32.html (100%)
 rename tests/data/extractors/{ => content}/test_issue32.json (100%)
 rename tests/data/extractors/{ => content}/test_issue4.html (100%)
 rename tests/data/extractors/{ => content}/test_issue4.json (100%)
 rename tests/data/extractors/{ => content}/test_lefigaro.html (100%)
 rename tests/data/extractors/{ => content}/test_lefigaro.json (100%)
 rename tests/data/extractors/{ => content}/test_liberation.html (100%)
 rename tests/data/extractors/{ => content}/test_liberation.json (100%)
 rename tests/data/extractors/{ => content}/test_links.html (100%)
 rename tests/data/extractors/{ => content}/test_links.json (100%)
 rename tests/data/extractors/{ => content}/test_marketplace.html (100%)
 rename tests/data/extractors/{ => content}/test_marketplace.json (100%)
 rename tests/data/extractors/{ => content}/test_mashable_issue_74.html (100%)
 rename tests/data/extractors/{ => content}/test_mashable_issue_74.json (100%)
 rename tests/data/extractors/{ => content}/test_msn1.html (100%)
 rename tests/data/extractors/{ => content}/test_msn1.json (100%)
 rename tests/data/extractors/{ => content}/test_okaymarketing.html (100%)
 rename tests/data/extractors/{ => content}/test_okaymarketing.json (100%)
 rename tests/data/extractors/{ => content}/test_opengraph.html (100%)
 rename tests/data/extractors/{ => content}/test_opengraph.json (100%)
 rename tests/data/extractors/{ => content}/test_politico.html (100%)
 rename tests/data/extractors/{ => content}/test_politico.json (100%)
 rename tests/data/extractors/{ => content}/test_publish_date.html (100%)
 rename tests/data/extractors/{ => content}/test_publish_date.json (100%)
 rename tests/data/extractors/{ => content}/test_publish_date_article.html (100%)
 rename tests/data/extractors/{ => content}/test_publish_date_article.json (100%)
 rename tests/data/extractors/{ => content}/test_publish_date_rnews.html (100%)
 rename tests/data/extractors/{ => content}/test_publish_date_rnews.json (100%)
 rename tests/data/extractors/{ => content}/test_publish_date_schema.html (100%)
 rename tests/data/extractors/{ => content}/test_publish_date_schema.json (100%)
 rename tests/data/extractors/{ => content}/test_tags_abcau.html (100%)
 rename tests/data/extractors/{ => content}/test_tags_abcau.json (100%)
 rename tests/data/extractors/{ => content}/test_tags_cnet.html (100%)
 rename tests/data/extractors/{ => content}/test_tags_cnet.json (100%)
 rename tests/data/extractors/{ => content}/test_tags_deadline.html (100%)
 rename tests/data/extractors/{ => content}/test_tags_deadline.json (100%)
 rename tests/data/extractors/{ => content}/test_tags_kexp.html (100%)
 rename tests/data/extractors/{ => content}/test_tags_kexp.json (100%)
 rename tests/data/extractors/{ => content}/test_tags_wnyc.html (100%)
 rename tests/data/extractors/{ => content}/test_tags_wnyc.json (100%)
 rename tests/data/extractors/{ => content}/test_techcrunch1.html (100%)
 rename tests/data/extractors/{ => content}/test_techcrunch1.json (100%)
 rename tests/data/extractors/{ => content}/test_testHuffingtonPost.html (100%)
 rename tests/data/extractors/{ => content}/test_testHuffingtonPost.json (100%)
 rename tests/data/extractors/{ => content}/test_time.html (100%)
 rename tests/data/extractors/{ => content}/test_time.json (100%)
 rename tests/data/extractors/{ => content}/test_time2.html (100%)
 rename tests/data/extractors/{ => content}/test_time2.json (100%)
 rename tests/data/extractors/{ => content}/test_title_opengraph.html (100%)
 rename tests/data/extractors/{ => content}/test_title_opengraph.json (100%)
 rename tests/data/extractors/{ => content}/test_tweet.html (100%)
 rename tests/data/extractors/{ => content}/test_tweet.json (100%)
 rename tests/data/extractors/{ => content}/test_usatoday_issue_74.html (100%)
 rename tests/data/extractors/{ => content}/test_usatoday_issue_74.json (100%)
 rename tests/data/extractors/{ => content}/test_yahoo.html (100%)
 rename tests/data/extractors/{ => content}/test_yahoo.json (100%)
 rename tests/data/{ => extractors}/videos/test_embed.html (100%)
 rename tests/data/{ => extractors}/videos/test_embed.json (100%)
 rename tests/data/{ => extractors}/videos/test_iframe.html (100%)
 rename tests/data/{ => extractors}/videos/test_iframe.json (100%)
 rename tests/data/{ => extractors}/videos/test_object.html (100%)
 rename tests/data/{ => extractors}/videos/test_object.json (100%)
 create mode 100644 tests/extractors/__init__.py
 create mode 100644 tests/extractors/base.py
 rename tests/{extractors.py => extractors/content.py} (100%)
 rename tests/{ => extractors}/images.py (100%)
 rename tests/{ => extractors}/videos.py (100%)

diff --git a/tests/data/extractors/test_allnewlyrics1.html b/tests/data/extractors/content/test_allnewlyrics1.html
similarity index 100%
rename from tests/data/extractors/test_allnewlyrics1.html
rename to tests/data/extractors/content/test_allnewlyrics1.html
diff --git a/tests/data/extractors/test_allnewlyrics1.json b/tests/data/extractors/content/test_allnewlyrics1.json
similarity index 100%
rename from tests/data/extractors/test_allnewlyrics1.json
rename to tests/data/extractors/content/test_allnewlyrics1.json
diff --git a/tests/data/extractors/test_aolNews.html b/tests/data/extractors/content/test_aolNews.html
similarity index 100%
rename from tests/data/extractors/test_aolNews.html
rename to tests/data/extractors/content/test_aolNews.html
diff --git a/tests/data/extractors/test_aolNews.json b/tests/data/extractors/content/test_aolNews.json
similarity index 100%
rename from tests/data/extractors/test_aolNews.json
rename to tests/data/extractors/content/test_aolNews.json
diff --git a/tests/data/extractors/test_articlebody_attribute.html b/tests/data/extractors/content/test_articlebody_attribute.html
similarity index 100%
rename from tests/data/extractors/test_articlebody_attribute.html
rename to tests/data/extractors/content/test_articlebody_attribute.html
diff --git a/tests/data/extractors/test_articlebody_attribute.json b/tests/data/extractors/content/test_articlebody_attribute.json
similarity index 100%
rename from tests/data/extractors/test_articlebody_attribute.json
rename to tests/data/extractors/content/test_articlebody_attribute.json
diff --git a/tests/data/extractors/test_articlebody_itemprop.html b/tests/data/extractors/content/test_articlebody_itemprop.html
similarity index 100%
rename from tests/data/extractors/test_articlebody_itemprop.html
rename to tests/data/extractors/content/test_articlebody_itemprop.html
diff --git a/tests/data/extractors/test_articlebody_itemprop.json b/tests/data/extractors/content/test_articlebody_itemprop.json
similarity index 100%
rename from tests/data/extractors/test_articlebody_itemprop.json
rename to tests/data/extractors/content/test_articlebody_itemprop.json
diff --git a/tests/data/extractors/test_articlebody_tag.html b/tests/data/extractors/content/test_articlebody_tag.html
similarity index 100%
rename from tests/data/extractors/test_articlebody_tag.html
rename to tests/data/extractors/content/test_articlebody_tag.html
diff --git a/tests/data/extractors/test_articlebody_tag.json b/tests/data/extractors/content/test_articlebody_tag.json
similarity index 100%
rename from tests/data/extractors/test_articlebody_tag.json
rename to tests/data/extractors/content/test_articlebody_tag.json
diff --git a/tests/data/extractors/test_author_schema.html b/tests/data/extractors/content/test_author_schema.html
similarity index 100%
rename from tests/data/extractors/test_author_schema.html
rename to tests/data/extractors/content/test_author_schema.html
diff --git a/tests/data/extractors/test_author_schema.json b/tests/data/extractors/content/test_author_schema.json
similarity index 100%
rename from tests/data/extractors/test_author_schema.json
rename to tests/data/extractors/content/test_author_schema.json
diff --git a/tests/data/extractors/test_bbc_chinese.html b/tests/data/extractors/content/test_bbc_chinese.html
similarity index 100%
rename from tests/data/extractors/test_bbc_chinese.html
rename to tests/data/extractors/content/test_bbc_chinese.html
diff --git a/tests/data/extractors/test_bbc_chinese.json b/tests/data/extractors/content/test_bbc_chinese.json
similarity index 100%
rename from tests/data/extractors/test_bbc_chinese.json
rename to tests/data/extractors/content/test_bbc_chinese.json
diff --git a/tests/data/extractors/test_businessWeek1.html b/tests/data/extractors/content/test_businessWeek1.html
similarity index 100%
rename from tests/data/extractors/test_businessWeek1.html
rename to tests/data/extractors/content/test_businessWeek1.html
diff --git a/tests/data/extractors/test_businessWeek1.json b/tests/data/extractors/content/test_businessWeek1.json
similarity index 100%
rename from tests/data/extractors/test_businessWeek1.json
rename to tests/data/extractors/content/test_businessWeek1.json
diff --git a/tests/data/extractors/test_businessWeek2.html b/tests/data/extractors/content/test_businessWeek2.html
similarity index 100%
rename from tests/data/extractors/test_businessWeek2.html
rename to tests/data/extractors/content/test_businessWeek2.html
diff --git a/tests/data/extractors/test_businessWeek2.json b/tests/data/extractors/content/test_businessWeek2.json
similarity index 100%
rename from tests/data/extractors/test_businessWeek2.json
rename to tests/data/extractors/content/test_businessWeek2.json
diff --git a/tests/data/extractors/test_businessWeek3.html b/tests/data/extractors/content/test_businessWeek3.html
similarity index 100%
rename from tests/data/extractors/test_businessWeek3.html
rename to tests/data/extractors/content/test_businessWeek3.html
diff --git a/tests/data/extractors/test_businessWeek3.json b/tests/data/extractors/content/test_businessWeek3.json
similarity index 100%
rename from tests/data/extractors/test_businessWeek3.json
rename to tests/data/extractors/content/test_businessWeek3.json
diff --git a/tests/data/extractors/test_businessinsider3.html b/tests/data/extractors/content/test_businessinsider3.html
similarity index 100%
rename from tests/data/extractors/test_businessinsider3.html
rename to tests/data/extractors/content/test_businessinsider3.html
diff --git a/tests/data/extractors/test_businessinsider3.json b/tests/data/extractors/content/test_businessinsider3.json
similarity index 100%
rename from tests/data/extractors/test_businessinsider3.json
rename to tests/data/extractors/content/test_businessinsider3.json
diff --git a/tests/data/extractors/test_cbslocal.html b/tests/data/extractors/content/test_cbslocal.html
similarity index 100%
rename from tests/data/extractors/test_cbslocal.html
rename to tests/data/extractors/content/test_cbslocal.html
diff --git a/tests/data/extractors/test_cbslocal.json b/tests/data/extractors/content/test_cbslocal.json
similarity index 100%
rename from tests/data/extractors/test_cbslocal.json
rename to tests/data/extractors/content/test_cbslocal.json
diff --git a/tests/data/extractors/test_cnbc1.html b/tests/data/extractors/content/test_cnbc1.html
similarity index 100%
rename from tests/data/extractors/test_cnbc1.html
rename to tests/data/extractors/content/test_cnbc1.html
diff --git a/tests/data/extractors/test_cnbc1.json b/tests/data/extractors/content/test_cnbc1.json
similarity index 100%
rename from tests/data/extractors/test_cnbc1.json
rename to tests/data/extractors/content/test_cnbc1.json
diff --git a/tests/data/extractors/test_cnet.html b/tests/data/extractors/content/test_cnet.html
similarity index 100%
rename from tests/data/extractors/test_cnet.html
rename to tests/data/extractors/content/test_cnet.html
diff --git a/tests/data/extractors/test_cnet.json b/tests/data/extractors/content/test_cnet.json
similarity index 100%
rename from tests/data/extractors/test_cnet.json
rename to tests/data/extractors/content/test_cnet.json
diff --git a/tests/data/extractors/test_cnn1.html b/tests/data/extractors/content/test_cnn1.html
similarity index 100%
rename from tests/data/extractors/test_cnn1.html
rename to tests/data/extractors/content/test_cnn1.html
diff --git a/tests/data/extractors/test_cnn1.json b/tests/data/extractors/content/test_cnn1.json
similarity index 100%
rename from tests/data/extractors/test_cnn1.json
rename to tests/data/extractors/content/test_cnn1.json
diff --git a/tests/data/extractors/test_cnn_arabic.html b/tests/data/extractors/content/test_cnn_arabic.html
similarity index 100%
rename from tests/data/extractors/test_cnn_arabic.html
rename to tests/data/extractors/content/test_cnn_arabic.html
diff --git a/tests/data/extractors/test_cnn_arabic.json b/tests/data/extractors/content/test_cnn_arabic.json
similarity index 100%
rename from tests/data/extractors/test_cnn_arabic.json
rename to tests/data/extractors/content/test_cnn_arabic.json
diff --git a/tests/data/extractors/test_donga_korean.html b/tests/data/extractors/content/test_donga_korean.html
similarity index 100%
rename from tests/data/extractors/test_donga_korean.html
rename to tests/data/extractors/content/test_donga_korean.html
diff --git a/tests/data/extractors/test_donga_korean.json b/tests/data/extractors/content/test_donga_korean.json
similarity index 100%
rename from tests/data/extractors/test_donga_korean.json
rename to tests/data/extractors/content/test_donga_korean.json
diff --git a/tests/data/extractors/test_elmondo1.html b/tests/data/extractors/content/test_elmondo1.html
similarity index 100%
rename from tests/data/extractors/test_elmondo1.html
rename to tests/data/extractors/content/test_elmondo1.html
diff --git a/tests/data/extractors/test_elmondo1.json b/tests/data/extractors/content/test_elmondo1.json
similarity index 100%
rename from tests/data/extractors/test_elmondo1.json
rename to tests/data/extractors/content/test_elmondo1.json
diff --git a/tests/data/extractors/test_elpais.html b/tests/data/extractors/content/test_elpais.html
similarity index 100%
rename from tests/data/extractors/test_elpais.html
rename to tests/data/extractors/content/test_elpais.html
diff --git a/tests/data/extractors/test_elpais.json b/tests/data/extractors/content/test_elpais.json
similarity index 100%
rename from tests/data/extractors/test_elpais.json
rename to tests/data/extractors/content/test_elpais.json
diff --git a/tests/data/extractors/test_engadget.html b/tests/data/extractors/content/test_engadget.html
similarity index 100%
rename from tests/data/extractors/test_engadget.html
rename to tests/data/extractors/content/test_engadget.html
diff --git a/tests/data/extractors/test_engadget.json b/tests/data/extractors/content/test_engadget.json
similarity index 100%
rename from tests/data/extractors/test_engadget.json
rename to tests/data/extractors/content/test_engadget.json
diff --git a/tests/data/extractors/test_espn.html b/tests/data/extractors/content/test_espn.html
similarity index 100%
rename from tests/data/extractors/test_espn.html
rename to tests/data/extractors/content/test_espn.html
diff --git a/tests/data/extractors/test_espn.json b/tests/data/extractors/content/test_espn.json
similarity index 100%
rename from tests/data/extractors/test_espn.json
rename to tests/data/extractors/content/test_espn.json
diff --git a/tests/data/extractors/test_foxNews.html b/tests/data/extractors/content/test_foxNews.html
similarity index 100%
rename from tests/data/extractors/test_foxNews.html
rename to tests/data/extractors/content/test_foxNews.html
diff --git a/tests/data/extractors/test_foxNews.json b/tests/data/extractors/content/test_foxNews.json
similarity index 100%
rename from tests/data/extractors/test_foxNews.json
rename to tests/data/extractors/content/test_foxNews.json
diff --git a/tests/data/extractors/test_get_canonical_url.html b/tests/data/extractors/content/test_get_canonical_url.html
similarity index 100%
rename from tests/data/extractors/test_get_canonical_url.html
rename to tests/data/extractors/content/test_get_canonical_url.html
diff --git a/tests/data/extractors/test_get_canonical_url.json b/tests/data/extractors/content/test_get_canonical_url.json
similarity index 100%
rename from tests/data/extractors/test_get_canonical_url.json
rename to tests/data/extractors/content/test_get_canonical_url.json
diff --git a/tests/data/extractors/test_gizmodo1.html b/tests/data/extractors/content/test_gizmodo1.html
similarity index 100%
rename from tests/data/extractors/test_gizmodo1.html
rename to tests/data/extractors/content/test_gizmodo1.html
diff --git a/tests/data/extractors/test_gizmodo1.json b/tests/data/extractors/content/test_gizmodo1.json
similarity index 100%
rename from tests/data/extractors/test_gizmodo1.json
rename to tests/data/extractors/content/test_gizmodo1.json
diff --git a/tests/data/extractors/test_guardian1.html b/tests/data/extractors/content/test_guardian1.html
similarity index 100%
rename from tests/data/extractors/test_guardian1.html
rename to tests/data/extractors/content/test_guardian1.html
diff --git a/tests/data/extractors/test_guardian1.json b/tests/data/extractors/content/test_guardian1.json
similarity index 100%
rename from tests/data/extractors/test_guardian1.json
rename to tests/data/extractors/content/test_guardian1.json
diff --git a/tests/data/extractors/test_huffingtonPost2.html b/tests/data/extractors/content/test_huffingtonPost2.html
similarity index 100%
rename from tests/data/extractors/test_huffingtonPost2.html
rename to tests/data/extractors/content/test_huffingtonPost2.html
diff --git a/tests/data/extractors/test_huffingtonPost2.json b/tests/data/extractors/content/test_huffingtonPost2.json
similarity index 100%
rename from tests/data/extractors/test_huffingtonPost2.json
rename to tests/data/extractors/content/test_huffingtonPost2.json
diff --git a/tests/data/extractors/test_issue115.html b/tests/data/extractors/content/test_issue115.html
similarity index 100%
rename from tests/data/extractors/test_issue115.html
rename to tests/data/extractors/content/test_issue115.html
diff --git a/tests/data/extractors/test_issue115.json b/tests/data/extractors/content/test_issue115.json
similarity index 100%
rename from tests/data/extractors/test_issue115.json
rename to tests/data/extractors/content/test_issue115.json
diff --git a/tests/data/extractors/test_issue129.html b/tests/data/extractors/content/test_issue129.html
similarity index 100%
rename from tests/data/extractors/test_issue129.html
rename to tests/data/extractors/content/test_issue129.html
diff --git a/tests/data/extractors/test_issue129.json b/tests/data/extractors/content/test_issue129.json
similarity index 100%
rename from tests/data/extractors/test_issue129.json
rename to tests/data/extractors/content/test_issue129.json
diff --git a/tests/data/extractors/test_issue24.html b/tests/data/extractors/content/test_issue24.html
similarity index 100%
rename from tests/data/extractors/test_issue24.html
rename to tests/data/extractors/content/test_issue24.html
diff --git a/tests/data/extractors/test_issue24.json b/tests/data/extractors/content/test_issue24.json
similarity index 100%
rename from tests/data/extractors/test_issue24.json
rename to tests/data/extractors/content/test_issue24.json
diff --git a/tests/data/extractors/test_issue25.html b/tests/data/extractors/content/test_issue25.html
similarity index 100%
rename from tests/data/extractors/test_issue25.html
rename to tests/data/extractors/content/test_issue25.html
diff --git a/tests/data/extractors/test_issue25.json b/tests/data/extractors/content/test_issue25.json
similarity index 100%
rename from tests/data/extractors/test_issue25.json
rename to tests/data/extractors/content/test_issue25.json
diff --git a/tests/data/extractors/test_issue28.html b/tests/data/extractors/content/test_issue28.html
similarity index 100%
rename from tests/data/extractors/test_issue28.html
rename to tests/data/extractors/content/test_issue28.html
diff --git a/tests/data/extractors/test_issue28.json b/tests/data/extractors/content/test_issue28.json
similarity index 100%
rename from tests/data/extractors/test_issue28.json
rename to tests/data/extractors/content/test_issue28.json
diff --git a/tests/data/extractors/test_issue32.html b/tests/data/extractors/content/test_issue32.html
similarity index 100%
rename from tests/data/extractors/test_issue32.html
rename to tests/data/extractors/content/test_issue32.html
diff --git a/tests/data/extractors/test_issue32.json b/tests/data/extractors/content/test_issue32.json
similarity index 100%
rename from tests/data/extractors/test_issue32.json
rename to tests/data/extractors/content/test_issue32.json
diff --git a/tests/data/extractors/test_issue4.html b/tests/data/extractors/content/test_issue4.html
similarity index 100%
rename from tests/data/extractors/test_issue4.html
rename to tests/data/extractors/content/test_issue4.html
diff --git a/tests/data/extractors/test_issue4.json b/tests/data/extractors/content/test_issue4.json
similarity index 100%
rename from tests/data/extractors/test_issue4.json
rename to tests/data/extractors/content/test_issue4.json
diff --git a/tests/data/extractors/test_lefigaro.html b/tests/data/extractors/content/test_lefigaro.html
similarity index 100%
rename from tests/data/extractors/test_lefigaro.html
rename to tests/data/extractors/content/test_lefigaro.html
diff --git a/tests/data/extractors/test_lefigaro.json b/tests/data/extractors/content/test_lefigaro.json
similarity index 100%
rename from tests/data/extractors/test_lefigaro.json
rename to tests/data/extractors/content/test_lefigaro.json
diff --git a/tests/data/extractors/test_liberation.html b/tests/data/extractors/content/test_liberation.html
similarity index 100%
rename from tests/data/extractors/test_liberation.html
rename to tests/data/extractors/content/test_liberation.html
diff --git a/tests/data/extractors/test_liberation.json b/tests/data/extractors/content/test_liberation.json
similarity index 100%
rename from tests/data/extractors/test_liberation.json
rename to tests/data/extractors/content/test_liberation.json
diff --git a/tests/data/extractors/test_links.html b/tests/data/extractors/content/test_links.html
similarity index 100%
rename from tests/data/extractors/test_links.html
rename to tests/data/extractors/content/test_links.html
diff --git a/tests/data/extractors/test_links.json b/tests/data/extractors/content/test_links.json
similarity index 100%
rename from tests/data/extractors/test_links.json
rename to tests/data/extractors/content/test_links.json
diff --git a/tests/data/extractors/test_marketplace.html b/tests/data/extractors/content/test_marketplace.html
similarity index 100%
rename from tests/data/extractors/test_marketplace.html
rename to tests/data/extractors/content/test_marketplace.html
diff --git a/tests/data/extractors/test_marketplace.json b/tests/data/extractors/content/test_marketplace.json
similarity index 100%
rename from tests/data/extractors/test_marketplace.json
rename to tests/data/extractors/content/test_marketplace.json
diff --git a/tests/data/extractors/test_mashable_issue_74.html b/tests/data/extractors/content/test_mashable_issue_74.html
similarity index 100%
rename from tests/data/extractors/test_mashable_issue_74.html
rename to tests/data/extractors/content/test_mashable_issue_74.html
diff --git a/tests/data/extractors/test_mashable_issue_74.json b/tests/data/extractors/content/test_mashable_issue_74.json
similarity index 100%
rename from tests/data/extractors/test_mashable_issue_74.json
rename to tests/data/extractors/content/test_mashable_issue_74.json
diff --git a/tests/data/extractors/test_msn1.html b/tests/data/extractors/content/test_msn1.html
similarity index 100%
rename from tests/data/extractors/test_msn1.html
rename to tests/data/extractors/content/test_msn1.html
diff --git a/tests/data/extractors/test_msn1.json b/tests/data/extractors/content/test_msn1.json
similarity index 100%
rename from tests/data/extractors/test_msn1.json
rename to tests/data/extractors/content/test_msn1.json
diff --git a/tests/data/extractors/test_okaymarketing.html b/tests/data/extractors/content/test_okaymarketing.html
similarity index 100%
rename from tests/data/extractors/test_okaymarketing.html
rename to tests/data/extractors/content/test_okaymarketing.html
diff --git a/tests/data/extractors/test_okaymarketing.json b/tests/data/extractors/content/test_okaymarketing.json
similarity index 100%
rename from tests/data/extractors/test_okaymarketing.json
rename to tests/data/extractors/content/test_okaymarketing.json
diff --git a/tests/data/extractors/test_opengraph.html b/tests/data/extractors/content/test_opengraph.html
similarity index 100%
rename from tests/data/extractors/test_opengraph.html
rename to tests/data/extractors/content/test_opengraph.html
diff --git a/tests/data/extractors/test_opengraph.json b/tests/data/extractors/content/test_opengraph.json
similarity index 100%
rename from tests/data/extractors/test_opengraph.json
rename to tests/data/extractors/content/test_opengraph.json
diff --git a/tests/data/extractors/test_politico.html b/tests/data/extractors/content/test_politico.html
similarity index 100%
rename from tests/data/extractors/test_politico.html
rename to tests/data/extractors/content/test_politico.html
diff --git a/tests/data/extractors/test_politico.json b/tests/data/extractors/content/test_politico.json
similarity index 100%
rename from tests/data/extractors/test_politico.json
rename to tests/data/extractors/content/test_politico.json
diff --git a/tests/data/extractors/test_publish_date.html b/tests/data/extractors/content/test_publish_date.html
similarity index 100%
rename from tests/data/extractors/test_publish_date.html
rename to tests/data/extractors/content/test_publish_date.html
diff --git a/tests/data/extractors/test_publish_date.json b/tests/data/extractors/content/test_publish_date.json
similarity index 100%
rename from tests/data/extractors/test_publish_date.json
rename to tests/data/extractors/content/test_publish_date.json
diff --git a/tests/data/extractors/test_publish_date_article.html b/tests/data/extractors/content/test_publish_date_article.html
similarity index 100%
rename from tests/data/extractors/test_publish_date_article.html
rename to tests/data/extractors/content/test_publish_date_article.html
diff --git a/tests/data/extractors/test_publish_date_article.json b/tests/data/extractors/content/test_publish_date_article.json
similarity index 100%
rename from tests/data/extractors/test_publish_date_article.json
rename to tests/data/extractors/content/test_publish_date_article.json
diff --git a/tests/data/extractors/test_publish_date_rnews.html b/tests/data/extractors/content/test_publish_date_rnews.html
similarity index 100%
rename from tests/data/extractors/test_publish_date_rnews.html
rename to tests/data/extractors/content/test_publish_date_rnews.html
diff --git a/tests/data/extractors/test_publish_date_rnews.json b/tests/data/extractors/content/test_publish_date_rnews.json
similarity index 100%
rename from tests/data/extractors/test_publish_date_rnews.json
rename to tests/data/extractors/content/test_publish_date_rnews.json
diff --git a/tests/data/extractors/test_publish_date_schema.html b/tests/data/extractors/content/test_publish_date_schema.html
similarity index 100%
rename from tests/data/extractors/test_publish_date_schema.html
rename to tests/data/extractors/content/test_publish_date_schema.html
diff --git a/tests/data/extractors/test_publish_date_schema.json b/tests/data/extractors/content/test_publish_date_schema.json
similarity index 100%
rename from tests/data/extractors/test_publish_date_schema.json
rename to tests/data/extractors/content/test_publish_date_schema.json
diff --git a/tests/data/extractors/test_tags_abcau.html b/tests/data/extractors/content/test_tags_abcau.html
similarity index 100%
rename from tests/data/extractors/test_tags_abcau.html
rename to tests/data/extractors/content/test_tags_abcau.html
diff --git a/tests/data/extractors/test_tags_abcau.json b/tests/data/extractors/content/test_tags_abcau.json
similarity index 100%
rename from tests/data/extractors/test_tags_abcau.json
rename to tests/data/extractors/content/test_tags_abcau.json
diff --git a/tests/data/extractors/test_tags_cnet.html b/tests/data/extractors/content/test_tags_cnet.html
similarity index 100%
rename from tests/data/extractors/test_tags_cnet.html
rename to tests/data/extractors/content/test_tags_cnet.html
diff --git a/tests/data/extractors/test_tags_cnet.json b/tests/data/extractors/content/test_tags_cnet.json
similarity index 100%
rename from tests/data/extractors/test_tags_cnet.json
rename to tests/data/extractors/content/test_tags_cnet.json
diff --git a/tests/data/extractors/test_tags_deadline.html b/tests/data/extractors/content/test_tags_deadline.html
similarity index 100%
rename from tests/data/extractors/test_tags_deadline.html
rename to tests/data/extractors/content/test_tags_deadline.html
diff --git a/tests/data/extractors/test_tags_deadline.json b/tests/data/extractors/content/test_tags_deadline.json
similarity index 100%
rename from tests/data/extractors/test_tags_deadline.json
rename to tests/data/extractors/content/test_tags_deadline.json
diff --git a/tests/data/extractors/test_tags_kexp.html b/tests/data/extractors/content/test_tags_kexp.html
similarity index 100%
rename from tests/data/extractors/test_tags_kexp.html
rename to tests/data/extractors/content/test_tags_kexp.html
diff --git a/tests/data/extractors/test_tags_kexp.json b/tests/data/extractors/content/test_tags_kexp.json
similarity index 100%
rename from tests/data/extractors/test_tags_kexp.json
rename to tests/data/extractors/content/test_tags_kexp.json
diff --git a/tests/data/extractors/test_tags_wnyc.html b/tests/data/extractors/content/test_tags_wnyc.html
similarity index 100%
rename from tests/data/extractors/test_tags_wnyc.html
rename to tests/data/extractors/content/test_tags_wnyc.html
diff --git a/tests/data/extractors/test_tags_wnyc.json b/tests/data/extractors/content/test_tags_wnyc.json
similarity index 100%
rename from tests/data/extractors/test_tags_wnyc.json
rename to tests/data/extractors/content/test_tags_wnyc.json
diff --git a/tests/data/extractors/test_techcrunch1.html b/tests/data/extractors/content/test_techcrunch1.html
similarity index 100%
rename from tests/data/extractors/test_techcrunch1.html
rename to tests/data/extractors/content/test_techcrunch1.html
diff --git a/tests/data/extractors/test_techcrunch1.json b/tests/data/extractors/content/test_techcrunch1.json
similarity index 100%
rename from tests/data/extractors/test_techcrunch1.json
rename to tests/data/extractors/content/test_techcrunch1.json
diff --git a/tests/data/extractors/test_testHuffingtonPost.html b/tests/data/extractors/content/test_testHuffingtonPost.html
similarity index 100%
rename from tests/data/extractors/test_testHuffingtonPost.html
rename to tests/data/extractors/content/test_testHuffingtonPost.html
diff --git a/tests/data/extractors/test_testHuffingtonPost.json b/tests/data/extractors/content/test_testHuffingtonPost.json
similarity index 100%
rename from tests/data/extractors/test_testHuffingtonPost.json
rename to tests/data/extractors/content/test_testHuffingtonPost.json
diff --git a/tests/data/extractors/test_time.html b/tests/data/extractors/content/test_time.html
similarity index 100%
rename from tests/data/extractors/test_time.html
rename to tests/data/extractors/content/test_time.html
diff --git a/tests/data/extractors/test_time.json b/tests/data/extractors/content/test_time.json
similarity index 100%
rename from tests/data/extractors/test_time.json
rename to tests/data/extractors/content/test_time.json
diff --git a/tests/data/extractors/test_time2.html b/tests/data/extractors/content/test_time2.html
similarity index 100%
rename from tests/data/extractors/test_time2.html
rename to tests/data/extractors/content/test_time2.html
diff --git a/tests/data/extractors/test_time2.json b/tests/data/extractors/content/test_time2.json
similarity index 100%
rename from tests/data/extractors/test_time2.json
rename to tests/data/extractors/content/test_time2.json
diff --git a/tests/data/extractors/test_title_opengraph.html b/tests/data/extractors/content/test_title_opengraph.html
similarity index 100%
rename from tests/data/extractors/test_title_opengraph.html
rename to tests/data/extractors/content/test_title_opengraph.html
diff --git a/tests/data/extractors/test_title_opengraph.json b/tests/data/extractors/content/test_title_opengraph.json
similarity index 100%
rename from tests/data/extractors/test_title_opengraph.json
rename to tests/data/extractors/content/test_title_opengraph.json
diff --git a/tests/data/extractors/test_tweet.html b/tests/data/extractors/content/test_tweet.html
similarity index 100%
rename from tests/data/extractors/test_tweet.html
rename to tests/data/extractors/content/test_tweet.html
diff --git a/tests/data/extractors/test_tweet.json b/tests/data/extractors/content/test_tweet.json
similarity index 100%
rename from tests/data/extractors/test_tweet.json
rename to tests/data/extractors/content/test_tweet.json
diff --git a/tests/data/extractors/test_usatoday_issue_74.html b/tests/data/extractors/content/test_usatoday_issue_74.html
similarity index 100%
rename from tests/data/extractors/test_usatoday_issue_74.html
rename to tests/data/extractors/content/test_usatoday_issue_74.html
diff --git a/tests/data/extractors/test_usatoday_issue_74.json b/tests/data/extractors/content/test_usatoday_issue_74.json
similarity index 100%
rename from tests/data/extractors/test_usatoday_issue_74.json
rename to tests/data/extractors/content/test_usatoday_issue_74.json
diff --git a/tests/data/extractors/test_yahoo.html b/tests/data/extractors/content/test_yahoo.html
similarity index 100%
rename from tests/data/extractors/test_yahoo.html
rename to tests/data/extractors/content/test_yahoo.html
diff --git a/tests/data/extractors/test_yahoo.json b/tests/data/extractors/content/test_yahoo.json
similarity index 100%
rename from tests/data/extractors/test_yahoo.json
rename to tests/data/extractors/content/test_yahoo.json
diff --git a/tests/data/videos/test_embed.html b/tests/data/extractors/videos/test_embed.html
similarity index 100%
rename from tests/data/videos/test_embed.html
rename to tests/data/extractors/videos/test_embed.html
diff --git a/tests/data/videos/test_embed.json b/tests/data/extractors/videos/test_embed.json
similarity index 100%
rename from tests/data/videos/test_embed.json
rename to tests/data/extractors/videos/test_embed.json
diff --git a/tests/data/videos/test_iframe.html b/tests/data/extractors/videos/test_iframe.html
similarity index 100%
rename from tests/data/videos/test_iframe.html
rename to tests/data/extractors/videos/test_iframe.html
diff --git a/tests/data/videos/test_iframe.json b/tests/data/extractors/videos/test_iframe.json
similarity index 100%
rename from tests/data/videos/test_iframe.json
rename to tests/data/extractors/videos/test_iframe.json
diff --git a/tests/data/videos/test_object.html b/tests/data/extractors/videos/test_object.html
similarity index 100%
rename from tests/data/videos/test_object.html
rename to tests/data/extractors/videos/test_object.html
diff --git a/tests/data/videos/test_object.json b/tests/data/extractors/videos/test_object.json
similarity index 100%
rename from tests/data/videos/test_object.json
rename to tests/data/extractors/videos/test_object.json
diff --git a/tests/extractors/__init__.py b/tests/extractors/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/extractors/base.py b/tests/extractors/base.py
new file mode 100644
index 00000000..60990b77
--- /dev/null
+++ b/tests/extractors/base.py
@@ -0,0 +1,252 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+import json
+import urllib2
+import unittest
+import socket
+
+from StringIO import StringIO
+
+from goose import Goose
+from goose.utils import FileHelper
+from goose.configuration import Configuration
+
+
+CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
+
+
+# Response
+class MockResponse():
+    """\
+    Base mock response class
+    """
+    code = 200
+    msg = "OK"
+
+    def __init__(self, cls):
+        self.cls = cls
+
+    def content(self):
+        return "response"
+
+    def response(self, req):
+        data = self.content(req)
+        url = req.get_full_url()
+        resp = urllib2.addinfourl(StringIO(data), data, url)
+        resp.code = self.code
+        resp.msg = self.msg
+        return resp
+
+
+class MockHTTPHandler(urllib2.HTTPHandler, urllib2.HTTPSHandler):
+    """\
+    Mocked HTTPHandler in order to query APIs locally
+    """
+    cls = None
+
+    def https_open(self, req):
+        return self.http_open(req)
+
+    def http_open(self, req):
+        r = self.cls.callback(self.cls)
+        return r.response(req)
+
+    @staticmethod
+    def patch(cls):
+        opener = urllib2.build_opener(MockHTTPHandler)
+        urllib2.install_opener(opener)
+        # dirty !
+        for h in opener.handlers:
+            if isinstance(h, MockHTTPHandler):
+                h.cls = cls
+        return [h for h in opener.handlers if isinstance(h, MockHTTPHandler)][0]
+
+    @staticmethod
+    def unpatch():
+        # urllib2
+        urllib2._opener = None
+
+
+class BaseMockTests(unittest.TestCase):
+    """\
+    Base Mock test case
+    """
+    callback = MockResponse
+
+    def setUp(self):
+        # patch DNS
+        self.original_getaddrinfo = socket.getaddrinfo
+        socket.getaddrinfo = self.new_getaddrinfo
+        MockHTTPHandler.patch(self)
+
+    def tearDown(self):
+        MockHTTPHandler.unpatch()
+        # DNS
+        socket.getaddrinfo = self.original_getaddrinfo
+
+    def new_getaddrinfo(self, *args):
+        return [(2, 1, 6, '', ('127.0.0.1', 0))]
+
+    def _get_current_testname(self):
+        return self.id().split('.')[-1:][0]
+
+
+class MockResponseExtractors(MockResponse):
+    def content(self, req):
+        current_test = self.cls._get_current_testname()
+        path = os.path.join(
+                os.path.dirname(CURRENT_PATH),
+                "data",
+                "extractors",
+                "content",
+                "%s.html" % current_test)
+        path = os.path.abspath(path)
+        content = FileHelper.loadResourceFile(path)
+        return content
+
+
+class TestExtractionBase(BaseMockTests):
+    """\
+    Extraction test case
+    """
+    callback = MockResponseExtractors
+
+    def getRawHtml(self):
+        test, suite, module, cls, func = self.id().split('.')
+        path = os.path.join(
+                os.path.dirname(CURRENT_PATH),
+                "data",
+                suite,
+                module,
+                "%s.html" % func)
+        path = os.path.abspath(path)
+        content = FileHelper.loadResourceFile(path)
+        return content
+
+    def loadData(self):
+        """\
+
+        """
+        test, suite, module, cls, func = self.id().split('.')
+        path = os.path.join(
+                os.path.dirname(CURRENT_PATH),
+                "data",
+                suite,
+                module,
+                "%s.json" % func)
+        path = os.path.abspath(path)
+        content = FileHelper.loadResourceFile(path)
+        self.data = json.loads(content)
+
+    def assert_cleaned_text(self, field, expected_value, result_value):
+        """\
+
+        """
+        # # TODO : handle verbose level in tests
+        # print "\n=======================::. ARTICLE REPORT %s .::======================\n" % self.id()
+        # print 'expected_value (%s) \n' % len(expected_value)
+        # print expected_value
+        # print "-------"
+        # print 'result_value (%s) \n' % len(result_value)
+        # print result_value
+
+        # cleaned_text is Null
+        msg = u"Resulting article text was NULL!"
+        self.assertNotEqual(result_value, None, msg=msg)
+
+        # cleaned_text length
+        msg = u"Article text was not as long as expected beginning!"
+        self.assertTrue(len(expected_value) <= len(result_value), msg=msg)
+
+        # clean_text value
+        result_value = result_value[0:len(expected_value)]
+        msg = u"The beginning of the article text was not as expected!"
+        self.assertEqual(expected_value, result_value, msg=msg)
+
+    def assert_tags(self, field, expected_value, result_value):
+        """\
+
+        """
+        # as we have a set in expected_value and a list in result_value
+        # make result_value a set
+        expected_value = set(expected_value)
+
+        # check if both have the same number of items
+        msg = (u"expected tags set and result tags set"
+                u"don't have the same number of items")
+        self.assertEqual(len(result_value), len(expected_value), msg=msg)
+
+        # check if each tag in result_value is in expected_value
+        for tag in result_value:
+            self.assertTrue(tag in expected_value)
+
+    def runArticleAssertions(self, article, fields):
+        """\
+
+        """
+        for field in fields:
+            expected_value = self.data['expected'][field]
+            result_value = getattr(article, field, None)
+
+            # custom assertion for a given field
+            assertion = 'assert_%s' % field
+            if hasattr(self, assertion):
+                getattr(self, assertion)(field, expected_value, result_value)
+                continue
+
+            # default assertion
+            msg = u"Error %s \nexpected: %s\nresult: %s" % (field, expected_value, result_value)
+            self.assertEqual(expected_value, result_value, msg=msg)
+
+    def extract(self, instance):
+        article = instance.extract(url=self.data['url'])
+        return article
+
+    def getConfig(self):
+        config = Configuration()
+        config.enable_image_fetching = False
+        return config
+
+    def getArticle(self):
+        """\
+
+        """
+        # load test case data
+        self.loadData()
+
+        # basic configuration
+        # no image fetching
+        config = self.getConfig()
+        self.parser = config.get_parser()
+
+        # target language
+        # needed for non english language most of the time
+        target_language = self.data.get('target_language')
+        if target_language:
+            config.target_language = target_language
+            config.use_meta_language = False
+
+        # run goose
+        g = Goose(config=config)
+        return self.extract(g)
diff --git a/tests/extractors.py b/tests/extractors/content.py
similarity index 100%
rename from tests/extractors.py
rename to tests/extractors/content.py
diff --git a/tests/images.py b/tests/extractors/images.py
similarity index 100%
rename from tests/images.py
rename to tests/extractors/images.py
diff --git a/tests/videos.py b/tests/extractors/videos.py
similarity index 100%
rename from tests/videos.py
rename to tests/extractors/videos.py

From 6009d44905334b855ba45a037944e81261d894b3 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 06:24:10 +0100
Subject: [PATCH 082/100] #188 - tests refactor

---
 tests/base.py               | 82 -------------------------------------
 tests/extractors/content.py | 32 ++++++++-------
 tests/extractors/images.py  |  2 +-
 tests/extractors/videos.py  | 22 ++++------
 4 files changed, 26 insertions(+), 112 deletions(-)

diff --git a/tests/base.py b/tests/base.py
index d0619ed1..7cc3532c 100644
--- a/tests/base.py
+++ b/tests/base.py
@@ -20,85 +20,3 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-
-import urllib2
-import unittest
-import socket
-
-from StringIO import StringIO
-
-
-# Response
-class MockResponse():
-    """\
-    Base mock response class
-    """
-    code = 200
-    msg = "OK"
-
-    def __init__(self, cls):
-        self.cls = cls
-
-    def content(self):
-        return "response"
-
-    def response(self, req):
-        data = self.content(req)
-        url = req.get_full_url()
-        resp = urllib2.addinfourl(StringIO(data), data, url)
-        resp.code = self.code
-        resp.msg = self.msg
-        return resp
-
-
-class MockHTTPHandler(urllib2.HTTPHandler, urllib2.HTTPSHandler):
-    """\
-    Mocked HTTPHandler in order to query APIs locally
-    """
-    cls = None
-
-    def https_open(self, req):
-        return self.http_open(req)
-
-    def http_open(self, req):
-        r = self.cls.callback(self.cls)
-        return r.response(req)
-
-    @staticmethod
-    def patch(cls):
-        opener = urllib2.build_opener(MockHTTPHandler)
-        urllib2.install_opener(opener)
-        # dirty !
-        for h in opener.handlers:
-            if isinstance(h, MockHTTPHandler):
-                h.cls = cls
-        return [h for h in opener.handlers if isinstance(h, MockHTTPHandler)][0]
-
-    @staticmethod
-    def unpatch():
-        # urllib2
-        urllib2._opener = None
-
-
-class BaseMockTests(unittest.TestCase):
-    """\
-    Base Mock test case
-    """
-    callback = MockResponse
-
-    def setUp(self):
-        # patch DNS
-        self.original_getaddrinfo = socket.getaddrinfo
-        socket.getaddrinfo = self.new_getaddrinfo
-        MockHTTPHandler.patch(self)
-
-    def tearDown(self):
-        MockHTTPHandler.unpatch()
-        # DNS
-        socket.getaddrinfo = self.original_getaddrinfo
-
-    def new_getaddrinfo(self, *args):
-        return [(2, 1, 6, '', ('127.0.0.1', 0))]
-
-    def _get_current_testname(self):
-        return self.id().split('.')[-1:][0]
diff --git a/tests/extractors/content.py b/tests/extractors/content.py
index b9496b8c..950d2208 100644
--- a/tests/extractors/content.py
+++ b/tests/extractors/content.py
@@ -23,28 +23,20 @@
 import os
 import json
 
-from base import BaseMockTests, MockResponse
+from base import BaseMockTests
+from base import MockResponseExtractors
 
 from goose import Goose
-from goose.utils import FileHelper
 from goose.configuration import Configuration
 from goose.text import StopWordsChinese
 from goose.text import StopWordsArabic
 from goose.text import StopWordsKorean
+from goose.utils import FileHelper
 
 
 CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
 
 
-class MockResponseExtractors(MockResponse):
-    def content(self, req):
-        current_test = self.cls._get_current_testname()
-        path = os.path.join(CURRENT_PATH, "data", "extractors", "%s.html" % current_test)
-        path = os.path.abspath(path)
-        content = FileHelper.loadResourceFile(path)
-        return content
-
-
 class TestExtractionBase(BaseMockTests):
     """\
     Extraction test case
@@ -52,8 +44,13 @@ class TestExtractionBase(BaseMockTests):
     callback = MockResponseExtractors
 
     def getRawHtml(self):
-        suite, module, cls, func = self.id().split('.')
-        path = os.path.join(CURRENT_PATH, "data", module, "%s.html" % func)
+        test, suite, module, cls, func = self.id().split('.')
+        path = os.path.join(
+                os.path.dirname(CURRENT_PATH),
+                "data",
+                suite,
+                module,
+                "%s.html" % func)
         path = os.path.abspath(path)
         content = FileHelper.loadResourceFile(path)
         return content
@@ -62,8 +59,13 @@ def loadData(self):
         """\
 
         """
-        suite, module, cls, func = self.id().split('.')
-        path = os.path.join(CURRENT_PATH, "data", module, "%s.json" % func)
+        test, suite, module, cls, func = self.id().split('.')
+        path = os.path.join(
+                os.path.dirname(CURRENT_PATH),
+                "data",
+                suite,
+                module,
+                "%s.json" % func)
         path = os.path.abspath(path)
         content = FileHelper.loadResourceFile(path)
         self.data = json.loads(content)
diff --git a/tests/extractors/images.py b/tests/extractors/images.py
index ace6d323..582bca9f 100644
--- a/tests/extractors/images.py
+++ b/tests/extractors/images.py
@@ -26,7 +26,7 @@
 import unittest
 
 from base import MockResponse
-from extractors import TestExtractionBase
+from base import TestExtractionBase
 
 from goose.configuration import Configuration
 from goose.image import Image
diff --git a/tests/extractors/videos.py b/tests/extractors/videos.py
index 4f18d0f1..389a414c 100644
--- a/tests/extractors/videos.py
+++ b/tests/extractors/videos.py
@@ -21,10 +21,9 @@
 limitations under the License.
 """
 import os
-import json
 
-from .base import MockResponse
-from .extractors import TestExtractionBase
+from base import MockResponse
+from base import TestExtractionBase
 
 from goose.utils import FileHelper
 
@@ -34,7 +33,12 @@
 class MockResponseVideos(MockResponse):
     def content(self, req):
         current_test = self.cls._get_current_testname()
-        path = os.path.join(CURRENT_PATH, "data", "videos", "%s.html" % current_test)
+        path = os.path.join(
+                os.path.dirname(CURRENT_PATH),
+                "data",
+                "extractors",
+                "videos",
+                "%s.html" % current_test)
         path = os.path.abspath(path)
         content = FileHelper.loadResourceFile(path)
         return content
@@ -59,16 +63,6 @@ def assert_movies(self, field, expected_value, result_value):
                 r = getattr(video, k)
                 self.assertEqual(r, v)
 
-    def loadData(self):
-        """\
-
-        """
-        suite, module, cls, func = self.id().split('.')
-        path = os.path.join(CURRENT_PATH, "data", module, "%s.json" % func)
-        path = os.path.abspath(path)
-        content = FileHelper.loadResourceFile(path)
-        self.data = json.loads(content)
-
     def test_embed(self):
         article = self.getArticle()
         fields = ['movies']

From 26ba835b5aeab2e3f0aeda024192ee6725837005 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 06:32:34 +0100
Subject: [PATCH 083/100] #188 - move image test case

---
 .../50850547cc7310bc53e30e802c6318f1          | Bin
 .../test_basic_image/test_basic_image.html    |   0
 .../test_basic_image/test_basic_image.json    |   0
 .../test_known_image_css_class.html           |   0
 .../test_known_image_css_class.json           |   0
 .../test_known_image_css_id.html              |   0
 .../test_known_image_css_id.json              |   0
 .../test_known_image_css_parent_class.html    |   0
 .../test_known_image_css_parent_class.json    |   0
 .../test_known_image_css_parent_id.html       |   0
 .../test_known_image_css_parent_id.json       |   0
 .../test_known_image_empty_src.html           |   0
 .../test_known_image_empty_src.json           |   0
 .../test_known_image_name_parent.html         |   0
 .../test_known_image_name_parent.json         |   0
 .../test_opengraph_tag.html                   |   0
 .../test_opengraph_tag.json                   |   0
 tests/extractors/images.py                    |  29 +++++++++++++++---
 18 files changed, 24 insertions(+), 5 deletions(-)
 rename tests/data/{ => extractors}/images/test_basic_image/50850547cc7310bc53e30e802c6318f1 (100%)
 rename tests/data/{ => extractors}/images/test_basic_image/test_basic_image.html (100%)
 rename tests/data/{ => extractors}/images/test_basic_image/test_basic_image.json (100%)
 rename tests/data/{ => extractors}/images/test_known_image_css_class/test_known_image_css_class.html (100%)
 rename tests/data/{ => extractors}/images/test_known_image_css_class/test_known_image_css_class.json (100%)
 rename tests/data/{ => extractors}/images/test_known_image_css_id/test_known_image_css_id.html (100%)
 rename tests/data/{ => extractors}/images/test_known_image_css_id/test_known_image_css_id.json (100%)
 rename tests/data/{ => extractors}/images/test_known_image_css_parent_class/test_known_image_css_parent_class.html (100%)
 rename tests/data/{ => extractors}/images/test_known_image_css_parent_class/test_known_image_css_parent_class.json (100%)
 rename tests/data/{ => extractors}/images/test_known_image_css_parent_id/test_known_image_css_parent_id.html (100%)
 rename tests/data/{ => extractors}/images/test_known_image_css_parent_id/test_known_image_css_parent_id.json (100%)
 rename tests/data/{ => extractors}/images/test_known_image_empty_src/test_known_image_empty_src.html (100%)
 rename tests/data/{ => extractors}/images/test_known_image_empty_src/test_known_image_empty_src.json (100%)
 rename tests/data/{ => extractors}/images/test_known_image_name_parent/test_known_image_name_parent.html (100%)
 rename tests/data/{ => extractors}/images/test_known_image_name_parent/test_known_image_name_parent.json (100%)
 rename tests/data/{ => extractors}/images/test_opengraph_tag/test_opengraph_tag.html (100%)
 rename tests/data/{ => extractors}/images/test_opengraph_tag/test_opengraph_tag.json (100%)

diff --git a/tests/data/images/test_basic_image/50850547cc7310bc53e30e802c6318f1 b/tests/data/extractors/images/test_basic_image/50850547cc7310bc53e30e802c6318f1
similarity index 100%
rename from tests/data/images/test_basic_image/50850547cc7310bc53e30e802c6318f1
rename to tests/data/extractors/images/test_basic_image/50850547cc7310bc53e30e802c6318f1
diff --git a/tests/data/images/test_basic_image/test_basic_image.html b/tests/data/extractors/images/test_basic_image/test_basic_image.html
similarity index 100%
rename from tests/data/images/test_basic_image/test_basic_image.html
rename to tests/data/extractors/images/test_basic_image/test_basic_image.html
diff --git a/tests/data/images/test_basic_image/test_basic_image.json b/tests/data/extractors/images/test_basic_image/test_basic_image.json
similarity index 100%
rename from tests/data/images/test_basic_image/test_basic_image.json
rename to tests/data/extractors/images/test_basic_image/test_basic_image.json
diff --git a/tests/data/images/test_known_image_css_class/test_known_image_css_class.html b/tests/data/extractors/images/test_known_image_css_class/test_known_image_css_class.html
similarity index 100%
rename from tests/data/images/test_known_image_css_class/test_known_image_css_class.html
rename to tests/data/extractors/images/test_known_image_css_class/test_known_image_css_class.html
diff --git a/tests/data/images/test_known_image_css_class/test_known_image_css_class.json b/tests/data/extractors/images/test_known_image_css_class/test_known_image_css_class.json
similarity index 100%
rename from tests/data/images/test_known_image_css_class/test_known_image_css_class.json
rename to tests/data/extractors/images/test_known_image_css_class/test_known_image_css_class.json
diff --git a/tests/data/images/test_known_image_css_id/test_known_image_css_id.html b/tests/data/extractors/images/test_known_image_css_id/test_known_image_css_id.html
similarity index 100%
rename from tests/data/images/test_known_image_css_id/test_known_image_css_id.html
rename to tests/data/extractors/images/test_known_image_css_id/test_known_image_css_id.html
diff --git a/tests/data/images/test_known_image_css_id/test_known_image_css_id.json b/tests/data/extractors/images/test_known_image_css_id/test_known_image_css_id.json
similarity index 100%
rename from tests/data/images/test_known_image_css_id/test_known_image_css_id.json
rename to tests/data/extractors/images/test_known_image_css_id/test_known_image_css_id.json
diff --git a/tests/data/images/test_known_image_css_parent_class/test_known_image_css_parent_class.html b/tests/data/extractors/images/test_known_image_css_parent_class/test_known_image_css_parent_class.html
similarity index 100%
rename from tests/data/images/test_known_image_css_parent_class/test_known_image_css_parent_class.html
rename to tests/data/extractors/images/test_known_image_css_parent_class/test_known_image_css_parent_class.html
diff --git a/tests/data/images/test_known_image_css_parent_class/test_known_image_css_parent_class.json b/tests/data/extractors/images/test_known_image_css_parent_class/test_known_image_css_parent_class.json
similarity index 100%
rename from tests/data/images/test_known_image_css_parent_class/test_known_image_css_parent_class.json
rename to tests/data/extractors/images/test_known_image_css_parent_class/test_known_image_css_parent_class.json
diff --git a/tests/data/images/test_known_image_css_parent_id/test_known_image_css_parent_id.html b/tests/data/extractors/images/test_known_image_css_parent_id/test_known_image_css_parent_id.html
similarity index 100%
rename from tests/data/images/test_known_image_css_parent_id/test_known_image_css_parent_id.html
rename to tests/data/extractors/images/test_known_image_css_parent_id/test_known_image_css_parent_id.html
diff --git a/tests/data/images/test_known_image_css_parent_id/test_known_image_css_parent_id.json b/tests/data/extractors/images/test_known_image_css_parent_id/test_known_image_css_parent_id.json
similarity index 100%
rename from tests/data/images/test_known_image_css_parent_id/test_known_image_css_parent_id.json
rename to tests/data/extractors/images/test_known_image_css_parent_id/test_known_image_css_parent_id.json
diff --git a/tests/data/images/test_known_image_empty_src/test_known_image_empty_src.html b/tests/data/extractors/images/test_known_image_empty_src/test_known_image_empty_src.html
similarity index 100%
rename from tests/data/images/test_known_image_empty_src/test_known_image_empty_src.html
rename to tests/data/extractors/images/test_known_image_empty_src/test_known_image_empty_src.html
diff --git a/tests/data/images/test_known_image_empty_src/test_known_image_empty_src.json b/tests/data/extractors/images/test_known_image_empty_src/test_known_image_empty_src.json
similarity index 100%
rename from tests/data/images/test_known_image_empty_src/test_known_image_empty_src.json
rename to tests/data/extractors/images/test_known_image_empty_src/test_known_image_empty_src.json
diff --git a/tests/data/images/test_known_image_name_parent/test_known_image_name_parent.html b/tests/data/extractors/images/test_known_image_name_parent/test_known_image_name_parent.html
similarity index 100%
rename from tests/data/images/test_known_image_name_parent/test_known_image_name_parent.html
rename to tests/data/extractors/images/test_known_image_name_parent/test_known_image_name_parent.html
diff --git a/tests/data/images/test_known_image_name_parent/test_known_image_name_parent.json b/tests/data/extractors/images/test_known_image_name_parent/test_known_image_name_parent.json
similarity index 100%
rename from tests/data/images/test_known_image_name_parent/test_known_image_name_parent.json
rename to tests/data/extractors/images/test_known_image_name_parent/test_known_image_name_parent.json
diff --git a/tests/data/images/test_opengraph_tag/test_opengraph_tag.html b/tests/data/extractors/images/test_opengraph_tag/test_opengraph_tag.html
similarity index 100%
rename from tests/data/images/test_opengraph_tag/test_opengraph_tag.html
rename to tests/data/extractors/images/test_opengraph_tag/test_opengraph_tag.html
diff --git a/tests/data/images/test_opengraph_tag/test_opengraph_tag.json b/tests/data/extractors/images/test_opengraph_tag/test_opengraph_tag.json
similarity index 100%
rename from tests/data/images/test_opengraph_tag/test_opengraph_tag.json
rename to tests/data/extractors/images/test_opengraph_tag/test_opengraph_tag.json
diff --git a/tests/extractors/images.py b/tests/extractors/images.py
index 582bca9f..e47a1dde 100644
--- a/tests/extractors/images.py
+++ b/tests/extractors/images.py
@@ -43,7 +43,13 @@ class MockResponseImage(MockResponse):
     def image_content(self, req):
         md5_hash = hashlib.md5(req.get_full_url()).hexdigest()
         current_test = self.cls._get_current_testname()
-        path = os.path.join(CURRENT_PATH, "data", "images", current_test, md5_hash)
+        path = os.path.join(
+                os.path.dirname(CURRENT_PATH),
+                "data",
+                "extractors",
+                "images",
+                current_test,
+                md5_hash)
         path = os.path.abspath(path)
         f = open(path, 'rb')
         content = f.read()
@@ -52,7 +58,13 @@ def image_content(self, req):
 
     def html_content(self, req):
         current_test = self.cls._get_current_testname()
-        path = os.path.join(CURRENT_PATH, "data", "images", current_test, "%s.html" % current_test)
+        path = os.path.join(
+                os.path.dirname(CURRENT_PATH),
+                "data",
+                "extractors",
+                "images",
+                current_test,
+                "%s.html" % current_test)
         path = os.path.abspath(path)
         return FileHelper.loadResourceFile(path)
 
@@ -72,8 +84,15 @@ def loadData(self):
         """\
 
         """
-        suite, module, cls, func = self.id().split('.')
-        path = os.path.join(CURRENT_PATH, "data", module, func, "%s.json" % func)
+        test, suite, module, cls, func = self.id().split('.')
+        path = os.path.join(
+                os.path.dirname(CURRENT_PATH),
+                "data",
+                suite,
+                module,
+                func,
+                "%s.json" % func)
+
         path = os.path.abspath(path)
         content = FileHelper.loadResourceFile(path)
         self.data = json.loads(content)
@@ -158,7 +177,7 @@ def test_opengraph_tag(self):
 class ImageUtilsTests(unittest.TestCase):
 
     def setUp(self):
-        self.path = 'tests/data/images/test_basic_image/50850547cc7310bc53e30e802c6318f1'
+        self.path = 'tests/data/extractors/images/test_basic_image/50850547cc7310bc53e30e802c6318f1'
         self.expected_results = {
             'width': 476,
             'height': 317,

From ff4449cc27fa9dc508f4ce7a0c06b7e16bd6656d Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 06:36:14 +0100
Subject: [PATCH 084/100] #188 - remove useless file

---
 tests/base.py | 22 ----------------------
 1 file changed, 22 deletions(-)
 delete mode 100644 tests/base.py

diff --git a/tests/base.py b/tests/base.py
deleted file mode 100644
index 7cc3532c..00000000
--- a/tests/base.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# -*- coding: utf-8 -*-
-"""\
-This is a python port of "Goose" orignialy licensed to Gravity.com
-under one or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.
-
-Python port was written by Xavier Grangier for Recrutae
-
-Gravity.com licenses this file
-to you under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""

From c381993b05688ad63d57306b87e1a4a1acfddbb2 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 06:40:38 +0100
Subject: [PATCH 085/100] #188 - news extractos tests files

---
 tests/extractors/authors.py     | 0
 tests/extractors/metas.py       | 0
 tests/extractors/opengraph.py   | 0
 tests/extractors/publishdate.py | 0
 tests/extractors/tags.py        | 0
 tests/extractors/title.py       | 0
 tests/extractors/tweets.py      | 0
 7 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 tests/extractors/authors.py
 create mode 100644 tests/extractors/metas.py
 create mode 100644 tests/extractors/opengraph.py
 create mode 100644 tests/extractors/publishdate.py
 create mode 100644 tests/extractors/tags.py
 create mode 100644 tests/extractors/title.py
 create mode 100644 tests/extractors/tweets.py

diff --git a/tests/extractors/authors.py b/tests/extractors/authors.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/extractors/metas.py b/tests/extractors/metas.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/extractors/opengraph.py b/tests/extractors/opengraph.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/extractors/publishdate.py b/tests/extractors/publishdate.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/extractors/tags.py b/tests/extractors/tags.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/extractors/title.py b/tests/extractors/title.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/extractors/tweets.py b/tests/extractors/tweets.py
new file mode 100644
index 00000000..e69de29b

From 0e6a7713e25c7a54ec6196b85111713335c0e834 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 06:53:07 +0100
Subject: [PATCH 086/100] #188 - test refactor video image tags publishdate

---
 .../test_publish_date.html                    |  0
 .../test_publish_date.json                    |  0
 .../test_publish_date_article.html            |  0
 .../test_publish_date_article.json            |  0
 .../test_publish_date_rnews.html              |  0
 .../test_publish_date_rnews.json              |  0
 .../test_publish_date_schema.html             |  0
 .../test_publish_date_schema.json             |  0
 .../{content => tags}/test_tags_abcau.html    |  0
 .../{content => tags}/test_tags_abcau.json    |  0
 .../{content => tags}/test_tags_cnet.html     |  0
 .../{content => tags}/test_tags_cnet.json     |  0
 .../{content => tags}/test_tags_deadline.html |  0
 .../{content => tags}/test_tags_deadline.json |  0
 .../{content => tags}/test_tags_kexp.html     |  0
 .../{content => tags}/test_tags_kexp.json     |  0
 .../{content => tags}/test_tags_wnyc.html     |  0
 .../{content => tags}/test_tags_wnyc.json     |  0
 tests/extractors/base.py                      | 25 ++-----
 tests/extractors/content.py                   | 67 -----------------
 tests/extractors/publishdate.py               | 43 +++++++++++
 tests/extractors/tags.py                      | 72 +++++++++++++++++++
 tests/extractors/videos.py                    | 16 -----
 23 files changed, 119 insertions(+), 104 deletions(-)
 rename tests/data/extractors/{content => publishdate}/test_publish_date.html (100%)
 rename tests/data/extractors/{content => publishdate}/test_publish_date.json (100%)
 rename tests/data/extractors/{content => publishdate}/test_publish_date_article.html (100%)
 rename tests/data/extractors/{content => publishdate}/test_publish_date_article.json (100%)
 rename tests/data/extractors/{content => publishdate}/test_publish_date_rnews.html (100%)
 rename tests/data/extractors/{content => publishdate}/test_publish_date_rnews.json (100%)
 rename tests/data/extractors/{content => publishdate}/test_publish_date_schema.html (100%)
 rename tests/data/extractors/{content => publishdate}/test_publish_date_schema.json (100%)
 rename tests/data/extractors/{content => tags}/test_tags_abcau.html (100%)
 rename tests/data/extractors/{content => tags}/test_tags_abcau.json (100%)
 rename tests/data/extractors/{content => tags}/test_tags_cnet.html (100%)
 rename tests/data/extractors/{content => tags}/test_tags_cnet.json (100%)
 rename tests/data/extractors/{content => tags}/test_tags_deadline.html (100%)
 rename tests/data/extractors/{content => tags}/test_tags_deadline.json (100%)
 rename tests/data/extractors/{content => tags}/test_tags_kexp.html (100%)
 rename tests/data/extractors/{content => tags}/test_tags_kexp.json (100%)
 rename tests/data/extractors/{content => tags}/test_tags_wnyc.html (100%)
 rename tests/data/extractors/{content => tags}/test_tags_wnyc.json (100%)

diff --git a/tests/data/extractors/content/test_publish_date.html b/tests/data/extractors/publishdate/test_publish_date.html
similarity index 100%
rename from tests/data/extractors/content/test_publish_date.html
rename to tests/data/extractors/publishdate/test_publish_date.html
diff --git a/tests/data/extractors/content/test_publish_date.json b/tests/data/extractors/publishdate/test_publish_date.json
similarity index 100%
rename from tests/data/extractors/content/test_publish_date.json
rename to tests/data/extractors/publishdate/test_publish_date.json
diff --git a/tests/data/extractors/content/test_publish_date_article.html b/tests/data/extractors/publishdate/test_publish_date_article.html
similarity index 100%
rename from tests/data/extractors/content/test_publish_date_article.html
rename to tests/data/extractors/publishdate/test_publish_date_article.html
diff --git a/tests/data/extractors/content/test_publish_date_article.json b/tests/data/extractors/publishdate/test_publish_date_article.json
similarity index 100%
rename from tests/data/extractors/content/test_publish_date_article.json
rename to tests/data/extractors/publishdate/test_publish_date_article.json
diff --git a/tests/data/extractors/content/test_publish_date_rnews.html b/tests/data/extractors/publishdate/test_publish_date_rnews.html
similarity index 100%
rename from tests/data/extractors/content/test_publish_date_rnews.html
rename to tests/data/extractors/publishdate/test_publish_date_rnews.html
diff --git a/tests/data/extractors/content/test_publish_date_rnews.json b/tests/data/extractors/publishdate/test_publish_date_rnews.json
similarity index 100%
rename from tests/data/extractors/content/test_publish_date_rnews.json
rename to tests/data/extractors/publishdate/test_publish_date_rnews.json
diff --git a/tests/data/extractors/content/test_publish_date_schema.html b/tests/data/extractors/publishdate/test_publish_date_schema.html
similarity index 100%
rename from tests/data/extractors/content/test_publish_date_schema.html
rename to tests/data/extractors/publishdate/test_publish_date_schema.html
diff --git a/tests/data/extractors/content/test_publish_date_schema.json b/tests/data/extractors/publishdate/test_publish_date_schema.json
similarity index 100%
rename from tests/data/extractors/content/test_publish_date_schema.json
rename to tests/data/extractors/publishdate/test_publish_date_schema.json
diff --git a/tests/data/extractors/content/test_tags_abcau.html b/tests/data/extractors/tags/test_tags_abcau.html
similarity index 100%
rename from tests/data/extractors/content/test_tags_abcau.html
rename to tests/data/extractors/tags/test_tags_abcau.html
diff --git a/tests/data/extractors/content/test_tags_abcau.json b/tests/data/extractors/tags/test_tags_abcau.json
similarity index 100%
rename from tests/data/extractors/content/test_tags_abcau.json
rename to tests/data/extractors/tags/test_tags_abcau.json
diff --git a/tests/data/extractors/content/test_tags_cnet.html b/tests/data/extractors/tags/test_tags_cnet.html
similarity index 100%
rename from tests/data/extractors/content/test_tags_cnet.html
rename to tests/data/extractors/tags/test_tags_cnet.html
diff --git a/tests/data/extractors/content/test_tags_cnet.json b/tests/data/extractors/tags/test_tags_cnet.json
similarity index 100%
rename from tests/data/extractors/content/test_tags_cnet.json
rename to tests/data/extractors/tags/test_tags_cnet.json
diff --git a/tests/data/extractors/content/test_tags_deadline.html b/tests/data/extractors/tags/test_tags_deadline.html
similarity index 100%
rename from tests/data/extractors/content/test_tags_deadline.html
rename to tests/data/extractors/tags/test_tags_deadline.html
diff --git a/tests/data/extractors/content/test_tags_deadline.json b/tests/data/extractors/tags/test_tags_deadline.json
similarity index 100%
rename from tests/data/extractors/content/test_tags_deadline.json
rename to tests/data/extractors/tags/test_tags_deadline.json
diff --git a/tests/data/extractors/content/test_tags_kexp.html b/tests/data/extractors/tags/test_tags_kexp.html
similarity index 100%
rename from tests/data/extractors/content/test_tags_kexp.html
rename to tests/data/extractors/tags/test_tags_kexp.html
diff --git a/tests/data/extractors/content/test_tags_kexp.json b/tests/data/extractors/tags/test_tags_kexp.json
similarity index 100%
rename from tests/data/extractors/content/test_tags_kexp.json
rename to tests/data/extractors/tags/test_tags_kexp.json
diff --git a/tests/data/extractors/content/test_tags_wnyc.html b/tests/data/extractors/tags/test_tags_wnyc.html
similarity index 100%
rename from tests/data/extractors/content/test_tags_wnyc.html
rename to tests/data/extractors/tags/test_tags_wnyc.html
diff --git a/tests/data/extractors/content/test_tags_wnyc.json b/tests/data/extractors/tags/test_tags_wnyc.json
similarity index 100%
rename from tests/data/extractors/content/test_tags_wnyc.json
rename to tests/data/extractors/tags/test_tags_wnyc.json
diff --git a/tests/extractors/base.py b/tests/extractors/base.py
index 60990b77..e19d20e0 100644
--- a/tests/extractors/base.py
+++ b/tests/extractors/base.py
@@ -114,13 +114,13 @@ def _get_current_testname(self):
 
 class MockResponseExtractors(MockResponse):
     def content(self, req):
-        current_test = self.cls._get_current_testname()
+        test, suite, module, cls, func = self.cls.id().split('.')
         path = os.path.join(
                 os.path.dirname(CURRENT_PATH),
                 "data",
-                "extractors",
-                "content",
-                "%s.html" % current_test)
+                suite,
+                module,
+                "%s.html" % func)
         path = os.path.abspath(path)
         content = FileHelper.loadResourceFile(path)
         return content
@@ -184,23 +184,6 @@ def assert_cleaned_text(self, field, expected_value, result_value):
         msg = u"The beginning of the article text was not as expected!"
         self.assertEqual(expected_value, result_value, msg=msg)
 
-    def assert_tags(self, field, expected_value, result_value):
-        """\
-
-        """
-        # as we have a set in expected_value and a list in result_value
-        # make result_value a set
-        expected_value = set(expected_value)
-
-        # check if both have the same number of items
-        msg = (u"expected tags set and result tags set"
-                u"don't have the same number of items")
-        self.assertEqual(len(result_value), len(expected_value), msg=msg)
-
-        # check if each tag in result_value is in expected_value
-        for tag in result_value:
-            self.assertTrue(tag in expected_value)
-
     def runArticleAssertions(self, article, fields):
         """\
 
diff --git a/tests/extractors/content.py b/tests/extractors/content.py
index 950d2208..3eeaf1fe 100644
--- a/tests/extractors/content.py
+++ b/tests/extractors/content.py
@@ -95,23 +95,6 @@ def assert_cleaned_text(self, field, expected_value, result_value):
         msg = u"The beginning of the article text was not as expected!"
         self.assertEqual(expected_value, result_value, msg=msg)
 
-    def assert_tags(self, field, expected_value, result_value):
-        """\
-
-        """
-        # as we have a set in expected_value and a list in result_value
-        # make result_value a set
-        expected_value = set(expected_value)
-
-        # check if both have the same number of items
-        msg = (u"expected tags set and result tags set"
-                u"don't have the same number of items")
-        self.assertEqual(len(result_value), len(expected_value), msg=msg)
-
-        # check if each tag in result_value is in expected_value
-        for tag in result_value:
-            self.assertTrue(tag in expected_value)
-
     def runArticleAssertions(self, article, fields):
         """\
 
@@ -387,25 +370,6 @@ def test_articlebody_tag(self):
         self.runArticleAssertions(article=article, fields=fields)
 
 
-class TestPublishDate(TestExtractionBase):
-
-    def test_publish_date(self):
-        article = self.getArticle()
-        self.runArticleAssertions(article=article, fields=['publish_date'])
-
-    def test_publish_date_rnews(self):
-        article = self.getArticle()
-        self.runArticleAssertions(article=article, fields=['publish_date'])
-
-    def test_publish_date_article(self):
-        article = self.getArticle()
-        self.runArticleAssertions(article=article, fields=['publish_date'])
-
-    def test_publish_date_schema(self):
-        article = self.getArticle()
-        self.runArticleAssertions(article=article, fields=['publish_date'])
-
-
 class TestExtractWithUrl(TestExtractionBase):
 
     def test_get_canonical_url(self):
@@ -484,34 +448,3 @@ def test_author_schema(self):
         article = self.getArticle()
         fields = ['authors']
         self.runArticleAssertions(article=article, fields=fields)
-
-
-class TestArticleTags(TestExtractionBase):
-
-    def test_tags_kexp(self):
-        article = self.getArticle()
-        fields = ['tags']
-        self.runArticleAssertions(article=article, fields=fields)
-
-    def test_tags_deadline(self):
-        article = self.getArticle()
-        fields = ['tags']
-        self.runArticleAssertions(article=article, fields=fields)
-
-    def test_tags_wnyc(self):
-        article = self.getArticle()
-        fields = ['tags']
-        self.runArticleAssertions(article=article, fields=fields)
-
-    def test_tags_cnet(self):
-        article = self.getArticle()
-        fields = ['tags']
-        self.runArticleAssertions(article=article, fields=fields)
-
-    def test_tags_abcau(self):
-        """
-        Test ABC Australia page with "topics" tags
-        """
-        article = self.getArticle()
-        fields = ['tags']
-        self.runArticleAssertions(article=article, fields=fields)
diff --git a/tests/extractors/publishdate.py b/tests/extractors/publishdate.py
index e69de29b..8d2a13b9 100644
--- a/tests/extractors/publishdate.py
+++ b/tests/extractors/publishdate.py
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from base import TestExtractionBase
+
+
+class TestPublishDate(TestExtractionBase):
+
+    def test_publish_date(self):
+        article = self.getArticle()
+        self.runArticleAssertions(article=article, fields=['publish_date'])
+
+    def test_publish_date_rnews(self):
+        article = self.getArticle()
+        self.runArticleAssertions(article=article, fields=['publish_date'])
+
+    def test_publish_date_article(self):
+        article = self.getArticle()
+        self.runArticleAssertions(article=article, fields=['publish_date'])
+
+    def test_publish_date_schema(self):
+        article = self.getArticle()
+        self.runArticleAssertions(article=article, fields=['publish_date'])
diff --git a/tests/extractors/tags.py b/tests/extractors/tags.py
index e69de29b..22b17129 100644
--- a/tests/extractors/tags.py
+++ b/tests/extractors/tags.py
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from base import TestExtractionBase
+
+
+class TestArticleTags(TestExtractionBase):
+
+    def assert_tags(self, field, expected_value, result_value):
+        """\
+
+        """
+        # as we have a set in expected_value and a list in result_value
+        # make result_value a set
+        expected_value = set(expected_value)
+
+        # check if both have the same number of items
+        msg = (u"expected tags set and result tags set"
+                u"don't have the same number of items")
+        self.assertEqual(len(result_value), len(expected_value), msg=msg)
+
+        # check if each tag in result_value is in expected_value
+        for tag in result_value:
+            self.assertTrue(tag in expected_value)
+
+    def test_tags_kexp(self):
+        article = self.getArticle()
+        fields = ['tags']
+        self.runArticleAssertions(article=article, fields=fields)
+
+    def test_tags_deadline(self):
+        article = self.getArticle()
+        fields = ['tags']
+        self.runArticleAssertions(article=article, fields=fields)
+
+    def test_tags_wnyc(self):
+        article = self.getArticle()
+        fields = ['tags']
+        self.runArticleAssertions(article=article, fields=fields)
+
+    def test_tags_cnet(self):
+        article = self.getArticle()
+        fields = ['tags']
+        self.runArticleAssertions(article=article, fields=fields)
+
+    def test_tags_abcau(self):
+        """
+        Test ABC Australia page with "topics" tags
+        """
+        article = self.getArticle()
+        fields = ['tags']
+        self.runArticleAssertions(article=article, fields=fields)
diff --git a/tests/extractors/videos.py b/tests/extractors/videos.py
index 389a414c..23d1670d 100644
--- a/tests/extractors/videos.py
+++ b/tests/extractors/videos.py
@@ -30,26 +30,10 @@
 CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
 
 
-class MockResponseVideos(MockResponse):
-    def content(self, req):
-        current_test = self.cls._get_current_testname()
-        path = os.path.join(
-                os.path.dirname(CURRENT_PATH),
-                "data",
-                "extractors",
-                "videos",
-                "%s.html" % current_test)
-        path = os.path.abspath(path)
-        content = FileHelper.loadResourceFile(path)
-        return content
-
-
 class ImageExtractionTests(TestExtractionBase):
     """\
     Base Mock test case
     """
-    callback = MockResponseVideos
-
     def assert_movies(self, field, expected_value, result_value):
         # check if result_value is a list
         self.assertTrue(isinstance(result_value, list))

From b762ea8dca09b9fd6c3c29ab0b932389d91696b1 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 06:56:28 +0100
Subject: [PATCH 087/100] #188 - move tweets tests case

---
 .../{content => tweets}/test_tweet.html       |  0
 .../{content => tweets}/test_tweet.json       |  0
 tests/extractors/content.py                   |  9 -----
 tests/extractors/tweets.py                    | 33 +++++++++++++++++++
 4 files changed, 33 insertions(+), 9 deletions(-)
 rename tests/data/extractors/{content => tweets}/test_tweet.html (100%)
 rename tests/data/extractors/{content => tweets}/test_tweet.json (100%)

diff --git a/tests/data/extractors/content/test_tweet.html b/tests/data/extractors/tweets/test_tweet.html
similarity index 100%
rename from tests/data/extractors/content/test_tweet.html
rename to tests/data/extractors/tweets/test_tweet.html
diff --git a/tests/data/extractors/content/test_tweet.json b/tests/data/extractors/tweets/test_tweet.json
similarity index 100%
rename from tests/data/extractors/content/test_tweet.json
rename to tests/data/extractors/tweets/test_tweet.json
diff --git a/tests/extractors/content.py b/tests/extractors/content.py
index 3eeaf1fe..35b13f20 100644
--- a/tests/extractors/content.py
+++ b/tests/extractors/content.py
@@ -424,15 +424,6 @@ def extract(self, instance):
         return article
 
 
-class TestArticleTweet(TestExtractionBase):
-
-    def test_tweet(self):
-        article = self.getArticle()
-        number_tweets = len(article.tweets)
-        expected_number_tweets = self.data['expected']['tweets']
-        self.assertEqual(number_tweets, expected_number_tweets)
-
-
 class TestArticleLinks(TestExtractionBase):
 
     def test_links(self):
diff --git a/tests/extractors/tweets.py b/tests/extractors/tweets.py
index e69de29b..50300f43 100644
--- a/tests/extractors/tweets.py
+++ b/tests/extractors/tweets.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from base import TestExtractionBase
+
+
+class TestArticleTweet(TestExtractionBase):
+
+    def test_tweet(self):
+        article = self.getArticle()
+        number_tweets = len(article.tweets)
+        expected_number_tweets = self.data['expected']['tweets']
+        self.assertEqual(number_tweets, expected_number_tweets)

From ea693a917829a2f5f83815fd4798bf8b21d48eb4 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 07:06:12 +0100
Subject: [PATCH 088/100] #188 - test refactor

---
 .../{content => links}/test_links.html        |   0
 .../{content => links}/test_links.json        |   0
 .../test_opengraph.html                       |   0
 .../test_opengraph.json                       |   0
 tests/extractors/content.py                   | 135 +-----------------
 tests/extractors/links.py                     |  33 +++++
 tests/extractors/opengraph.py                 |  32 +++++
 tests/extractors/videos.py                    |   7 -
 8 files changed, 66 insertions(+), 141 deletions(-)
 rename tests/data/extractors/{content => links}/test_links.html (100%)
 rename tests/data/extractors/{content => links}/test_links.json (100%)
 rename tests/data/extractors/{content => opengraph}/test_opengraph.html (100%)
 rename tests/data/extractors/{content => opengraph}/test_opengraph.json (100%)
 create mode 100644 tests/extractors/links.py

diff --git a/tests/data/extractors/content/test_links.html b/tests/data/extractors/links/test_links.html
similarity index 100%
rename from tests/data/extractors/content/test_links.html
rename to tests/data/extractors/links/test_links.html
diff --git a/tests/data/extractors/content/test_links.json b/tests/data/extractors/links/test_links.json
similarity index 100%
rename from tests/data/extractors/content/test_links.json
rename to tests/data/extractors/links/test_links.json
diff --git a/tests/data/extractors/content/test_opengraph.html b/tests/data/extractors/opengraph/test_opengraph.html
similarity index 100%
rename from tests/data/extractors/content/test_opengraph.html
rename to tests/data/extractors/opengraph/test_opengraph.html
diff --git a/tests/data/extractors/content/test_opengraph.json b/tests/data/extractors/opengraph/test_opengraph.json
similarity index 100%
rename from tests/data/extractors/content/test_opengraph.json
rename to tests/data/extractors/opengraph/test_opengraph.json
diff --git a/tests/extractors/content.py b/tests/extractors/content.py
index 35b13f20..5b287f18 100644
--- a/tests/extractors/content.py
+++ b/tests/extractors/content.py
@@ -20,130 +20,11 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import os
-import json
+from base import TestExtractionBase
 
-from base import BaseMockTests
-from base import MockResponseExtractors
-
-from goose import Goose
-from goose.configuration import Configuration
 from goose.text import StopWordsChinese
 from goose.text import StopWordsArabic
 from goose.text import StopWordsKorean
-from goose.utils import FileHelper
-
-
-CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
-
-
-class TestExtractionBase(BaseMockTests):
-    """\
-    Extraction test case
-    """
-    callback = MockResponseExtractors
-
-    def getRawHtml(self):
-        test, suite, module, cls, func = self.id().split('.')
-        path = os.path.join(
-                os.path.dirname(CURRENT_PATH),
-                "data",
-                suite,
-                module,
-                "%s.html" % func)
-        path = os.path.abspath(path)
-        content = FileHelper.loadResourceFile(path)
-        return content
-
-    def loadData(self):
-        """\
-
-        """
-        test, suite, module, cls, func = self.id().split('.')
-        path = os.path.join(
-                os.path.dirname(CURRENT_PATH),
-                "data",
-                suite,
-                module,
-                "%s.json" % func)
-        path = os.path.abspath(path)
-        content = FileHelper.loadResourceFile(path)
-        self.data = json.loads(content)
-
-    def assert_cleaned_text(self, field, expected_value, result_value):
-        """\
-
-        """
-        # # TODO : handle verbose level in tests
-        # print "\n=======================::. ARTICLE REPORT %s .::======================\n" % self.id()
-        # print 'expected_value (%s) \n' % len(expected_value)
-        # print expected_value
-        # print "-------"
-        # print 'result_value (%s) \n' % len(result_value)
-        # print result_value
-
-        # cleaned_text is Null
-        msg = u"Resulting article text was NULL!"
-        self.assertNotEqual(result_value, None, msg=msg)
-
-        # cleaned_text length
-        msg = u"Article text was not as long as expected beginning!"
-        self.assertTrue(len(expected_value) <= len(result_value), msg=msg)
-
-        # clean_text value
-        result_value = result_value[0:len(expected_value)]
-        msg = u"The beginning of the article text was not as expected!"
-        self.assertEqual(expected_value, result_value, msg=msg)
-
-    def runArticleAssertions(self, article, fields):
-        """\
-
-        """
-        for field in fields:
-            expected_value = self.data['expected'][field]
-            result_value = getattr(article, field, None)
-
-            # custom assertion for a given field
-            assertion = 'assert_%s' % field
-            if hasattr(self, assertion):
-                getattr(self, assertion)(field, expected_value, result_value)
-                continue
-
-            # default assertion
-            msg = u"Error %s \nexpected: %s\nresult: %s" % (field, expected_value, result_value)
-            self.assertEqual(expected_value, result_value, msg=msg)
-
-    def extract(self, instance):
-        article = instance.extract(url=self.data['url'])
-        return article
-
-    def getConfig(self):
-        config = Configuration()
-        config.enable_image_fetching = False
-        return config
-
-    def getArticle(self):
-        """\
-
-        """
-        # load test case data
-        self.loadData()
-
-        # basic configuration
-        # no image fetching
-        config = self.getConfig()
-        self.parser = config.get_parser()
-
-        # target language
-        # needed for non english language most of the time
-        target_language = self.data.get('target_language')
-        if target_language:
-            config.target_language = target_language
-            config.use_meta_language = False
-
-        # run goose
-        g = Goose(config=config)
-        return self.extract(g)
 
 
 class TestExtractions(TestExtractionBase):
@@ -330,11 +211,6 @@ def test_okaymarketing(self):
         fields = ['cleaned_text']
         self.runArticleAssertions(article=article, fields=fields)
 
-    def test_opengraph(self):
-        article = self.getArticle()
-        fields = ['opengraph']
-        self.runArticleAssertions(article=article, fields=fields)
-
     def test_title_opengraph(self):
         article = self.getArticle()
         fields = ['title']
@@ -424,15 +300,6 @@ def extract(self, instance):
         return article
 
 
-class TestArticleLinks(TestExtractionBase):
-
-    def test_links(self):
-        article = self.getArticle()
-        number_links = len(article.links)
-        expected_number_links = self.data['expected']['links']
-        self.assertEqual(number_links, expected_number_links)
-
-
 class TestArticleAuthor(TestExtractionBase):
 
     def test_author_schema(self):
diff --git a/tests/extractors/links.py b/tests/extractors/links.py
new file mode 100644
index 00000000..8539465e
--- /dev/null
+++ b/tests/extractors/links.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from base import TestExtractionBase
+
+
+class TestArticleLinks(TestExtractionBase):
+
+    def test_links(self):
+        article = self.getArticle()
+        number_links = len(article.links)
+        expected_number_links = self.data['expected']['links']
+        self.assertEqual(number_links, expected_number_links)
diff --git a/tests/extractors/opengraph.py b/tests/extractors/opengraph.py
index e69de29b..415a784c 100644
--- a/tests/extractors/opengraph.py
+++ b/tests/extractors/opengraph.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from base import TestExtractionBase
+
+
+class TestOpenGraph(TestExtractionBase):
+
+    def test_opengraph(self):
+        article = self.getArticle()
+        fields = ['opengraph']
+        self.runArticleAssertions(article=article, fields=fields)
diff --git a/tests/extractors/videos.py b/tests/extractors/videos.py
index 23d1670d..10be15ff 100644
--- a/tests/extractors/videos.py
+++ b/tests/extractors/videos.py
@@ -20,15 +20,8 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import os
-
-from base import MockResponse
 from base import TestExtractionBase
 
-from goose.utils import FileHelper
-
-CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
-
 
 class ImageExtractionTests(TestExtractionBase):
     """\

From 41e951ce3be2ab5c29bd7a9d24f5e2ee391f02d9 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 07:18:05 +0100
Subject: [PATCH 089/100] #188 - move authors tests

---
 .../test_author_schema.html                   |  0
 .../test_author_schema.json                   |  0
 tests/extractors/authors.py                   | 32 +++++++++++++++++++
 tests/extractors/content.py                   |  8 -----
 4 files changed, 32 insertions(+), 8 deletions(-)
 rename tests/data/extractors/{content => authors}/test_author_schema.html (100%)
 rename tests/data/extractors/{content => authors}/test_author_schema.json (100%)

diff --git a/tests/data/extractors/content/test_author_schema.html b/tests/data/extractors/authors/test_author_schema.html
similarity index 100%
rename from tests/data/extractors/content/test_author_schema.html
rename to tests/data/extractors/authors/test_author_schema.html
diff --git a/tests/data/extractors/content/test_author_schema.json b/tests/data/extractors/authors/test_author_schema.json
similarity index 100%
rename from tests/data/extractors/content/test_author_schema.json
rename to tests/data/extractors/authors/test_author_schema.json
diff --git a/tests/extractors/authors.py b/tests/extractors/authors.py
index e69de29b..709040c1 100644
--- a/tests/extractors/authors.py
+++ b/tests/extractors/authors.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from base import TestExtractionBase
+
+
+class TestArticleAuthor(TestExtractionBase):
+
+    def test_author_schema(self):
+        article = self.getArticle()
+        fields = ['authors']
+        self.runArticleAssertions(article=article, fields=fields)
diff --git a/tests/extractors/content.py b/tests/extractors/content.py
index 5b287f18..1e940ee9 100644
--- a/tests/extractors/content.py
+++ b/tests/extractors/content.py
@@ -298,11 +298,3 @@ class TestExtractionsRaw(TestExtractions):
     def extract(self, instance):
         article = instance.extract(raw_html=self.getRawHtml())
         return article
-
-
-class TestArticleAuthor(TestExtractionBase):
-
-    def test_author_schema(self):
-        article = self.getArticle()
-        fields = ['authors']
-        self.runArticleAssertions(article=article, fields=fields)

From 9be09b8a8dd6a07bf59b5e7e0a3565267606a89a Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 07:22:19 +0100
Subject: [PATCH 090/100] #188 - move title tests

---
 .../test_title_opengraph.html                 |  0
 .../test_title_opengraph.json                 |  0
 tests/extractors/content.py                   |  5 ---
 tests/extractors/title.py                     | 32 +++++++++++++++++++
 4 files changed, 32 insertions(+), 5 deletions(-)
 rename tests/data/extractors/{content => title}/test_title_opengraph.html (100%)
 rename tests/data/extractors/{content => title}/test_title_opengraph.json (100%)

diff --git a/tests/data/extractors/content/test_title_opengraph.html b/tests/data/extractors/title/test_title_opengraph.html
similarity index 100%
rename from tests/data/extractors/content/test_title_opengraph.html
rename to tests/data/extractors/title/test_title_opengraph.html
diff --git a/tests/data/extractors/content/test_title_opengraph.json b/tests/data/extractors/title/test_title_opengraph.json
similarity index 100%
rename from tests/data/extractors/content/test_title_opengraph.json
rename to tests/data/extractors/title/test_title_opengraph.json
diff --git a/tests/extractors/content.py b/tests/extractors/content.py
index 1e940ee9..30dc2754 100644
--- a/tests/extractors/content.py
+++ b/tests/extractors/content.py
@@ -211,11 +211,6 @@ def test_okaymarketing(self):
         fields = ['cleaned_text']
         self.runArticleAssertions(article=article, fields=fields)
 
-    def test_title_opengraph(self):
-        article = self.getArticle()
-        fields = ['title']
-        self.runArticleAssertions(article=article, fields=fields)
-
     def test_issue129(self):
         article = self.getArticle()
         fields = ['cleaned_text']
diff --git a/tests/extractors/title.py b/tests/extractors/title.py
index e69de29b..36bee9a2 100644
--- a/tests/extractors/title.py
+++ b/tests/extractors/title.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from base import TestExtractionBase
+
+
+class TestTitle(TestExtractionBase):
+
+    def test_title_opengraph(self):
+        article = self.getArticle()
+        fields = ['title']
+        self.runArticleAssertions(article=article, fields=fields)

From 6959185a8f72d9d94d5b01fc33ecb3439a3e3fb0 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 07:30:42 +0100
Subject: [PATCH 091/100] #188 - add empty meta test case

---
 tests/extractors/metas.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/tests/extractors/metas.py b/tests/extractors/metas.py
index e69de29b..fd45915a 100644
--- a/tests/extractors/metas.py
+++ b/tests/extractors/metas.py
@@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+"""\
+This is a python port of "Goose" orignialy licensed to Gravity.com
+under one or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.
+
+Python port was written by Xavier Grangier for Recrutae
+
+Gravity.com licenses this file
+to you under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from base import TestExtractionBase
+
+
+class TestMetas(TestExtractionBase):
+
+    pass

From ca1d8240246c64bfc8c4e5a838f5c1ba1aa33471 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Wed, 31 Dec 2014 07:37:42 +0100
Subject: [PATCH 092/100] bump version

---
 goose/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/goose/version.py b/goose/version.py
index 875065c7..c8718138 100644
--- a/goose/version.py
+++ b/goose/version.py
@@ -21,5 +21,5 @@
 limitations under the License.
 """
 
-version_info = (1, 0, 23)
+version_info = (1, 0, 24)
 __version__ = ".".join(map(str, version_info))

From f9f1f1db5015a4819d8597061e41eef652c747a5 Mon Sep 17 00:00:00 2001
From: randvis <craftsbear@gmail.com>
Date: Fri, 2 Jan 2015 22:51:42 +0800
Subject: [PATCH 093/100] 191 - keep available parsers list unchanged during
 multiple extract() calls

---
 goose/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/goose/__init__.py b/goose/__init__.py
index 49073bd1..409b5732 100644
--- a/goose/__init__.py
+++ b/goose/__init__.py
@@ -59,7 +59,7 @@ def shutdown_network(self):
         pass
 
     def crawl(self, crawl_candiate):
-        parsers = self.config.available_parsers
+        parsers = list(self.config.available_parsers)
         parsers.remove(self.config.parser_class)
         try:
             crawler = Crawler(self.config)

From c583da286c710ee321055fc81b3b610ae0ceafec Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Sat, 3 Jan 2015 10:30:45 +0100
Subject: [PATCH 094/100] bump version

---
 goose/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/goose/version.py b/goose/version.py
index c8718138..fedcbb6d 100644
--- a/goose/version.py
+++ b/goose/version.py
@@ -21,5 +21,5 @@
 limitations under the License.
 """
 
-version_info = (1, 0, 24)
+version_info = (1, 0, 25)
 __version__ = ".".join(map(str, version_info))

From 7981697c3704bb19b1b6a618300a5cd517ab16f9 Mon Sep 17 00:00:00 2001
From: Nathan Breit <nabreit@gmail.com>
Date: Tue, 20 Jan 2015 21:47:57 +0800
Subject: [PATCH 095/100] Check for empty title

---
 goose/extractors/title.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/goose/extractors/title.py b/goose/extractors/title.py
index 8104c52b..092471f2 100644
--- a/goose/extractors/title.py
+++ b/goose/extractors/title.py
@@ -50,6 +50,11 @@ def clean_title(self, title):
         # TechCrunch | my wonderfull article
         # my wonderfull article | TechCrunch
         title_words = title.split()
+        
+        # check for an empty title
+        # so that we don't get an IndexError below
+        if len(title_words) == 0:
+            return u""
 
         # check if first letter is in TITLE_SPLITTERS
         # if so remove it

From 3bf8f5ec4e0c2dcad12437a7acb5b510d58e35e6 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Sat, 24 Jan 2015 21:18:41 +0100
Subject: [PATCH 096/100] #199 - pep8

---
 goose/extractors/title.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/goose/extractors/title.py b/goose/extractors/title.py
index 092471f2..31d69840 100644
--- a/goose/extractors/title.py
+++ b/goose/extractors/title.py
@@ -50,7 +50,7 @@ def clean_title(self, title):
         # TechCrunch | my wonderfull article
         # my wonderfull article | TechCrunch
         title_words = title.split()
-        
+
         # check for an empty title
         # so that we don't get an IndexError below
         if len(title_words) == 0:

From aee045dc2bee3a252bb13b8a00e6317371c781f3 Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Sat, 24 Jan 2015 21:19:04 +0100
Subject: [PATCH 097/100] #199 - test for empty title

---
 tests/data/extractors/title/test_title_empty.html | 12 ++++++++++++
 tests/data/extractors/title/test_title_empty.json |  6 ++++++
 tests/extractors/title.py                         |  5 +++++
 3 files changed, 23 insertions(+)
 create mode 100644 tests/data/extractors/title/test_title_empty.html
 create mode 100644 tests/data/extractors/title/test_title_empty.json

diff --git a/tests/data/extractors/title/test_title_empty.html b/tests/data/extractors/title/test_title_empty.html
new file mode 100644
index 00000000..63a8cab9
--- /dev/null
+++ b/tests/data/extractors/title/test_title_empty.html
@@ -0,0 +1,12 @@
+<html>
+    <head>
+        <title></title>
+    </head>
+    <body>
+        <div>
+            <p>
+              TextNode 1 - The Scala supported IDE is one of the few pain points of developers who want to start using Scala in their Java project. On existing long term project developed by a team its hard to step in and introduce a new language that is not supported by the existing IDE. On way to go about it is to hid the fact that you use Scala from the Java world by using one way dependency injection. Still, if you wish to truly absorb Scala into your existing java environment then you'll soon introduced cross language dependencies.
+            </p>
+        </div>
+    </body>
+</html>
diff --git a/tests/data/extractors/title/test_title_empty.json b/tests/data/extractors/title/test_title_empty.json
new file mode 100644
index 00000000..c31bab9f
--- /dev/null
+++ b/tests/data/extractors/title/test_title_empty.json
@@ -0,0 +1,6 @@
+{
+    "url": "http://exemple.com/test_title_empty.html",
+    "expected": {
+        "title": ""
+    }
+}
diff --git a/tests/extractors/title.py b/tests/extractors/title.py
index 36bee9a2..09170205 100644
--- a/tests/extractors/title.py
+++ b/tests/extractors/title.py
@@ -30,3 +30,8 @@ def test_title_opengraph(self):
         article = self.getArticle()
         fields = ['title']
         self.runArticleAssertions(article=article, fields=fields)
+
+    def test_title_empty(self):
+        article = self.getArticle()
+        fields = ['title']
+        self.runArticleAssertions(article=article, fields=fields)

From cc9d892139cad23b98d43f267cf4ab620a63cb52 Mon Sep 17 00:00:00 2001
From: Steven Maude <StevenMaude@users.noreply.github.com>
Date: Thu, 19 Feb 2015 12:35:13 +0000
Subject: [PATCH 098/100] Tidy README.rst

Minor typo fixes.
---
 README.rst | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/README.rst b/README.rst
index 86f3cf7a..5dc8ab0b 100644
--- a/README.rst
+++ b/README.rst
@@ -5,9 +5,9 @@ Intro
 -----
 
 Goose was originally an article extractor written in Java that has most
-recently (aug2011) been converted to a `scala project <https://github.com/GravityLabs/goose>`_.
+recently (Aug2011) been converted to a `scala project <https://github.com/GravityLabs/goose>`_.
 
-This is a complete rewrite in python. The aim of the software is to
+This is a complete rewrite in Python. The aim of the software is to
 take any news article or article-type web page and not only extract what
 is the main body of the article but also all meta data and most probable
 image candidate.
@@ -16,11 +16,11 @@ Goose will try to extract the following information:
 
 -  Main text of an article
 -  Main image of article
--  Any Youtube/Vimeo movies embedded in article
+-  Any YouTube/Vimeo movies embedded in article
 -  Meta Description
 -  Meta tags
 
-The python version was rewritten by:
+The Python version was rewritten by:
 
 -  Xavier Grangier
 
@@ -28,10 +28,10 @@ Licensing
 ---------
 
 If you find Goose useful or have issues please drop me a line. I'd love
-to hear how you're using it or what features should be improved
+to hear how you're using it or what features should be improved.
 
-Goose is licensed by Gravity.com under the Apache 2.0 license, see the
-LICENSE file for more details
+Goose is licensed by Gravity.com under the Apache 2.0 license; see the
+LICENSE file for more details.
 
 Setup
 -----
@@ -70,13 +70,13 @@ pass goose a Configuration() object. The second one is to pass a
 configuration dict.
 
 For instance, if you want to change the userAgent used by Goose just
-pass :
+pass:
 
 ::
 
     >>> g = Goose({'browser_user_agent': 'Mozilla'})
 
-Switching parsers : Goose can now be use with lxml html parser or lxml
+Switching parsers : Goose can now be used with lxml html parser or lxml
 soup parser. By default the html parser is used. If you want to use the
 soup parser pass it in the configuration dict :
 
@@ -87,8 +87,8 @@ soup parser pass it in the configuration dict :
 Goose is now language aware
 ---------------------------
 
-For example scrapping a Spanish content page with correct meta language
-tags
+For example, scraping a Spanish content page with correct meta language
+tags:
 
 ::
 
@@ -114,7 +114,7 @@ configuration :
     u'Importante golpe a la banda terrorista ETA en Francia. La Guardia Civil ha detenido en un hotel de Macon, a 70 kil\xf3metros de Lyon, a Izaskun Lesaka y '
 
 Passing {'use\_meta\_language': False, 'target\_language':'es'} will
-force as configuration will force the spanish language
+forcibly select Spanish.
 
 
 Video extraction
@@ -146,7 +146,7 @@ Goose in Chinese
 Some users want to use Goose for Chinese content. Chinese word
 segmentation is way more difficult to deal with than occidental
 languages. Chinese needs a dedicated StopWord analyser that need to be
-passed to the config object
+passed to the config object.
 
 ::
 
@@ -202,7 +202,7 @@ Known issues
 ------------
 
 - There are some issues with unicode URLs.
-- Cookie handling : Some websites need cookie handling. At the moment the only work around is to use the raw_html extraction. For instance ;
+- Cookie handling : Some websites need cookie handling. At the moment the only work around is to use the raw_html extraction. For instance:
 
     >>> import urllib2
     >>> import goose

From 5db0166d03fd12fb5e9abb4e111a01feaeead369 Mon Sep 17 00:00:00 2001
From: Amal Francis <amalfra@gmail.com>
Date: Wed, 4 Mar 2015 12:41:07 +0530
Subject: [PATCH 099/100] Type fix: Issue #204

---
 goose/extractors/title.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/goose/extractors/title.py b/goose/extractors/title.py
index 31d69840..a59dca92 100644
--- a/goose/extractors/title.py
+++ b/goose/extractors/title.py
@@ -32,7 +32,7 @@ class TitleExtractor(BaseExtractor):
 
     def clean_title(self, title):
         """Clean title with the use of og:site_name
-        in this case try to get ride of site name
+        in this case try to get rid of site name
         and use TITLE_SPLITTERS to reformat title
         """
         # check if we have the site name in opengraph data

From 09023ec9f5ef26a628a2365616c0a7c864f0ecea Mon Sep 17 00:00:00 2001
From: Xavier Grangier <x.grangier@liberation.fr>
Date: Sun, 29 Mar 2015 16:04:10 +0200
Subject: [PATCH 100/100] #217 - check if content value is not None

---
 goose/extractors/opengraph.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/goose/extractors/opengraph.py b/goose/extractors/opengraph.py
index a52ac349..dc43b4bf 100644
--- a/goose/extractors/opengraph.py
+++ b/goose/extractors/opengraph.py
@@ -34,5 +34,6 @@ def extract(self):
             attr = self.parser.getAttribute(meta, 'property')
             if attr is not None and attr.startswith("og:"):
                 value = self.parser.getAttribute(meta, 'content')
-                opengraph_dict.update({attr.split(":")[1]: value})
+                if value:
+                    opengraph_dict.update({attr.split(":")[1]: value})
         return opengraph_dict