diff --git a/README.rst b/README.rst index 5dc8ab0b..b40b5263 100644 --- a/README.rst +++ b/README.rst @@ -1,3 +1,7 @@ +在原作者的基础上,针对中文网站做一些改进,以期能够正确提取正文、日期等 + +===华丽的分隔线,以下为原项目说明=== + Python-Goose - Article Extractor |Build Status| =============================================== diff --git a/goose/extractors/content.py b/goose/extractors/content.py index e0703d55..02fdf586 100644 --- a/goose/extractors/content.py +++ b/goose/extractors/content.py @@ -28,6 +28,7 @@ KNOWN_ARTICLE_CONTENT_TAGS = [ {'attr': 'itemprop', 'value': 'articleBody'}, {'attr': 'class', 'value': 'post-content'}, + {'attr': 'class', 'value': 'article'}, {'tag': 'article'}, ]