From 71903d7f0c035e43d43d797b877042e55c1d767a Mon Sep 17 00:00:00 2001 From: Magnus Hagander Date: Mon, 28 Nov 2011 21:14:04 +0100 Subject: [PATCH] Parse files in the ftp tree as utf8, so as not to get random encoding errors Any characters that aren't utf-8 will get replaced with the unicode replacement character, instead of throwing an exception. Fixes #106 --- tools/ftp/spider_ftp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/ftp/spider_ftp.py b/tools/ftp/spider_ftp.py index b6a42449..076d418c 100755 --- a/tools/ftp/spider_ftp.py +++ b/tools/ftp/spider_ftp.py @@ -10,12 +10,12 @@ import sys import os from datetime import datetime import cPickle as pickle -#from pprint import pprint +import codecs allnodes = {} def read_file(fn): - f = open(fn, "r") + f = codecs.open(fn, 'r', encoding='utf-8', errors='replace') t = f.read() f.close() return t -- 2.39.5