From c49f3ec9548db549b68533016a113336bdf05ff4 Mon Sep 17 00:00:00 2001 From: Marko Kreen Date: Tue, 18 Sep 2012 16:39:07 +0300 Subject: [PATCH] londiste compare improvements * Calculate common column names, to allow different order or different subset. * Use different hash function on 8.3 <> something else comparision as hashtext() changed in 8.4. * Always calculate final hash as bigint. --- python/londiste/compare.py | 48 +++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/python/londiste/compare.py b/python/londiste/compare.py index f053e60d..b08a04e4 100644 --- a/python/londiste/compare.py +++ b/python/londiste/compare.py @@ -23,8 +23,19 @@ class Comparator(Syncer): self.log.info('Counting %s' % dst_tbl) - q = "select count(1) as cnt, sum(hashtext(t.*::text)) as chksum from only _TABLE_ t" + # get common cols + cols = self.calc_cols(src_curs, src_tbl, dst_curs, dst_tbl) + + # get sane query + v1 = src_db.server_version + v2 = dst_db.server_version + if (v1 < 80400 or v2 < 80400) and v1 != v2: + q = "select count(1) as cnt, sum(md5('x'||substr(md5(_COLS_::text),1,16))::bit(64)::bigint) as chksum from only _TABLE_" + else: + q = "select count(1) as cnt, sum(hashtext(_COLS_::text)::bigint) as chksum from only _TABLE_" + q = self.cf.get('compare_sql', q) + q = q.replace("_COLS_", cols) src_q = q.replace('_TABLE_', skytools.quote_fqident(src_tbl)) dst_q = q.replace('_TABLE_', skytools.quote_fqident(dst_tbl)) @@ -50,6 +61,41 @@ class Comparator(Syncer): return 1 return 0 + def calc_cols(self, src_curs, src_tbl, dst_curs, dst_tbl): + cols1 = self.load_cols(src_curs, src_tbl) + cols2 = self.load_cols(dst_curs, dst_tbl) + + qcols = [] + for c in self.calc_common(cols1, cols2): + qcols.append(skytools.quote_ident(c)) + return "(%s)" % ",".join(qcols) + + def load_cols(self, curs, tbl): + schema, table = skytools.fq_name_parts(tbl) + q = "select column_name from information_schema.columns"\ + " where table_schema = %s and table_name = %s" + curs.execute(q, [schema, table]) + cols = [] + for row in curs.fetchall(): + cols.append(row[0]) + return cols + + def calc_common(self, cols1, cols2): + common = [] + map2 = {} + for c in cols2: + map2[c] = 1 + for c in cols1: + if c in map2: + common.append(c) + if len(common) == 0: + raise Exception("no common columns found") + + if len(common) != len(cols1) or len(cols2) != len(cols1): + self.log.warning("Ignoring some columns") + + return common + if __name__ == '__main__': script = Comparator(sys.argv[1:]) script.start() -- 2.39.5