#!/usr/bin/env python # # This script will compare the master and slave databases, one table at a time, to determine # whether they are synchronized or not. Lines are printed with a leading "x" or "o", inspired # by the Japanese associate of "X" meaning "no" and "O" meaning "OK". Any line starting with # an "x" will follow with details about the table not being in sync. # import os import psycopg2 import time import sys FAILWAIT = 3 MAXFAILS = 5 ret_val = 0 DEBUGMODE = 1 OKTORUN = 1 host1 = os.environ.get("SLONY_MASTER") host2 = os.environ.get("SLONY_SLAVE") if not host1: print("You need to set the SLONY_MASTER environment variable to the master's hostname.") ret_val = 1 if not host2: print("You need to set the SLONY_SLAVE environment variable to the slave's hostname.") ret_val = 1 if ret_val != 0: sys.exit(ret_val) # For marian db # ============= dbname1 = 'blahblahblah' dbname2 = 'blahblahblah' user1 = 'blahblahblah' user2 = 'blahblahblah' password1 = 'blahblahblah' password2 = 'blahblahblah' # function: debug_print # ============================================================================ def debug_print(txt): """Lets us turn off print statements that are otherwise unimportant.""" if DEBUGMODE != 0: print txt # function: show_usage # ============================================================================ def show_usage(): """Shows the usage of this script""" print "There are two ways to call this script:" print "" print "1. %s" % sys.argv[0] print " This compares the full data of each table. (VERY SLOW)" print "" print "2. %s -q" % sys.argv[0] print " This compares the row-counts of each table. (QUICK)" print "" # ============================================================================ # function: db_diff_quick # # This merely compares the total rows in tables, and compares the results. # Yes, this really doesn't tell us things are in sync, but if you have a # database with a LOT of activity on tables that never eliminate data, then # this could very well serve your needs. It's more of a sanity check than # a real equality test. # ============================================================================ def db_diff_quick(db1, db2): """Compares contents of two databases and prints results""" curs1 = db1.cursor() curs2 = db2.cursor() # Get a list of tables from the master database curs1.execute("SELECT tablename FROM pg_tables WHERE schemaname='" + dbname1 + "' AND tableowner='" + user1 + "'") tables = curs1.fetchall() for t in tables: failCount = 0 while failCount < MAXFAILS: curs1.execute("SELECT COUNT(*) FROM " + t[0]) curs2.execute("SELECT COUNT(*) FROM " + t[0]) rows1 = curs1.fetchall() rows2 = curs2.fetchall() if rows1 == rows2: if failCount == 0: debug_print("o - Table " + t[0] + " row-count matches.") else: print "o - Table " + t[0] + " row-count matches." break else: failCount += 1 print "x - Table %s row-count does not match. (Attempt %d of %d)" % (t[0], failCount, MAXFAILS) time.sleep(FAILWAIT) if failCount == MAXFAILS: return 1 return 0 # ============================================================================ # function: db_diff # # This, because Python is so cool, actually compares the entire data between # tables. I don't recommend this for large databases. # ============================================================================ def db_diff(db1, db2): """Compares contents of two databases and prints results""" curs1 = db1.cursor() curs2 = db2.cursor() # Get a list of tables from the master database curs1.execute("SELECT tablename FROM pg_tables WHERE schemaname='" + dbname1 + "' AND tableowner='" + user1 + "'") tables = curs1.fetchall() for t in tables: failCount = 0 while failCount < MAXFAILS: curs1.execute("SELECT * FROM " + t[0]) curs2.execute("SELECT * FROM " + t[0]) rows1 = curs1.fetchall() rows2 = curs2.fetchall() if rows1 == rows2: if failCount == 0: debug_print("o - Table %s is in sync." % (t[0])) else: print "o - Table %s is in sync." % (t[0]) break else: failCount += 1 print "x - Table %s is not in sync. (Attempt %d of %d)" % (t[0], failCount, MAXFAILS) time.sleep(FAILWAIT) if failCount == MAXFAILS: return 1 return 0 # ============================================================================ db1 = psycopg2.connect("user=" + user1 + " password=" + password1 + " dbname=" + dbname1 + " host=" + host1) db2 = psycopg2.connect("user=" + user2 + " password=" + password2 + " dbname=" + dbname2 + " host=" + host2) debug_print("== COMPARING DATABASES: ==========================================") debug_print("** Database #1: " + dbname1 + " on host: " + host1) debug_print("** Database #2: " + dbname2 + " on host: " + host2) debug_print("==================================================================") try: arg1 = sys.argv[1] except IndexError: ret_val = db_diff(db1,db2) else: if arg1 == "-q": ret_val = db_diff_quick(db1,db2) else: show_usage() if ret_val == 0: print "Databases are in sync!" else: print "Databases do not appear to be in sync." sys.exit(ret_val)