#!/usr/bin/env python #Authors: Dora Erdos and Vatche Ishakian #Copyright (c) 2012 Boston University. #All rights reserved. #If using any of this code please cite: #Dora Erdos, Vatche Ishakian, Azer Bestavros, and Evimaria Terzi "The Filter Placement Problem and its Application to Minimizing Information Multiplicity", PVLDB 5(5): 418-429, 2012, Istanbul, Turkey, August 2012. #Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: #Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. #Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. #Neither the name "Boston University" nor the names of the authors may be used to endorse or promote products derived from this software without specific prior written permission. #THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import math import os import getopt, sys import time import commands import random import operator #Parameters: #All params are set from command line FILENAME = "data.txt" #file containing two tab-separated columns, containing directed edges in graph: out_id \t in_id OUTPUTFILE = "result.txt" #filter ids and objective function value output to this file ChooseAlgo = 0 #takes a value of 0 or 1. 0 for Greedy_All algorithm and 1 for Greedy_Max NumFilters = 3 #number of filters (num of iterations) placed by the algorithm #Data Structures: NodeList = {} #List of nodes and their children CopyNodeList = {} TopologicalList = [] #Topologicalal order of the nodes RevNodeList={} #List of nodes and their parents NodeInfluenceList = {} NodeInfluenceNodeList = {} AggImpact = {} Filter_List = {} ImpFilter = {} #function that runs the whole experiment def RunSimulator(): #Load graph LoadGraphFile() GenerateGraphCopy() #Compute topological order of nodes OrderGraphTopological() sys.stderr.write('Finished topological ordering\n') #Print first line of output LogInit() #Initial computation of impact per node CalculateFullImpact() sys.stderr.write('Finished impact computation \n') #Greedy filter placement heuristic, Greedy_All or Greedy_Max FindFilters() sys.stderr.write('Finished greedy heuristic to find filters \n') #Preprocessing algorithm, computing for every node which nodes it is influenced by or it is influencing def CalculateFullImpact(): global TopologicalList, NodeInfluenceList, NodeInfluenceNodeList, NodeList, RevNodeList, Filter_List, ImpFilter NodeInfluenceList = {} NodeInfluenceNodeList = {} TempImpFilter = {} for k in ImpFilter.keys(): if Filter_List.has_key(k): TempImpFilter[k] = 0 else: TempImpFilter[k] = ImpFilter[k] for i in range(0,len(TopologicalList)): lNodeID = TopologicalList[i] if len(NodeInfluenceList.keys()) == 0: #This is only needed for the first node NodeInfluenceList[lNodeID] = 1 NodeInfluenceNodeList[lNodeID] = {} else: #All other times we should go to this section of the if statement. #Go to find the list of parents and find their NodeInfluence. TotalInfluence = 0 NodeInfluenceNodeList[lNodeID] = {} for j in RevNodeList[lNodeID].keys(): TotalInfluence = TotalInfluence + NodeInfluenceList[j] #If the parents influence already exists in the NodeInfluenceNodeList because it could be coming from somewhere else. if NodeInfluenceNodeList[lNodeID].has_key(j): if not Filter_List.has_key(lNodeID): NodeInfluenceNodeList[lNodeID][j] = NodeInfluenceNodeList[lNodeID][j] + NodeList[j][lNodeID] else: TempImpFilter[j] = TempImpFilter[j] + NodeList[j][lNodeID] else: if Filter_List.has_key(lNodeID): NodeInfluenceNodeList[lNodeID][j] = 0 TempImpFilter[j] = TempImpFilter[j] + NodeList[j][lNodeID] else: NodeInfluenceNodeList[lNodeID][j] = NodeList[j][lNodeID] for k in NodeInfluenceNodeList[j].keys(): #Add the influence of the parent's parent if NodeInfluenceNodeList[lNodeID].has_key(k): if not Filter_List.has_key(lNodeID): NodeInfluenceNodeList[lNodeID][k] = NodeInfluenceNodeList[lNodeID][k] + NodeInfluenceNodeList[j][k] else: TempImpFilter[k] = TempImpFilter[k] + NodeInfluenceNodeList[j][k] else: if Filter_List.has_key(lNodeID): NodeInfluenceNodeList[lNodeID][k] = 0 TempImpFilter[k] = TempImpFilter[k] + NodeInfluenceNodeList[j][k] else: NodeInfluenceNodeList[lNodeID][k] = NodeInfluenceNodeList[j][k] #Update the influence Number. #If the node is a filter, then its influence is set to 1 otherwise, we have its total influence. if Filter_List.has_key(lNodeID): NodeInfluenceList[lNodeID] = 1 else: NodeInfluenceList[lNodeID] = TotalInfluence for k in TempImpFilter.keys(): ImpFilter[k] = TempImpFilter[k] #Given a list of NodInfluence and NodeList,Calculate the impact of each node. #Assign filters based on Greedy_All or Greedy_Max heuristic def FindFilters(): global NumFilters, NodeInfluenceNodeList, NodeInfluenceList, FilterImpact, ChooseAlgo, ImpFilter for i in range(0,NumFilters): StartTime = time.time() FilterImpact = {} for j in NodeInfluenceList.keys(): CurrentImpact = 0 ApproxImpact = 0 if not Filter_List.has_key(j): if not(NodeInfluenceList[j] == 1): for k in NodeInfluenceNodeList.keys(): if NodeInfluenceNodeList[k].has_key(j): ApproxImpact = ApproxImpact + NodeInfluenceNodeList[k][j] * 1 FilterImpact[j] = (NodeInfluenceList[j] - 1) * (ApproxImpact + ImpFilter[j]) if ChooseAlgo: #Just Look at the top NumFilters and return that list xFilterImpact = sorted(FilterImpact.iteritems(), key=operator.itemgetter(1) ,reverse=True) Counter = 0 for j in range(0, len(xFilterImpact)): StartTime = time.time() Filter_List[xFilterImpact[j][0]] = xFilterImpact[j][1] Counter = Counter + 1 Value = GetObjFunc() EndTime = time.time() LogResults(Value,Counter,StartTime,EndTime) if not (Counter < NumFilters): return else: #Select a filter, Log the results and then repeat continue until the number of filters are reached. if i > 0: xFilterImpact = sorted(FilterImpact.iteritems(), key=operator.itemgetter(1) ,reverse=True) Filter_List[xFilterImpact[0][0]] = xFilterImpact[0][1] NodeInfluenceList[xFilterImpact[0][0]] = 1 Value = GetObjFunc() EndTime = time.time() LogResults(Value,i,StartTime,EndTime) CalculateFullImpact() #Prints first line of output file def LogInit(): global OUTPUTFILE fout = open(OUTPUTFILE,'a') fout.write('iteration\tobj_func_val\tfilter_ids\tstart_time\tend_time\n') fout.close() #Prints result of each iteration on output file def LogResults(i_ObjValue, i_NumOfFilter, i_StartTime, i_EndTime): global OUTPUTFILE, Filter_List fout = open(OUTPUTFILE,'a') fout.write(str(i_NumOfFilter)+ "\t" + str(i_ObjValue) + "\t" + str(Filter_List.keys()) + "\t" + str(i_StartTime) + "\t" + str(i_EndTime) + "\n") fout.close() #Generated a topological Ordering of the nodes def OrderGraphTopological(): global CopyNodeList, TopologicalList, RevNodeList while 1: for i in CopyNodeList.keys(): if len(CopyNodeList[i].keys()) == 0: #This is a sink node. ParentList = RevNodeList[i] for j in ParentList.keys(): del CopyNodeList[j][i] #Put the node in a list TopologicalList.insert(0,i) #Delete the Node from the CopyListNode del CopyNodeList[i] break if len(CopyNodeList.keys()) ==0: break #Generate a copy of the graph def GenerateGraphCopy(): global NodeList, CopyNodeList for i in NodeList.keys(): oNode = NodeList[i] oCopyNode = {} for j in oNode.keys(): oCopyNode[j] = oNode[j] CopyNodeList[i] = oCopyNode #Loads a graph file which consists of NodeID\tNodeID #Assumption:The graph is directed #The function loads a graph, creates a Node To children list designated as NodeList #Simultaneously, it also creates a Node to Parent List designated by RevNodeList def LoadGraphFile(): global FILENAME, NodeList, RevNodeList, ImpFilter fgraph = open(FILENAME,'r') l = fgraph.readline() while l: a = l.split("\t") Source = int((a[0]).strip()) Destination = int((a[1]).strip()) weight = 1 if NodeList.has_key(Source): Node = NodeList[Source] else: Node = {} Node[Destination] = weight NodeList[Source] = Node #Make sure you append the destination if not(NodeList.has_key(Destination)): Node = {} NodeList[Destination] = Node ImpFilter[Source] = 0 ImpFilter[Destination] = 0 #Now do a reverse node listing. if RevNodeList.has_key(Destination): Node = RevNodeList[Destination] else: Node = {} Node[Source] = Source RevNodeList[Destination] = Node #Make sure you append the destination if not(RevNodeList.has_key(Source)): Node = {} RevNodeList[Source] = Node l = fgraph.readline() fgraph.close() #define a function that calculates an objective function. def GetObjFunc(): global Filter_List, TopologicalList, RevNodeList TotalMessages = 0 TotalFilterMessages = 0 #Given a list of Filters, Return the objective function of the graph. MessageList = {} #Calculate the Total Number of messages in case there is no Filter for j in range(0, len(TopologicalList)): for k in NodeList[TopologicalList[j]].keys(): if MessageList.has_key(k): MessageList[k] = MessageList[k] + MessageList[TopologicalList[j]] else: if len(RevNodeList[TopologicalList[j]].keys()) == 0: MessageList[k] = 1 else: MessageList[k] = MessageList[TopologicalList[j]] for i in MessageList.keys(): TotalMessages = TotalMessages + MessageList[i] MessageList = {} for j in range(0, len(TopologicalList)): for k in NodeList[TopologicalList[j]].keys(): if MessageList.has_key(k): if Filter_List.has_key(TopologicalList[j]): MessageList[k] = MessageList[k] + 1 else: MessageList[k] = MessageList[k] + MessageList[TopologicalList[j]] else: if len(RevNodeList[TopologicalList[j]].keys()) == 0: MessageList[k] = 1 elif Filter_List.has_key(TopologicalList[j]): MessageList[k] = 1 else: MessageList[k] = MessageList[TopologicalList[j]] for i in MessageList.keys(): TotalFilterMessages = TotalFilterMessages + MessageList[i] return TotalFilterMessages def usage(): sys.stderr.write('usage:\n') sys.stderr.write('syntax: python filters.py arg#1 arg#2 arg#3 arg#4\n') sys.stderr.write('arg#1: graph_file (string), format: id1id2\n') sys.stderr.write('arg#2: choose which algorithm to run (integer, 0 or 1), 0: Greedy_All 1: Greedy_Max\n') sys.stderr.write('arg#3: number of filters (integer) \n') sys.stderr.write('arg#4: output_file (string)\n') def main(): global FILENAME,NumFilters, ChooseAlgo, OUTPUTFILE try: opts,args = getopt.getopt(sys.argv[1:], "hg:d", ["help"]) FILENAME = args[0] ChooseAlgo = int(args[1]) NumFilters = int(args[2]) OUTPUTFILE = args[3] RunSimulator() except: usage() sys.exit(2) if __name__ == "__main__": main()