Вы находитесь на странице: 1из 9

DELHI TECHNOLOGICAL UNIVERSITY

DEPARTMENT OF INFORMATION TECHNOLOGY


IV YEAR VIII SEMESTER

IT 404
BIG DATA ANALYSIS

ASSIGNMENT ON INFORMATION MAXIMISATION


(YEAR 2019-20)

SUBMITTED BY:
RUJHAN SINGLA – 2K16/IT/093
SAAMARTH GUPTA - 2K16/IT/094
PROBLEM I – INFORMATION MAXIMISATION USING LINEAR
THRESHOLD MODEL

CODE
import networkx as nx
import random
import copy

def linear_threshold(G, seeds, steps=0, threshold=0.1):


if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
raise Exception("linear_threshold() is not defined for graphs
with multiedges.")

# make sure the seeds are in the graph


for s in seeds:
if s not in G.nodes():
raise Exception("seed", s, "is not in graph")

# change to directed graph


if not G.is_directed():
DG = G.to_directed()
else:
DG = copy.deepcopy(G)

# init thresholds
for n in DG.nodes():
if 'threshold' not in DG.node[n]:
# Set Deafult threshold
DG.node[n]['threshold'] = threshold
elif DG.node[n]['threshold'] > 1:
raise Exception("node threshold:", DG.node[n]['threshold'], \
"cannot be larger than 1")

# init influences
in_deg = DG.in_degree()
for e in DG.edges():
if 'influence' not in DG[e[0]][e[1]]:
DG[e[0]][e[1]]['influence'] = 1.0 / in_deg[e[1]]
elif DG[e[0]][e[1]]['influence'] > 1:
raise Exception("edge influence:", DG[e[0]][e[1]]['influence'], \
"cannot be larger than 1")

# perform diffusion
A = copy.deepcopy(seeds)
if steps <= 0:
# perform diffusion until no more nodes can be activated
return _diffuse_all(DG, A)
# perform diffusion for at most "steps" rounds only
return _diffuse_k_rounds(DG, A, steps)

def _diffuse_all(G, A):


layer_i_nodes = [ ]
layer_i_nodes.append([i for i in A])
while True:
len_old = len(A)
A, activated_nodes_of_this_round = _diffuse_one_round(G, A)
layer_i_nodes.append(activated_nodes_of_this_round)
if len(A) == len_old:
break
return layer_i_nodes

def _diffuse_k_rounds(G, A, steps):


layer_i_nodes = [ ]
layer_i_nodes.append([i for i in A])
while steps > 0 and len(A) < len(G):
len_old = len(A)
A, activated_nodes_of_this_round = _diffuse_one_round(G, A)
layer_i_nodes.append(activated_nodes_of_this_round)
if len(A) == len_old:
break
steps -= 1
return layer_i_nodes

def _diffuse_one_round(G, A):


activated_nodes_of_this_round = set()
for s in A:
nbs = G.successors(s)
for nb in nbs:
if nb in A:
continue
active_nb = list(set(G.predecessors(nb)).intersection(set(A)))
if _influence_sum(G, active_nb, nb) >= G.node[nb]['threshold']:
activated_nodes_of_this_round.add(nb)
A.extend(list(activated_nodes_of_this_round))
return A, list(activated_nodes_of_this_round)

def _influence_sum(G, froms, to):


influence_sum = 0.0
for f in froms:
influence_sum += G[f][to]['influence']
return influence_sum

# Generate Random Graph with edges and edge probability


edges = 1000
edgeProbability = 0.05

G = nx.gnp_random_graph(edges, edgeProbability)
print(nx.info(G))

seed_set = random.sample(G.nodes(), 4) # Selecting intial seed set


randomly
print('Selected Seeds:', seed_set)
resultData = linear_threshold(G, seed_set)
print ('Result : ', resultData)
print('Max Influence recorded for Node #',resultData[len(resultData)-
2])

OUTPUT
PROBLEM II – INFORMATION MAXIMISATION USING INFORMATION
CASCADE MODEL

CODE

import matplotlib.pyplot as plt


import networkx as nx
import random
import copy

def info_cascade(G, seeds, steps=0, threshold=0.1):

# make sure the seeds are in the graph


for s in seeds:
if s not in G.nodes():
raise Exception("seed", s, "is not in graph")

# change to directed graph


if not G.is_directed():
DG = G.to_directed()
else:
DG = copy.deepcopy(G)

# init prob
in_deg = DG.in_degree()
for e in DG.edges():
if 'prob' not in DG[e[0]][e[1]]:
DG[e[0]][e[1]]['prob'] = 1.0 / in_deg[e[1]]
elif DG[e[0]][e[1]]['prob'] > 1:
raise Exception("probablity:", DG[e[0]][e[1]]['prob'], \
"cannot be larger than 1")
# perform diffusion
A = copy.deepcopy(seeds)
if steps <= 0:
# perform diffusion until no more nodes can be activated
return _diffuse_all(DG, A)
# perform diffusion for at most "steps" rounds only
return _diffuse_k_rounds(DG, A, steps)

def _diffuse_all(G, A):


tried_edges = set()
layer_i_nodes = [ ]
layer_i_nodes.append([i for i in A]) # prevent side effect
while True:
len_old = len(A)
(A, activated_nodes_of_this_round, cur_tried_edges)
=_diffuse_one_round(G, A, tried_edges)
layer_i_nodes.extend(activated_nodes_of_this_round)
tried_edges = tried_edges.union(cur_tried_edges)
if len(A) == len_old:
break
return layer_i_nodes

def _diffuse_k_rounds(G, A, steps):


tried_edges = set()
layer_i_nodes = [ ]
layer_i_nodes.append([i for i in A])
while steps > 0 and len(A) < len(G):
len_old = len(A)
(A, activated_nodes_of_this_round, cur_tried_edges) = \
_diffuse_one_round(G, A, tried_edges)
layer_i_nodes.extend(activated_nodes_of_this_round)
tried_edges = tried_edges.union(cur_tried_edges)
if len(A) == len_old:
break
steps -= 1
return layer_i_nodes

def _diffuse_one_round(G, A, tried_edges):


activated_nodes_of_this_round = set()
cur_tried_edges = set()
for s in A:
for nb in G.successors(s):
if nb in A or (s, nb) in tried_edges or (s, nb) in
cur_tried_edges:
continue
if (_prop_success(G, s, nb)):
activated_nodes_of_this_round.add(nb)
cur_tried_edges.add((s, nb))
activated_nodes_of_this_round = list(activated_nodes_of_this_round)
A.extend(activated_nodes_of_this_round)
return A, activated_nodes_of_this_round, cur_tried_edges

def _prop_success(G, src, dest):


return random.random() <= G[src][dest]['prob']

G=nx.gnp_random_graph(50,0.045)
print(nx.info(G))
nx.draw(G,node_size=140,with_labels=True)
#plt.draw(markersize=8)
plt.show()
iterations=10000 # Number of Iterations
for i in range(iterations):
max_influence=0
seed_set=random.sample(G.nodes(),7)
S=set(seed_set)
influence=info_cascade(G,list(S))
if(len(influence)>max_influence):
max_influence=len(influence)
y=influence[:-1]
seeds=list(S)
print("Max influence for: ")
print(seeds)
print("Influence: ")
print(y)

OUTPUT

Вам также может понравиться