{ "metadata": { "name": "", "signature": "sha256:12b80d66c1d1d3c8be54d86798369b6159695da3349fe4df2d07dbbea028a972" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Graph Analysis " ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Imports" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import numpy as np\n", "import pandas as pd\n", "import scipy as sp\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn.preprocessing import MinMaxScaler\n", "import networkx as nx\n", "from sklearn.cluster import KMeans\n", "\n", "%matplotlib inline" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "K-core decomposition of a graph" ] }, { "cell_type": "code", "collapsed": false, "input": [ "G=nx.karate_club_graph()\n", "G = nx.Graph(G)\n", "\n", "print len(G.nodes())" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "degree_sequence=sorted(nx.degree(G).values(),reverse=True) # degree sequence\n", "dmax=max(degree_sequence)\n", "print dmax" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "https://networkx.github.io/documentation/latest/reference/algorithms.html" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Computing the k-core decomposition of a graph" ] }, { "cell_type": "code", "collapsed": false, "input": [ "core_dec = nx.core_number(G)\n", "print core_dec" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Plotting the graph; nodes with the same color belong in the same core" ] }, { "cell_type": "code", "collapsed": false, "input": [ "colors = ['#d7191c', '#fdae61', '#ffffbf', '#abdda4', '#2b83ba']\n", "node_colors = [ colors[core_dec[v]] for v in G.nodes()]\n", "\n", "nx.draw(G, node_color=node_colors, with_labels=True)\n" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Minimun Cuts" ] }, { "cell_type": "code", "collapsed": false, "input": [ "cut_edges = nx.minimum_edge_cut(G)\n", "print cut_edges" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "Gcopy = G.copy()\n", "Gcopy.remove_edges_from(cut_edges)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "cc = nx.connected_components(Gcopy)\n", "node_set = {}\n", "i = 1\n", "for s in cc:\n", " for node in s:\n", " node_set[node] = i\n", " i+=1\n", "print node_set" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "colors = ['#d7191c', '#2b83ba']\n", "node_colors = [ colors[node_set[v]-1] for v in G.nodes()]\n", "nx.draw(G, node_color=node_colors, with_labels='True')" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "cut_edges = nx.minimum_edge_cut(G, s=0, t=33)\n", "print cut_edges" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "Gcopy = G.copy()\n", "Gcopy.remove_edges_from(cut_edges)\n" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "cc = nx.connected_components(Gcopy)\n", "node_set = {}\n", "for i, s in enumerate(cc):\n", " for node in s:\n", " node_set[node] = i\n", "colors = ['#d7191c', '#2b83ba']\n", "node_colors = [ colors[node_set[v]-1] for v in G.nodes()]\n", "nx.draw(G, node_color=node_colors, with_labels='True')" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Graph spectral clustering" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "https://networkx.github.io/documentation/latest/reference/linalg.html" ] }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "Exploring the Fiedler vector of the Karate-club graph" ] }, { "cell_type": "code", "collapsed": false, "input": [ "G=nx.karate_club_graph()\n", "G = nx.Graph(G)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "f = nx.fiedler_vector(G)\n", "print f" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "s = np.zeros(len(f))\n", "s[f>0]=1\n", "s = s.astype(int)\n", "#s = s.tolist()\n", "print s, type(s)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "colors = ['#d7191c', '#2b83ba']\n", "node_colors = [ colors[s[v]] for v in G.nodes()]\n", "node_colors = ['#d7191c' if f[i] < 0 else '#2b83ba' for i, v in enumerate(G.nodes())]\n", "nx.draw(G, node_color=node_colors, with_labels='True')" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "Exploring the Fiedler vector of a union of noisy cliques" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from numpy.random import RandomState\n", "\n", "def generate_noisy_subcliques(nodes_per_clique, inside_p, across_p, min_node_label=0, seed=None):\n", " \"\"\"Generates a graph which consists of small cliques connected with each other.\n", " The noise within a clique and across cliques can be set by the inside_p and \n", " across_p parameters respectively.\n", " \n", " \n", " Parameters\n", " ----------\n", " nodes_per_clique : list\n", " The size of this list corresponds to the number of cliques that will be\n", " generated. The value of each element will be the size of the corresponding \n", " clique.\n", " \n", " inside_p : float\n", " The probability of an edge inside a clique. The higher this number, the more \n", " each clique will resemble a fully connected graph.\n", " \n", " across_p : float\n", " The probability of an edge across cliques.\n", " \n", " min_node_label : int, default is 0\n", " The minimum node label of the graph.\n", " \n", " seed : int, default is None\n", " The seed to the pseudorandom number generator.\n", " \n", " \n", " Returns\n", " -------\n", " G : networkX graph\n", " The generated graph.\n", " \"\"\"\n", "\n", " prng = RandomState(seed)\n", " clique_list = []\n", " number_of_cliques = len(nodes_per_clique)\n", "\n", " # Make the independent cliques\n", " starting_node = min_node_label\n", " for clique in range(number_of_cliques):\n", " G = nx.Graph()\n", " for u in range(starting_node, starting_node + nodes_per_clique[clique]):\n", " for v in range(u + 1, starting_node + nodes_per_clique[clique]):\n", " if prng.rand() < inside_p:\n", " G.add_edge(u, v)\n", " clique_list.append(G)\n", " starting_node += nodes_per_clique[clique]\n", "\n", " # Combine them in one graph\n", " G = nx.Graph()\n", " for clique in range(number_of_cliques):\n", " G.add_edges_from(clique_list[clique].edges())\n", "\n", " # Connect edges across the cliques\n", " for i in range(number_of_cliques):\n", " clique_from = clique_list[i]\n", " for j in range(i + 1, number_of_cliques):\n", " clique_to = clique_list[j]\n", " for u in clique_from.nodes():\n", " for v in clique_to.nodes():\n", " if prng.rand() < across_p:\n", " G.add_edge(u, v)\n", " return G\n" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "nodes_per_clique = [10, 10, 10]\n", "across_p = 0.05\n", "inside_p = 0.9\n", "cliques = generate_noisy_subcliques(nodes_per_clique, inside_p, across_p)\n", "nx.draw(cliques, with_labels=True)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "f = nx.fiedler_vector(cliques)\n", "print f" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "s = np.zeros(len(f))\n", "s[f>0]=1\n", "s = s.astype(int)\n", "s = s.tolist()\n", "print s, type(s)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "colors = ['#d7191c', '#2b83ba']\n", "node_colors = [ colors[s[v]] for v in cliques.nodes()]\n", "nx.draw(cliques, node_color=node_colors, with_labels='True')" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "Beyond the Fiedler vector" ] }, { "cell_type": "code", "collapsed": false, "input": [ "L = nx.laplacian_matrix(cliques).astype(float)\n", "w,v = sp.sparse.linalg.eigsh(L, k = 3, which='SM')" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "print w\n", "print v" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "print w.shape, v.shape\n", "X = v*w\n" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "kmeans = KMeans(init='k-means++', n_clusters=3, n_init=10)\n", "kmeans.fit_predict(X)\n", "centroids = kmeans.cluster_centers_\n", "labels = kmeans.labels_\n", "error = kmeans.inertia_" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "print labels" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "colors = ['#d7191c', '#ffffbf', '#2b83ba']\n", "node_colors = [ colors[labels[v]] for v in cliques.nodes()]\n", "nx.draw(cliques, node_color=node_colors, with_labels='True')" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "# Code for setting the style of the notebook\n", "from IPython.core.display import HTML\n", "def css_styling():\n", " styles = open(\"../theme/custom.css\", \"r\").read()\n", " return HTML(styles)\n", "css_styling()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\r\n", "\r\n", "\r\n", "\r\n", "" ], "metadata": {}, "output_type": "pyout", "prompt_number": 2, "text": [ "" ] } ], "prompt_number": 2 } ], "metadata": {} } ] }