Академический Документы
Профессиональный Документы
Культура Документы
2
10
PDF from the pdf() function in the scipy.stats.chi2 class
import pylab, scipy.stats
X = pylab.standard_normal((10, 10000))
s = pylab.sum(X * X, axis=0)
(n, bins, patches) = pylab.hist(s, bins=100)
x = pylab.linspace(0, max(s), 100)
y = scipy.stats.chi2.pdf(x, 10) * 10000 * pylab.diff(bins)[0]
pylab.plot(x, y, "y", linewidth=5)
pylab.show()
Finn
Arup Nielsen 2 September 15, 2014
Python programming solutions for numeric Python
. . . Statistical distributions . . .
Finn
Arup Nielsen 3 September 15, 2014
Python programming solutions for numeric Python
. . . Statistical distributions
It may be worthwhile to take a glance at the examples on the matplotlib
homepage, e.g., the histogram examples page shows an example that is
quite similar to the exercise. Note the use of normed input argument.
from pylab import *
from scipy.stats import chi2
X = standard_normal((10, 10000))
sum_of_squares = sum(X * X, axis=0)
(n, bins, patches) = hist(sum_of_squares, bins=100, normed=1)
x = linspace(0, max(sum_of_squares), 100)
y = chi2.pdf(x, 10)
plot(x, y, y, linewidth=5)
show()
Finn
Arup Nielsen 4 September 15, 2014
Python programming solutions for numeric Python
Coauthors . . .
Read coauthors.csv a tab-separated le with co-author matrix. Find
the author with most coauthoring.
import csv
from numpy import zeros, asarray, sum, diag, where
n = 0; M = dict()
for line in csv.reader(open("coauthors.csv"), delimiter="\t"):
if not n:
M["columns"] = line[1:]
M["rows"] = line[1:]
M["matrix"] = zeros((len(line[1:]), len(line[1:])))
else:
M["matrix"][n-1,:] = map(int, line[1:])
n += 1
Finn
Arup Nielsen 5 September 15, 2014
Python programming solutions for numeric Python
m = M["matrix"]
i = sum(where(m - diag(diag(m)), 1, 0), axis=1).argsort()
list(asarray(M["rows"])[i[:-10:-1]]) # Not pretty !
Finn
Arup Nielsen 6 September 15, 2014
Python programming solutions for numeric Python
Another implementation
import csv
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
class AnnotatedMatrix(object):
"""A matrix with columns defined"""
def __init__(self, filename, delimiter="\t", encoding="utf-8"):
"""Read data from a csv file"""
csv_reader = csv.reader(open(filename), delimiter=delimiter)
self.columns = map(lambda s: unicode(s, encoding),
csv_reader.next()[1:])
self.matrix = np.zeros((len(self.columns), len(self.columns)))
for n, line in enumerate(csv_reader):
self.matrix[n,:] = map(int, line[1:])
Finn
Arup Nielsen 7 September 15, 2014
Python programming solutions for numeric Python
self.g = nx.Graph()
for i,j in zip(*self.matrix.nonzero()):
self.g.add_edge(self.columns[i], self.columns[j])
def most_coauthoring(self):
offdiag = self.matrix - np.diag(np.diag(self.matrix))
nonzeros = np.where(offdiag != 0, 1, 0)
i = np.argmax(np.sum(nonzeros, axis=1))
return self.columns[i]
def largest_component(self):
return nx.connected_component_subgraphs(self.g)[0]
coauthors = AnnotatedMatrix("coauthors.csv", encoding="ISO8859-1")
coauthors.most_coauthoring()
nx.draw(coauthors.largest_component())
plt.show()
Finn
Arup Nielsen 8 September 15, 2014
Python programming solutions for numeric Python
. . . Coauthor
See also Pandas and networkx.from_numpy_matrix
Finn
Arup Nielsen 9 September 15, 2014
Python programming solutions for numeric Python
Optimization
import scipy.optimize
f = lambda (x, y): (1-x)**2 + 100 * (y-x**2)**2
scipy.optimize.fmin(f, x0=(0, 0))
import sympy
x, y = sympy.symbols(x y)
sympy.diff("(1-x)**2 + 100 * (y-x**2)**2", x)
Finn
Arup Nielsen 10 September 15, 2014