All Samples(4746) | Call(4565) | Derive(0) | Import(181)
Sum of array elements over a given axis.
Parameters
----------
a : array_like
Elements to sum.
axis : integer, optional
Axis over which the sum is taken. By default `axis` is None,
and all elements are summed.
dtype : dtype, optional
The type of the returned array and of the accumulator in which
the elements are summed. By default, the dtype of `a` is used.
An exception is when `a` has an integer type with less precision
than the default platform integer. In that case, the default
platform integer is used instead.
out : ndarray, optional
Array into which the output is placed. By default, a new array is
created. If `out` is given, it must be of the appropriate shape
(the shape of `a` with `axis` removed, i.e.,
``numpy.delete(a.shape, axis)``). Its type is preserved. See
`doc.ufuncs` (Section "Output arguments") for more details.
Returns
-------
sum_along_axis : ndarray
An array with the same shape as `a`, with the specified
axis removed. If `a` is a 0-d array, or if `axis` is None, a scalar
is returned. If an output array is specified, a reference to
`out` is returned.
See Also
--------
ndarray.sum : Equivalent method.
cumsum : Cumulative sum of array elements.
trapz : Integration of array values using the composite trapezoidal rule.
mean, average
Notes
-----
Arithmetic is modular when using integer types, and no error is
raised on overflow.
Examples
--------
>>> np.sum([0.5, 1.5])
2.0
>>> np.sum([0.5, 0.7, 0.2, 1.5], dtype=np.int32)
1
>>> np.sum([[0, 1], [0, 5]])
6
>>> np.sum([[0, 1], [0, 5]], axis=0)
array([0, 6])
>>> np.sum([[0, 1], [0, 5]], axis=1)
array([1, 5])
If the accumulator is too small, overflow occurs:
>>> np.ones(128, dtype=np.int8).sum(dtype=np.int8)
-128
def sum(a, axis=None, dtype=None, out=None):
"""
Sum of array elements over a given axis.
Parameters
----------
a : array_like
Elements to sum.
axis : integer, optional
Axis over which the sum is taken. By default `axis` is None,
and all elements are summed.
dtype : dtype, optional
The type of the returned array and of the accumulator in which
the elements are summed. By default, the dtype of `a` is used.
An exception is when `a` has an integer type with less precision
than the default platform integer. In that case, the default
platform integer is used instead.
out : ndarray, optional
Array into which the output is placed. By default, a new array is
created. If `out` is given, it must be of the appropriate shape
(the shape of `a` with `axis` removed, i.e.,
``numpy.delete(a.shape, axis)``). Its type is preserved. See
`doc.ufuncs` (Section "Output arguments") for more details.
Returns
-------
sum_along_axis : ndarray
An array with the same shape as `a`, with the specified
axis removed. If `a` is a 0-d array, or if `axis` is None, a scalar
is returned. If an output array is specified, a reference to
`out` is returned.
See Also
--------
ndarray.sum : Equivalent method.
cumsum : Cumulative sum of array elements.
trapz : Integration of array values using the composite trapezoidal rule.
mean, average
Notes
-----
Arithmetic is modular when using integer types, and no error is
raised on overflow.
Examples
--------
>>> np.sum([0.5, 1.5])
2.0
>>> np.sum([0.5, 0.7, 0.2, 1.5], dtype=np.int32)
1
>>> np.sum([[0, 1], [0, 5]])
6
>>> np.sum([[0, 1], [0, 5]], axis=0)
array([0, 6])
>>> np.sum([[0, 1], [0, 5]], axis=1)
array([1, 5])
If the accumulator is too small, overflow occurs:
>>> np.ones(128, dtype=np.int8).sum(dtype=np.int8)
-128
"""
if isinstance(a, _gentype):
res = _sum_(a)
if out is not None:
out[...] = res
return out
return res
try:
sum = a.sum
except AttributeError:
return _wrapit(a, 'sum', axis, dtype, out)
return sum(axis, dtype, out)
pysum = sum # save it before it gets overwritten # Scipy imports. from numpy import array, asarray, dot, ma, zeros, sum import scipy.special as special import scipy.linalg as linalg import numpy as np
x, axis = _chk_asarray(x,axis)
x = x.copy()
Norig = x.shape[axis]
factor = 1.0-np.sum(np.isnan(x),axis)*1.0/Norig
x[np.isnan(x)] = 0
return np.mean(x,axis)/factor
x = x.copy()
Norig = x.shape[axis]
Nnan = np.sum(np.isnan(x),axis)*1.0
n = Norig - Nnan
x[np.isnan(x)] = 0.
m1 = np.sum(x,axis)/n
else:
d = (x - m1)**2.0
m2 = np.sum(d,axis)-(m1*m1)*Nnan
if bias:
m2c = m2 / n
else:
size = a.shape[0]
else:
size = a.shape[axis]
return size / np.sum(1.0/a, axis=axis, dtype=dtype)
else:
raise ValueError("Harmonic mean only defined if all elements greater than zero")
oldcounts = np.zeros(testshape)
for score in scores:
template = (a == score)
counts = np.expand_dims(np.sum(template, axis),axis)
mostfrequent = np.where(counts > oldcounts, score, oldmostfreq)
oldcounts = np.maximum(counts, oldcounts)
oldmostfreq = mostfrequent
return pct
elif kind == 'strict':
return sum(a < score) / float(n) * 100
elif kind == 'weak':
return sum(a <= score) / float(n) * 100
elif kind == 'mean':
return (sum(a < score) + sum(a <= score)) * 50 / float(n)
f_obs = asarray(f_obs)
k = len(f_obs)
if f_exp is None:
f_exp = array([np.sum(f_obs,axis=0)/float(k)] * len(f_obs),float)
f_exp = f_exp.astype(float)
chisq = np.add.reduce((f_obs-f_exp)**2 / f_exp)
return chisq, chisqprob(chisq, k-1-ddof)
ranked = rankdata(np.concatenate((x,y)))
rankx = ranked[0:n1] # get the x-ranks
#ranky = ranked[n1:] # the rest are y-ranks
u1 = n1*n2 + (n1*(n1+1))/2.0 - np.sum(rankx,axis=0) # calc U for x
u2 = n1*n2 - u1 # remainder is U for y
bigu = max(u1,u2)
smallu = min(u1,u2)
ranked = rankdata(alldata)
x = ranked[:n1]
y = ranked[n1:]
s = np.sum(x,axis=0)
expected = n1*(n1+n2+1) / 2.0
z = (s - expected) / np.sqrt(n1*n2*(n1+n2+1)/12.0)
prob = 2 * distributions.norm.sf(abs(z))
del ranked[0:n[i]]
rsums = []
for i in range(len(args)):
rsums.append(np.sum(args[i],axis=0)**2)
rsums[i] = rsums[i] / float(n[i])
ssbn = np.sum(rsums,axis=0)
totaln = np.sum(n,axis=0)
if len(p) == 2: # ttest_ind
c = array([1,-1])
df = n-2
fact = np.sum(1.0/np.sum(x,0),axis=0) # i.e., 1/n1 + 1/n2 + 1/n3 ...
t = dot(c,b) / np.sqrt(s_sq*fact)
probs = betai(0.5*df,0.5,float(df)/(df+t*t))
return t, probs
The sum along the given axis for (a*a).
"""
a, axis = _chk_asarray(a, axis)
return np.sum(a*a, axis)
def square_of_sums(a, axis=0):
"""Adds the values in the passed array, squares that sum, and returns the
result.
Returns: the square of the sum over axis.
"""
a, axis = _chk_asarray(a, axis)
s = np.sum(a,axis)
src/s/c/scipy-0.8.0/scipy/stats/stats.py scipy(Download)
pysum = sum # save it before it gets overwritten # Scipy imports. from numpy import array, asarray, dot, ma, zeros, sum import scipy.special as special import scipy.linalg as linalg import numpy as np
x, axis = _chk_asarray(x,axis)
x = x.copy()
Norig = x.shape[axis]
factor = 1.0-np.sum(np.isnan(x),axis)*1.0/Norig
x[np.isnan(x)] = 0
return np.mean(x,axis)/factor
x = x.copy()
Norig = x.shape[axis]
Nnan = np.sum(np.isnan(x),axis)*1.0
n = Norig - Nnan
x[np.isnan(x)] = 0.
m1 = np.sum(x,axis)/n
else:
d = (x - m1)**2.0
m2 = np.sum(d,axis)-(m1*m1)*Nnan
if bias:
m2c = m2 / n
else:
size = a.shape[0]
else:
size = a.shape[axis]
return size / np.sum(1.0/a, axis=axis, dtype=dtype)
else:
raise ValueError("Harmonic mean only defined if all elements greater than zero")
oldcounts = np.zeros(testshape)
for score in scores:
template = (a == score)
counts = np.expand_dims(np.sum(template, axis),axis)
mostfrequent = np.where(counts > oldcounts, score, oldmostfreq)
oldcounts = np.maximum(counts, oldcounts)
oldmostfreq = mostfrequent
return pct
elif kind == 'strict':
return sum(a < score) / float(n) * 100
elif kind == 'weak':
return sum(a <= score) / float(n) * 100
elif kind == 'mean':
return (sum(a < score) + sum(a <= score)) * 50 / float(n)
f_obs = asarray(f_obs)
k = len(f_obs)
if f_exp is None:
f_exp = array([np.sum(f_obs,axis=0)/float(k)] * len(f_obs),float)
f_exp = f_exp.astype(float)
chisq = np.add.reduce((f_obs-f_exp)**2 / f_exp)
return chisq, chisqprob(chisq, k-1-ddof)
ranked = rankdata(np.concatenate((x,y)))
rankx = ranked[0:n1] # get the x-ranks
#ranky = ranked[n1:] # the rest are y-ranks
u1 = n1*n2 + (n1*(n1+1))/2.0 - np.sum(rankx,axis=0) # calc U for x
u2 = n1*n2 - u1 # remainder is U for y
bigu = max(u1,u2)
smallu = min(u1,u2)
ranked = rankdata(alldata)
x = ranked[:n1]
y = ranked[n1:]
s = np.sum(x,axis=0)
expected = n1*(n1+n2+1) / 2.0
z = (s - expected) / np.sqrt(n1*n2*(n1+n2+1)/12.0)
prob = 2 * distributions.norm.sf(abs(z))
del ranked[0:n[i]]
rsums = []
for i in range(len(args)):
rsums.append(np.sum(args[i],axis=0)**2)
rsums[i] = rsums[i] / float(n[i])
ssbn = np.sum(rsums,axis=0)
totaln = np.sum(n,axis=0)
if len(p) == 2: # ttest_ind
c = array([1,-1])
df = n-2
fact = np.sum(1.0/np.sum(x,0),axis=0) # i.e., 1/n1 + 1/n2 + 1/n3 ...
t = dot(c,b) / np.sqrt(s_sq*fact)
probs = betai(0.5*df,0.5,float(df)/(df+t*t))
return t, probs
The sum along the given axis for (a*a).
"""
a, axis = _chk_asarray(a, axis)
return np.sum(a*a, axis)
def square_of_sums(a, axis=0):
"""Adds the values in the passed array, squares that sum, and returns the
result.
Returns: the square of the sum over axis.
"""
a, axis = _chk_asarray(a, axis)
s = np.sum(a,axis)
src/s/c/scipy-HEAD/scipy/stats/morestats.py scipy(Download)
import stats
from stats import find_repeats
import distributions
from numpy import isscalar, r_, log, sum, around, unique, asarray
from numpy import zeros, arange, sort, amin, amax, any, where, \
atleast_1d, sqrt, ceil, floor, array, poly1d, compress, not_equal, \
pi, exp, ravel, angle
data = ravel(data)
N = len(data)
for k in range(1,n+1):
S[k] = sum(data**k,axis=0)
if n==1:
return S[1]*1.0/N
elif n==2:
def boxcox_llf(lmb, data):
"""The boxcox log-likelihood function.
"""
N = len(data)
y = boxcox(data,lmb)
my = np.mean(y, axis=0)
f = (lmb-1)*sum(log(data),axis=0)
f -= N/2.0*log(sum((y-my)**2.0/N,axis=0))
def rootfunc(ab,xj,N):
a,b = ab
tmp = (xj-a)/b
tmp2 = exp(tmp)
val = [sum(1.0/(1+tmp2),axis=0)-0.5*N,
sum(tmp*(1.0-tmp2)/(1+tmp2),axis=0)+N]
return array(val)
critical = around(_Avals_gumbel / (1.0 + 0.2/sqrt(N)),3)
i = arange(1,N+1)
S = sum((2*i-1.0)/N*(log(z)+log(1-z[::-1])),axis=0)
A2 = -N-S
return A2, critical, sig
xy = r_[x,y] # combine
rank = stats.rankdata(xy)
symrank = amin(array((rank,N-rank+1)),0)
AB = sum(symrank[:n],axis=0)
uxy = unique(xy)
repeats = (len(uxy) != len(xy))
exact = ((m<55) and (n<55) and not repeats)
if repeats and ((m < 55) or (n < 55)):
warnings.warn("Ties preclude use of exact statistic.")
if exact:
astart, a1, ifault = statlib.gscale(n,m)
ind = AB-astart
total = sum(a1,axis=0)
if ind < len(a1)/2.0:
cind = int(ceil(ind))
if (ind == cind):
pval = 2.0*sum(a1[:cind+1],axis=0)/total
else:
pval = 2.0*sum(a1[:cind],axis=0)/total
else:
find = int(floor(ind))
if (ind == floor(ind)):
pval = 2.0*sum(a1[find:],axis=0)/total
else:
pval = 2.0*sum(a1[find+1:],axis=0)/total
varAB = m*n*(N+2)*(N-2.0)/48/(N-1.0)
if repeats: # adjust variance estimates
# compute sum(tj * rj**2,axis=0)
fac = sum(symrank**2,axis=0)
if N % 2: # N odd
varAB = m*n*(16*N*fac-(N+1)**4)/(16.0 * N**2 * (N-1))
else: # N even
for j in range(k):
Ni[j] = len(args[j])
ssq[j] = np.var(args[j], ddof=1)
Ntot = sum(Ni,axis=0)
spsq = sum((Ni-1)*ssq,axis=0)/(1.0*(Ntot-k))
numer = (Ntot*1.0-k)*log(spsq) - sum((Ni-1.0)*log(ssq),axis=0)
denom = 1.0 + (1.0/(3*(k-1)))*((sum(1.0/(Ni-1.0),axis=0))-1.0/(Ntot-k))
for j in range(k):
Ni[j] = len(args[j])
Yci[j] = func(args[j])
Ntot = sum(Ni,axis=0)
# compute Zij's
Zij = [None]*k
Zbar += Zbari[i]*Ni[i]
Zbar /= Ntot
numer = (Ntot-k)*sum(Ni*(Zbari-Zbar)**2,axis=0)
# compute denom_variance
dvar = 0.0
for i in range(k):
dvar += sum((Zij[i]-Zbari[i])**2,axis=0)
rerr = 1+1e-7
if (x < p*n):
i = np.arange(np.ceil(p*n),n+1)
y = np.sum(distributions.binom.pmf(i,n,p) <= d*rerr,axis=0)
pval = distributions.binom.cdf(x,n,p) + distributions.binom.sf(n-y,n,p)
else:
i = np.arange(np.floor(p*n))
y = np.sum(distributions.binom.pmf(i,n,p) <= d*rerr,axis=0)
Ni = asarray([len(args[j]) for j in range(k)])
Yci = asarray([func(args[j]) for j in range(k)])
Ntot = sum(Ni,axis=0)
# compute Zij's
Zij = [abs(asarray(args[i])-Yci[i]) for i in range(k)]
allZij = []
Aibar = _apply_func(a,g,sum) / Ni
anbar = np.mean(a, axis=0)
varsq = np.var(a,axis=0, ddof=1)
Xsq = sum(Ni*(asarray(Aibar)-anbar)**2.0,axis=0)/varsq
pval = distributions.chi2.sf(Xsq,k-1) # 1 - cdf
return Xsq, pval
raise ValueError("Not enough observations.")
ranks = stats.rankdata(xy)
Ri = ranks[:n]
M = sum((Ri - (N+1.0)/2)**2,axis=0)
# Approx stat.
mnM = n*(N*N-1.0)/12
varM = m*n*(N+1.0)*(N+2)*(N-2)/180
Mi = array([np.mean(args[i], axis=0) for i in range(k)])
Vi = array([np.var(args[i]) for i in range(k)])
Wi = Ni / Vi
swi = sum(Wi,axis=0)
N = sum(Ni,axis=0)
my = sum(Mi*Ni,axis=0)*1.0/N
tmp = sum((1-Wi/swi)**2 / (Ni-1.0),axis=0)/(k*k-1.0)
if evar:
F = ((sum(Ni*(Mi-my)**2,axis=0) / (k-1.0)) / (sum((Ni-1.0)*Vi,axis=0) / (N-k)))
pval = distributions.f.sf(F,k-1,N-k) # 1-cdf
else:
m = sum(Wi*Mi,axis=0)*1.0/swi
F = sum(Wi*(Mi-m)**2,axis=0) / ((k-1.0)*(1+2*(k-2)*tmp))
if (count < 10):
warnings.warn("Warning: sample size too small for normal approximation.")
r = stats.rankdata(abs(d))
r_plus = sum((d > 0)*r,axis=0)
r_minus = sum((d < 0)*r,axis=0)
T = min(r_plus, r_minus)
mn = count*(count+1.0)*0.25
src/s/c/scipy-HEAD/scipy/stats/distributions.py scipy(Download)
from scipy.special import gammaln as gamln
import inspect
from numpy import alltrue, where, arange, putmask, \
ravel, take, ones, sum, shape, product, repeat, reshape, \
zeros, floor, logical_and, log, sqrt, exp, arctanh, tan, sin, arcsin, \
arctan, tanh, ndarray, cos, cosh, sinh, newaxis, array, log1p, expm1
def _nnlf(self, x, *args):
return -sum(self._logpdf(x, *args),axis=0)
def nnlf(self, theta, x):
# - sum (log pdf(x, theta),axis=0)
# where theta are the parameters (including loc and scale)
#
def _munp(self, n, c):
k = arange(0,n+1)
val = (-1.0/c)**n * sum(comb(n,k)*(-1)**k / (1.0-c*k),axis=0)
return where(c*n < 1, val, inf)
def _entropy(self, c):
if (c > 0):
return 1+c
def _munp(self, n, c):
k = arange(0,n+1)
vals = 1.0/c**n * sum(comb(n,k) * (-1)**k * special.gamma(c*k + 1),axis=0)
return where(c*n > -1, vals, inf)
genextreme = genextreme_gen(name='genextreme',
longname="A generalized extreme value",
shapes='c',extradoc="""
Routine will normalize pk and qk if they don't sum to 1
"""
pk = arr(pk)
pk = 1.0* pk / sum(pk,axis=0)
if qk is None:
vec = where(pk == 0, 0.0, pk*log(pk))
else:
qk = arr(qk)
if len(qk) != len(pk):
raise ValueError, "qk and pk must have same length."
qk = 1.0*qk / sum(qk,axis=0)
if any(take(pk,nonzero(qk==0.0),axis=0)!=0.0, 0):
return inf
vec = where (pk == 0, 0.0, -pk*log(pk / qk))
return -sum(vec,axis=0)
## Handlers for generic case where xk and pk are given
def _drv_moment(self, n, *args):
n = arr(n)
return sum(self.xk**n[newaxis,...] * self.pk, axis=0)
def _drv_moment_gen(self, t, *args):
t = arr(t)
return sum(exp(self.xk * t[newaxis,...]) * self.pk, axis=0)
def _cdfsingle(self, k, *args):
m = arange(int(self.a),k+1)
return sum(self._pmf(m,*args),axis=0)
def _cdf(self, x, *args):
k = floor(x)
return self._cdfvec(k,*args)
upp = min(max(suppnmin, upp), ub)
supp = np.arange(low, upp+1, self.inc) #check limits
#print 'low, upp', low, upp
tot = np.sum(fun(supp))
diff = 1e100
pos = upp + self.inc
count = 0
def _entropy(self, n, pr):
k = r_[0:n+1]
vals = self._pmf(k,n,pr)
lvals = where(vals==0,0.0,log(vals))
return -sum(vals*lvals,axis=0)
binom = binom_gen(name='binom',shapes="n, pr",extradoc="""
def _entropy(self, M, n, N):
k = r_[N-(M-n):min(n,N)+1]
vals = self.pmf(k,M,n,N)
lvals = where(vals==0.0,0.0,log(vals))
return -sum(vals*lvals,axis=0)
hypergeom = hypergeom_gen(name='hypergeom',longname="A hypergeometric",
shapes="M, n, N", extradoc="""
src/p/y/pycogent-HEAD/trunk/cogent/maths/distance_transform.py pycogent(Download)
"""
from __future__ import division
from numpy import (array, zeros, logical_and, logical_or, logical_xor, where,
mean, std, argsort, take, ravel, logical_not, shape, sqrt, abs,
sum, square, asmatrix, asarray, multiply, min, rank, any, all, isfinite,
nonzero, nan_to_num, geterr, seterr, isnan)
transformations for ordination of species data. Oecologia: 129: 271-280.
"""
m = asmatrix(m)
row_norms = sqrt(sum(square(m), axis=1))
result = m / row_norms
return result
transformations for ordination of species data. Oecologia: 129: 271-280.
"""
m = asmatrix(m)
row_sums = sum(m, axis=1)
result = m / row_sums
return result
transformations for ordination of species data. Oecologia: 129: 271-280.
"""
m = asmatrix(m)
row_sums = sum(m, axis=1)
result = sqrt(m / row_sums)
return result
r1 = datamtx[i,:]
for j in range(i):
r2 = datamtx[j,:]
abs_v = float(sum(abs(r1 - r2)))
v = sum(r1 + r2)
cur_d = 0.0
if v > 0:
if numrows == 0 or numcols == 0:
return zeros((0,0),'d')
dists = zeros((numrows,numrows),'d')
sqrt_grand_sum = sqrt(sum(datamtx))
rowsums, colsums = sum(datamtx, axis=1), sum(datamtx, axis=0)
if not colsums.all():
for i in range(len(colsums)):
else: dist = 1.0
else:
dist = sqrt_grand_sum *\
sqrt(sum( multiply((1./colsums) ,
square(r1/r1sum - r2/r2sum)) ))
dists[i,j] = dists[j,i] = dist
return dists
for j in range(i):
r2 = datamtx[j]
rowdiff = r2 - r1
dist = sum(abs(r1 - r2) / coldiffs)
dists[i,j] = dists[j,i] = dist
return dists
dists = zeros((numrows,numrows),'d')
for i in range(numrows):
r1 = datamtx[i]
r1sum = sum(r1)
for j in range(i):
r2 = datamtx[j]
r2sum = sum(r2)
for j in range(i):
r2 = datamtx[j]
jrowsum = rowsums[j]
rowminsum = float(sum(where(r1<r2, r1,r2)))
if (irowsum == 0.0 and jrowsum == 0.0):
cur_d = 0.0 # => two rows of zeros
elif (irowsum == 0.0 or jrowsum == 0.0):
for i in range(numrows):
r1 = datamtx[i] # cache here
for j in range(i):
dists[i,j] = dists[j,i] = sum(abs(r1 - datamtx[j]))
return dists
similarity = 0.0
else:
shared = logical_and(row1, row2)
u = sum(row1[shared]) / N1
v = sum(row2[shared]) / N2
# Verified by graphical inspection
if u == 0.0 and v == 0.0:
dist = 1.0
else:
# d's zero only if N's zero, and we already checked for that
similarity = 2*sum(row1*row2)
similarity = similarity / ( (d1 + d2) * N1 * N2 )
dist = 1 - similarity
dists[i][j] = dists[j][i] = dist
r2 = datamtx[j,:]
r2m = rowmeans[j]
r2dev = r2 - r2m
top = sum(r1dev*r2dev)
sum1 = sum(r1dev**2)
sum2 = sum(r2dev**2)
r1 = datamtx[i,:]
for j in range(i):
r2 = datamtx[j,:]
top = float(sum(abs(r1 - r2)))
bot = float(sum(where(r1>r2, r1,r2)))
if bot <= 0.0:
cur_d = 0.0
r2 = datamtx[j,:]
rank2 = _rankdata(r2)
rankdiff = rank1 - rank2
dsqsum = sum((rankdiff)**2)
dist = 6*dsqsum / float(numcols*(numcols**2-1))
dists[i][j] = dists[j][i] = dist
dists = zeros((numrows,numrows),'d')
for i in range(numrows):
r1 = datamtx[i]
r1sum = sum(r1)
for j in range(i):
r2 = datamtx[j]
r2sum = sum(r2)
row = []
for j in range(len(otumtx)):
new = otumtx[i] - otumtx[j]
row.append(sum(new > 0))
result.append(row)
return array(result)
src/p/y/pycogent-HEAD/cogent/maths/distance_transform.py pycogent(Download)
"""
from __future__ import division
from numpy import (array, zeros, logical_and, logical_or, logical_xor, where,
mean, std, argsort, take, ravel, logical_not, shape, sqrt, abs,
sum, square, asmatrix, asarray, multiply, min, rank, any, all, isfinite,
nonzero, nan_to_num, geterr, seterr, isnan)
transformations for ordination of species data. Oecologia: 129: 271-280.
"""
m = asmatrix(m)
row_norms = sqrt(sum(square(m), axis=1))
result = m / row_norms
return result
transformations for ordination of species data. Oecologia: 129: 271-280.
"""
m = asmatrix(m)
row_sums = sum(m, axis=1)
result = m / row_sums
return result
transformations for ordination of species data. Oecologia: 129: 271-280.
"""
m = asmatrix(m)
row_sums = sum(m, axis=1)
result = sqrt(m / row_sums)
return result
r1 = datamtx[i,:]
for j in range(i):
r2 = datamtx[j,:]
abs_v = float(sum(abs(r1 - r2)))
v = sum(r1 + r2)
cur_d = 0.0
if v > 0:
if numrows == 0 or numcols == 0:
return zeros((0,0),'d')
dists = zeros((numrows,numrows),'d')
sqrt_grand_sum = sqrt(sum(datamtx))
rowsums, colsums = sum(datamtx, axis=1), sum(datamtx, axis=0)
if not colsums.all():
for i in range(len(colsums)):
else: dist = 1.0
else:
dist = sqrt_grand_sum *\
sqrt(sum( multiply((1./colsums) ,
square(r1/r1sum - r2/r2sum)) ))
dists[i,j] = dists[j,i] = dist
return dists
for j in range(i):
r2 = datamtx[j]
rowdiff = r2 - r1
dist = sum(abs(r1 - r2) / coldiffs)
dists[i,j] = dists[j,i] = dist
return dists
dists = zeros((numrows,numrows),'d')
for i in range(numrows):
r1 = datamtx[i]
r1sum = sum(r1)
for j in range(i):
r2 = datamtx[j]
r2sum = sum(r2)
for j in range(i):
r2 = datamtx[j]
jrowsum = rowsums[j]
rowminsum = float(sum(where(r1<r2, r1,r2)))
if (irowsum == 0.0 and jrowsum == 0.0):
cur_d = 0.0 # => two rows of zeros
elif (irowsum == 0.0 or jrowsum == 0.0):
for i in range(numrows):
r1 = datamtx[i] # cache here
for j in range(i):
dists[i,j] = dists[j,i] = sum(abs(r1 - datamtx[j]))
return dists
dist = 1.0
else:
# d's zero only if N's zero, and we already checked for that
similarity = 2*sum(row1*row2)
similarity = similarity / ( (d1 + d2) * N1 * N2 )
dist = 1 - similarity
dists[i][j] = dists[j][i] = dist
r2 = datamtx[j,:]
r2m = rowmeans[j]
r2dev = r2 - r2m
top = sum(r1dev*r2dev)
sum1 = sum(r1dev**2)
sum2 = sum(r2dev**2)
r1 = datamtx[i,:]
for j in range(i):
r2 = datamtx[j,:]
top = float(sum(abs(r1 - r2)))
bot = float(sum(where(r1>r2, r1,r2)))
if bot <= 0.0:
cur_d = 0.0
r2 = datamtx[j,:]
rank2 = _rankdata(r2)
rankdiff = rank1 - rank2
dsqsum = sum((rankdiff)**2)
dist = 6*dsqsum / float(numcols*(numcols**2-1))
dists[i][j] = dists[j][i] = dist
dists = zeros((numrows,numrows),'d')
for i in range(numrows):
r1 = datamtx[i]
r1sum = sum(r1)
for j in range(i):
r2 = datamtx[j]
r2sum = sum(r2)
src/s/c/scipy-0.8.0/scipy/stats/morestats.py scipy(Download)
import statlib
import stats
import distributions
from numpy import isscalar, r_, log, sum, around, unique, asarray
from numpy import zeros, arange, sort, amin, amax, any, where, \
atleast_1d, sqrt, ceil, floor, array, poly1d, compress, not_equal, \
pi, exp, ravel, angle
data = ravel(data)
N = len(data)
for k in range(1,n+1):
S[k] = sum(data**k,axis=0)
if n==1:
return S[1]*1.0/N
elif n==2:
def boxcox_llf(lmb, data):
"""The boxcox log-likelihood function.
"""
N = len(data)
y = boxcox(data,lmb)
my = np.mean(y, axis=0)
f = (lmb-1)*sum(log(data),axis=0)
f -= N/2.0*log(sum((y-my)**2.0/N,axis=0))
def rootfunc(ab,xj,N):
a,b = ab
tmp = (xj-a)/b
tmp2 = exp(tmp)
val = [sum(1.0/(1+tmp2),axis=0)-0.5*N,
sum(tmp*(1.0-tmp2)/(1+tmp2),axis=0)+N]
return array(val)
raise ValueError("dist has to be one of 'norm','expon','logistic'",
"'gumbel','extreme1'")
i = arange(1,N+1)
S = sum((2*i-1.0)/N*(log(z)+log(1-z[::-1])),axis=0)
A2 = -N-S
return A2, critical, sig
xy = r_[x,y] # combine
rank = stats.rankdata(xy)
symrank = amin(array((rank,N-rank+1)),0)
AB = sum(symrank[:n],axis=0)
uxy = unique(xy)
repeats = (len(uxy) != len(xy))
exact = ((m<55) and (n<55) and not repeats)
if repeats and ((m < 55) or (n < 55)):
warnings.warn("Ties preclude use of exact statistic.")
if exact:
astart, a1, ifault = statlib.gscale(n,m)
ind = AB-astart
total = sum(a1,axis=0)
if ind < len(a1)/2.0:
cind = int(ceil(ind))
if (ind == cind):
pval = 2.0*sum(a1[:cind+1],axis=0)/total
else:
pval = 2.0*sum(a1[:cind],axis=0)/total
else:
find = int(floor(ind))
if (ind == floor(ind)):
pval = 2.0*sum(a1[find:],axis=0)/total
else:
pval = 2.0*sum(a1[find+1:],axis=0)/total
varAB = m*n*(N+2)*(N-2.0)/48/(N-1.0)
if repeats: # adjust variance estimates
# compute sum(tj * rj**2,axis=0)
fac = sum(symrank**2,axis=0)
if N % 2: # N odd
varAB = m*n*(16*N*fac-(N+1)**4)/(16.0 * N**2 * (N-1))
else: # N even
for j in range(k):
Ni[j] = len(args[j])
ssq[j] = np.var(args[j], ddof=1)
Ntot = sum(Ni,axis=0)
spsq = sum((Ni-1)*ssq,axis=0)/(1.0*(Ntot-k))
numer = (Ntot*1.0-k)*log(spsq) - sum((Ni-1.0)*log(ssq),axis=0)
denom = 1.0 + (1.0/(3*(k-1)))*((sum(1.0/(Ni-1.0),axis=0))-1.0/(Ntot-k))
for j in range(k):
Ni[j] = len(args[j])
Yci[j] = func(args[j])
Ntot = sum(Ni,axis=0)
# compute Zij's
Zij = [None]*k
Zbar += Zbari[i]*Ni[i]
Zbar /= Ntot
numer = (Ntot-k)*sum(Ni*(Zbari-Zbar)**2,axis=0)
# compute denom_variance
dvar = 0.0
for i in range(k):
dvar += sum((Zij[i]-Zbari[i])**2,axis=0)
rerr = 1+1e-7
if (x < p*n):
i = np.arange(np.ceil(p*n),n+1)
y = np.sum(distributions.binom.pmf(i,n,p) <= d*rerr,axis=0)
pval = distributions.binom.cdf(x,n,p) + distributions.binom.sf(n-y,n,p)
else:
i = np.arange(np.floor(p*n))
y = np.sum(distributions.binom.pmf(i,n,p) <= d*rerr,axis=0)
Ni = asarray([len(args[j]) for j in range(k)])
Yci = asarray([func(args[j]) for j in range(k)])
Ntot = sum(Ni,axis=0)
# compute Zij's
Zij = [abs(asarray(args[i])-Yci[i]) for i in range(k)]
allZij = []
anbar = np.mean(a, axis=0)
varsq = np.var(a,axis=0, ddof=1)
Xsq = sum(Ni*(asarray(Aibar)-anbar)**2.0,axis=0)/varsq
pval = distributions.chi2.sf(Xsq,k-1) # 1 - cdf
return Xsq, pval
raise ValueError, "Not enough observations."
ranks = stats.rankdata(xy)
Ri = ranks[:n]
M = sum((Ri - (N+1.0)/2)**2,axis=0)
# Approx stat.
mnM = n*(N*N-1.0)/12
varM = m*n*(N+1.0)*(N+2)*(N-2)/180
Mi = array([np.mean(args[i], axis=0) for i in range(k)])
Vi = array([np.var(args[i]) for i in range(k)])
Wi = Ni / Vi
swi = sum(Wi,axis=0)
N = sum(Ni,axis=0)
my = sum(Mi*Ni,axis=0)*1.0/N
tmp = sum((1-Wi/swi)**2 / (Ni-1.0),axis=0)/(k*k-1.0)
if evar:
F = ((sum(Ni*(Mi-my)**2,axis=0) / (k-1.0)) / (sum((Ni-1.0)*Vi,axis=0) / (N-k)))
pval = distributions.f.sf(F,k-1,N-k) # 1-cdf
else:
m = sum(Wi*Mi,axis=0)*1.0/swi
F = sum(Wi*(Mi-m)**2,axis=0) / ((k-1.0)*(1+2*(k-2)*tmp))
if (count < 10):
warnings.warn("Warning: sample size too small for normal approximation.")
r = stats.rankdata(abs(d))
r_plus = sum((d > 0)*r,axis=0)
r_minus = sum((d < 0)*r,axis=0)
T = min(r_plus, r_minus)
mn = count*(count+1.0)*0.25
src/c/o/cogent-1.4.1/cogent/maths/distance_transform.py cogent(Download)
"""
from __future__ import division
from numpy import (array, zeros, logical_and, logical_or, logical_xor, where,
mean, std, argsort, take, ravel, logical_not, shape, sqrt, abs,
sum, square, asmatrix, asarray, multiply, min, rank, any, all, isfinite,
nonzero, nan_to_num, geterr, seterr)
transformations for ordination of species data. Oecologia: 129: 271-280.
"""
m = asmatrix(m)
row_norms = sqrt(sum(square(m), axis=1))
result = m / row_norms
return result
transformations for ordination of species data. Oecologia: 129: 271-280.
"""
m = asmatrix(m)
row_sums = sum(m, axis=1)
result = m / row_sums
return result
transformations for ordination of species data. Oecologia: 129: 271-280.
"""
m = asmatrix(m)
row_sums = sum(m, axis=1)
result = sqrt(m / row_sums)
return result
r1 = datamtx[i,:]
for j in range(i):
r2 = datamtx[j,:]
abs_v = float(sum(abs(r1 - r2)))
v = sum(r1 + r2)
cur_d = 0.0
if v > 0:
if numrows == 0 or numcols == 0:
return zeros((0,0),'d')
dists = zeros((numrows,numrows),'d')
sqrt_grand_sum = sqrt(sum(datamtx))
rowsums, colsums = sum(datamtx, axis=1), sum(datamtx, axis=0)
if not colsums.all():
for i in range(len(colsums)):
else: dist = 1.0
else:
dist = sqrt_grand_sum *\
sqrt(sum( multiply((1./colsums) ,
square(r1/r1sum - r2/r2sum)) ))
dists[i,j] = dists[j,i] = dist
return dists
for j in range(i):
r2 = datamtx[j]
rowdiff = r2 - r1
dist = sum(abs(r1 - r2) / coldiffs)
dists[i,j] = dists[j,i] = dist
return dists
dists = zeros((numrows,numrows),'d')
for i in range(numrows):
r1 = datamtx[i]
r1sum = sum(r1)
for j in range(i):
r2 = datamtx[j]
r2sum = sum(r2)
for j in range(i):
r2 = datamtx[j]
jrowsum = rowsums[j]
rowminsum = float(sum(where(r1<r2, r1,r2)))
if (irowsum == 0.0 and jrowsum == 0.0):
cur_d = 0.0 # => two rows of zeros
elif (irowsum == 0.0 or jrowsum == 0.0):
for i in range(numrows):
r1 = datamtx[i] # cache here
for j in range(i):
dists[i,j] = dists[j,i] = sum(abs(r1 - datamtx[j]))
return dists
dist = 1.0
else:
# d's zero only if N's zero, and we already checked for that
similarity = 2*sum(row1*row2)
similarity = similarity / ( (d1 + d2) * N1 * N2 )
dist = 1 - similarity
dists[i][j] = dists[j][i] = dist
r2 = datamtx[j,:]
r2m = rowmeans[j]
r2dev = r2 - r2m
top = sum(r1dev*r2dev)
sum1 = sum(r1dev**2)
sum2 = sum(r2dev**2)
r1 = datamtx[i,:]
for j in range(i):
r2 = datamtx[j,:]
top = float(sum(abs(r1 - r2)))
bot = float(sum(where(r1>r2, r1,r2)))
if bot <= 0.0:
cur_d = 0.0
r2 = datamtx[j,:]
rank2 = _rankdata(r2)
rankdiff = rank1 - rank2
dsqsum = sum((rankdiff)**2)
dist = 6*dsqsum / float(numcols*(numcols**2-1))
dists[i][j] = dists[j][i] = dist
dists = zeros((numrows,numrows),'d')
for i in range(numrows):
r1 = datamtx[i]
r1sum = sum(r1)
for j in range(i):
r2 = datamtx[j]
r2sum = sum(r2)
src/s/c/scipy-0.8.0/scipy/stats/distributions.py scipy(Download)
import scipy.integrate
import inspect
from numpy import alltrue, where, arange, put, putmask, \
ravel, take, ones, sum, shape, product, repeat, reshape, \
zeros, floor, logical_and, log, sqrt, exp, arctanh, tan, sin, arcsin, \
arctan, tanh, ndarray, cos, cosh, sinh, newaxis, array, log1p, expm1
def _nnlf(self, x, *args):
return -sum(log(self._pdf(x, *args)),axis=0)
def nnlf(self, theta, x):
"""Negative log likelihood function.
This function should be minimized to produce maximum likelihood estimates (MLE).
def _munp(self, n, c):
k = arange(0,n+1)
val = (-1.0/c)**n * sum(comb(n,k)*(-1)**k / (1.0-c*k),axis=0)
return where(c*n < 1, val, inf)
def _entropy(self, c):
if (c > 0):
return 1+c
def _munp(self, n, c):
k = arange(0,n+1)
vals = 1.0/c**n * sum(comb(n,k) * (-1)**k * special.gamma(c*k + 1),axis=0)
return where(c*n > -1, vals, inf)
genextreme = genextreme_gen(name='genextreme',
longname="A generalized extreme value",
shapes='c',extradoc="""
Routine will normalize pk and qk if they don't sum to 1
"""
pk = arr(pk)
pk = 1.0* pk / sum(pk,axis=0)
if qk is None:
vec = where(pk == 0, 0.0, pk*log(pk))
else:
qk = arr(qk)
if len(qk) != len(pk):
raise ValueError, "qk and pk must have same length."
qk = 1.0*qk / sum(qk,axis=0)
if any(take(pk,nonzero(qk==0.0),axis=0)!=0.0, 0):
return inf
vec = where (pk == 0, 0.0, -pk*log(pk / qk))
return -sum(vec,axis=0)
## Handlers for generic case where xk and pk are given
def _drv_moment(self, n, *args):
n = arr(n)
return sum(self.xk**n[newaxis,...] * self.pk, axis=0)
def _drv_moment_gen(self, t, *args):
t = arr(t)
return sum(exp(self.xk * t[newaxis,...]) * self.pk, axis=0)
def _cdfsingle(self, k, *args):
m = arange(int(self.a),k+1)
return sum(self._pmf(m,*args),axis=0)
def _cdf(self, x, *args):
k = floor(x)
return self._cdfvec(k,*args)
def _entropy(self, n, pr):
k = r_[0:n+1]
vals = self._pmf(k,n,pr)
lvals = where(vals==0,0.0,log(vals))
return -sum(vals*lvals,axis=0)
binom = binom_gen(name='binom',shapes="n, pr",extradoc="""
def _entropy(self, M, n, N):
k = r_[N-(M-n):min(n,N)+1]
vals = self.pmf(k,M,n,N)
lvals = where(vals==0.0,0.0,log(vals))
return -sum(vals*lvals,axis=0)
hypergeom = hypergeom_gen(name='hypergeom',longname="A hypergeometric",
shapes="M, n, N", extradoc="""
src/w/a/wafo-0.11/src/wafo/stats/distributions.py wafo(Download)
from scipy import optimize
import inspect
from numpy import alltrue, where, arange, put, putmask, \
ravel, take, ones, sum, shape, product, repeat, reshape, \
zeros, floor, logical_and, log, sqrt, exp, arctanh, tan, sin, arcsin, \
arctan, tanh, ndarray, cos, cosh, sinh, newaxis, array, log1p, expm1
finiteD = numpy.isfinite(logD)
nonfiniteD = 1 - finiteD
if any(nonfiniteD):
T = -sum(logD[finiteD], axis=0) + 100.0 * log(realmax) * sum(nonfiniteD, axis=0);
else:
T = -sum(logD, axis=0) #%Moran's negative log product spacing statistic
return T
def _nnlf(self, x, *args):
return - sum(log(self._pdf(x, *args)), axis=0)
def nnlf(self, theta, x):
''' Return negative loglikelihood function, i.e., - sum (log pdf(x, theta),axis=0)
where theta are the parameters (including loc and scale)
'''
goodargs = argsreduce(1 - cond0, *((x,)))
goodargs = tuple(goodargs + list(args))
N = len(x)
Nbad = sum(cond0)
xmin = floatinfo.machar.xmin
return self._nnlf(*goodargs) + N * log(scale) + Nbad * 100.0 * log(xmin)
elif (any(cond0)):
n = len(x)
if abs(c) > eps:
cx = c * x;
sumlog1pcx = sum(log1p(cx));
#LL = n*log(scale) + (1-1/k)*sumlog1mkxn
r = x / (1.0 + cx)
sumix = sum(1.0 / (1.0 + cx) ** 2.0)
sumr = sum(r)
sumr2 = sum(r ** 2.0)
H11 = -2 * sumlog1pcx / c ** 3 + 2 * sumr / c ** 2 + (1.0 + 1.0 / c) * sumr2
H22 = c * (c + 1) * sumix / scale ** 2.0
H33 = (n - 2 * (c + 1) * sumr + c * (c + 1) * sumr2) / scale ** 2.0;
H12 = -sum((1 - x) / ((1 + cx) ** 2.0)) / scale
else: # c == 0
sumx = sum(x);
#LL = n*log(scale) + sumx;
sumx2 = sum(x ** 2.0);
H11 = -(2 / 3) * sum(x ** 3.0) + sumx2
H22 = 0.0
H12 = -(n - sum(x)) / scale
def _munp(self, n, c):
k = arange(0, n + 1)
val = (-1.0 / c) ** n * sum(comb(n, k) * (-1) ** k / (1.0 - c * k), axis=0)
return where(c * n < 1, val, inf)
def _entropy(self, c):
# return 1+c
if (c >= 0):
def _munp(self, n, c):
k = arange(0, n + 1)
vals = 1.0 / c ** n * sum(comb(n, k) * (-1) ** k * special.gamma(c * k + 1), axis=0)
return where(c * n > -1, vals, inf)
genextreme = genextreme_gen(name='genextreme',
longname="A generalized extreme value",
shapes='c', extradoc="""
Routine will normalize pk and qk if they don't sum to 1
"""
pk = arr(pk)
pk = 1.0 * pk / sum(pk, axis=0)
if qk is None:
vec = where(pk == 0, 0.0, pk * log(pk))
else:
qk = arr(qk)
if len(qk) != len(pk):
raise ValueError, "qk and pk must have same length."
qk = 1.0 * qk / sum(qk, axis=0)
if any(take(pk, nonzero(qk == 0.0), axis=0) != 0.0, 0):
return inf
vec = where (pk == 0, 0.0, -pk * log(pk / qk))
return - sum(vec, axis=0)
## Handlers for generic case where xk and pk are given
def _drv_moment(self, n, *args):
n = arr(n)
return sum(self.xk ** n[newaxis, ...] * self.pk, axis=0)
def _drv_moment_gen(self, t, *args):
t = arr(t)
return sum(exp(self.xk * t[newaxis, ...]) * self.pk, axis=0)
def _cdfsingle(self, k, *args):
m = arange(int(self.a), k + 1)
return sum(self._pmf(m, *args), axis=0)
def _cdf(self, x, *args):
k = floor(x)
return self._cdfvec(k, *args)
def _entropy(self, n, pr):
k = r_[0:n + 1]
vals = self._pmf(k, n, pr)
lvals = where(vals == 0, 0.0, log(vals))
return - sum(vals * lvals, axis=0)
binom = binom_gen(name='binom', shapes="n,pr", extradoc="""
def _entropy(self, M, n, N):
k = r_[N - (M - n):min(n, N) + 1]
vals = self.pmf(k, M, n, N)
lvals = where(vals == 0.0, 0.0, log(vals))
return - sum(vals * lvals, axis=0)
hypergeom = hypergeom_gen(name='hypergeom', longname="A hypergeometric",
shapes="M,n,N", extradoc="""
1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 Next