天天看点

numpy库的基本使用

Jupter Notebook高级 - 魔法命令

%timeit

696 µs ± 104 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
           
1.08 µs ± 105 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
           
46.7 ms ± 3.45 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
           
%%timeit
L = []
for n in range(1000):
    L.append(n**2)
           
506 µs ± 47.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
           

%time

Wall time: 997 µs
           
%%time
L = []
for n in range(1000):
    L.append(n**2)
           
Wall time: 996 µs
           

03 Numpy.array基础

import numpy
           
numpy.__version__
           
'1.16.2'
           
import numpy as np
           

Python List的特点

L = [i for i in range(10)]
L
           
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
           
5
           
L
           
[0, 1, 2, 3, 4, 'Machine Learing', 6, 7, 8, 9]
           
import array
arr = array.array('i', [i for i in range(10)])
           
arr

           
array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
           
5
           

numpy.array

nparr = np.array([i for i in range(10)])
nparr
           
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
           
5
           
nparr[5] = 100
nparr
           
array([  0,   1,   2,   3,   4, 100,   6,   7,   8,   9])
           
nparr.dtype
           
dtype('int32')
           
nparr

           
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
           
nparr.dtype

           
dtype('int32')
           
nparr2.dtype

           
dtype('float64')
           

其他创建 numpy.array的方法

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
           
np.zeros(10).dtype
           
dtype('float64')
           
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
           
array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])
           
array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])
           
array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])
           
array([[666.6, 666.6, 666.6, 666.6, 666.6],
       [666.6, 666.6, 666.6, 666.6, 666.6],
       [666.6, 666.6, 666.6, 666.6, 666.6]])
           

arange

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
           
array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])
           
array([ 0. ,  0.2,  0.4,  0.6,  0.8,  1. ,  1.2,  1.4,  1.6,  1.8,  2. ,
        2.2,  2.4,  2.6,  2.8,  3. ,  3.2,  3.4,  3.6,  3.8,  4. ,  4.2,
        4.4,  4.6,  4.8,  5. ,  5.2,  5.4,  5.6,  5.8,  6. ,  6.2,  6.4,
        6.6,  6.8,  7. ,  7.2,  7.4,  7.6,  7.8,  8. ,  8.2,  8.4,  8.6,
        8.8,  9. ,  9.2,  9.4,  9.6,  9.8, 10. , 10.2, 10.4, 10.6, 10.8,
       11. , 11.2, 11.4, 11.6, 11.8, 12. , 12.2, 12.4, 12.6, 12.8, 13. ,
       13.2, 13.4, 13.6, 13.8, 14. , 14.2, 14.4, 14.6, 14.8, 15. , 15.2,
       15.4, 15.6, 15.8, 16. , 16.2, 16.4, 16.6, 16.8, 17. , 17.2, 17.4,
       17.6, 17.8, 18. , 18.2, 18.4, 18.6, 18.8, 19. , 19.2, 19.4, 19.6,
       19.8])
           
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
           
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
           

linspace

array([ 0.        ,  2.22222222,  4.44444444,  6.66666667,  8.88888889,
       11.11111111, 13.33333333, 15.55555556, 17.77777778, 20.        ])
           
array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14., 16., 18., 20.])
           

random

array([9, 5, 9, 5, 1, 8, 4, 1, 2, 1])
           
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
           
array([3, 4, 0, 9, 4, 3, 6, 4, 6, 0])
           
array([[2, 5, 0, 8, 9],
       [8, 7, 8, 8, 6],
       [6, 6, 2, 8, 0]])
           
# 指定种子,每次产生的数都一样
np.random.seed(666)
np.random.randint(0, 10, size=(3, 5))

           
array([[2, 6, 9, 4, 3],
       [1, 0, 8, 7, 5],
       [2, 5, 5, 4, 8]])
           
0.7315955468480113
           
array([[0.8578588 , 0.76741234, 0.95323137, 0.29097383, 0.84778197],
       [0.3497619 , 0.92389692, 0.29489453, 0.52438061, 0.94253896],
       [0.07473949, 0.27646251, 0.4675855 , 0.31581532, 0.39016259]])
           
0.9047266176428719
           
# 均值为10,方差为100的正态分布
np.random.normal(10, 100)
           
-72.62832650185376
           
array([[  92.1013692 ,   46.7125916 ,  175.39958581,   23.94647258],
       [-111.71535503,  -89.49473667, -146.44858645, -152.87900441],
       [ 133.17486561,  -81.36003361,  -17.08440668,  152.02491357]])
           
np.random.normal?
           
np.random?
           
Help on built-in function normal:

normal(...) method of mtrand.RandomState instance
    normal(loc=0.0, scale=1.0, size=None)
    
    Draw random samples from a normal (Gaussian) distribution.
    
    The probability density function of the normal distribution, first
    derived by De Moivre and 200 years later by both Gauss and Laplace
    independently [2]_, is often called the bell curve because of
    its characteristic shape (see the example below).
    
    The normal distributions occurs often in nature.  For example, it
    describes the commonly occurring distribution of samples influenced
    by a large number of tiny, random disturbances, each with its own
    unique distribution [2]_.
    
    Parameters
    ----------
    loc : float or array_like of floats
        Mean ("centre") of the distribution.
    scale : float or array_like of floats
        Standard deviation (spread or "width") of the distribution.
    size : int or tuple of ints, optional
        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
        ``m * n * k`` samples are drawn.  If size is ``None`` (default),
        a single value is returned if ``loc`` and ``scale`` are both scalars.
        Otherwise, ``np.broadcast(loc, scale).size`` samples are drawn.
    
    Returns
    -------
    out : ndarray or scalar
        Drawn samples from the parameterized normal distribution.
    
    See Also
    --------
    scipy.stats.norm : probability density function, distribution or
        cumulative density function, etc.
    
    Notes
    -----
    The probability density for the Gaussian distribution is
    
    .. math:: p(x) = \frac{1}{\sqrt{ 2 \pi \sigma^2 }}
                     e^{ - \frac{ (x - \mu)^2 } {2 \sigma^2} },
    
    where :math:`\mu` is the mean and :math:`\sigma` the standard
    deviation. The square of the standard deviation, :math:`\sigma^2`,
    is called the variance.
    
    The function has its peak at the mean, and its "spread" increases with
    the standard deviation (the function reaches 0.607 times its maximum at
    :math:`x + \sigma` and :math:`x - \sigma` [2]_).  This implies that
    `numpy.random.normal` is more likely to return samples lying close to
    the mean, rather than those far away.
    
    References
    ----------
    .. [1] Wikipedia, "Normal distribution",
           https://en.wikipedia.org/wiki/Normal_distribution
    .. [2] P. R. Peebles Jr., "Central Limit Theorem" in "Probability,
           Random Variables and Random Signal Principles", 4th ed., 2001,
           pp. 51, 51, 125.
    
    Examples
    --------
    Draw samples from the distribution:
    
    >>> mu, sigma = 0, 0.1 # mean and standard deviation
    >>> s = np.random.normal(mu, sigma, 1000)
    
    Verify the mean and the variance:
    
    >>> abs(mu - np.mean(s)) < 0.01
    True
    
    >>> abs(sigma - np.std(s, ddof=1)) < 0.01
    True
    
    Display the histogram of the samples, along with
    the probability density function:
    
    >>> import matplotlib.pyplot as plt
    >>> count, bins, ignored = plt.hist(s, 30, density=True)
    >>> plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) *
    ...                np.exp( - (bins - mu)**2 / (2 * sigma**2) ),
    ...          linewidth=2, color='r')
    >>> plt.show()
           

05 Numpy.array的基本操作

import numpy as np
x = np.arange(10)
x
           
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
           
X = np.arange(15).reshape(3, 5)
X
           
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
           
x.ndim
           
1
           
X.ndim
           
2
           
x.shape
           
(10,)
           
X.shape
           
(3, 5)
           
x.size
           
10
           
X.size
           
15
           

numpy.array的数据访问

x
           
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
           
9
           
X

           
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
           
10
           
array([0, 1, 2, 3, 4])
           
array([0, 1, 2, 3, 4])
           
array([5, 6, 7, 8, 9])
           
array([0, 2, 4, 6, 8])
           
array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])
           
X
           
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
           
array([[0, 1, 2],
       [5, 6, 7]])
           
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
           
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
           
array([[0, 2, 4],
       [5, 7, 9]])
           
array([[14, 13, 12, 11, 10],
       [ 9,  8,  7,  6,  5],
       [ 4,  3,  2,  1,  0]])
           
array([0, 1, 2, 3, 4])
           
array([0, 1, 2, 3, 4])
           
X[0, :].ndim
           
1
           
array([ 0,  5, 10])
           
X[:, 0].ndim
           
1
           
subX = X[:2, :3]
subX
           
array([[0, 1, 2],
       [5, 6, 7]])
           
subX[0, 0] = 100
subX
           
array([[100,   1,   2],
       [  5,   6,   7]])
           
X
           
array([[100,   1,   2,   3,   4],
       [  5,   6,   7,   8,   9],
       [ 10,  11,  12,  13,  14]])
           
subX = X[:2, :3].copy()
subX[0, 0] = 0
X
           
array([[100,   1,   2,   3,   4],
       [  5,   6,   7,   8,   9],
       [ 10,  11,  12,  13,  14]])
           

Reshape

x.shape
           
(10,)
           
x.ndim
           
1
           
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
           
x
           
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
           
A = x.reshape(2, 5)
A
           
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
           
x

           
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
           
B

           
array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
           
x

           
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
           
B.ndim

           
2
           
B.shape

           
(1, 10)
           
x.shape

           
(10,)
           
array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])
           
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
           
array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])
           
x = np.array([1,2,3])
y = np.array([3,2,1])

           
x

           
array([1, 2, 3])
           
y

           
array([3, 2, 1])
           
array([1, 2, 3, 3, 2, 1])
           
z = np.array([666, 666, 666])
np.concatenate([x,y,z])

           
array([  1,   2,   3,   3,   2,   1, 666, 666, 666])
           
A = np.array([[1,2,3],
             [4,5,6]])

           
array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])
           
# 沿着x轴拼接
np.concatenate([A,A], axis=1)

           
array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])
           
---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-348-abdc54b54f98> in <module>
----> 1 np.concatenate([A,z])



ValueError: all the input arrays must have same number of dimensions
           
array([[  1,   2,   3],
       [  4,   5,   6],
       [666, 666, 666]])
           
A

           
array([[1, 2, 3],
       [4, 5, 6]])
           
A2 = np.concatenate([A,z.reshape(1, -1)])
A2

           
array([[  1,   2,   3],
       [  4,   5,   6],
       [666, 666, 666]])
           
array([[  1,   2,   3],
       [  4,   5,   6],
       [666, 666, 666]])
           
B = np.full((2,2) ,100)
B

           
array([[100, 100],
       [100, 100]])
           
array([[  1,   2,   3, 100, 100],
       [  4,   5,   6, 100, 100]])
           

分割操作

x = np.arange(10)
x

           
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
           
x1, x2, x3 = np.split(x, [3,7])
x1, x2, x3

           
(array([0, 1, 2]), array([3, 4, 5, 6]), array([7, 8, 9]))
           
A = np.arange(16).reshape((4, 4))
A

           
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
           
A1, A2 = np.split(A, [2])
A1

           
array([[0, 1, 2, 3],
       [4, 5, 6, 7]])
           
A2

           
array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])
           
A1, A2 = np.split(A, [2], axis=1)
A1
           
array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]])
           
A2
           
array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15]])
           
upper
           
array([[0, 1, 2, 3],
       [4, 5, 6, 7]])
           
lower
           
array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])
           
data = np.arange(16).reshape((4, 4))
data
           
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
           
X, y = np.hsplit(data, [-1])
X
           
array([[ 0,  1,  2],
       [ 4,  5,  6],
       [ 8,  9, 10],
       [12, 13, 14]])
           
y
           
array([[ 3],
       [ 7],
       [11],
       [15]])
           

numpy.array中的运算

给定一个向量,让向量中每一个数乘以2

a=(0,1,2)

a*2=(0,2,4)

n = 10
L = [i for i in range(n)]
           
2*L
           
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
           
A=[]
for e in L:
    A.append(2*e)
A
           
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
           
L = np.arange(n)
A = 2*L
A
           
array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])
           

Universal Functions

X = np.arange(1, 16).reshape((3,5))
X
           
array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])
           
array([[ 2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16]])
           
array([[ 2,  4,  6,  8, 10],
       [12, 14, 16, 18, 20],
       [22, 24, 26, 28, 30]])
           
1/X
           
array([[1.        , 0.5       , 0.33333333, 0.25      , 0.2       ],
       [0.16666667, 0.14285714, 0.125     , 0.11111111, 0.1       ],
       [0.09090909, 0.08333333, 0.07692308, 0.07142857, 0.06666667]])
           
array([[ 0.84147098,  0.90929743,  0.14112001, -0.7568025 , -0.95892427],
       [-0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849, -0.54402111],
       [-0.99999021, -0.53657292,  0.42016704,  0.99060736,  0.65028784]])
           
array([[       3,        9,       27,       81,      243],
       [     729,     2187,     6561,    19683,    59049],
       [  177147,   531441,  1594323,  4782969, 14348907]], dtype=int32)
           

矩阵运算

X+Y
           
array([[ 2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16]])
           
X*Y
           
array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])
           
X.T
           
array([[ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14],
       [ 5, 10, 15]])
           
X.T.shape
           
(5, 3)
           

向量和矩阵的运算

聚合操作

import numpy as np

L = np.random.random(100)
           
L
           
array([0.95619151, 0.73353358, 0.71210247, 0.2427146 , 0.57043673,
       0.57773404, 0.89274848, 0.12138544, 0.66083873, 0.16459271,
       0.09435837, 0.5876144 , 0.86014445, 0.82936987, 0.65091808,
       0.51832728, 0.00355049, 0.80309176, 0.35932938, 0.42014544,
       0.2026436 , 0.63279787, 0.18935861, 0.1308497 , 0.75765845,
       0.34158167, 0.52138487, 0.88302327, 0.44914216, 0.23902229,
       0.33014086, 0.26650938, 0.79768204, 0.05551712, 0.12980746,
       0.82262638, 0.36076783, 0.56970121, 0.83023273, 0.44767601,
       0.2132831 , 0.56115445, 0.71657783, 0.7493205 , 0.58624783,
       0.54759891, 0.0817732 , 0.40852941, 0.63205157, 0.12168885,
       0.27480879, 0.07770505, 0.15726591, 0.14978044, 0.38535367,
       0.70941476, 0.44518764, 0.01584702, 0.99491381, 0.90632665,
       0.05199571, 0.86100897, 0.51224649, 0.0111548 , 0.49310591,
       0.55102356, 0.27260476, 0.2311436 , 0.95858105, 0.66579831,
       0.84015904, 0.14691185, 0.14394403, 0.30843116, 0.37016398,
       0.31852964, 0.56240025, 0.4640979 , 0.80066784, 0.78735522,
       0.84323067, 0.68824287, 0.31854825, 0.93794112, 0.40711455,
       0.75336448, 0.5065076 , 0.8242313 , 0.48603164, 0.17872445,
       0.79322194, 0.13924006, 0.71347858, 0.38300909, 0.70410853,
       0.82867258, 0.58154578, 0.38693726, 0.39648041, 0.15039198])
           
48.824427939282955
           
48.82442793928298
           
0.0035504869521352234
           
0.9949138071844505