梦开始的地方----Machine Learning
5 + 7
12
for i in range(3):
print('Hello')
Hello
Hello
Hello
5 + 6
11
01 这是一个小小的测试
data = [i * 2 for i in range(20)]
len(data)
20
data
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38]
data[:10]
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
02 Jupyter Notebook 高级 - 魔法命令
%run
%run D:\百度网盘下载\工作空间\pythonProject\MachineLearning\hello.py
Hello Machine Learning !
hello('acwing zfw')
Hello acwing zfw !
%run myscript/hello.py
hello, Machine Learning !
hello('imooc')
hello, imooc !
import mymodule.FirstML
mymodule.FirstML.predict(1)
?
from mymodule import FirstML
FirstML.predict(1)
?
%timeit
%timeit L = [i ** 2 for i in range(1000)] # 测试一个语句的运行速度
369 µs ± 37.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%timeit L = [i ** 2 for i in range(1000000)]
353 ms ± 40 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%%timeit # 测试一段代码块的运行速度
L = []
for i in range(1000):
L.append(i ** 2)
402 µs ± 26.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%time
%time L = [i ** 2 for i in range(1000)] # 只测试一次所需的时间
Wall time: 998 µs
%%time
# 测试一段代码块的运行速度
L = []
for i in range(1000):
L.append(i ** 2)
Wall time: 1e+03 µs
import random
L = [random.random() for i in range(1000000)]
%timeit L.sort()
31.7 ms ± 3.22 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
L = [random.random() for i in range(1000000)]
%time L.sort()
Wall time: 329 ms
%time L.sort()
Wall time: 39.7 ms
其他魔法命令
%lsmagic
Available line magics:
%alias %alias_magic %autoawait %autocall %automagic %autosave %bookmark %cd %clear %cls %colors %conda %config %connect_info %copy %ddir %debug %dhist %dirs %doctest_mode %echo %ed %edit %env %gui %hist %history %killbgscripts %ldir %less %load %load_ext %loadpy %logoff %logon %logstart %logstate %logstop %ls %lsmagic %macro %magic %matplotlib %mkdir %more %notebook %page %pastebin %pdb %pdef %pdoc %pfile %pinfo %pinfo2 %pip %popd %pprint %precision %prun %psearch %psource %pushd %pwd %pycat %pylab %qtconsole %quickref %recall %rehashx %reload_ext %ren %rep %rerun %reset %reset_selective %rmdir %run %save %sc %set_env %store %sx %system %tb %time %timeit %unalias %unload_ext %who %who_ls %whos %xdel %xmode
Available cell magics:
%%! %%HTML %%SVG %%bash %%capture %%cmd %%debug %%file %%html %%javascript %%js %%latex %%markdown %%perl %%prun %%pypy %%python %%python2 %%python3 %%ruby %%script %%sh %%svg %%sx %%system %%time %%timeit %%writefile
Automagic is ON, % prefix IS NOT needed for line magics.
%run?
03 numpy.array 基础
import numpy
numpy.__version__
'1.19.2'
import numpy as np
np.__version__
'1.19.2'
Python List的特点
import array
arr = array.array('i', [i for i in range(10)])
arr
array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr[5]
5
arr[5] = 100
arr
array('i', [0, 1, 2, 3, 4, 100, 6, 7, 8, 9])
numpy.array
nparr = np.array([i for i in range(10)])
nparr
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
nparr[5]
5
nparr[5] = 100
nparr
array([ 0, 1, 2, 3, 4, 100, 6, 7, 8, 9])
nparr
array([ 0, 1, 2, 3, 4, 100, 6, 7, 8, 9])
nparr[5] = 'zfw' # 不可以这样哦
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-64-4656cbc62448> in <module>
----> 1 nparr[5] = 'zfw'
ValueError: invalid literal for int() with base 10: 'zfw'
nparr.dtype
dtype('int32')
nparr[5] = 5.1
nparr
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
nparr[5] = 3.8
nparr
array([0, 1, 2, 3, 4, 3, 6, 7, 8, 9])
nparr2 = np.array([1, 2, 3.0])
nparr2
array([1., 2., 3.])
nparr2.dtype
dtype('float64')
其他创建 numpy.array 的方法
np.zeros(10)
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
np.zeros(10).dtype
dtype('float64')
np.zeros(10, dtype = int)
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
np.zeros(10, dtype = int).dtype
dtype('int32')
np.zeros((3, 5))
array([[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.]])
np.zeros((3, 5), dtype = int)
array([[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]])
np.zeros(shape = (3, 5), dtype = int)
array([[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]])
np.ones(shape = (3, 5), dtype = int)
array([[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1]])
np.full((3, 5), 666)
array([[666, 666, 666, 666, 666],
[666, 666, 666, 666, 666],
[666, 666, 666, 666, 666]])
np.full((3, 5), fill_value = 666)
array([[666, 666, 666, 666, 666],
[666, 666, 666, 666, 666],
[666, 666, 666, 666, 666]])
np.full(shape = 10, fill_value = 666)
array([666, 666, 666, 666, 666, 666, 666, 666, 666, 666])
arange
[i for i in range(1, 21, 2)]
[1, 3, 5, 7, 9, 11, 13, 15, 17, 19]
np.arange(1, 21, 2)
array([ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19])
np.arange(0, 1, 0.2)
array([0. , 0.2, 0.4, 0.6, 0.8])
np.arange(10)
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
linspace
np.linspace(0, 20, 10) # 第三个参数代表要截取多少个等长区间
array([ 0. , 2.22222222, 4.44444444, 6.66666667, 8.88888889,
11.11111111, 13.33333333, 15.55555556, 17.77777778, 20. ])
random
np.random.randint(0, 10)
1
np.random.randint(0, 10, 10)
array([4, 4, 8, 4, 9, 7, 6, 6, 4, 5])
np.random.randint(4, 8, size = 10)
array([7, 5, 5, 5, 7, 7, 4, 6, 6, 4])
np.random.randint(4, 8, size = (3, 5))
array([[6, 6, 4, 4, 6],
[5, 7, 4, 5, 5],
[4, 4, 7, 6, 4]])
np.random.random() # 随机出一个0~1之间的随机数
0.20766702586215313
np.random.random(10)
array([0.1302426 , 0.98278666, 0.60458215, 0.43981822, 0.60969485,
0.5779554 , 0.52108026, 0.97803974, 0.72699246, 0.99372079])
np.random.random((3, 5))
array([[0.63109061, 0.49412309, 0.17998444, 0.50134157, 0.8936802 ],
[0.27790656, 0.28513558, 0.73943784, 0.87266798, 0.35319681],
[0.07838782, 0.74422724, 0.66583002, 0.86252842, 0.05271564]])
np.random.normal() # 符合均值为0,方差为1的随机浮点数
1.0296249427172843
np.random.normal(10, 100) # 符合均值为10,方差为100的随机浮点数
-194.45635517586766
np.random.normal(0, 1, (3, 5))
array([[ 0.28678452, 0.35496813, 0.41680273, 0.7366241 , -0.39959191],
[-0.7997863 , 1.22113831, 0.37437665, -1.65072346, 0.16311137],
[ 1.19352255, -2.34281573, -1.27401525, 0.54320386, -0.5390432 ]])
np.random.normal?
np.random?
help(np.random.normal)
Help on built-in function normal:
normal(...) method of numpy.random.mtrand.RandomState instance
normal(loc=0.0, scale=1.0, size=None)
Draw random samples from a normal (Gaussian) distribution.
The probability density function of the normal distribution, first
derived by De Moivre and 200 years later by both Gauss and Laplace
independently [2]_, is often called the bell curve because of
its characteristic shape (see the example below).
The normal distributions occurs often in nature. For example, it
describes the commonly occurring distribution of samples influenced
by a large number of tiny, random disturbances, each with its own
unique distribution [2]_.
.. note::
New code should use the ``normal`` method of a ``default_rng()``
instance instead; please see the :ref:`random-quick-start`.
Parameters
----------
loc : float or array_like of floats
Mean ("centre") of the distribution.
scale : float or array_like of floats
Standard deviation (spread or "width") of the distribution. Must be
non-negative.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
a single value is returned if ``loc`` and ``scale`` are both scalars.
Otherwise, ``np.broadcast(loc, scale).size`` samples are drawn.
Returns
-------
out : ndarray or scalar
Drawn samples from the parameterized normal distribution.
See Also
--------
scipy.stats.norm : probability density function, distribution or
cumulative density function, etc.
Generator.normal: which should be used for new code.
Notes
-----
The probability density for the Gaussian distribution is
.. math:: p(x) = \frac{1}{\sqrt{ 2 \pi \sigma^2 }}
e^{ - \frac{ (x - \mu)^2 } {2 \sigma^2} },
where :math:`\mu` is the mean and :math:`\sigma` the standard
deviation. The square of the standard deviation, :math:`\sigma^2`,
is called the variance.
The function has its peak at the mean, and its "spread" increases with
the standard deviation (the function reaches 0.607 times its maximum at
:math:`x + \sigma` and :math:`x - \sigma` [2]_). This implies that
normal is more likely to return samples lying close to the mean, rather
than those far away.
References
----------
.. [1] Wikipedia, "Normal distribution",
https://en.wikipedia.org/wiki/Normal_distribution
.. [2] P. R. Peebles Jr., "Central Limit Theorem" in "Probability,
Random Variables and Random Signal Principles", 4th ed., 2001,
pp. 51, 51, 125.
Examples
--------
Draw samples from the distribution:
>>> mu, sigma = 0, 0.1 # mean and standard deviation
>>> s = np.random.normal(mu, sigma, 1000)
Verify the mean and the variance:
>>> abs(mu - np.mean(s))
0.0 # may vary
>>> abs(sigma - np.std(s, ddof=1))
0.1 # may vary
Display the histogram of the samples, along with
the probability density function:
>>> import matplotlib.pyplot as plt
>>> count, bins, ignored = plt.hist(s, 30, density=True)
>>> plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) *
... np.exp( - (bins - mu)**2 / (2 * sigma**2) ),
... linewidth=2, color='r')
>>> plt.show()
Two-by-four array of samples from N(3, 6.25):
>>> np.random.normal(3, 2.5, size=(2, 4))
array([[-4.49401501, 4.00950034, -1.81814867, 7.29718677], # random
[ 0.39924804, 4.68456316, 4.99394529, 4.84057254]]) # random
05 Numpy.array 的基本操作
import numpy as np
help(np.arange)
Help on built-in function arange in module numpy:
arange(...)
arange([start,] stop[, step,], dtype=None)
Return evenly spaced values within a given interval.
Values are generated within the half-open interval ``[start, stop)``
(in other words, the interval including `start` but excluding `stop`).
For integer arguments the function is equivalent to the Python built-in
`range` function, but returns an ndarray rather than a list.
When using a non-integer step, such as 0.1, the results will often not
be consistent. It is better to use `numpy.linspace` for these cases.
Parameters
----------
start : number, optional
Start of interval. The interval includes this value. The default
start value is 0.
stop : number
End of interval. The interval does not include this value, except
in some cases where `step` is not an integer and floating point
round-off affects the length of `out`.
step : number, optional
Spacing between values. For any output `out`, this is the distance
between two adjacent values, ``out[i+1] - out[i]``. The default
step size is 1. If `step` is specified as a position argument,
`start` must also be given.
dtype : dtype
The type of the output array. If `dtype` is not given, infer the data
type from the other input arguments.
Returns
-------
arange : ndarray
Array of evenly spaced values.
For floating point arguments, the length of the result is
``ceil((stop - start)/step)``. Because of floating point overflow,
this rule may result in the last element of `out` being greater
than `stop`.
See Also
--------
numpy.linspace : Evenly spaced numbers with careful handling of endpoints.
numpy.ogrid: Arrays of evenly spaced numbers in N-dimensions.
numpy.mgrid: Grid-shaped arrays of evenly spaced numbers in N-dimensions.
Examples
--------
>>> np.arange(3)
array([0, 1, 2])
>>> np.arange(3.0)
array([ 0., 1., 2.])
>>> np.arange(3,7)
array([3, 4, 5, 6])
>>> np.arange(3,7,2)
array([3, 5])
x = np.arange(10)
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
X = np.arange(15).reshape((3, 5))
X
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
基本属性
x.ndim # 查看x是一个几维的数组
1
X.ndim
2
x.shape
(10,)
X.shape
(3, 5)
x.size # x的元素个数
10
X.size
15
numpy.array 的数据访问
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
X
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
x[0]
0
X[0]
array([0, 1, 2, 3, 4])
X[-1][-1]
14
X[2][2]
12
X[(2, 2)]
12
X[2, 2] # 推荐写法
12
x[::2] # 从头到尾步长为2
array([0, 2, 4, 6, 8])
x[::-1]
array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])
X
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
# 访问X的前两行的前三列
X[:2, :3]
array([[0, 1, 2],
[5, 6, 7]])
X[:2][:3]
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
# 访问X的前两行,每行从头到尾间隔为2的元素
X[:2, ::2]
array([[0, 2, 4],
[5, 7, 9]])
# 反转整个矩阵
X[::-1, ::-1]
array([[14, 13, 12, 11, 10],
[ 9, 8, 7, 6, 5],
[ 4, 3, 2, 1, 0]])
# 访问X的第一列元素
X[:, 0]
array([ 0, 5, 10])
subX = X[:2, :3] # subX为X的一个子矩阵
subX
array([[0, 1, 2],
[5, 6, 7]])
subX[0][0] = 100
subX
array([[100, 1, 2],
[ 5, 6, 7]])
X # 在numpy中修改子矩阵中的元素会影响原矩阵
array([[100, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[ 10, 11, 12, 13, 14]])
subX = X[:2, :3].copy() # 此时修改subX不会影响X,此时的subX只是X的一个副本,与原矩阵脱离了关系
subX
array([[100, 1, 2],
[ 5, 6, 7]])
subX[0, 0] = 0
subX
array([[0, 1, 2],
[5, 6, 7]])
X
array([[100, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[ 10, 11, 12, 13, 14]])
Reshape 修改数组的维度
x.shape # x是一个一维向量,其中x有10个元素
(10,)
x.ndim # x的维度
1
# 获取x变成2行5列的数组
x.reshape(2, 5)
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
x.reshape(10, -1) # 获取x变成10行之后的数组,不管每行有多少列
array([[0],
[1],
[2],
[3],
[4],
[5],
[6],
[7],
[8],
[9]])
x.reshape(-1, 10) # 获取x变成10列之后的数组,不管每列有多少行
array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
x.reshape(2, -1) # 获取x变成2行之后的数组,不管每行有多少列
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
感觉机器学习要学的好多
确实不少,我也比较佛系了,能学多少算多少hh
在
cmd
环境下,输入命令:jupyter notebook
之后就可以启动Jupyter Notebook
编辑器,启动之后会自动打开浏览器,并访问http://localhost:8088
,默认跳转到http://localhost:8088/tree
。