10. 归一化
10.1. 归一化
- numpy.linalg.norm(x, ord=None, axis=None, keepdims=False)
- sklearn.preprocessing.normalize(X, norm=’l2’, axis=1, copy=True, return_norm=False)
- torch.nn.functional.normalize(input, p=2, dim=1, eps=1e-12)
1>>> import numpy as np
2>>> import numpy.linalg as la
3>>> arr = np.array([[2,1,2],[2,1,1]], dtype=np.float32)
4>>> print arr
5[[ 2. 1. 2.]
6 [ 2. 1. 1.]]
7>>> norm = la.norm(arr, axis=0, keepdims=True)
8>>> print norm
9[[ 2.82842708 1.41421354 2.23606801]]
10>>> print arr / np.tile(norm,(2,1))
11[[ 0.70710677 0.70710677 0.89442718]
12 [ 0.70710677 0.70710677 0.44721359]]
13
14>>> from sklearn import preprocessing
15>>> print preprocessing.normalize(arr, axis=0, norm='l2')
16[[ 0.70710677 0.70710677 0.89442718]
17 [ 0.70710677 0.70710677 0.44721359]]
18
19>>> import torch.nn.functional as F
20>>> print F.normalize(torch.from_numpy(arr), p=2, dim=0)
210.7071 0.7071 0.8944
220.7071 0.7071 0.4472
23[torch.FloatTensor of size 2x3]
10.2. k-means 实现
1import numpy as np
2
3## feature initialization
4np.random.seed(1)
5n = 10000
6d = 3
7K = 50
8data = np.random.randn(n, d) ## n x d
9
10## feature normalization
11data = data / np.tile(np.linalg.norm(data, axis=1, keepdims=True), (1, data.shape[1]))
12
13## center initialization
14center = data[np.random.permutation(n)[0:K]]
15
16itr = 0
17## loop
18while itr < 20:
19 itr += 1
20 ## quantization
21 similarity = np.dot(data, center.T)
22 quan_id = np.argsort(-similarity, axis=1)[:, 0]
23
24 ## update center
25 new_error = 0.0
26 for c in range(K):
27 data_c = data[quan_id == c]
28 if data_c.shape[0] != 0:
29 center[c] = np.mean(data_c, axis=0)
30 new_error += np.sum((data_c - center[c])**2)
31
32 if itr > 1 and abs(1 - new_error/old_error) < 1e-3:
33 break
34 old_error = new_error
10.3. 参考资料
numpy.linalg.norm
sklearn.preprocessing.normalize
torch.nn.functional.normalize