-
Notifications
You must be signed in to change notification settings - Fork 0
/
stream-visualize.py
82 lines (55 loc) · 2.35 KB
/
stream-visualize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from sklearn.datasets import make_blobs
from generators.NIC import NIC_Stream
import numpy as np
import matplotlib.pyplot as plt
n_classes = 8
n_features = 2
X, y = make_blobs(n_samples=50000, centers=n_classes, n_features = 2, cluster_std=1.5, random_state=17688)
stream = NIC_Stream(X, y, min_classes=2)
accumulated_samples_before = []
gt_class_before = []
accumulated_samples_after = []
gt_class_after = []
fig, ax = plt.subplots(1,2,figsize=(10,5))
plt.suptitle('Before and after data shift')
__X = []
__y = []
for chunk in range(stream.max_chunk):
_X, _y = stream.get_chunk()
print(np.unique(_y, return_counts=True))
# Aggregate known samples
if chunk<10:
__X.append(_X)
__y.append(_y)
known_y = np.unique(_y)
# Concept shift in the second part of a stream:
if chunk>0.5*stream.max_chunk:
phi = 0.5
affine_matrix = [[np.cos(phi), -np.sin(phi), 0],[np.sin(phi), np.cos(phi), 0],[0,0,1]]
_X_e = np.column_stack((_X, np.zeros(_X.shape[0])))
_X = (_X_e@affine_matrix)[:,:2]
mask_known = np.zeros((_X.shape[0])).astype(bool)
mask_known[_y == known_y[0]]=1
mask_known[_y == known_y[1]]=1
accumulated_samples_after.extend(_X)
gt_class_after.extend(mask_known)
else:
mask_known = np.zeros((_X.shape[0])).astype(bool)
mask_known[_y == known_y[0]]=1
mask_known[_y == known_y[1]]=1
accumulated_samples_before.extend(_X)
gt_class_before.extend(mask_known)
_accumulated_samples_before = np.array(accumulated_samples_before)
_gt_class_before = np.array(gt_class_before)
_accumulated_samples_after= np.array(accumulated_samples_after)
_gt_class_after = np.array(gt_class_after)
ax[0].scatter(_accumulated_samples_before[:,0], _accumulated_samples_before[:,1], alpha=0.15, s=5, c=_gt_class_before, cmap='coolwarm', marker='x')
ax[1].scatter(_accumulated_samples_after[:,0], _accumulated_samples_after[:,1], alpha=0.15, s=5, c=_gt_class_after, cmap='coolwarm', marker='x')
ax[0].set_title('chunks %i : %i' % (0, stream.max_chunk/2))
ax[1].set_title('chunks %i : %i' % (stream.max_chunk/2, stream.max_chunk))
for aa in ax:
aa.grid(ls=':')
aa.spines['top'].set_visible(False)
aa.spines['right'].set_visible(False)
plt.tight_layout()
plt.savefig('foo.png')