Skip to content

Commit caa19fb

Browse files
author
Srikumar Sastry
committed
Docstring changes
1 parent d7321f5 commit caa19fb

File tree

2 files changed

+108
-1
lines changed

2 files changed

+108
-1
lines changed
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
from tensorflow.keras.models import Sequential, Model
2+
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
3+
from tensorflow.keras.optimizers import Adam
4+
from tensorflow.keras.metrics import categorical_crossentropy
5+
from tensorflow.keras.utils import to_categorical
6+
import tensorflow as tf
7+
import numpy as np
8+
from keras.datasets import mnist
9+
from keras.wrappers.scikit_learn import KerasClassifier
10+
from modAL.models import ActiveLearner
11+
12+
(X_train, y_train), (X_test, y_test) = mnist.load_data()
13+
14+
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
15+
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)
16+
17+
X_train = X_train.astype('float32')
18+
X_test = X_test.astype('float32')
19+
20+
X_train = X_train/255.0
21+
X_test_norm = X_test/255.0
22+
23+
y_train = to_categorical(y_train)
24+
y_test = to_categorical(y_test)
25+
26+
def LeNet():
27+
model = Sequential()
28+
29+
model.add(Conv2D(filters = 6, kernel_size = (5,5), padding = 'same',
30+
activation = 'relu', input_shape = (28,28,1)))
31+
model.add(MaxPooling2D(pool_size = (2,2)))
32+
model.add(Dropout(0.25))
33+
model.add(Conv2D(filters = 16, kernel_size = (5,5), activation = 'relu'))
34+
model.add(MaxPooling2D(pool_size = (2,2)))
35+
model.add(Flatten())
36+
model.add(Dense(120, activation = 'relu'))
37+
model.add(Dropout(0.55))
38+
model.add(Dense(10, activation = 'softmax'))
39+
opt = Adam(learning_rate = 0.001)
40+
model.compile(loss = categorical_crossentropy,
41+
optimizer = opt,
42+
metrics = ['accuracy'])
43+
44+
return model
45+
46+
def max_disagreement(model, X, n=32, n_mcd=10):
47+
48+
partial_model = Model(model.estimator.model.inputs, model.estimator.model.layers[-1].output)
49+
prob = np.stack([partial_model(X.reshape(-1, 28, 28, 1), training=True) for _ in range(n_mcd)])
50+
pb = np.mean(prob, axis=0)
51+
entropy1 = (-pb*np.log(pb)).sum(axis=1)
52+
entropy2 = (-prob*np.log(prob)).sum(axis=2).mean(axis=0)
53+
un = entropy2-entropy1
54+
return np.argpartition(un, n)[:n]
55+
56+
model = KerasClassifier(LeNet)
57+
58+
U_x = np.copy(X_train)
59+
U_y = np.copy(y_train)
60+
61+
INITIAL_SET_SIZE = 32
62+
ind = np.random.choice(range(len(U_x)), size=INITIAL_SET_SIZE)
63+
64+
X_initial = U_x[ind]
65+
y_initial = U_y[ind]
66+
67+
U_x = np.delete(U_x, ind, axis=0)
68+
U_y = np.delete(U_y, ind, axis=0)
69+
70+
active_learner = ActiveLearner(
71+
estimator=model,
72+
X_training=X_initial,
73+
y_training=y_initial,
74+
query_strategy=max_disagreement,
75+
verbose=0
76+
)
77+
78+
N_QUERIES = 20
79+
80+
scores = [active_learner.score(X_test, y_test, verbose=0)]
81+
82+
for index in range(N_QUERIES):
83+
84+
query_idx, query_instance = active_learner.query(U_x)
85+
86+
L_x = U_x[query_idx]
87+
L_y = U_y[query_idx]
88+
89+
active_learner.teach(L_x, L_y, epochs=50, batch_size=128, verbose=0)
90+
91+
U_x = np.delete(U_x, query_idx, axis=0)
92+
U_y = np.delete(U_y, query_idx, axis=0)
93+
94+
acc = active_learner.score(X_test, y_test)
95+
96+
print(F'Query {index+1}: Test Accuracy: {acc}')
97+
98+
scores.append(acc)

examples/cost_effective_active_learning.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
This is a modified implementation of the algorithm Cost Effective Active Learning
33
(Pl. refer - https://arxiv.org/abs/1701.03551). This version not only picks up the
44
top K uncertain samples but also picks up the top N highly confident samples that
5-
may represent information and diversity. It is better than the original implementation
5+
may represent information and diversity. It is different than the original implementation
66
as it does not involve tuning the confidence threshold parameter for every dataset.
77
"""
88

@@ -50,6 +50,15 @@ def max_entropy(active_learner, X, K=16, N=16):
5050
class_prob = active_learner.predict_proba(X)
5151
entropy = entr(class_prob).sum(axis=1)
5252
uncertain_idx = np.argpartition(entropy, -K)[-K:]
53+
54+
"""
55+
Original Implementation -- Pick most confident samples with
56+
entropy less than a threshold. Threshold is decayed in every
57+
iteration.
58+
59+
Different than original -- Pick top n most confident samples.
60+
"""
61+
5362
confidence_idx = np.argpartition(entropy, N)[:N]
5463

5564
return np.concatenate((uncertain_idx, confidence_idx), axis=0)

0 commit comments

Comments
 (0)