22
22
23
23
24
24
# No inheritance from MultiOutputMixin because decision_function would fail
25
- # test_check_estimator.py 'check_classifier_multioutput' (line 2479 of estimator_checks.py)
26
- # - this is the only test for MultiOutputClassifiers , so is it OK to exclude this tag?
25
+ # test_check_estimator.py 'check_classifier_multioutput' (line 2479 of estimator_checks.py).
26
+ # This is the only test for multioutput classifiers , so is it OK to exclude this tag?
27
27
28
28
# did a search of all classifiers that inherit from MultiOutputMixin - none of them implement
29
- # decision function, so I don't think we need to inherit
29
+ # decision function
30
30
31
31
32
32
class PCovC (LinearClassifierMixin , _BasePCov ):
@@ -120,6 +120,7 @@ class PCovC(LinearClassifierMixin, _BasePCov):
120
120
- ``sklearn.linear_model.LogisticRegressionCV()``
121
121
- ``sklearn.svm.LinearSVC()``
122
122
- ``sklearn.discriminant_analysis.LinearDiscriminantAnalysis()``
123
+ - ``sklearn.multioutput.MultiOutputClassifier()``
123
124
- ``sklearn.linear_model.RidgeClassifier()``
124
125
- ``sklearn.linear_model.RidgeClassifierCV()``
125
126
- ``sklearn.linear_model.Perceptron()``
@@ -131,8 +132,8 @@ class PCovC(LinearClassifierMixin, _BasePCov):
131
132
`sklearn.pipeline.Pipeline` with model caching.
132
133
In such cases, the classifier will be re-fitted on the same
133
134
training data as the composite estimator.
134
- If None and ``Y.ndim < 2``, ``sklearn.linear_model.LogisticRegression()`` is used.
135
- If None and ``Y.ndim == 2``, ``sklearn.multioutput.MultiOutputClassifier()`` is used.
135
+ If None and ``n_outputs < 2``, ``sklearn.linear_model.LogisticRegression()`` is used.
136
+ If None and ``n_outputs == 2``, ``sklearn.multioutput.MultiOutputClassifier()`` is used.
136
137
137
138
iterated_power : int or 'auto', default='auto'
138
139
Number of iterations for the power method computed by
@@ -164,6 +165,9 @@ class PCovC(LinearClassifierMixin, _BasePCov):
164
165
n_components, or the lesser value of n_features and n_samples
165
166
if n_components is None.
166
167
168
+ n_outputs : int
169
+ The number of outputs when ``fit`` is performed.
170
+
167
171
classifier : estimator object
168
172
The linear classifier passed for fitting.
169
173
@@ -263,16 +267,14 @@ def fit(self, X, Y, W=None):
263
267
264
268
Y : numpy.ndarray, shape (n_samples,) or (n_samples, n_outputs)
265
269
Training data, where n_samples is the number of samples and
266
- n_outputs is the number of outputs. If ``self.classifier`` is an instance
267
- of ``sklearn.multioutput.MultiOutputClassifier()``, Y can be of shape
268
- (n_samples, n_outputs).
270
+ n_outputs is the number of outputs.
269
271
270
272
W : numpy.ndarray, shape (n_features, n_classes)
271
273
Classification weights, optional when classifier is ``precomputed``. If
272
274
not passed, it is assumed that the weights will be taken from a
273
275
linear classifier fit between :math:`\mathbf{X}` and :math:`\mathbf{Y}`.
274
- In the case of a multioutput classifier ``classifier`` ,
275
- `` W = np.hstack([est_.coef_.T for est_ in classifier.estimators_])``.
276
+ In the multioutput case ,
277
+ `` W = np.hstack([est_.coef_.T for est_ in classifier.estimators_])``.
276
278
"""
277
279
X , Y = validate_data (self , X , Y , multi_output = True , y_numeric = False )
278
280
@@ -303,49 +305,31 @@ def fit(self, X, Y, W=None):
303
305
", or `precomputed`"
304
306
)
305
307
306
- if self .n_outputs == 1 and isinstance (self .classifier , MultiOutputClassifier ):
307
- raise ValueError (
308
- "Classifier cannot be an instance of `MultiOutputClassifier` when Y is 1D"
309
- )
308
+ multioutput = self .n_outputs != 1
309
+ precomputed = self .classifier == "precomputed"
310
310
311
- if (
312
- self .n_outputs != 1
313
- and self .classifier not in ["precomputed" , None ]
314
- and not (
315
- isinstance (self .classifier , MultiOutputClassifier )
316
- or self .classifier == "precomputed"
317
- )
318
- ):
319
- raise ValueError (
320
- "Classifier must be an instance of `MultiOutputClassifier` when Y is 2D"
311
+ if self .classifier is None or precomputed :
312
+ # used as the default classifier for subsequent computations
313
+ classifier = (
314
+ MultiOutputClassifier (LogisticRegression ())
315
+ if multioutput
316
+ else LogisticRegression ()
321
317
)
318
+ else :
319
+ classifier = self .classifier
322
320
323
- if self .n_outputs == 1 :
324
- if self .classifier != "precomputed" :
325
- classifier = self .classifier or LogisticRegression ()
326
- self .z_classifier_ = check_cl_fit (classifier , X , Y )
327
- W = self .z_classifier_ .coef_ .T
328
-
321
+ if precomputed and W is None :
322
+ _ = clone (classifier ).fit (X , Y )
323
+ if multioutput :
324
+ W = np .hstack ([_ .coef_ .T for _ in _ .estimators_ ])
329
325
else :
330
- # to be used later on as the classifier fit between T and Y
331
- classifier = LogisticRegression ()
332
- if W is None :
333
- W = clone (classifier ).fit (X , Y ).coef_ .T
334
-
326
+ W = _ .coef_ .T
335
327
else :
336
- if self .classifier != "precomputed" :
337
- classifier = self .classifier or MultiOutputClassifier (
338
- estimator = LogisticRegression ()
339
- )
340
- self .z_classifier_ = check_cl_fit (classifier , X , Y )
328
+ self .z_classifier_ = check_cl_fit (classifier , X , Y )
329
+ if multioutput :
341
330
W = np .hstack ([est_ .coef_ .T for est_ in self .z_classifier_ .estimators_ ])
342
-
343
331
else :
344
- # to be used later on as the classifier fit between T and Y
345
- classifier = MultiOutputClassifier (estimator = LogisticRegression ())
346
- if W is None :
347
- _ = clone (classifier ).fit (X , Y )
348
- W = np .hstack ([_ .coef_ .T for _ in _ .estimators_ ])
332
+ W = self .z_classifier_ .coef_ .T
349
333
350
334
Z = X @ W
351
335
@@ -358,21 +342,21 @@ def fit(self, X, Y, W=None):
358
342
# classifier and steal weights to get pxz and ptz
359
343
self .classifier_ = clone (classifier ).fit (X @ self .pxt_ , Y )
360
344
361
- if self .n_outputs == 1 :
362
- self .ptz_ = self .classifier_ .coef_ .T
363
- # print(self.ptz_.shape)
364
- self .pxz_ = self .pxt_ @ self .ptz_
365
- else :
345
+ if multioutput :
366
346
self .ptz_ = np .hstack (
367
347
[est_ .coef_ .T for est_ in self .classifier_ .estimators_ ]
368
348
)
369
349
# print(f"pxt {self.pxt_.shape}")
370
350
# print(f"ptz {self.ptz_.shape}")
371
351
self .pxz_ = self .pxt_ @ self .ptz_
372
352
# print(f"pxz {self.pxz_.shape}")
353
+ else :
354
+ self .ptz_ = self .classifier_ .coef_ .T
355
+ # print(self.ptz_.shape)
356
+ self .pxz_ = self .pxt_ @ self .ptz_
373
357
374
358
# print(self.ptz_.shape)
375
- if len ( Y . shape ) == 1 and type_of_target (Y ) == "binary" :
359
+ if not multioutput and type_of_target (Y ) == "binary" :
376
360
self .pxz_ = self .pxz_ .reshape (
377
361
X .shape [1 ],
378
362
)
@@ -472,9 +456,9 @@ def decision_function(self, X=None, T=None):
472
456
Z : numpy.ndarray, shape (n_samples,) or (n_samples, n_classes), or a list of \
473
457
n_outputs such arrays if n_outputs > 1
474
458
Confidence scores. For binary classification, has shape `(n_samples,)`,
475
- for multiclass classification, has shape `(n_samples, n_classes)`. If n_outputs > 1,
476
- the list can contain arrays with differing shapes depending on the
477
- number of classes in each output of Y.
459
+ for multiclass classification, has shape `(n_samples, n_classes)`.
460
+ If n_outputs > 1, the list can contain arrays with differing shapes
461
+ depending on the number of classes in each output of Y.
478
462
"""
479
463
check_is_fitted (self , attributes = ["pxz_" , "ptz_" ])
480
464
@@ -529,36 +513,3 @@ def transform(self, X=None):
529
513
and n_features is the number of features.
530
514
"""
531
515
return super ().transform (X )
532
-
533
- # def score(self, X, Y, sample_weight=None):
534
- # """Return the accuracy on the given test data and labels. Contains support
535
- # for multiclass-multioutput data.
536
-
537
- # Parameters
538
- # ----------
539
- # X : array-like of shape (n_samples, n_features)
540
- # Test samples.
541
-
542
- # Y : array-like of shape (n_samples,) or (n_samples, n_outputs)
543
- # True labels for `X`.
544
-
545
- # sample_weight : array-like of shape (n_samples,), default=None
546
- # Sample weights. Can only be used if the PCovC instance
547
- # has been trained on single-target data.
548
-
549
- # Returns
550
- # -------
551
- # score : float
552
- # Accuracy scores. If the PCovC instance was trained on a 1D Y,
553
- # this will call the ``score()`` function defined by
554
- # ``sklearn.base.ClassifierMixin``. If trained on a 2D Y, this will
555
- # call the ``score()`` function defined by
556
- # ``sklearn.multioutput.MultiOutputClassifier``.
557
- # """
558
- # X, Y = validate_data(self, X, Y, reset=False)
559
-
560
- # if isinstance(self.classifier_, MultiOutputClassifier):
561
- # # LinearClassifierMixin.score fails with multioutput-multiclass Y
562
- # return self.classifier_.score(X @ self.pxt_, Y)
563
- # else:
564
- # return self.classifier_.score(X @ self.pxt_, Y, sample_weight=sample_weight)
0 commit comments