Skip to content

declearn.metrics.MulticlassAccuracyPrecisionRecall

Bases: Metric[ClassifConfmat]

Multiclass classification accuracy, precision and recall metrics.

This metric assumes that the evaluated classifier emits a score for each and every predictable label, and that the predicted label is that with the highest score. Alternatively, pre-selected labels (or one-hot encodings) may be passed as predictions.

Computed metrics are the following:

  • accuracy: float Overall accuracy of the classifier, i.e. P(pred==true).
  • precision: 1-d numpy.ndarray Label-wise precision score, i.e. P(true=k|pred=k).
  • recall: 1-d numpy.ndarray Label-wise recall score, i.e. P(pred=k|true=k).
  • f-score: 1-d numpy.ndarray Label-wise f1-score, i.e. harmonic mean of precision and recall.
  • confusion: 2-d numpy.ndarray Confusion matrix of predictions, where C[i, j] indicates the (opt. weighted) number of samples belonging to label i that were predicted to belong to label j.
Source code in declearn/metrics/_classif.py
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
class MulticlassAccuracyPrecisionRecall(Metric[ClassifConfmat]):
    """Multiclass classification accuracy, precision and recall metrics.

    This metric assumes that the evaluated classifier emits a score for
    each and every predictable label, and that the predicted label is
    that with the highest score. Alternatively, pre-selected labels (or
    one-hot encodings) may be passed as predictions.

    Computed metrics are the following:

    * accuracy: float
        Overall accuracy of the classifier, i.e. P(pred==true).
    * precision: 1-d numpy.ndarray
        Label-wise precision score, i.e. P(true=k|pred=k).
    * recall: 1-d numpy.ndarray
        Label-wise recall score, i.e. P(pred=k|true=k).
    * f-score: 1-d numpy.ndarray
        Label-wise f1-score, i.e. harmonic mean of precision and recall.
    * confusion: 2-d numpy.ndarray
        Confusion matrix of predictions, where C[i, j] indicates the
        (opt. weighted) number of samples belonging to label i that
        were predicted to belong to label j.
    """

    name = "multi-classif"
    state_cls = ClassifConfmat

    def __init__(
        self,
        labels: Collection[Union[int, str]],
    ) -> None:
        """Instantiate the multiclass accuracy/precision/recall metrics.

        Parameters
        ----------
        labels: collection of {int, str}
            Ordered set of possible labels.
        """
        self.labels = np.array(list(labels))
        super().__init__()

    def get_config(self) -> Dict[str, Any]:
        return {"labels": self.labels.tolist()}

    def build_initial_states(
        self,
    ) -> ClassifConfmat:
        matrix = np.zeros((len(self.labels), len(self.labels)))
        return ClassifConfmat(matrix)

    def get_result(
        self,
    ) -> Dict[str, Union[float, np.ndarray]]:
        # Compute the metrics, silencing division-by-zero errors.
        confmat = self._states.confmat
        diag = np.diag(confmat)  # label-wise true positives
        pred = confmat.sum(axis=0)  # label-wise number of predictions
        true = confmat.sum(axis=1)  # label-wise number of labels (support)
        with np.errstate(invalid="ignore"):
            scores = {
                "accuracy": diag.sum() / confmat.sum(),
                "precision": diag / pred,
                "recall": diag / true,
                "f-score": 2 * diag / (pred + true),
            }
        # Convert NaNs resulting from zero-division to zero.
        scores = {k: np.nan_to_num(v, copy=False) for k, v in scores.items()}
        # Add a copy of the confusion matrix and return.
        scores["confusion"] = confmat.copy()
        return scores

    def update(
        self,
        y_true: np.ndarray,
        y_pred: np.ndarray,
        s_wght: Optional[np.ndarray] = None,
    ) -> None:
        """Update the metric's internal state based on a data batch.

        Parameters
        ----------
        y_true: numpy.ndarray
            True labels that were to be predicted, as a 1-d array.
        y_pred: numpy.ndarray
            Predictions, either as a 1-d array of labels, or 2-d array
            of scores with shape `(len(y_true), len(self.labels))`. In
            the latter case, the label with the highest score is used
            as prediction (one-vs-all style).
        s_wght: numpy.ndarray or None, default=None
            Optional sample weights to take into account in scores.
        """
        if y_pred.ndim == 2:
            y_pred = self.labels[y_pred.argmax(axis=1)]
        elif y_pred.ndim != 1:
            raise TypeError("Expected 1-d or 2-d y_pred array.")
        self._states.confmat += sklearn.metrics.confusion_matrix(
            y_true, y_pred, labels=self.labels, sample_weight=s_wght
        )

__init__(labels)

Instantiate the multiclass accuracy/precision/recall metrics.

Parameters:

Name Type Description Default
labels Collection[Union[int, str]]

Ordered set of possible labels.

required
Source code in declearn/metrics/_classif.py
183
184
185
186
187
188
189
190
191
192
193
194
195
def __init__(
    self,
    labels: Collection[Union[int, str]],
) -> None:
    """Instantiate the multiclass accuracy/precision/recall metrics.

    Parameters
    ----------
    labels: collection of {int, str}
        Ordered set of possible labels.
    """
    self.labels = np.array(list(labels))
    super().__init__()

update(y_true, y_pred, s_wght=None)

Update the metric's internal state based on a data batch.

Parameters:

Name Type Description Default
y_true np.ndarray

True labels that were to be predicted, as a 1-d array.

required
y_pred np.ndarray

Predictions, either as a 1-d array of labels, or 2-d array of scores with shape (len(y_true), len(self.labels)). In the latter case, the label with the highest score is used as prediction (one-vs-all style).

required
s_wght Optional[np.ndarray]

Optional sample weights to take into account in scores.

None
Source code in declearn/metrics/_classif.py
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
def update(
    self,
    y_true: np.ndarray,
    y_pred: np.ndarray,
    s_wght: Optional[np.ndarray] = None,
) -> None:
    """Update the metric's internal state based on a data batch.

    Parameters
    ----------
    y_true: numpy.ndarray
        True labels that were to be predicted, as a 1-d array.
    y_pred: numpy.ndarray
        Predictions, either as a 1-d array of labels, or 2-d array
        of scores with shape `(len(y_true), len(self.labels))`. In
        the latter case, the label with the highest score is used
        as prediction (one-vs-all style).
    s_wght: numpy.ndarray or None, default=None
        Optional sample weights to take into account in scores.
    """
    if y_pred.ndim == 2:
        y_pred = self.labels[y_pred.argmax(axis=1)]
    elif y_pred.ndim != 1:
        raise TypeError("Expected 1-d or 2-d y_pred array.")
    self._states.confmat += sklearn.metrics.confusion_matrix(
        y_true, y_pred, labels=self.labels, sample_weight=s_wght
    )