Source code for landlab.utils.count_repeats

#! /usr/bin/env python
"""Count repeated values in an array."""

import numpy as np



[docs]
def count_repeated_values(values):
    """Count how many times in an array values repeat and where they appear.

    Return a list of length *n* that gives the values and indices of repeated
    values. The first element of the list will be the values and indices of
    all values that appear once or the first time repeated values appear. The
    next element, values that repeat twice or more, and so on. Thus, the
    length of the returned list will be the maximum number that any value is
    repeated in *x*.

    Parameters
    ----------
    values : array_like
        Input array to count repeated values.

    Returns
    -------
    list of tuple
        List of tuples of (*repeated_values*, *indices*).

    Examples
    --------

    For an array that contains no repeated values, this function just returns
    a copy of *x*, and the indices to each element.

    >>> import numpy as np
    >>> from landlab.utils.count_repeats import count_repeated_values
    >>> counts = count_repeated_values(np.array([20, 30, 40], dtype=int))
    >>> len(counts)
    1
    >>> counts[0]
    (array([20, 30, 40]), array([0, 1, 2]))

    If *x* contains a repeated value, the first element contains all unique
    values along with their indices. For repeated values, return indices to
    their first occurrance. The second element contains values and indices to
    values occurring two or more times.

    >>> counts = count_repeated_values(np.array([20, 30, 40, 30, 30], dtype=int))
    >>> len(counts)
    3
    >>> counts[0]
    (array([20, 30, 40]), array([0, 1, 2]))
    >>> counts[1]
    (array([30]), array([3]))
    >>> counts[2]
    (array([30]), array([4]))

    The input array remains unchanged.

    >>> x = np.array([20, 30, 30, 40], dtype=int)
    >>> counts = count_repeated_values(x)
    >>> x
    array([20, 30, 30, 40])
    """
    counts = []

    unique_values, unique_inds = np.unique(values, return_index=True)
    x_inds = np.arange(len(values), dtype=int)
    while len(unique_values) > 0:
        counts.append((unique_values, x_inds[unique_inds]))
        values = np.delete(values, unique_inds)
        x_inds = np.delete(x_inds, unique_inds)
        unique_values, unique_inds = np.unique(values, return_index=True)

    return counts