# N-D version of itertools.combinations in numpy

Posted on

### Question :

N-D version of itertools.combinations in numpy

I would like to implement itertools.combinations for numpy. Based on this discussion, I have a function that works for 1D input:

``````def combs(a, r):
"""
Return successive r-length combinations of elements in the array a.
Should produce the same output as array(list(combinations(a, r))), but
faster.
"""
a = asarray(a)
dt = dtype([('', a.dtype)]*r)
b = fromiter(combinations(a, r), dt)
return b.view(a.dtype).reshape(-1, r)
``````

and the output makes sense:

``````In : list(combinations([1,2,3], 2))
Out: [(1, 2), (1, 3), (2, 3)]

In : array(list(combinations([1,2,3], 2)))
Out:
array([[1, 2],
[1, 3],
[2, 3]])

In : combs([1,2,3], 2)
Out:
array([[1, 2],
[1, 3],
[2, 3]])
``````

However, it would be best if I could expand it to N-D inputs, where additional dimensions simply allow you to speedily do multiple calls at once. So, conceptually, if `combs([1, 2, 3], 2)` produces `[1, 2], [1, 3], [2, 3]`, and `combs([4, 5, 6], 2)` produces `[4, 5], [4, 6], [5, 6]`, then `combs((1,2,3) and (4,5,6), 2)` should produce `[1, 2], [1, 3], [2, 3] and [4, 5], [4, 6], [5, 6]` where “and” just represents parallel rows or columns (whichever makes sense). (and likewise for additional dimensions)

I’m not sure:

1. How to make the dimensions work in a logical way that’s consistent with the way other functions work (like how some numpy functions have an `axis=` parameter, and a default of axis 0. So probably axis 0 should be the one I am combining along, and all other axes just represent parallel calculations?)
2. How to get the above code to work with ND (right now I get `ValueError: setting an array element with a sequence.`)
3. Is there a better way to do `dt = dtype([('', a.dtype)]*r)`?

You can use `itertools.combinations()` to create the index array, and then use NumPy’s fancy indexing:

``````import numpy as np
from itertools import combinations, chain
from scipy.special import comb

def comb_index(n, k):
count = comb(n, k, exact=True)
index = np.fromiter(chain.from_iterable(combinations(range(n), k)),
int, count=count*k)
return index.reshape(-1, k)

data = np.array([[1,2,3,4,5],[10,11,12,13,14]])

idx = comb_index(5, 3)
print(data[:, idx])
``````

output:

``````[[[ 1  2  3]
[ 1  2  4]
[ 1  2  5]
[ 1  3  4]
[ 1  3  5]
[ 1  4  5]
[ 2  3  4]
[ 2  3  5]
[ 2  4  5]
[ 3  4  5]]

[[10 11 12]
[10 11 13]
[10 11 14]
[10 12 13]
[10 12 14]
[10 13 14]
[11 12 13]
[11 12 14]
[11 13 14]
[12 13 14]]]
``````

When `r = k = 2`, you can also use `numpy.triu_indices(n, 1)` which indexes upper triangle of a matrix.

``````idx = comb_index(5, 2)
``````

from HYRY’s answer is equivalent to

``````idx = np.transpose(np.triu_indices(5, 1))
``````

but built-in, and a few times faster for N above ~20:

``````timeit comb_index(1000, 2)
32.3 ms ± 443 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)

timeit np.transpose(np.triu_indices(1000, 1))
10.2 ms ± 25.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
``````

### Case k = 2: `np.triu_indices`

I’ve tested case `k = 2` using lots of variations of abovementioned functions using `perfplot`. The winner is, no doubt, `np.triu_indices` and I see now that using `np.dtype([('', np.intp)] * 2)` data structure can be a huge boost even for exotic data types such as `igraph.EdgeList`.

``````from itertools import combinations, chain
from scipy.special import comb
import igraph as ig #graph library build on C
import networkx as nx #graph library, pure Python

def _combs(n):
return np.array(list(combinations(range(n),2)))

def _combs_fromiter(n): #@Jaime
indices = np.arange(n)
dt = np.dtype([('', np.intp)]*2)
indices = np.fromiter(combinations(indices, 2), dt)
indices = indices.view(np.intp).reshape(-1, 2)
return indices

def _combs_fromiterplus(n):
dt = np.dtype([('', np.intp)]*2)
indices = np.fromiter(combinations(range(n), 2), dt)
indices = indices.view(np.intp).reshape(-1, 2)
return indices

def _numpy(n): #@endolith
return np.transpose(np.triu_indices(n,1))

def _igraph(n):
return np.array(ig.Graph(n).complementer(False).get_edgelist())

def _igraph_fromiter(n):
dt = np.dtype([('', np.intp)]*2)
indices = np.fromiter(ig.Graph(n).complementer(False).get_edgelist(), dt)
indices = indices.view(np.intp).reshape(-1, 2)
return indices

def _nx(n):
G = nx.Graph()
return np.array(list(nx.complement(G).edges))

def _nx_fromiter(n):
G = nx.Graph()
dt = np.dtype([('', np.intp)]*2)
indices = np.fromiter(nx.complement(G).edges, dt)
indices = indices.view(np.intp).reshape(-1, 2)
return indices

def _comb_index(n): #@HYRY
count = comb(n, 2, exact=True)
index = np.fromiter(chain.from_iterable(combinations(range(n), 2)),
int, count=count*2)
return index.reshape(-1, 2)

fig = plt.figure(figsize=(15, 10))
plt.grid(True, which="both")
out = perfplot.bench(
setup = lambda x: x,
kernels = [_numpy, _combs, _combs_fromiter, _combs_fromiterplus,
_comb_index, _igraph, _igraph_fromiter, _nx, _nx_fromiter],
n_range = [2 ** k for k in range(12)],
xlabel = 'combinations(n, 2)',
title = 'testing combinations',
show_progress = False,
equality_check = False)
out.show()
`````` Wondering why `np.triu_indices` can’t be extended to more dimensions?

### Case 2 ? k ? 4: `triu_indices`(implemented here) = up to 2x speedup

`np.triu_indices` could actually be a winner for case `k = 3` and even `k = 4` if we implement a generalised method instead. A current version of this method is equivalent of:

``````def triu_indices(n, k):
x = np.less.outer(np.arange(n), np.arange(-k+1, n-k+1))
return np.nonzero(x)
``````

It constructs matrix representation of a relation x < y for two sequences 0,1,…,n-1 and finds locations of cells where they are not zero. For 3D case we need to add extra dimension and intersect relations x < y and y < z. For next dimensions procedure is the same but this gets a huge memory overload since n^k binary cells are needed and only C(n, k) of them attains True values. Memory usage and performance grows by O(n!) so this algorithm outperformans `itertools.combinations` only for small values of k. This is best to use actually for case `k=2` and `k=3`

``````def C(n, k): #huge memory overload...
if k==0:
return np.array([])
if k==1:
return np.arange(1,n+1)
elif k==2:
return np.less.outer(np.arange(n), np.arange(n))
else:
x = C(n, k-1)
X = np.repeat(x[None, :, :], len(x), axis=0)
Y = np.repeat(x[:, :, None], len(x), axis=2)
return X&Y

def C_indices(n, k):
return np.transpose(np.nonzero(C(n,k)))
``````

Let’s checkout with perfplot:

``````import matplotlib.pyplot as plt
import numpy as np
import perfplot
from itertools import chain, combinations
from scipy.special import comb

def C(n, k):  # huge memory overload...
if k == 0:
return np.array([])
if k == 1:
return np.arange(1, n + 1)
elif k == 2:
return np.less.outer(np.arange(n), np.arange(n))
else:
x = C(n, k - 1)
X = np.repeat(x[None, :, :], len(x), axis=0)
Y = np.repeat(x[:, :, None], len(x), axis=2)
return X & Y

def C_indices(data):
n, k = data
return np.transpose(np.nonzero(C(n, k)))

def comb_index(data):
n, k = data
count = comb(n, k, exact=True)
index = np.fromiter(chain.from_iterable(combinations(range(n), k)),
int, count=count * k)
return index.reshape(-1, k)

def build_args(k):
return {'setup': lambda x: (x, k),
'kernels': [comb_index, C_indices],
'n_range': [2 ** x for x in range(2, {2: 10, 3:10, 4:7, 5:6}[k])],
'xlabel': f'N',
'title': f'test of case C(N,{k})',
'show_progress': True,
'equality_check': lambda x, y: np.array_equal(x, y)}

outs = [perfplot.bench(**build_args(n)) for n in (2, 3, 4, 5)]
fig = plt.figure(figsize=(20, 20))
for i in range(len(outs)):
ax = fig.add_subplot(2, 2, i + 1)
ax.grid(True, which="both")
outs[i].plot()
plt.show()
`````` So the best performance boost is achieved for `k=2` (equivalent to `np.triu_indices) and for `k=3` it’s faster almost twice.

### Case k > 3: `numpy_combinations`(implemented here) = up to 2.5x speedup

Following this question (thanks @Divakar) I managed to find a way to calculate values of specific column based on previous column and Pascal’s triangle. It’s not optimized yet as much as it could but results are really promising. Here we go:

``````from scipy.linalg import pascal

def stretch(a, k):
l = a.sum()+len(a)*(-k)
out = np.full(l, -1, dtype=int)
out = a-1
idx = (a-k).cumsum()[:-1]
out[idx] = a[1:]-1-k
return out.cumsum()

def numpy_combinations(n, k):
#n, k = data #benchmark version
n, k = data
x = np.array([n])
P = pascal(n).astype(int)
C = []
for b in range(k-1,-1,-1):
x = stretch(x, b)
r = P[b][x - b]
C.append(np.repeat(x, r))
return n - 1 - np.array(C).T
``````

And the benchmark results are:

``````# script is the same as in previous example except this part
def build_args(k):
return {'setup': lambda x: (k, x),
'kernels': [comb_index, numpy_combinations],
'n_range': [x for x in range(1, k)],
'xlabel': f'N',
'title': f'test of case C({k}, k)',
'show_progress': True,
'equality_check': False}
outs = [perfplot.bench(**build_args(n)) for n in (12, 15, 17, 23, 25, 28)]
fig = plt.figure(figsize=(20, 20))
for i in range(len(outs)):
ax = fig.add_subplot(2, 3, i + 1)
ax.grid(True, which="both")
outs[i].plot()
plt.show()
`````` Despite it still can’t fight with `itertools.combinations` for `n < 15` but it is a new winner in other cases. Last but not least, `numpy` demonstrates its power when amount of combinations gets reaaallly big. It was able to survive while processing C(28, 14) combinations which is around 40’000’000 items of size 14

Not sure how it will work out performance-wise, but you can do the combinations on an index array, then extract the actual array slices with `np.take`:

``````def combs_nd(a, r, axis=0):
a = np.asarray(a)
if axis < 0:
axis += a.ndim
indices = np.arange(a.shape[axis])
dt = np.dtype([('', np.intp)]*r)
indices = np.fromiter(combinations(indices, r), dt)
indices = indices.view(np.intp).reshape(-1, r)
return np.take(a, indices, axis=axis)

>>> combs_nd([1,2,3], 2)
array([[1, 2],
[1, 3],
[2, 3]])
>>> combs_nd([[1,2,3],[4,5,6]], 2, axis=1)
array([[[1, 2],
[1, 3],
[2, 3]],

[[4, 5],
[4, 6],
[5, 6]]])
``````