fixstars · LWisteria · Nov 21, 2017 · Nov 21, 2017 · Nov 10, 2017 · Nov 21, 2017
diff --git a/.gitignore b/.gitignore
@@ -19,3 +19,4 @@ htmlcov/
 .idea/
 !ultima/ultima.cpp
 ultima/ultima
+.cache/
diff --git a/clpy/_version.py b/clpy/_version.py
@@ -1 +1 @@
-__version__ = '2.1.0.1'
+__version__ = '2.2.0'
diff --git a/clpy/backend/cuda/compiler.py b/clpy/backend/cuda/compiler.py
@@ -195,7 +195,7 @@ def dump(self, f):
         linum_fmt = '{{:0{}d}} '.format(digits)
         f.write('NVRTC compilation error: {}\n'.format(self))
         f.write('-----\n')
-        f.write('Name: {}\n'.format(' '.join(self.name)))
+        f.write('Name: {}\n'.format(self.name))
         f.write('Options: {}\n'.format(' '.join(self.options)))
         f.write('CUDA source:\n')
         for i, line in enumerate(lines):

diff --git a/clpy/backend/cuda/device.pxd b/clpy/backend/cuda/device.pxd
@@ -1,5 +1,7 @@
 cpdef int get_device_id() except *
-cpdef get_cublas_handle()
+cpdef size_t get_cublas_handle() except *
+cpdef size_t get_cusolver_handle() except *
+cpdef size_t get_cusparse_handle() except *
 
 cdef class Device:
     cdef:

diff --git a/clpy/backend/cuda/device.pyx b/clpy/backend/cuda/device.pyx
@@ -24,24 +24,27 @@ cdef dict _cusolver_handles = {}
 cdef dict _cusparse_handles = {}
 
 
-cpdef get_cublas_handle():
+cpdef size_t get_cublas_handle() except *:
     dev_id = get_device_id()
-    if dev_id in _cublas_handles:
-        return _cublas_handles[dev_id]
+    ret = _cublas_handles.get(dev_id, None)
+    if ret is not None:
+        return ret
     return Device().cublas_handle
 
 
-cpdef get_cusolver_handle():
+cpdef size_t get_cusolver_handle() except *:
     dev_id = get_device_id()
-    if dev_id in _cusolver_handles:
-        return _cusolver_handles[dev_id]
+    ret = _cusolver_handles.get(dev_id, None)
+    if ret is not None:
+        return ret
     return Device().cusolver_handle
 
 
-cpdef get_cusparse_handle():
+cpdef size_t get_cusparse_handle() except *:
     dev_id = get_device_id()
-    if dev_id in _cusparse_handles:
-        return _cusparse_handles[dev_id]
+    ret = _cusparse_handles.get(dev_id, None)
+    if ret is not None:
+        return ret
     return Device().cusparse_handle
 
 

diff --git a/clpy/core/core.pxd b/clpy/core/core.pxd
@@ -71,6 +71,9 @@ cdef class Indexer:
         readonly Py_ssize_t size
         readonly tuple shape
 
+
+cpdef ndarray ascontiguousarray(ndarray a, dtype=*)
+
 cdef class CArray:
     cdef:
         readonly tuple stride

diff --git a/clpy/core/core.pyx b/clpy/core/core.pyx
@@ -1888,29 +1888,18 @@ include "reduction.pxi"
 
 cdef _id = 'out0 = in0'
 
-_elementwise_copy = create_ufunc(
+elementwise_copy = create_ufunc(
     'clpy_copy',
     ('?->?', 'b->b', 'B->B', 'h->h', 'H->H', 'i->i', 'I->I', 'l->l', 'L->L',
      'q->q', 'Q->Q', ('e->e', _id), 'f->f', 'd->d', 'F->F', 'D->D'),
-    'out0 = (out0_type)(in0)')
+    'out0 = out0_type(in0)', default_casting='unsafe')
 # complex numbers requires out0 = complex<T>(in0)
 
-
-def elementwise_copy(*args, **kwargs):
-    kwargs['casting'] = 'unsafe'
-    return _elementwise_copy(*args, **kwargs)
-
-
-_elementwise_copy_where = create_ufunc(
+elementwise_copy_where = create_ufunc(
     'clpy_copy_where',
     ('??->?', 'b?->b', 'B?->B', 'h?->h', 'H?->H', 'i?->i', 'I?->I', 'l?->l',
-     'L?->L', 'q?->q', 'Q?->Q', 'f?->f', 'd?->d', 'F?->F', 'D?->D'),
-    'if (in1) out0 = in0')
-
-
-def elementwise_copy_where(*args, **kwargs):
-    kwargs['casting'] = 'unsafe'
-    return _elementwise_copy_where(*args, **kwargs)
+     'L?->L', 'q?->q', 'Q?->Q', 'e?->e', 'f?->f', 'd?->d', 'F?->F', 'D?->D'),
+    'if (in1) out0 = in0', default_casting='unsafe')
 
 
 cdef _divmod_int = string.Template('''
@@ -3465,6 +3454,11 @@ cpdef ndarray matmul(ndarray a, ndarray b, ndarray out=None):
     cdef Py_ssize_t i, n, m, ka, kb
     cdef Py_ssize_t batchCount
 
+    orig_a_shape = a.shape
+    orig_b_shape = b.shape
+    if len(orig_a_shape) == 0 or len(orig_b_shape) == 0:
+        raise ValueError('Scalar operands are not allowed, use \'*\' instead')
+
     ret_dtype = numpy.result_type(a.dtype, b.dtype)
     dtype = numpy.find_common_type((ret_dtype, 'f'), ())
 
@@ -3550,9 +3544,16 @@ cpdef ndarray matmul(ndarray a, ndarray b, ndarray out=None):
     *la, ka, n = a.shape
     *lb, m, kb = b.shape
 
-    assert ka == kb
+    if ka != kb:
+        raise ValueError(
+            'shapes ({}) and ({}) not aligned'.format(
+                ','.join([str(_) for _ in orig_a_shape]),
+                ','.join([str(_) for _ in orig_b_shape])))
     for la_, lb_ in zip(la, lb):
-        assert la_ == lb_ or la_ == 1 or lb_ == 1
+        if not (la_ == lb_ or la_ == 1 or lb_ == 1):
+            raise ValueError(
+                'operands could not be broadcast together with '
+                'remapped shapes')
 
     batchCount = 1  # batchCount = numpy.prod(la)
     for i in la:

diff --git a/clpy/core/elementwise.pxi b/clpy/core/elementwise.pxi
@@ -567,9 +567,11 @@ cdef class ElementwiseKernel:
 
         Args:
             args: Arguments of the kernel.
-            size (int): Range size of the indices. If specified, the variable
-                ``n`` is set to this value. Otherwise, the result of
-                broadcasting is used to determine the value of ``n``.
+            size (int): Range size of the indices.  By default, the range size
+                is automatically determined from the result of broadcasting.
+                This parameter must be specified if and only if all ndarrays
+                are `raw` and the range size cannot be determined
+                automatically.
 
         Returns:
             Arrays are returned according to the ``out_params`` argument of the
@@ -761,7 +763,9 @@ class ufunc(object):
         nargs (int): Number of all arguments.
 
     """
-    def __init__(self, name, nin, nout, ops, preamble='', doc=''):
+
+    def __init__(self, name, nin, nout, ops, preamble='', doc='',
+                 default_casting=None):
         # TODO(tomoya.sakai): raw array may be possible for ufunc
         self.name = name
         self.nin = nin
@@ -770,6 +774,10 @@ class ufunc(object):
         self._ops = ops
         self._preamble = preamble
         self.__doc__ = doc
+        if default_casting is None:
+            self._default_casting = 'same_kind'
+        else:
+            self._default_casting = default_casting
         _in_params = tuple(
             ParameterInfo('T in%d' % i, True)
             for i in range(nin))
@@ -799,9 +807,7 @@ class ufunc(object):
         return types
 
     def __call__(self, *args, **kwargs):
-        """__call__(*args, **kwargs)
-
-        Applies the universal function to arguments elementwise.
+        """Applies the universal function to arguments elementwise.
 
         Args:
             args: Input arguments. Each of them can be a :class:`clpy.ndarray`
@@ -821,7 +827,7 @@ class ufunc(object):
         out = kwargs.pop('out', None)
         dtype = kwargs.pop('dtype', None)
         # Note default behavior of casting is 'same_kind' on numpy>=1.10
-        casting = kwargs.pop('casting', 'same_kind')
+        casting = kwargs.pop('casting', self._default_casting)
         if dtype is not None:
             dtype = numpy.dtype(dtype).type
         if kwargs:
@@ -879,7 +885,8 @@ class ufunc(object):
         return ret
 
 
-cpdef create_ufunc(name, ops, routine=None, preamble='', doc=''):
+cpdef create_ufunc(name, ops, routine=None, preamble='', doc='',
+                   default_casting=None):
     _ops = []
     for t in ops:
         if not isinstance(t, tuple):
@@ -897,4 +904,6 @@ cpdef create_ufunc(name, ops, routine=None, preamble='', doc=''):
         out_types = tuple([numpy.dtype(t).type for t in out_types])
         _ops.append((in_types, out_types, rt))
 
-    return ufunc(name, len(_ops[0][0]), len(_ops[0][1]), _ops, preamble, doc)
+    ret = ufunc(name, len(_ops[0][0]), len(_ops[0][1]), _ops, preamble, doc,
+                default_casting=default_casting)
+    return ret
diff --git a/clpy/core/fusion.py b/clpy/core/fusion.py
@@ -734,7 +734,7 @@ def _create_ufunc(clpy_ufunc, numpy_ufunc):
 
 clip = ufunc(core._clip, math.misc.clip, numpy.clip)
 
-copy = ufunc(core._elementwise_copy,
+copy = ufunc(core.elementwise_copy,
              creation.from_data.copy, numpy.copy)
 
 bitwise_and = _create_ufunc(core.bitwise_and, numpy.bitwise_and)

diff --git a/clpy/core/reduction.pxi b/clpy/core/reduction.pxi
@@ -398,9 +398,7 @@ class ReductionKernel(object):
         self.preamble = preamble
 
     def __call__(self, *args, **kwargs):
-        """__call__(*args, **kwargs)
-
-        Compiles and invokes the reduction kernel.
+        """Compiles and invokes the reduction kernel.
 
         The compilation runs only if the kernel is not cached. Note that the
         kernels with different argument dtypes, ndims, or axis are not
-Original file line number
+Diff line change
@@ Expand Up / @@ -19,3 +19,4 @@ htmlcov/ @@
     .idea/
     !ultima/ultima.cpp
     ultima/ultima
+    .cache/