← Previous Changeset
Next Changeset →

Changeset f734e7d in sasmodels

Timestamp:

Feb 22, 2015 1:44:54 AM (10 years ago)

Author:

pkienzle

Branches:

master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests

Children:

Parents:

Message:

restructure c code generation for maintainability; extend test harness to allow opencl and ctypes tests

Location:

Files:

: 2 added
: 9 edited

core.py (added)
direct_model.py (modified) (1 diff)
generate.py (modified) (8 diffs)
kernel_template.c (added)
kernelcl.py (modified) (5 diffs)
kerneldll.py (modified) (8 diffs)
kernelpy.py (modified) (4 diffs)
model_test.py (modified) (5 diffs)
models/broad_peak.py (modified) (2 diffs)
models/lamellarPC.py (modified) (2 diffs)
models/lamellarPC_kernel.c (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

sasmodels/direct_model.py

-                      r16bc3fc
+                      rf734e7d
 import numpy as np
+from . import models
+from . import weights
+try:
+    from .kernelcl import load_model
+except ImportError,exc:
+    warnings.warn(str(exc))
+    warnings.warn("using ctypes instead")
+    from .kerneldll import load_model
+def load_model_definition(model_name):
+    __import__('sasmodels.models.'+model_name)
+    model_definition = getattr(models, model_name, None)
+    return model_definition
+# load_model is imported above.  It looks like the following
+#def load_model(model_definition, dtype='single):
+#    if kerneldll:
+#        if source is newer than compiled: compile
+#        load dll
+#        return kernel
+#    elif kernelcl:
+#        compile source on context
+#        return kernel
+def make_kernel(model, q_vectors):
+    """
+    Return a computation kernel from the model definition and the q input.
+    """
+    input = model.make_input(q_vectors)
+    return model(input)
+def get_weights(kernel, pars, name):
+    """
+    Generate the distribution for parameter *name* given the parameter values
+    in *pars*.
+    Searches for "name", "name_pd", "name_pd_type", "name_pd_n", "name_pd_sigma"
+    """
+    relative = name in kernel.info['partype']['pd-rel']
+    limits = kernel.info['limits']
+    disperser = pars.get(name+'_pd_type', 'gaussian')
+    value = pars.get(name, kernel.info['defaults'][name])
+    npts = pars.get(name+'_pd_n', 0)
+    width = pars.get(name+'_pd', 0.0)
+    nsigma = pars.get(name+'_pd_nsigma', 3.0)
+    v,w = weights.get_weights(
+        disperser, npts, width, nsigma,
+        value, limits[name], relative)
+    return v,w/np.sum(w)
+def call_kernel(kernel, pars):
+    fixed_pars = [pars.get(name, kernel.info['defaults'][name])
+                  for name in kernel.fixed_pars]
+    pd_pars = [get_weights(kernel, pars, name) for name in kernel.pd_pars]
+    return kernel(fixed_pars, pd_pars)
+from .core import load_model_definition, make_kernel, call_kernel
+from .core import load_model_cl as load_model
+if load_model is None:
+    warnings.warn("unable to load opencl; using ctypes instead")
+    from .core import load_model_dll as load_model
 class DirectModel:

sasmodels/generate.py

-                      rf1ecfa92
+                      rf734e7d
 import sys
+import os
+import os.path
+from os.path import abspath, dirname, join as joinpath, exists
 import re
 import numpy as np
+C_KERNEL_TEMPLATE_PATH = joinpath(dirname(__file__), 'kernel_template.c')
 F64 = np.dtype('float64')
 …
 PARTABLE_VALUE_WIDTH = 10
-# Header included before every kernel.
-# This makes sure that the appropriate math constants are defined, and does
-# whatever is required to make the kernel compile as pure C rather than
-# as an OpenCL kernel.
-KERNEL_HEADER = """\
-// GENERATED CODE --- DO NOT EDIT ---
-// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
-#ifdef __OPENCL_VERSION__
-# define USE_OPENCL
-#endif
-// If opencl is not available, then we are compiling a C function
-// Note: if using a C++ compiler, then define kernel as extern "C"
-#ifndef USE_OPENCL
-#  ifdef __cplusplus
-     #include <cmath>
-     #if defined(_MSC_VER)
-     #define kernel extern "C" __declspec( dllexport )
-     #else
-     #define kernel extern "C"
-     #endif
-     using namespace std;
-     inline void SINCOS(double angle, double &svar, double &cvar)
-       { svar=sin(angle); cvar=cos(angle); }
-#  else
-     #include <math.h>
-     #if defined(_MSC_VER)
-     #define kernel __declspec( dllexport )
-     #else
-     #define kernel
-     #endif
-     #define SINCOS(angle,svar,cvar) do {svar=sin(angle);cvar=cos(angle);} while (0)
-#  endif
-#  define global
-#  define local
-#  define constant const
-#  define powr(a,b) pow(a,b)
-#  define pown(a,b) pow(a,b)
-#else
-#  ifdef USE_SINCOS
-#    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
-#  else
-#    define SINCOS(angle,svar,cvar) do {svar=sin(angle);cvar=cos(angle);} while (0)
-#  endif
-#endif
-// Standard mathematical constants:
-//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
-//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
-// OpenCL defines M_constant_F for float constants, and nothing if double
-// is not enabled on the card, which is why these constants may be missing
-#ifndef M_PI
-#  define M_PI 3.141592653589793
-#endif
-#ifndef M_PI_2
-#  define M_PI_2 1.570796326794897
-#endif
-#ifndef M_PI_4
-#  define M_PI_4 0.7853981633974483
-#endif
-// Non-standard pi/180, used for converting between degrees and radians
-#ifndef M_PI_180
-#  define M_PI_180 0.017453292519943295
-#endif
-"""
-# The I(q) kernel and the I(qx, qy) kernel have one and two q parameters
-# respectively, so the template builder will need to do extra work to
-# declare, initialize and pass the q parameters.
-KERNEL_1D = {
-    'fn': "Iq",
-    'q_par_decl': "global const double *q,",
-    'qinit': "const double qi = q[i];",
-    'qcall': "qi",
-    'qwork': ["q"],
+    }
-KERNEL_2D = {
-    'fn': "Iqxy",
-    'q_par_decl': "global const double *qx,\n    global const double *qy,",
-    'qinit': "const double qxi = qx[i];\n    const double qyi = qy[i];",
-    'qcall': "qxi, qyi",
-    'qwork': ["qx", "qy"],
+    }
-# Generic kernel template for the polydispersity loop.
-# This defines the opencl kernel that is available to the host.  The same
-# structure is used for Iq and Iqxy kernels, so extra flexibility is needed
-# for q parameters.  The polydispersity loop is built elsewhere and
-# substituted into this template.
-KERNEL_TEMPLATE = """\
-kernel void %(name)s(
-    %(q_par_decl)s
-    global double *result,
-#ifdef USE_OPENCL
-    global double *loops_g,
-#else
-    const int Nq,
-#endif
-    local double *loops,
-    const double cutoff,
-    %(par_decl)s
+    )
+{
-#ifdef USE_OPENCL
-  // copy loops info to local memory
-  event_t e = async_work_group_copy(loops, loops_g, (%(pd_length)s)*2, 0);
-  wait_group_events(1, &e);
-  int i = get_global_id(0);
-  int Nq = get_global_size(0);
-#endif
-#ifdef USE_OPENCL
-  if (i < Nq)
-#else
-  #pragma omp parallel for
-  for (int i=0; i < Nq; i++)
-#endif
+  {
-    %(qinit)s
-    double ret=0.0, norm=0.0;
-    double vol=0.0, norm_vol=0.0;
-%(loops)s
-    if (vol*norm_vol != 0.0) {
-      ret *= norm_vol/vol;
+    }
-    result[i] = scale*ret/norm+background;
+  }
+}
-"""
-# Polydispersity loop level.
-# This pulls the parameter value and weight from the looping vector in order
-# in preperation for a nested loop.
-LOOP_OPEN="""\
-for (int %(name)s_i=0; %(name)s_i < N%(name)s; %(name)s_i++) {
-  const double %(name)s = loops[2*(%(name)s_i%(offset)s)];
-  const double %(name)s_w = loops[2*(%(name)s_i%(offset)s)+1];\
-"""
-##########################################################
-#                                                        #
-#   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
-#   !!                                              !!   #
-#   !!  KEEP THIS CODE CONSISTENT WITH PYKERNEL.PY  !!   #
-#   !!                                              !!   #
-#   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
-#                                                        #
-##########################################################
-# Polydispersity loop body.
-# This computes the weight, and if it is sufficient, calls the scattering
-# function and adds it to the total.  If there is a volume normalization,
-# it will also be added here.
-LOOP_BODY="""\
-const double weight = %(weight_product)s;
-if (weight > cutoff) {
-  const double I = %(fn)s(%(qcall)s, %(pcall)s);
-  if (I>=0.0) { // scattering cannot be negative
-    ret += weight*I%(sasview_spherical)s;
-    norm += weight;
-    %(volume_norm)s
+  }
-  //else { printf("exclude qx,qy,I:%%g,%%g,%%g\\n",%(qcall)s,I); }
+}
-//else { printf("exclude weight:%%g\\n",weight); }\
-"""
-# Use this when integrating over orientation
-SPHERICAL_CORRECTION="""\
-// Correction factor for spherical integration p(theta) I(q) sin(theta) dtheta
-double spherical_correction = (Ntheta>1 ? fabs(sin(M_PI_180*theta)) : 1.0);\
-"""
-# Use this to reproduce sasview behaviour
-SASVIEW_SPHERICAL_CORRECTION="""\
-// Correction factor for spherical integration p(theta) I(q) sin(theta) dtheta
-double spherical_correction = (Ntheta>1 ? fabs(cos(M_PI_180*theta))*M_PI_2 : 1.0);\
-"""
-# Volume normalization.
-# If there are "volume" polydispersity parameters, then these will be used
-# to call the form_volume function from the user supplied kernel, and accumulate
-# a normalized weight.
-VOLUME_NORM="""const double vol_weight = %(vol_weight)s;
-    vol += vol_weight*form_volume(%(vol_pars)s);
-    norm_vol += vol_weight;\
-"""
-# functions defined as strings in the .py module
-WORK_FUNCTION="""\
-double %(name)s(%(pars)s);
-double %(name)s(%(pars)s)
+{
-%(body)s
-}\
-"""
 # Documentation header for the module, giving the model name, its short
 # description and its parameter table.  The remainder of the doc comes
 …
 %(docs)s
 """
-def indent(s, depth):
-    """
-    Indent a string of text with *depth* additional spaces on each line.
-    """
-    spaces = " "*depth
-    sep = "\n"+spaces
-    return spaces + sep.join(s.split("\n"))
-def kernel_name(info, is_2D):
-    """
-    Name of the exported kernel symbol.
-    """
-    return info['name'] + "_" + ("Iqxy" if is_2D else "Iq")
-def use_single(source):
-    """
-    Convert code from double precision to single precision.
-    """
-    # Convert double keyword to float.  Accept an 'n' parameter for vector
-    # values, where n is 2, 4, 8 or 16. Assume complex numbers are represented
-    # as cdouble which is typedef'd to double2.
-    source = re.sub(r'(^|[^a-zA-Z0-9_]c?)double(([248]|16)?($|[^a-zA-Z0-9_]))',
-                    r'\1float\2', source)
-    # Convert floating point constants to single by adding 'f' to the end.
-    # OS/X driver complains if you don't do this.
-    source = re.sub(r'[^a-zA-Z_](\d*[.]\d+|\d+[.]\d*)([eE][+-]?\d+)?',
-                    r'\g<0>f', source)
-    return source
-def make_kernel(info, is_2D):
-    """
-    Build a kernel call from metadata supplied by the user.
-    *info* is the json object defined in the kernel file.
-    *form* is either "Iq" or "Iqxy".
-    This does not create a complete OpenCL kernel source, only the top
-    level kernel call with polydispersity and a call to the appropriate
-    Iq or Iqxy function.
-    """
-    # If we are building the Iqxy kernel, we need to propagate qx,qy
-    # parameters, otherwise we can
-    dim = "2d" if is_2D else "1d"
-    fixed_pars = info['partype']['fixed-'+dim]
-    pd_pars = info['partype']['pd-'+dim]
-    vol_pars = info['partype']['volume']
-    q_pars = KERNEL_2D if is_2D else KERNEL_1D
-    fn = q_pars['fn']
-    # Build polydispersity loops
-    depth = 4
-    offset = ""
-    loop_head = []
-    loop_end = []
-    for name in pd_pars:
-        subst = { 'name': name, 'offset': offset }
-        loop_head.append(indent(LOOP_OPEN%subst, depth))
-        loop_end.insert(0, (" "*depth) + "}")
-        offset += '+N'+name
-        depth += 2
-    # The volume parameters in the inner loop are used to call the volume()
-    # function in the kernel, with the parameters defined in vol_pars and the
-    # weight product defined in weight.  If there are no volume parameters,
-    # then there will be no volume normalization.
-    if vol_pars:
-        subst = {
-            'vol_weight': "*".join(p+"_w" for p in vol_pars),
-            'vol_pars': ", ".join(vol_pars),
+            }
-        volume_norm = VOLUME_NORM%subst
-    else:
-        volume_norm = ""
-    # Define the inner loop function call
-    # The parameters to the f(q,p1,p2...) call should occur in the same
-    # order as given in the parameter info structure.  This may be different
-    # from the parameter order in the call to the kernel since the kernel
-    # call places all fixed parameters before all polydisperse parameters.
-    fq_pars = [p[0] for p in info['parameters'][len(COMMON_PARAMETERS):]
-               if p[0] in set(fixed_pars+pd_pars)]
-    if False and "theta" in pd_pars:
-        spherical_correction = [indent(SPHERICAL_CORRECTION, depth)]
-        weights = [p+"_w" for p in pd_pars]+['spherical_correction']
-        sasview_spherical = ""
-    elif True and "theta" in pd_pars:
-        spherical_correction = [indent(SASVIEW_SPHERICAL_CORRECTION,depth)]
-        weights = [p+"_w" for p in pd_pars]
-        sasview_spherical = "*spherical_correction"
-    else:
-        spherical_correction = []
-        weights = [p+"_w" for p in pd_pars]
-        sasview_spherical = ""
-    weight_product = "*".join(weights) if len(weights) > 1 else "1.0"
-    subst = {
-        'weight_product': weight_product,
-        'volume_norm': volume_norm,
-        'fn': fn,
-        'qcall': q_pars['qcall'],
-        'pcall': ", ".join(fq_pars), # skip scale and background
-        'sasview_spherical': sasview_spherical,
+        }
-    loop_body = [indent(LOOP_BODY%subst, depth)]
-    loops = "\n".join(loop_head+spherical_correction+loop_body+loop_end)
-    # declarations for non-pd followed by pd pars
-    # e.g.,
-    #     const double sld,
-    #     const int Nradius
-    fixed_par_decl = ",\n    ".join("const double %s"%p for p in fixed_pars)
-    pd_par_decl = ",\n    ".join("const int N%s"%p for p in pd_pars)
-    if fixed_par_decl and pd_par_decl:
-        par_decl = ",\n    ".join((fixed_par_decl, pd_par_decl))
-    elif fixed_par_decl:
-        par_decl = fixed_par_decl
-    else:
-        par_decl = pd_par_decl
-    # Finally, put the pieces together in the kernel.
-    pd_length = "+".join('N'+p for p in pd_pars) if len(pd_pars) > 0 else "0"
-    subst = {
-        # kernel name is, e.g., cylinder_Iq
-        'name': kernel_name(info, is_2D),
-        # to declare, e.g., global double q[],
-        'q_par_decl': q_pars['q_par_decl'],
-        # to declare, e.g., double sld, int Nradius, int Nlength
-        'par_decl': par_decl,
-        # to copy global to local pd pars we need, e.g., Nradius+Nlength
-        'pd_length': pd_length,
-        # the q initializers, e.g., double qi = q[i];
-        'qinit': q_pars['qinit'],
-        # the actual polydispersity loop
-        'loops': loops,
+        }
-    kernel = KERNEL_TEMPLATE%subst
-    # If the working function is defined in the kernel metadata as a
-    # string, translate the string to an actual function definition
-    # and put it before the kernel.
-    if info[fn]:
-        subst = {
-            'name': fn,
-            'pars': ", ".join("double "+p for p in q_pars['qwork']+fq_pars),
-            'body': info[fn],
+            }
-        kernel = "\n".join((WORK_FUNCTION%subst, kernel))
-    return kernel
 def make_partable(pars):
 …
     """
     for path in search_path:
         target = os.path.join(path, filename)
         if os.path.exists(target):
+        target = joinpath(path, filename)
+        if exists(target):
             return target
     raise ValueError("%r not found in %s"%(filename, search_path))
 …
     Return a list of the sources file paths for the module.
     """
-    from os.path import abspath, dirname, join as joinpath
     search_path = [ dirname(info['filename']),
                     abspath(joinpath(dirname(__file__),'models')) ]
     return [_search(search_path, f) for f in info['source']]
+def make_model(info):
+    """
+    Generate the code for the kernel defined by info, using source files
+    found in the given search path.
+    """
+    source = [open(f).read() for f in sources(info)]
+    # If the form volume is defined as a string, then wrap it in a
+    # function definition and place it after the external sources but
+    # before the kernel functions.  If the kernel functions are strings,
+    # they will be translated in the make_kernel call.
+    if info['form_volume']:
+        subst = {
+            'name': "form_volume",
+            'pars': ", ".join("double "+p for p in info['partype']['volume']),
+            'body': info['form_volume'],
+            }
+        source.append(WORK_FUNCTION%subst)
+    kernel_Iq = make_kernel(info, is_2D=False) if not callable(info['Iq']) else ""
+    kernel_Iqxy = make_kernel(info, is_2D=True) if not callable(info['Iqxy']) else ""
+    kernel = "\n\n".join([KERNEL_HEADER]+source+[kernel_Iq, kernel_Iqxy])
+    return kernel
+def use_single(source):
+    """
+    Convert code from double precision to single precision.
+    """
+    # Convert double keyword to float.  Accept an 'n' parameter for vector
+    # values, where n is 2, 4, 8 or 16. Assume complex numbers are represented
+    # as cdouble which is typedef'd to double2.
+    source = re.sub(r'(^|[^a-zA-Z0-9_]c?)double(([248]|16)?($|[^a-zA-Z0-9_]))',
+                    r'\1float\2', source)
+    # Convert floating point constants to single by adding 'f' to the end.
+    # OS/X driver complains if you don't do this.
+    source = re.sub(r'[^a-zA-Z_](\d*[.]\d+|\d+[.]\d*)([eE][+-]?\d+)?',
+                    r'\g<0>f', source)
+    return source
+def kernel_name(info, is_2D):
+    """
+    Name of the exported kernel symbol.
+    """
+    return info['name'] + "_" + ("Iqxy" if is_2D else "Iq")
 def categorize_parameters(pars):
 …
     return partype
+def indent(s, depth):
+    """
+    Indent a string of text with *depth* additional spaces on each line.
+    """
+    spaces = " "*depth
+    sep = "\n"+spaces
+    return spaces + sep.join(s.split("\n"))
+def build_polydispersity_loops(pd_pars):
+    """
+    Build polydispersity loops
+    Returns loop opening and loop closing
+    """
+    LOOP_OPEN="""\
+for (int %(name)s_i=0; %(name)s_i < N%(name)s; %(name)s_i++) {
+  const double %(name)s = loops[2*(%(name)s_i%(offset)s)];
+  const double %(name)s_w = loops[2*(%(name)s_i%(offset)s)+1];\
+"""
+    depth = 4
+    offset = ""
+    loop_head = []
+    loop_end = []
+    for name in pd_pars:
+        subst = { 'name': name, 'offset': offset }
+        loop_head.append(indent(LOOP_OPEN%subst, depth))
+        loop_end.insert(0, (" "*depth) + "}")
+        offset += '+N'+name
+        depth += 2
+    return "\n".join(loop_head), "\n".join(loop_end)
+C_KERNEL_TEMPLATE=None
+def make_model(info):
+    """
+    Generate the code for the kernel defined by info, using source files
+    found in the given search path.
+    """
+    # TODO: need something other than volume to indicate dispersion parameters
+    # No volume normalization despite having a volume parameter.
+    # Thickness is labelled a volume in order to trigger polydispersity.
+    # May want a separate dispersion flag, or perhaps a separate category for
+    # disperse, but not volume.  Volume parameters also use relative values
+    # for the distribution rather than the absolute values used by angular
+    # dispersion.  Need to be careful that necessary parameters are available
+    # for computing volume even if we allow non-disperse volume parameters.
+    # Load template
+    global C_KERNEL_TEMPLATE
+    if C_KERNEL_TEMPLATE is None:
+        with open(C_KERNEL_TEMPLATE_PATH) as fid:
+            C_KERNEL_TEMPLATE = fid.read()
+    # Load additional sources
+    source = [open(f).read() for f in sources(info)]
+    # Prepare defines
+    defines = []
+    partype = info['partype']
+    pd_1d = partype['pd-1d']
+    pd_2d = partype['pd-2d']
+    fixed_1d = partype['fixed-1d']
+    fixed_2d = partype['fixed-1d']
+    iq_parameters = [p[0]
+        for p in info['parameters'][2:] # skip scale, background
+        if p[0] in set(fixed_1d+pd_1d)]
+    iqxy_parameters = [p[0]
+        for p in info['parameters'][2:] # skip scale, background
+        if p[0] in set(fixed_2d+pd_2d)]
+    volume_parameters = [p[0]
+        for p in info['parameters']
+        if p[4]=='volume']
+    # Fill in defintions for volume parameters
+    if volume_parameters:
+        defines.append(('VOLUME_PARAMETERS',
+                        ','.join(volume_parameters)))
+        defines.append(('VOLUME_WEIGHT_PRODUCT',
+                        '*'.join(p+'_w' for p in volume_parameters)))
+    # Generate form_volume function from body only
+    if info['form_volume'] is not None:
+        defines.append(('VOLUME_PARAMETER_DECLARATIONS',
+                        ', '.join('double '+p for p in volume_parameters)))
+        fn = """\
+double form_volume(VOLUME_PARAMETER_DECLARATIONS);
+double form_volume(VOLUME_PARAMETER_DECLARATIONS) {
+    %(body)s
+}
+"""%{'body':info['form_volume']}
+        source.append(fn)
+    # Fill in definitions for Iq parameters
+    defines.append(('IQ_KERNEL_NAME', info['name']+'_Iq'))
+    defines.append(('IQ_PARAMETERS', ', '.join(iq_parameters)))
+    if fixed_1d:
+        defines.append(('IQ_FIXED_PARAMETER_DECLARATIONS',
+                        ', \\\n    '.join('const double %s'%p for p in fixed_1d)))
+    if pd_1d:
+        defines.append(('IQ_WEIGHT_PRODUCT',
+                        '*'.join(p+'_w' for p in pd_1d)))
+        defines.append(('IQ_DISPERSION_LENGTH_DECLARATIONS',
+                        ', \\\n    '.join('const int N%s'%p for p in pd_1d)))
+        defines.append(('IQ_DISPERSION_LENGTH_SUM',
+                        '+'.join('N'+p for p in pd_1d)))
+        open_loops, close_loops = build_polydispersity_loops(pd_1d)
+        defines.append(('IQ_OPEN_LOOPS',
+                        open_loops.replace('\n',' \\\n')))
+        defines.append(('IQ_CLOSE_LOOPS',
+                        close_loops.replace('\n',' \\\n')))
+    if info['Iq'] is not None:
+        defines.append(('IQ_PARAMETER_DECLARATIONS',
+                       ', '.join('double '+p for p in iq_parameters)))
+        fn = """\
+double Iq(double q, IQ_PARAMETER_DECLARATIONS);
+double Iq(double q, IQ_PARAMETER_DECLARATIONS) {
+    %(body)s
+}
+"""%{'body':info['Iq']}
+        source.append(fn)
+    # Fill in definitions for Iqxy parameters
+    defines.append(('IQXY_KERNEL_NAME', info['name']+'_Iqxy'))
+    defines.append(('IQXY_PARAMETERS', ', '.join(iqxy_parameters)))
+    if fixed_2d:
+        defines.append(('IQXY_FIXED_PARAMETER_DECLARATIONS',
+                        ', \\\n    '.join('const double %s'%p for p in fixed_2d)))
+    if pd_2d:
+        defines.append(('IQXY_WEIGHT_PRODUCT',
+                        '*'.join(p+'_w' for p in pd_2d)))
+        defines.append(('IQXY_DISPERSION_LENGTH_DECLARATIONS',
+                        ', \\\n    '.join('const int N%s'%p for p in pd_2d)))
+        defines.append(('IQXY_DISPERSION_LENGTH_SUM',
+                        '+'.join('N'+p for p in pd_2d)))
+        open_loops, close_loops = build_polydispersity_loops(pd_2d)
+        defines.append(('IQXY_OPEN_LOOPS',
+                        open_loops.replace('\n',' \\\n')))
+        defines.append(('IQXY_CLOSE_LOOPS',
+                        close_loops.replace('\n',' \\\n')))
+    if info['Iqxy'] is not None:
+        defines.append(('IQXY_PARAMETER_DECLARATIONS',
+                       ', '.join('double '+p for p in iqxy_parameters)))
+        fn = """\
+double Iqxy(double qx, double qy, IQXY_PARAMETER_DECLARATIONS);
+double Iqxy(double qx, double qy, IQXY_PARAMETER_DECLARATIONS) {
+    %(body)s
+}
+"""%{'body':info['Iqxy']}
+        source.append(fn)
+    # Need to know if we have a theta parameter for Iqxy; it is not there
+    # for the magnetic sphere model, for example, which has a magnetic
+    # orientation but no shape orientation.
+    if 'theta' in pd_2d:
+        defines.append(('IQXY_HAS_THETA', '1'))
+    #for d in defines: print d
+    DEFINES='\n'.join('#define %s %s'%(k,v) for k,v in defines)
+    SOURCES='\n\n'.join(source)
+    return C_KERNEL_TEMPLATE%{
+        'DEFINES':DEFINES,
+        'SOURCES':SOURCES,
+        }
 def make(kernel_module):
     """
 …
     """
     # TODO: allow Iq and Iqxy to be defined in python
-    from os.path import abspath
     #print kernelfile
     info = dict(
 …
     info['defaults'] = dict((p[0],p[2]) for p in info['parameters'])
     source = make_model(info)
+    # Assume if one part of the kernel is python then all parts are.
+    source = make_model(info) if not callable(info['Iq']) else None
     return source, info

sasmodels/kernelcl.py

-                      rf1ecfa92
+                      rf734e7d
 from . import generate
 from .kernelpy import PyInput, PyKernel
+from .kernelpy import PyInput, PyModel
 F64_DEFS = """\
 …
     """
     source, info = generate.make(kernel_module)
+    if callable(info.get('Iq',None)):
+        return PyModel(info)
     ## for debugging, save source to a .cl file, edit it, and reload as model
     #open(info['name']+'.cl','w').write(source)
 …
     def __call__(self, input):
-        # Support pure python kernel call
-        if input.is_2D and callable(self.info['Iqxy']):
-            return PyKernel(self.info['Iqxy'], self.info, input)
-        elif not input.is_2D and callable(self.info['Iq']):
-            return PyKernel(self.info['Iq'], self.info, input)
         if self.dtype != input.dtype:
             raise TypeError("data and kernel have different types")
 …
         ctypes and some may be pure python.
         """
+        # Support pure python kernel call
+        if len(q_vectors) == 1 and callable(self.info['Iq']):
+            return PyInput(q_vectors, dtype=self.dtype)
+        elif callable(self.info['Iqxy']):
+            return PyInput(q_vectors, dtype=self.dtype)
+        else:
+            return GpuInput(q_vectors, dtype=self.dtype)
+        return GpuInput(q_vectors, dtype=self.dtype)
 # TODO: check that we don't need a destructor for buffers which go out of scope
 …
     def __call__(self, pars, pd_pars, cutoff=1e-5):
+    def __call__(self, fixed_pars, pd_pars, cutoff=1e-5):
         real = np.float32 if self.input.dtype == generate.F32 else np.float64
+        fixed = [real(p) for p in pars]
+        cutoff = real(cutoff)
+        loops = np.hstack(pd_pars) if pd_pars else np.empty(0,dtype=self.input.dtype)
+        loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten()
+        Nloops = [np.uint32(len(p[0])) for p in pd_pars]
+        #print "loops",Nloops, loops
+        #import sys; print >>sys.stderr,"opencl eval",pars
+        #print "opencl eval",pars
+        if len(loops) > 2*MAX_LOOPS:
+            raise ValueError("too many polydispersity points")
         device_num = 0
+        queuei = environment().queues[device_num]
         res_bi = self.res_b[device_num]
+        queuei = environment().queues[device_num]
+        loops_bi = self.loops_b[device_num]
+        loops_l = cl.LocalMemory(len(loops.data))
+        cl.enqueue_copy(queuei, loops_bi, loops)
+        #ctx = environment().context
+        #loops_bi = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=loops)
+        args = self.input.q_buffers + [res_bi,loops_bi,loops_l,cutoff] + fixed + Nloops
+        nq = np.uint32(self.input.nq)
+        if pd_pars:
+            cutoff = real(cutoff)
+            loops_N = [np.uint32(len(p[0])) for p in pd_pars]
+            loops = np.hstack(pd_pars) if pd_pars else np.empty(0,dtype=self.input.dtype)
+            loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten()
+            #print "loops",Nloops, loops
+            #import sys; print >>sys.stderr,"opencl eval",pars
+            #print "opencl eval",pars
+            if len(loops) > 2*MAX_LOOPS:
+                raise ValueError("too many polydispersity points")
+            loops_bi = self.loops_b[device_num]
+            cl.enqueue_copy(queuei, loops_bi, loops)
+            loops_l = cl.LocalMemory(len(loops.data))
+            #ctx = environment().context
+            #loops_bi = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=loops)
+            dispersed = [loops_bi, loops_l, cutoff] + loops_N
+        else:
+            dispersed = []
+        fixed = [real(p) for p in fixed_pars]
+        args = self.input.q_buffers + [res_bi, nq] + dispersed + fixed
         self.kernel(queuei, self.input.global_size, None, *args)
         cl.enqueue_copy(queuei, self.res, res_bi)

sasmodels/kerneldll.py

-                      r68d3c1b
+                      rf734e7d
 from . import generate
 from .kernelpy import PyInput, PyKernel
+from .kernelpy import PyInput, PyModel
 from .generate import F32, F64
 …
     if "VCINSTALLDIR" in os.environ:
         # MSVC compiler is available, so use it.
+        # TODO: remove intermediate OBJ file created in the directory
+        # TODO: maybe don't use randomized name for the c file
         COMPILE = "cl /nologo /Ox /MD /W3 /GS- /DNDEBUG /Tp%(source)s /openmp /link /DLL /INCREMENTAL:NO /MANIFEST /OUT:%(output)s"
         # Can't find VCOMP90.DLL (don't know why), so remove openmp support from windows compiler build
 …
     be defined without using too many resources.
     """
-    import tempfile
     source, info = generate.make(kernel_module)
+    if callable(info.get('Iq',None)):
+        return PyModel(info)
     source_files = generate.sources(info) + [info['filename']]
     newest = max(os.path.getmtime(f) for f in source_files)
 …
         status = os.system(command)
         if status != 0:
             print "compile failed.  File is in %r"%filename
+            raise RuntimeError("compile failed.  File is in %r"%filename)
         else:
             ## uncomment the following to keep the generated c file
 …
 IQ_ARGS = [c_void_p, c_void_p, c_int, c_void_p, c_double]
 IQXY_ARGS = [c_void_p, c_void_p, c_void_p, c_int, c_void_p, c_double]
+IQ_ARGS = [c_void_p, c_void_p, c_int]
+IQXY_ARGS = [c_void_p, c_void_p, c_void_p, c_int]
 class DllModel(object):
 …
         self.dll = ct.CDLL(self.dllpath)
+        pd_args_1d = [c_void_p, c_double] + [c_int]*Npd1d if Npd1d else []
+        pd_args_2d= [c_void_p, c_double] + [c_int]*Npd2d if Npd2d else []
         self.Iq = self.dll[generate.kernel_name(self.info, False)]
         self.Iq.argtypes = IQ_ARGS + [c_double]*Nfixed1d + [c_int]*Npd1d
+        self.Iq.argtypes = IQ_ARGS + pd_args_1d + [c_double]*Nfixed1d
         self.Iqxy = self.dll[generate.kernel_name(self.info, True)]
         self.Iqxy.argtypes = IQXY_ARGS + [c_double]*Nfixed2d + [c_int]*Npd2d
+        self.Iqxy.argtypes = IQXY_ARGS + pd_args_2d + [c_double]*Nfixed2d
     def __getstate__(self):
 …
     def __call__(self, input):
-        # Support pure python kernel call
-        if input.is_2D and callable(self.info['Iqxy']):
-            return PyKernel(self.info['Iqxy'], self.info, input)
-        elif not input.is_2D and callable(self.info['Iq']):
-            return PyKernel(self.info['Iq'], self.info, input)
         if self.dll is None: self._load_dll()
         kernel = self.Iqxy if input.is_2D else self.Iq
 …
         self.p_res = self.res.ctypes.data
     def __call__(self, pars, pd_pars, cutoff):
+    def __call__(self, fixed_pars, pd_pars, cutoff):
         real = np.float32 if self.input.dtype == F32 else np.float64
-        fixed = [real(p) for p in pars]
-        cutoff = real(cutoff)
-        loops = np.hstack(pd_pars)
-        loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten()
-        loops_N = [np.uint32(len(p[0])) for p in pd_pars]
         nq = c_int(self.input.nq)
+        p_loops = loops.ctypes.data
+        args = self.input.q_pointers + [self.p_res, nq, p_loops, cutoff] + fixed + loops_N
+        if pd_pars:
+            cutoff = real(cutoff)
+            loops_N = [np.uint32(len(p[0])) for p in pd_pars]
+            loops = np.hstack(pd_pars)
+            loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten()
+            p_loops = loops.ctypes.data
+            dispersed = [p_loops, cutoff] + loops_N
+        else:
+            dispersed = []
+        fixed = [real(p) for p in fixed_pars]
+        args = self.input.q_pointers + [self.p_res, nq] + dispersed + fixed
         #print pars
         self.kernel(*args)

sasmodels/kernelpy.py

-                      r6edb74a
+                      rf734e7d
 from .generate import F32, F64
+class PyModel(object):
+    def __init__(self, info):
+        self.info = info
+    def __call__(self, input):
+        kernel = self.info['Iqxy'] if input.is_2D else self.info['Iq']
+        return PyKernel(kernel, self.info, input)
+    def make_input(self, q_vectors):
+        return PyInput(q_vectors, dtype=F64)
+    def release(self):
+        pass
 class PyInput(object):
 …
     """
     ##########################################################
     #                                                        #
     #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
     #   !!                                              !!   #
     #   !!  KEEP THIS CODE CONSISTENT WITH GENERATE.PY  !!   #
     #   !!                                              !!   #
     #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
     #                                                        #
     ##########################################################
+    ################################################################
+    #                                                              #
+    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
+    #   !!                                                    !!   #
+    #   !!  KEEP THIS CODE CONSISTENT WITH KERNEL_TEMPLATE.C  !!   #
+    #   !!                                                    !!   #
+    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
+    #                                                              #
+    ################################################################
     weight = np.empty(len(pd), 'd')
 …
         stride = np.array([1])
         vol_weight_index = slice(None, None)
+        # keep lint happy
+        fast_value = [None]
+        fast_weight = [None]
     ret = np.zeros_like(args[0])
 …
             # Correction factor for spherical integration p(theta) I(q) sin(theta) dtheta
             #spherical_correction = abs(sin(pi*args[theta_index])) if theta_index>=0 else 1.0
             #spherical_correction = abs(cos(pi*args[theta_index]))*pi/2 if theta_index>=0 else 1.0
             spherical_correction = 1.0
+            spherical_correction = abs(cos(pi*args[theta_index]))*pi/2 if theta_index>=0 else 1.0
+            #spherical_correction = 1.0
             ret += w*I*spherical_correction*positive
             norm += w*positive

sasmodels/model_test.py

-                      r5ca9762
+                      rf734e7d
 """
+import sys
 import unittest
+import warnings
 import numpy as np
+from os.path import basename, dirname, join as joinpath
+from glob import glob
+try:
+    from .kernelcl import load_model
+except ImportError,exc:
+    warnings.warn(str(exc))
+    warnings.warn("using ctypes instead")
+    from .kerneldll import load_model
+def load_kernel(model, dtype='single'):
+    kernel = load_model(model, dtype=dtype)
+    kernel.info['defaults'] = dict((p[0],p[2]) for p in kernel.info['parameters'])
+    return kernel
+def get_weights(model, pars, name):
+    from . import weights
+    relative = name in model.info['partype']['pd-rel']
+    disperser = pars.get(name+"_pd_type", "gaussian")
+    value = pars.get(name, model.info['defaults'][name])
+    width = pars.get(name+"_pd", 0.0)
+    npts = pars.get(name+"_pd_n", 30)
+    nsigma = pars.get(name+"_pd_nsigma", 3.0)
+    v,w = weights.get_weights(
+            disperser, npts, width, nsigma,
+            value, model.info['limits'][name], relative)
+    return v,w/np.sum(w)
+def eval_kernel(kernel, q, pars, cutoff=1e-5):
+    input = kernel.make_input(q)
+    finput = kernel(input)
+    fixed_pars = [pars.get(name, finput.info['defaults'][name])
+                  for name in finput.fixed_pars]
+    pd_pars = [get_weights(finput, pars, p) for p in finput.pd_pars]
+    return finput(fixed_pars, pd_pars, cutoff)
+from .core import list_models, load_model_definition
+from .core import load_model_cl, load_model_dll
+from .core import make_kernel, call_kernel
 def annotate_exception(exc, msg):
 …
             exc.args = (" ".join((str(exc),msg)),)
+def suite():
+    root = dirname(__file__)
+    files = sorted(glob(joinpath(root, 'models', "[a-zA-Z]*.py")))
+    models_names = [basename(f)[:-3] for f in files]
+def suite(loaders, models):
     suite = unittest.TestSuite()
-    for model_name in models_names:
-        module = __import__('sasmodels.models.' + model_name)
-        module = getattr(module, 'models', None)
+        model = getattr(module, model_name, None)
+    if models[0] == 'all':
+        skip = models[1:]
+        models = list_models()
+    else:
+        skip = []
+    for model_name in models:
+        if model_name in skip: continue
+        model_definition = load_model_definition(model_name)
         smoke_tests = [[{},0.1,None],[{},(0.1,0.1),None]]
         tests = smoke_tests + getattr(model, 'tests', [])
+        tests = smoke_tests + getattr(model_definition, 'tests', [])
         if tests:
+        if tests: # in case there are no smoke tests...
             #print '------'
             #print 'found tests in', model_name
             #print '------'
+            kernel = load_kernel(model)
+            suite.addTest(ModelTestCase(model_name, kernel, tests))
+            # if ispy then use the dll loader to call pykernel
+            # don't try to call cl kernel since it will not be
+            # available in some environmentes.
+            ispy = callable(getattr(model_definition,'Iq', None))
+            # test using opencl if desired
+            if not ispy and ('opencl' in loaders and load_model_cl):
+                test_name = "Model: %s, Kernel: OpenCL"%model_name
+                test = ModelTestCase(test_name, model_definition,
+                                     load_model_cl, tests)
+                print "defining", test_name
+                suite.addTest(test)
+            # test using dll if desired
+            if ispy or ('dll' in loaders and load_model_dll):
+                test_name = "Model: %s, Kernel: dll"%model_name
+                test = ModelTestCase(test_name, model_definition,
+                                     load_model_dll, tests)
+                print "defining", test_name
+                suite.addTest(test)
     return suite
 …
 class ModelTestCase(unittest.TestCase):
     def __init__(self, model_name, kernel, tests):
+    def __init__(self, test_name, definition, loader, tests):
         unittest.TestCase.__init__(self)
+        self.model_name = model_name
+        self.kernel = kernel
+        self.test_name = test_name
+        self.definition = definition
+        self.loader = loader
         self.tests = tests
     def runTest(self):
+        #print '------'
+        #print self.model_name
+        #print '------'
+        print "running", self.test_name
         try:
+            model = self.loader(self.definition)
             for test in self.tests:
+                params = test[0]
+                Q = test[1]
+                I = test[2]
+                pars, Q, I = test
                 if not isinstance(Q, list):
                     Q = [Q]
 …
                 self.assertEqual(len(I), len(Q))
+                Iq = eval_kernel(self.kernel, Q_vectors, params)
+                kernel = make_kernel(model, Q_vectors)
+                Iq = call_kernel(kernel, pars)
                 self.assertGreater(len(Iq), 0)
 …
                 for q, i, iq in zip(Q, I, Iq):
                     if i is None: continue # smoke test --- make sure it runs
                     err = np.abs(i - iq)
                     nrm = np.abs(i)
+                    err = abs(i - iq)
+                    nrm = abs(i)
                     self.assertLess(err * 10**5, nrm, 'q:%s; expected:%s; actual:%s' % (q, i, iq))
         except Exception,exc:
             annotate_exception(exc, '\r\nModel: %s' % self.model_name)
+            annotate_exception(exc, self.test_name)
             raise
 def main():
+    #unittest.main()
+    runner = unittest.TextTestRunner()
+    runner.run(suite())
+    models = sys.argv[1:]
+    if models and models[0] == 'opencl':
+        if load_model_cl is None:
+            print >>sys.stderr, "opencl is not available"
+            sys.exit(1)
+        loaders = ['opencl']
+        models = models[1:]
+    elif models and models[0] == 'dll':
+        # TODO: test if compiler is available?
+        loaders = ['dll']
+        models = models[1:]
+    else:
+        loaders = ['opencl', 'dll']
+    if models:
+        runner = unittest.TextTestRunner()
+        runner.run(suite(loaders, models))
+    else:
+        print >>sys.stderr, "usage: python -m sasmodels.model_test [opencl|dll] model1 model2 ..."
+        print >>sys.stderr, "if model1 is 'all', then all except the remaining models will be tested"
 if __name__ == "__main__":

sasmodels/models/broad_peak.py

-                      rf57d123
+                      rf734e7d
 def form_volume():
     return 1
+#def form_volume():
+#    return 1
 def Iq(q, porod_scale, porod_exp, lorentz_scale, lorentz_length, peak_pos, lorentz_exp):
 …
 Iq.vectorized = True
 def Iqxy(qx, qy, porod_scale, porod_exp, lorentz_scale, lorentz_length, peak_pos, lorentz_exp):
     return Iq(sqrt(qx**2 + qy**2), porod_scale, porod_exp, lorentz_scale, lorentz_length, peak_pos, lorentz_exp)
+def Iqxy(qx, qy, *args):
+    return Iq(sqrt(qx**2 + qy**2), *args)
 # FOR VECTORIZED VERSION, UNCOMMENT THE NEXT LINE

sasmodels/models/lamellarPC.py

-                      rdc02af0
+                      rf734e7d
 source = [ "lamellarPC_kernel.c"]
-# No volume normalization despite having a volume parameter
-# This should perhaps be volume normalized?
 form_volume = """
     return 1.0;
 …
 Iqxy = """
+    // never called since no orientation or magnetic parameters.
+    return -1.0;
+    return Iq(sqrt(qx*qx+qy*qy), IQ_PARAMETERS);
     """

sasmodels/models/lamellarPC_kernel.c

rdc02af0	rf734e7d
28	28	//get the fractional part of Nlayers, to determine the "mixing" of N's
29	29
30		n1 = trunc(Nlayers); //rounds towards zero
	30	n1 = (long)trunc(Nlayers); //rounds towards zero
31	31	n2 = n1 + 1;
32	32	xn = (double)n2 - Nlayers; //fractional contribution of n1

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats: