Source code for pygeode.view

# An interface to describe a 'view' of a grid.
# Consists of a bunch of axes and ranges.
# Used in variable read routines, to simplify the mapping between input and output variables.


# Attributes:
#   axes    - a list of axis objects to use for the view (whole axis, not subsetted)
#   slices  - a list of slices describing what parts of the axes we're actually viewing
#             (can include non-uniform lists of indices in addition to slices)
#   integer_indices - an explicit array of integer indices, indicating which values along the axes have been selected.

[docs]def expand (ind, upper_bound): # {{{ '''Expand a slice or integer into an explicit array of indices''' import numpy as np # already a list? if hasattr(ind,'__len__'): return np.array(ind, dtype='int32') # a single integer index? if isinstance(ind,int): return np.array([ind], dtype='int32') # Otherwise, we should have a slice now assert isinstance(ind,slice), "unknown slicing mechanism - "+repr(ind) # Make sure start, stop, and step are positive integers # ind = fix_slice(ind, upper_bound) # return np.arange(ind.start, ind.stop, ind.step) start, stop, step = ind.start, ind.stop, ind.step if step is None: step = 1 assert step != 0 if start is None: if step > 0: start = 0 else: start = upper_bound-1 if start < 0: start += upper_bound if stop is None: if step > 0: stop = upper_bound else: stop = -1 elif stop < 0: stop += upper_bound return np.arange(start, stop, step, dtype='int32')
# }}}
[docs]def simplify (ind): # {{{ '''Simplify an index list into a slice, if possible does *not* return single integer indices, since that screws up the dimensions when applying these slices to numpy arrays.''' assert hasattr(ind,'__len__'), "not an array of indices" # Single value? if len(ind) == 1: ind = int(ind[0]) return slice(ind,ind+1,1) # otherwise, check the distance between each value - see if it's regular import numpy as np delt = np.unique(np.diff(ind)) if len(delt) != 1 or delt == 0: return ind # irregular indices, can't do anything delt = int(delt[0]) start, stop, step = int(ind[0]), int(ind[-1])+delt, delt # Special case: going in reverse, ending at the start of the array - stop will be just before index 0, i.e. a negative stop value # (because of the way negative indices are wrapped in numpy, this won't behave the way we'd want) # so, for this case only, set 'stop' equal to None, which should tell numpy to go until you can't go anymore if stop < 0: stop = None return slice(start, stop, step)
# }}}
[docs]def contiguate (ind): # {{{ '''how to contiguate(?) the slices without actually returning the contiguous slices (returns what you would pass to slice_into to get the contiguous slices)''' assert hasattr(ind,'__len__'), "not an array of indices" import numpy as np delt = np.diff(ind) # Find all non-contiguous 'jumps' stops = list(np.where(delt!=1)[0] + 1) + [len(ind)] starts = [0] + stops[:-1] return [slice(i,j) for i,j in zip(starts,stops)]
# }}} #TODO: write a set of integer indices as a superposition of slices # ideally, this should return a minimum number of slices to cover all indices # Assume the indices are in order! def indices_to_slices (ind): import numpy as np d = np.diff(ind) assert not np.any (d <= 0) slices = [] #TODO class View: # {{{ def __init__(self, axes, slices=None, force_slices=None, force_integer_indices=None): # {{{ if hasattr(axes,'axes'): axes = axes.axes self.axes = tuple(axes) self.naxes = len(axes) # if no slices given, default to the whole range if slices is None: slices = [slice(None)] * len(axes) # don't allow a generic 'None' as a slice - it causes problems in numpy assert not any(s is None for s in slices) assert len(axes) == len(slices) if force_integer_indices is not None: self.integer_indices = tuple(force_integer_indices) else: self.integer_indices = tuple(expand(sl,len(a)) for sl,a in zip(slices,axes)) if force_slices is not None: self.slices = tuple(force_slices) else: self.slices = tuple(simplify(e) for e in self.integer_indices) self.shape = tuple(len(a) for a in self.integer_indices) self.size = 1 for s in self.shape: self.size *= s # Don't allow empty views? # Remove this if it interferes with normal operations, I just added it # to shorten the stack trace when a bad mapping was taking # assert not any(len(ind) == 0 for ind in self.integer_indices), self.slices # }}} def map_to (self, axes, strict=True, order=[]): # {{{ '''Map one view onto another. Modifies the slices to work with the new list of axes. 'strict' indicates if all the output axes must correspond to an input axes (if not, the default behaviour is to include the entire axis if it's not mentioned in the input view) 'order' returns the ordering of the output axes to get the original axes back (-1 for things that can't be mapped)''' assert isinstance(order,list), "can't output order to a non-list" if hasattr(axes,'axes'): axes = axes.axes # check if a var or view was passed slices = [] order[:] = [-1] * len(self.axes) for out_i,out_a in enumerate(axes): #TODO: remove this special case, let Axis.map_to handle it if out_a in self.axes: in_i = list(self.axes).index(out_a) slices.append(self.slices[in_i]) order[in_i] = out_i continue # Look for a mapping map = None for in_i,in_a in enumerate(self.subaxes()): map = out_a.map_to(in_a) if map is not None: slices.append(simplify(map)) order[in_i] = out_i break if map is not None: continue # found a map # otherwise, nothing of interest was found, so default to a full slice on the axis assert strict is False, "Can't do a strict mapping from %s to %s"%(self.axes, axes) slices.append(slice(None)) # eventually, want: # -> out[:,x,y] = in[[0,1,2,3,4,5,6,7,8,9,10,11,0,1,2,3,4,5,6,7,8,9,10,11,...],x,y] # # so, out_times can map to in_times (climatology) # so, we would have out_time.map_to(in_time) = [0,1,2,3,4,5,6,7,8,9,10,11,0,1,2,3,4,5,6,7,8,9,10,11,...] # assume our view is on the output # view.map_to(in) = [[0,1,2,3,4,5,6,7,8,9,10,11,0,1,2,3,4,5,6,7,8,9,10,11,...],x,y] # = [out_time.map_to(in_time), out_x.map_to(in_x), ...] assert not any(s is None for s in slices) return View(axes, slices = slices) # }}} def subaxis (self, iaxis): '''Returns the specified axis, after selecting the elements that are in the current view.''' iaxis = self.index(iaxis) return self.axes[iaxis].slice [self.slices[iaxis]] def subaxes (self): return tuple(self.subaxis(i) for i in range(self.naxes)) def clip (self): # {{{ '''Return the same view, but with the axes clipped to cover only the selected region (the axes could cover more than the selection) i.e. latitude axis could be from 90S to 90N, but the view region is 45S to 45N Having a larger axis than needed could be useful in some cases, such as when the current view needs to be extended to take more values (i.e., taking a derivative might need to extend latitudes to 46N to 46S to do a finite difference) One useful case for clipping is if you want a 1:1 mapping from the view to an output array (and the output array would only be defined on the view region)''' return View (self.subaxes()) # }}} def get (self, var, pbar=False, strict=True, conform=True): # {{{ '''Applies this view to the given variable, returning the data conformed to the shape of this view. (strict, conform keywords?)''' # from pygeode.progress import FakePBar # assert not isinstance(pbar, FakePBar) import numpy as np from warnings import warn from pygeode.progress import FakePBar if pbar is False or pbar is None: pbar = FakePBar() assert strict in (True, False) assert conform in (True, False) # If we allow the output axes to contain things not found in the input axes, # then it is impossible(?) to conform the array to match the inputs assert not (strict is False and conform is True), "Can't conform an array from an unstrict get" # Map to the var's axes, and read the data # Keep track of how to reverse this operation (if it can be reversed) order = [] newview = self.map_to(var.axes, strict=strict, order=order) # It rubs the lotion on its skin # If the shape is degenerate, then we can just return the empty array here if any(s == 0 for s in newview.shape): # print 'FAIL:' # print 'input var:', repr(var), var.axes # print 'view:', self.axes, self.slices # print 'mapped view:', newview.axes, newview.slices # raise Exception return np.empty([s if o > -1 else 1 for s,o in zip(newview.shape,order)], var.dtype) assert newview.axes == var.axes # Or it gets the hose again # Only request unique elements from the input unique_indices = [np.unique(ind) for ind in newview.integer_indices] if all(np.all(u == ind) for u,ind in zip(unique_indices, newview.integer_indices)): # already unique duplicator = () unique_view = newview else: # uniquify duplicator = [np.searchsorted(u, ind) for u,ind in zip(unique_indices, newview.integer_indices)] # need to massage this into something numpy can handle duplicator = tuple([d.reshape([1]*i+[-1]+[1]*(newview.naxes-i-1)) for i,d in enumerate(duplicator)]) unique_view = View(newview.axes, force_integer_indices = unique_indices) # from pygeode.var import Var if hasattr(var,'values'): # values = var.values[unique_view.slices] # ^^ can't do this if we are slicing by integer indices. # (Refer to issue 6 - https://github.com/pygeode/pygeode/issues/6 ) # Instead, apply the slices one at a time. # (TODO: instead, conform the index array shapes the way duplicator is done # above, but need to have addictional check if we have slices or indices.) values = var.values for i,sl in enumerate(unique_view.slices): values = values[tuple([slice(None)]*i + [sl])] elif hasattr(var,'getview'): # Can we use the progress bar? if 'pbar' in var.getview.__code__.co_varnames: values = var.getview(unique_view, pbar=pbar) else: print('no pbar in', type(var)) values = var.getview(unique_view) elif hasattr(var,'getvalues'): values = np.empty(unique_view.shape, var.dtype) # Loop over contiguous pieces, build the result loop = list(unique_view.loop_contiguous()) for i, (outsl, start, count) in enumerate(loop): values[outsl] = var.getvalues(start, count) pbar.update (100./len(loop) * i) else: raise IOError("can't determine how to extract values from "+repr(var)) pbar.update(100) assert isinstance(values,(np.ndarray,np.number)), "did not receive a valid array from %s"%repr(var) if values.dtype != var.dtype: warn ("%s is supposed to return a %s, but is returning %s"%(repr(var),var.dtype.name,values.dtype.name)) values = np.asarray(values, var.dtype) if not isinstance(values,np.ndarray): warn ("%s is returning a numpy scalar instead of an ndarray - re-wrapping it now"%type(var)) values = np.array(values) # Use these unique elements to generate the expected array, including duplicates values = values[duplicator] #TODO: better error messages assert len(values.shape) == len(var.axes) if strict is True: assert values.shape == newview.shape, "expected shape %s, got shape %s (culprit is %s) view is %s"%(newview.shape,values.shape,var,self) # conform the array to the view shape if conform is True: # print "conforming!" # print "order from %s to %s is %s"%(self.axes,var.axes,order) values = values.transpose([o for o in order if o > -1]) assert len(self.shape) == len(order) values = values.reshape([s if o > -1 else 1 for s,o in zip(self.shape,order)]) # It puts the lotion in the basket # pbar.update(100) # Write-protect the values, just in case they're shared by multiple components # Can't write-protect scalars, though? if values.ndim > 0: values.flags.writeable = False # if values.max() == 0 and values.min() == 0: # warn ("%s chunk is all zeros"%repr(var)) return values # }}} ########################################################################### # View modification functions # def add_axis (self, iaxis, axis, sl): # {{{ '''Add a new axis (and slice) to the view''' iaxis = self.index(iaxis) axes = self.axes[:iaxis] + (axis,) + self.axes[iaxis:] slices = self.slices[:iaxis] + (sl,) + self.slices[iaxis:] return View(axes, slices) # }}} def remove(self, *iaxes): # {{{ ''' Remove specified axes from the view entirely''' ind = [] for iaxis in iaxes: ind.append(self.index(iaxis)) rind = [i for i in range(len(self.slices)) if i not in ind] axes = [self.axes[i] for i in rind] slices = [self.slices[i] for i in rind] integer_indices = [self.integer_indices[i] for i in rind] # return View (axes, slices) return View (axes, force_slices = slices, force_integer_indices = integer_indices) # }}} def replace_axis (self, iaxis, axis, sl=slice(None)): # {{{ iaxis = self.index(iaxis) axes = list(self.axes) axes[iaxis] = axis slices = list(self.slices) slices[iaxis] = sl return View(axes, slices) # }}} def modify_slice (self, iaxis, newslice): # {{{ '''Modify a slice of the view''' iaxis = self.index(iaxis) return View(self.axes, self.slices[:iaxis] + (newslice,) + self.slices[iaxis+1:]) # }}} def unslice(self, *iaxes): # {{{ ''' Remove any slicing on the specified axes''' from pygeode.axis import Axis slices = list(self.slices) for iaxis in iaxes: ind = self.index(iaxis) if ind >= 0: slices[ind] = slice(None) return View (self.axes, slices) # }}} def only_slice(self, *iaxes): # {{{ '''Remove slicing for all but the specified axes''' slices = [slice(None)] * len(self.slices) for iaxis in iaxes: ind = self.index(iaxis) if ind >= 0: slices[ind] = self.slices[ind] return View(self.axes, slices) # }}} #Note: this is very similar to Var.whichaxis(). #Is there some (sensible) way to remove these kinds of redundancies between Vars and Views? # (both have a list of axes associated with them, and methods to lookup / modify them) def index (self, axis): # {{{ ''' Returns index of matching axis if present; -1 otherwise. ''' from pygeode.axis import Axis if isinstance(axis, int): assert 0 <= axis <= len(self.axes) # TODO: need more strict upper bound? return axis # axis object? if isinstance(axis, Axis): try: return list(self.axes).index(axis) except ValueError: return -1 # axis class? for i, a in enumerate(self.axes): if isinstance(a, axis): return i return -1 # }}} ############# # iterators ############# def loop_contiguous(self): # {{{ '''Break a non-contiguous view up into contiguous pieces (so that we don't have to handle it at a lower level) Input: this view Generates: outsl, start, count outsl is the current slice into an array that contains the view in a contiguous piece of memory start, count are corresponding arrays giving the start of the slice & its length. NOTE: These pieces might not fit in memory (see loop_mem for handling that). The only guarantee is that the pieces will be contiguous. If the input view is already contiguous, then the slice will be over the *whole* view.''' #TODO: use itertools.product when everyone has python >= 2.6 from itertools import product #from pygeode.tools import product outslices = [contiguate(e) for e in self.integer_indices] inslices = [[simplify(ind[osl]) for osl in outsl] for ind,outsl in zip(self.integer_indices, outslices)] for outsl, insl in zip(product(*outslices), product(*inslices)): start = [sl.start if isinstance(sl,slice) else sl for sl in insl] count = [sl.stop - sl.start if isinstance(sl,slice) else 1 for sl in insl] yield outsl, start, count # }}} def _loop_mem (self, preserve=None): # {{{ '''Loop over smaller pieces of the view that fit in memory''' from pygeode import MAX_ARRAY_SIZE #TODO: use itertools.product when everyone has python >= 2.6 from itertools import product #from pygeode.tools import product # Determine the largest chunk that can be loaded, given the size constraint maxsize = MAX_ARRAY_SIZE # Get the shape of a single chunk indices = [] for i in reversed(list(range(len(self.axes)))): # Number of values along this axis we can get at a time N = self.shape[i] # total length of this axis n = min(maxsize,N) # amount we can fix in a single chunk # break up the axis into subslices input_indices = self.integer_indices[i] ind = [input_indices[j:min(j+n,N)] for j in range(0,N,n)] # Build the subslices from the last axis to the first indices = [ind] + indices # Take into account the count along this axis when looking at the faster-varying axes maxsize //= n # Loop over all combinations of slices to cover the whole view # (take the cartesian product) for ind in product(*indices): yield View(self.axes, ind) # }}} def loop_mem (self, preserve=None): # {{{ '''Loop over smaller pieces of the view that fit in memory. preserve can optionally be a list of integer indices of axes that should be loaded in their entirety in each chunk. A warning is thrown if this ends up being larger than MAX_ARRAY_SIZE, but not an exception; memory allocation problems may result in this case. ''' from pygeode import MAX_ARRAY_SIZE #TODO: use itertools.product when everyone has python >= 2.6 from itertools import product #from pygeode.tools import product from warnings import warn # Determine the largest chunk that can be loaded, given the size constraint maxsize = MAX_ARRAY_SIZE # Get the shape of a single chunk indices = [[] for s in self.shape] if preserve is not None: for i in preserve: N = self.shape[i] indices[i] = [self.integer_indices[i]] if maxsize < N: warn('Data request involves arrays larger than MAX_ARRAY_SIZE; continuing for now but memory allocation problems may result.') maxsize = 1 else: maxsize //= N others = [i for i in range(len(self.axes)) if i not in preserve] else: others = list(range(len(self.axes))) # Build the subslices from the last axis to the first for i in reversed(others): # Number of values along this axis we can get at a time N = self.shape[i] # total length of this axis n = min(maxsize,N) # amount we can fix in a single chunk # break up the axis into subslices input_indices = self.integer_indices[i] indices[i] = [input_indices[j:min(j+n,N)] for j in range(0,N,n)] # Take into account the count along this axis when looking at the faster-varying axes maxsize //= n # Loop over all combinations of slices to cover the whole view # (take the cartesian product) for ind in product(*indices): yield View(self.axes, ind) # }}} ####################### # string representations ####################### def strtok (self): # {{{ out = [] for a,s in zip(self.axes, self.slices): v = a.values[s] if hasattr(v,'__len__'): if len(v) > 0: out.append(a.name+': '+str(a.formatvalue(v[0]))+' to '+str(a.formatvalue(v[-1]))) else: out.append(a.name+': <none>') else: out.append(a.name+': '+str(a.formatvalue(v))) return out # }}} def __str__ (self): # {{{ return '\n'.join(self.strtok()) # }}} def __repr__ (self): # {{{ return 'View(' + ', '.join(self.strtok()) + ')' # }}} # }}}