"""
Enzo-specific IO functions
"""
#-----------------------------------------------------------------------------
# Copyright (c) 2013, yt Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#-----------------------------------------------------------------------------
import os
import random
from contextlib import contextmanager
from yt.utilities.io_handler import \
BaseIOHandler, _axis_ids
from yt.utilities.logger import ytLogger as mylog
from yt.geometry.selection_routines import mask_fill, AlwaysSelector
from yt.extern.six import u, b, iteritems
import h5py
import numpy as np
from yt.funcs import *
_convert_mass = ("particle_mass","mass")
_particle_position_names = {}
[docs]class IOHandlerPackedHDF5(BaseIOHandler):
_dataset_type = "enzo_packed_3d"
_base = slice(None)
_field_dtype = "float64"
def _read_field_names(self, grid):
if grid.filename is None: return []
f = h5py.File(grid.filename, "r")
try:
group = f["/Grid%08i" % grid.id]
except KeyError:
group = f
fields = []
dtypes = set([])
add_io = "io" in grid.ds.particle_types
for name, v in iteritems(group):
# NOTE: This won't work with 1D datasets or references.
# For all versions of Enzo I know about, we can assume all floats
# are of the same size. So, let's grab one.
if not hasattr(v, "shape") or v.dtype == "O":
continue
elif len(v.dims) == 1:
if grid.ds.dimensionality == 1:
fields.append( ("enzo", str(name)) )
elif add_io:
fields.append( ("io", str(name)) )
else:
fields.append( ("enzo", str(name)) )
dtypes.add(v.dtype)
if len(dtypes) == 1:
# Now, if everything we saw was the same dtype, we can go ahead and
# set it here. We do this because it is a HUGE savings for 32 bit
# floats, since our numpy copying/casting is way faster than
# h5py's, for some reason I don't understand. This does *not* need
# to be correct -- it will get fixed later -- it just needs to be
# okay for now.
self._field_dtype = list(dtypes)[0]
f.close()
return fields
@property
def _read_exception(self):
return (KeyError,)
def _read_particle_coords(self, chunks, ptf):
chunks = list(chunks)
for chunk in chunks: # These should be organized by grid filename
f = None
for g in chunk.objs:
if g.filename is None: continue
if f is None:
#print "Opening (count) %s" % g.filename
f = h5py.File(g.filename, "r")
nap = sum(g.NumberOfActiveParticles.values())
if g.NumberOfParticles == 0 and nap == 0:
continue
ds = f.get("/Grid%08i" % g.id)
for ptype, field_list in sorted(ptf.items()):
if ptype != "io":
if g.NumberOfActiveParticles[ptype] == 0: continue
pds = ds.get("Particles/%s" % ptype)
else:
pds = ds
pn = _particle_position_names.get(ptype,
r"particle_position_%s")
x, y, z = (np.asarray(pds.get(pn % ax).value, dtype="=f8")
for ax in 'xyz')
for field in field_list:
if np.asarray(pds[field]).ndim > 1:
self._array_fields[field] = pds[field].shape
yield ptype, (x, y, z)
if f: f.close()
def _read_particle_fields(self, chunks, ptf, selector):
chunks = list(chunks)
for chunk in chunks: # These should be organized by grid filename
f = None
for g in chunk.objs:
if g.filename is None: continue
if f is None:
#print "Opening (read) %s" % g.filename
f = h5py.File(g.filename, "r")
nap = sum(g.NumberOfActiveParticles.values())
if g.NumberOfParticles == 0 and nap == 0:
continue
ds = f.get("/Grid%08i" % g.id)
for ptype, field_list in sorted(ptf.items()):
if ptype != "io":
if g.NumberOfActiveParticles[ptype] == 0: continue
pds = ds.get("Particles/%s" % ptype)
else:
pds = ds
pn = _particle_position_names.get(ptype,
r"particle_position_%s")
x, y, z = (np.asarray(pds.get(pn % ax).value, dtype="=f8")
for ax in 'xyz')
mask = selector.select_points(x, y, z, 0.0)
if mask is None: continue
for field in field_list:
data = np.asarray(pds.get(field).value, "=f8")
if field in _convert_mass:
data *= g.dds.prod(dtype="f8")
yield (ptype, field), data[mask]
if f: f.close()
def _read_fluid_selection(self, chunks, selector, fields, size):
rv = {}
# Now we have to do something unpleasant
chunks = list(chunks)
if selector.__class__.__name__ == "GridSelector":
if not (len(chunks) == len(chunks[0].objs) == 1):
raise RuntimeError
g = chunks[0].objs[0]
f = h5py.File(u(g.filename), 'r')
if g.id in self._cached_fields:
gf = self._cached_fields[g.id]
rv.update(gf)
if len(rv) == len(fields): return rv
gds = f.get("/Grid%08i" % g.id)
for field in fields:
if field in rv:
self._hits += 1
continue
self._misses += 1
ftype, fname = field
if fname in gds:
rv[(ftype, fname)] = gds.get(fname).value.swapaxes(0,2)
else:
rv[(ftype, fname)] = np.zeros(g.ActiveDimensions)
if self._cache_on:
for gid in rv:
self._cached_fields.setdefault(gid, {})
self._cached_fields[gid].update(rv[gid])
f.close()
return rv
if size is None:
size = sum((g.count(selector) for chunk in chunks
for g in chunk.objs))
for field in fields:
ftype, fname = field
fsize = size
rv[field] = np.empty(fsize, dtype="float64")
ng = sum(len(c.objs) for c in chunks)
mylog.debug("Reading %s cells of %s fields in %s grids",
size, [f2 for f1, f2 in fields], ng)
ind = 0
h5_type = self._field_dtype
for chunk in chunks:
fid = None
for g in chunk.objs:
if g.filename is None: continue
if fid is None:
fid = h5py.h5f.open(b(g.filename), h5py.h5f.ACC_RDONLY)
gf = self._cached_fields.get(g.id, {})
data = np.empty(g.ActiveDimensions[::-1], dtype=h5_type)
data_view = data.swapaxes(0,2)
nd = 0
for field in fields:
if field in gf:
nd = g.select(selector, gf[field], rv[field], ind)
self._hits += 1
continue
self._misses += 1
ftype, fname = field
try:
node = "/Grid%08i/%s" % (g.id, fname)
dg = h5py.h5d.open(fid, b(node))
except KeyError:
if fname == "Dark_Matter_Density": continue
raise
dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data)
if self._cache_on:
self._cached_fields.setdefault(g.id, {})
# Copy because it's a view into an empty temp array
self._cached_fields[g.id][field] = data_view.copy()
nd = g.select(selector, data_view, rv[field], ind) # caches
ind += nd
if fid: fid.close()
return rv
@contextmanager
[docs] def preload(self, chunk, fields, max_size):
if len(fields) == 0:
yield self
return
old_cache_on = self._cache_on
old_cached_fields = self._cached_fields
self._cached_fields = cf = {}
self._cache_on = True
for gid in old_cached_fields:
# Will not copy numpy arrays, which is good!
cf[gid] = old_cached_fields[gid].copy()
self._hits = self._misses = 0
self._cached_fields = self._read_chunk_data(chunk, fields)
mylog.debug("(1st) Hits = % 10i Misses = % 10i",
self._hits, self._misses)
self._hits = self._misses = 0
yield self
mylog.debug("(2nd) Hits = % 10i Misses = % 10i",
self._hits, self._misses)
self._cached_fields = old_cached_fields
self._cache_on = old_cache_on
# Randomly remove some grids from the cache. Note that we're doing
# this on a grid basis, not a field basis. Performance will be
# slightly non-deterministic as a result of this, but it should roughly
# be statistically alright, assuming (as we do) that this will get
# called during largely unbalanced stuff.
if len(self._cached_fields) > max_size:
to_remove = random.sample(self._cached_fields.keys(),
len(self._cached_fields) - max_size)
mylog.debug("Purging from cache %s", len(to_remove))
for k in to_remove:
self._cached_fields.pop(k)
else:
mylog.warning("Cache size % 10i (max % 10i)",
len(self._cached_fields), max_size)
def _read_chunk_data(self, chunk, fields):
fid = fn = None
rv = {}
mylog.debug("Preloading fields %s", fields)
# Split into particles and non-particles
fluid_fields, particle_fields = [], []
for ftype, fname in fields:
if ftype in self.ds.particle_types:
particle_fields.append((ftype, fname))
else:
fluid_fields.append((ftype, fname))
if len(particle_fields) > 0:
selector = AlwaysSelector(self.ds)
rv.update(self._read_particle_selection(
[chunk], selector, particle_fields))
if len(fluid_fields) == 0: return rv
h5_type = self._field_dtype
for g in chunk.objs:
rv[g.id] = gf = {}
if g.id in self._cached_fields:
rv[g.id].update(self._cached_fields[g.id])
if g.filename is None: continue
elif g.filename != fn:
if fid is not None: fid.close()
fid = None
if fid is None:
fid = h5py.h5f.open(b(g.filename), h5py.h5f.ACC_RDONLY)
fn = g.filename
data = np.empty(g.ActiveDimensions[::-1], dtype=h5_type)
data_view = data.swapaxes(0,2)
for field in fluid_fields:
if field in gf:
self._hits += 1
continue
self._misses += 1
ftype, fname = field
try:
node = "/Grid%08i/%s" % (g.id, fname)
dg = h5py.h5d.open(fid, b(node))
except KeyError:
if fname == "Dark_Matter_Density": continue
raise
dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data)
gf[field] = data_view.copy()
if fid: fid.close()
if self._cache_on:
for gid in rv:
self._cached_fields.setdefault(gid, {})
self._cached_fields[gid].update(rv[gid])
return rv
[docs]class IOHandlerPackedHDF5GhostZones(IOHandlerPackedHDF5):
_dataset_type = "enzo_packed_3d_gz"
[docs] def __init__(self, *args, **kwargs):
super(IOHandlerPackgedHDF5GhostZones, self).__init__(*args, **kwargs)
NGZ = self.ds.parameters.get("NumberOfGhostZones", 3)
self._base = (slice(NGZ, -NGZ),
slice(NGZ, -NGZ),
slice(NGZ, -NGZ))
def _read_raw_data_set(self, grid, field):
f = h5py.File(grid.filename, "r")
ds = f["/Grid%08i/%s" % (grid.id, field)][:].swapaxes(0,2)
f.close()
return ds
[docs]class IOHandlerInMemory(BaseIOHandler):
_dataset_type = "enzo_inline"
[docs] def __init__(self, ds, ghost_zones=3):
self.ds = ds
import enzo
self.enzo = enzo
self.grids_in_memory = enzo.grid_data
self.old_grids_in_memory = enzo.old_grid_data
self.my_slice = (slice(ghost_zones,-ghost_zones),
slice(ghost_zones,-ghost_zones),
slice(ghost_zones,-ghost_zones))
BaseIOHandler.__init__(self, ds)
def _read_field_names(self, grid):
fields = []
add_io = "io" in grid.ds.particle_types
for name, v in self.grids_in_memory[grid.id].items():
# NOTE: This won't work with 1D datasets or references.
if not hasattr(v, "shape") or v.dtype == "O":
continue
elif v.ndim == 1:
if grid.ds.dimensionality == 1:
fields.append( ("enzo", str(name)) )
elif add_io:
fields.append( ("io", str(name)) )
else:
fields.append( ("enzo", str(name)) )
return fields
def _read_fluid_selection(self, chunks, selector, fields, size):
rv = {}
# Now we have to do something unpleasant
chunks = list(chunks)
if selector.__class__.__name__ == "GridSelector":
if not (len(chunks) == len(chunks[0].objs) == 1):
raise RuntimeError
g = chunks[0].objs[0]
for ftype, fname in fields:
rv[(ftype, fname)] = self.grids_in_memory[grid.id][fname].swapaxes(0,2)
return rv
if size is None:
size = sum((g.count(selector) for chunk in chunks
for g in chunk.objs))
for field in fields:
ftype, fname = field
fsize = size
rv[field] = np.empty(fsize, dtype="float64")
ng = sum(len(c.objs) for c in chunks)
mylog.debug("Reading %s cells of %s fields in %s grids",
size, [f2 for f1, f2 in fields], ng)
ind = 0
for chunk in chunks:
for g in chunk.objs:
# We want a *hard error* here.
#if g.id not in self.grids_in_memory: continue
for field in fields:
ftype, fname = field
data_view = self.grids_in_memory[g.id][fname][self.my_slice].swapaxes(0,2)
nd = g.select(selector, data_view, rv[field], ind)
ind += nd
assert(ind == fsize)
return rv
def _read_particle_coords(self, chunks, ptf):
chunks = list(chunks)
for chunk in chunks: # These should be organized by grid filename
for g in chunk.objs:
if g.id not in self.grids_in_memory: continue
nap = sum(g.NumberOfActiveParticles.values())
if g.NumberOfParticles == 0 and nap == 0: continue
for ptype, field_list in sorted(ptf.items()):
x, y, z = self.grids_in_memory[g.id]['particle_position_x'], \
self.grids_in_memory[g.id]['particle_position_y'], \
self.grids_in_memory[g.id]['particle_position_z']
yield ptype, (x, y, z)
def _read_particle_fields(self, chunks, ptf, selector):
chunks = list(chunks)
for chunk in chunks: # These should be organized by grid filename
for g in chunk.objs:
if g.id not in self.grids_in_memory: continue
nap = sum(g.NumberOfActiveParticles.values())
if g.NumberOfParticles == 0 and nap == 0: continue
for ptype, field_list in sorted(ptf.items()):
x, y, z = self.grids_in_memory[g.id]['particle_position_x'], \
self.grids_in_memory[g.id]['particle_position_y'], \
self.grids_in_memory[g.id]['particle_position_z']
mask = selector.select_points(x, y, z, 0.0)
if mask is None: continue
for field in field_list:
data = self.grids_in_memory[g.id][field]
if field in _convert_mass:
data = data * g.dds.prod(dtype="f8")
yield (ptype, field), data[mask]
[docs]class IOHandlerPacked2D(IOHandlerPackedHDF5):
_dataset_type = "enzo_packed_2d"
_particle_reader = False
def _read_data_set(self, grid, field):
f = h5py.File(grid.filename, "r")
ds = f["/Grid%08i/%s" % (grid.id, field)][:]
f.close()
return ds.transpose()[:,:,None]
[docs] def modify(self, field):
pass
def _read_fluid_selection(self, chunks, selector, fields, size):
rv = {}
# Now we have to do something unpleasant
chunks = list(chunks)
if selector.__class__.__name__ == "GridSelector":
if not (len(chunks) == len(chunks[0].objs) == 1):
raise RuntimeError
g = chunks[0].objs[0]
f = h5py.File(g.filename, 'r')
gds = f.get("/Grid%08i" % g.id)
for ftype, fname in fields:
rv[(ftype, fname)] = np.atleast_3d(gds.get(fname).value)
f.close()
return rv
if size is None:
size = sum((g.count(selector) for chunk in chunks
for g in chunk.objs))
for field in fields:
ftype, fname = field
fsize = size
rv[field] = np.empty(fsize, dtype="float64")
ng = sum(len(c.objs) for c in chunks)
mylog.debug("Reading %s cells of %s fields in %s grids",
size, [f2 for f1, f2 in fields], ng)
ind = 0
for chunk in chunks:
f = None
for g in chunk.objs:
if f is None:
#print "Opening (count) %s" % g.filename
f = h5py.File(g.filename, "r")
gds = f.get("/Grid%08i" % g.id)
if gds is None:
gds = f
for field in fields:
ftype, fname = field
ds = np.atleast_3d(gds.get(fname).value.transpose())
nd = g.select(selector, ds, rv[field], ind) # caches
ind += nd
f.close()
return rv
[docs]class IOHandlerPacked1D(IOHandlerPackedHDF5):
_dataset_type = "enzo_packed_1d"
_particle_reader = False
def _read_data_set(self, grid, field):
f = h5py.File(grid.filename, "r")
ds = f["/Grid%08i/%s" % (grid.id, field)][:]
f.close()
return ds.transpose()[:,None,None]
[docs] def modify(self, field):
pass