Source code for pyCactus.CactusField

"""CactusField and supporting functions."""
import os
import time as pytime

import numpy as np
import pandas as pd

from warnings import *

from common_utils import load_data, get_file_time

[docs]class CactusField(object): """Class for reading WakeData (element) from CSV files. Grid node locations X,Y,Z are assumed to be the same for all timesteps. Attributes ---------- filenames : list Filenames (str) of wake element data files. num_times : int Number of timesteps of wake element data. times : list List of times (float) of wake element data. fdict : dict A dictionary of `{time : filename}`. grid_dims : dict A dictionary of grid dimensions. """
[docs] def __init__(self, filenames, read_headers=True, read_grid_dims=True): """Initialize the instance.""" self.filenames = filenames self.num_times = len(filenames) self.times = [] self.fdict = {} # read the file headers for times if read_headers: tic = pytime.time() self.read_file_headers() print 'Read %d wake element data headers in %2.2f s' %\ (len(self.times), pytime.time() - tic) # get the grid dimensions by reading the first file # (note that this isn't necessarily the FIRST timestep, just the first # file from glob) if read_grid_dims: for filename in self.filenames: tic = pytime.time() self.grid_dims = self.read_grid_dims(filename) print 'Read grid dimensions in %2.2f s' % (pytime.time() - tic) break
[docs] def read_file_headers(self): """Read the file headers, store times to instance variable. Gets the times from the headers of each data file in the instance `filenames` attribute amd stores to instance variables `self.times` and `self.fdict` Returns ------- times : list List of times (float) of wake element data. fdict : dict A dictionary of `{time : fname}`. """ # the name of the column containing time info time_col_name = 'Normalized Time (-)' self.fdict = {} self.times = [] for fname in self.filenames: time = get_file_time(fname, time_col_name) self.times.append(time) self.fdict[time] = fname # sort the times self.times = np.sort(np.array(self.times)) return self.times, self.fdict
[docs] def read_grid_dims(self, filename): """Read the grid dimensions of a file. Returns ------- grid_dims : dict Dictionary of grid dimension. """ # get grid dimensions df = load_data(filename) _, grid_dims = self.fielddata_from_df(df) # unpack grid dimensions return grid_dims
[docs] def get_df_inst(self, time=None, fname=None): """Get the data from a specified time or filename. Either the time or the filename must be specified. Parameters ---------- time : Optional[float] The time at which to extract the dataframe. fname : Optional[str] The filename to read (defaults to self.fdict[time]). Returns ------- df_inst : pandas.DataFrame DataFrame of the time. """ if (time is None) and (fname is None): print 'Error: must specify either the time or filename of the \ desired data.' if time is not None: # if the time is specified, get the filename fname = self.fdict[time] else: # otherwise, use the filename given pass # read the CSV data file df_inst = load_data(fname) return df_inst
[docs] def fielddata_from_df(self, df): """Return the dictionary containing fielddata variarbles. Take a dataframe containing field data at a single timestep, returns a dictionary of the data as np.arrays. Dicttionary is keyed by a descriptive variable name. Parameters ---------- df : pandas.DataFrame Returns ------- grid_data : dict Dictionary of np.arrays containing the data. grid_dims : dict Dictionary of grid dimensions. """ # column names # time_col_name = 'Normalized Time (-)' x_col_name = 'X/R (-)' y_col_name = 'Y/R (-)' z_col_name = 'Z/R (-)' u_col_name = 'U/Uinf (-)' v_col_name = 'V/Uinf (-)' w_col_name = 'W/Uinf (-)' ufs_col_name = 'Ufs/Uinf (-)' vfs_col_name = 'Vfs/Uinf (-)' wfs_col_name = 'Wfs/Uinf (-)' # extract columns x = df.loc[:, x_col_name] y = df.loc[:, y_col_name].values z = df.loc[:, z_col_name].values u = df.loc[:, u_col_name].values v = df.loc[:, v_col_name].values w = df.loc[:, w_col_name].values # extract freestream velocity data if it is there has_vel_fs = False if ufs_col_name in df and vfs_col_name in df and wfs_col_name in df: has_vel_fs = True ufs = df.loc[:, ufs_col_name].values vfs = df.loc[:, vfs_col_name].values wfs = df.loc[:, wfs_col_name].values # compute grid dimensions xmin = x.min() xmax = x.max() ymin = y.min() ymax = y.max() zmin = z.min() zmax = z.max() nx = len(np.unique(x)) ny = len(np.unique(y)) # number of grid points nz = len(np.unique(z)) dx = (xmax-xmin)/nx dy = (ymax-ymin)/ny # grid spacing dz = (zmax-zmin)/nz xlim = [xmin, xmax] ylim = [ymin, ymax] # grid extents zlim = [zmin, zmax] # reshape to 3-D structured numpy arrays # (note that in Python, the final index is the fastest changing) X = np.float32(np.reshape(x, [nz, ny, nx])) Y = np.float32(np.reshape(y, [nz, ny, nx])) Z = np.float32(np.reshape(z, [nz, ny, nx])) U = np.float32(np.reshape(u, [nz, ny, nx])) V = np.float32(np.reshape(v, [nz, ny, nx])) W = np.float32(np.reshape(w, [nz, ny, nx])) if has_vel_fs: Ufs = np.float32(np.reshape(ufs, [nz, ny, nx])) Vfs = np.float32(np.reshape(vfs, [nz, ny, nx])) Wfs = np.float32(np.reshape(wfs, [nz, ny, nx])) # store data and dimensions as dicts grid_data = {'X': X, 'Y': Y, 'Z': Z, 'U': U, 'V': V, 'W': W} if has_vel_fs: grid_data['Ufs'] = Ufs grid_data['Vfs'] = Vfs grid_data['Wfs'] = Wfs grid_dims = {'nx': nx, 'ny': ny, 'nz': nz, 'dx': dx, 'dy': dy, 'dz': dz, 'xlim': xlim, 'ylim': ylim, 'zlim': zlim} return grid_data, grid_dims
[docs] def write_vtk_series(self, path, name, print_status=True): """Write the field data to a time series of VTK files. Data is written as VTK structured data (.vts). Velocity data is a vector field. Also writes a Paraview collection file (.pvd) which contains the normalized times of each timestep. Parameters ---------- path : str The path to which to write the VTK files. name : str The prefix of the VTK filenames. print_status: bool True to print the status of VTK conversion, False to suppress. Returns ------- data_filenames : list List of the VTK filenames. pvd_filename : str .pvd collection filename. """ from pyevtk.hl import gridToVTK # evtk module import xml.etree.cElementTree as ET # xml module # set the collection filename collection_fname = name + ".pvd" # set up blank list of the vtk filenames data_filenames = [] # set up XML tree for PVD collection file root = ET.Element("VTKFile") root.set("type", "Collection") collection = ET.SubElement(root, "Collection") # write the VTK files for i, time in enumerate(np.sort(self.times)): # get the system time (for elapsed time) t_start = pytime.time() # get the filename containing the data at current time fname = self.fdict[time] # base name of data file vtk_name = name + '_' + str(i) # read the CSV data file df_inst = self.get_df_inst(time=time) grid_data, grid_dims = self.fielddata_from_df(df_inst) # unpack the grid data X = grid_data['X'] Y = grid_data['Y'] Z = grid_data['Z'] U = grid_data['U'] V = grid_data['V'] W = grid_data['W'] # save velocity fields as tuples velocity = (U, V, W) # create dictionary of data pointData = {'velocity': velocity} # check if the file has freestream velocity data if 'Ufs' in grid_data and \ 'Vfs' in grid_data and \ 'Wfs' in grid_data: # get the freestream velocity data Ufs = grid_data['Ufs'] Vfs = grid_data['Vfs'] Wfs = grid_data['Wfs'] # save as tuple velocity_fs = (Ufs, Vfs, Wfs) # append to pointdata dictionary pointData['velocity_fs'] = velocity_fs data_filename = gridToVTK(os.path.abspath(os.path.join(path, vtk_name)), X, Y, Z, pointData=pointData) # append filename to list data_filenames.append(data_filename) # add elements to XML tree for PVD collection file dataset = ET.SubElement(collection, "DataSet") dataset.set("timestep", str(time)) dataset.set("file", os.path.basename(data_filename)) # print status message elapsed_time = pytime.time() - t_start if print_status: print 'Converted: ' + fname + ' -->\n\t\t\t' + data_filename +\ ' in %2.2f s\n' % (elapsed_time) # write the collection file tree = ET.ElementTree(root) pvd_filename = os.path.abspath(os.path.join(path, collection_fname)) tree.write(pvd_filename, xml_declaration=True) if print_status: print 'Wrote ParaView collection file: ' + pvd_filename return data_filenames, pvd_filename
[docs] def field_time_average(self, ti_start=-5, ti_end=-1): """Return the time-averaged field data over a specified timestep range. Averaging is done over the time range self.times[ti_start:ti_end]. Parameters ---------- ti_start : Optional[int] The timestep index to start averaging (default -5) ti_end : Optional[int] The timestep index to end averaging (default -1) Returns -------- data_dict_mean : dict A dictionary of grid coordinates and time-averaged data. """ # number of timestep num_times = len(self.times[ti_start:ti_end]) # sum fields for ti, time in enumerate(self.times[ti_start:ti_end]): df_inst = self.get_df_inst(time=time) grid_data, grid_dims = self.fielddata_from_df(df_inst) if ti == 0: # on the first timestep, save the grid data and initialize # variables X = grid_data['X'] Y = grid_data['Y'] Z = grid_data['Z'] U = grid_data['U'] V = grid_data['V'] W = grid_data['W'] Ufs = grid_data['Ufs'] Vfs = grid_data['Vfs'] Wfs = grid_data['Wfs'] else: # on subsequent timesteps, just add the other fields U = U + grid_data['U'] V = V + grid_data['V'] W = W + grid_data['W'] Ufs = Ufs + grid_data['Ufs'] Vfs = Vfs + grid_data['Vfs'] Wfs = Wfs + grid_data['Wfs'] # then divide by the number of steps to get the average U = U/num_times V = V/num_times W = W/num_times Ufs = Ufs/num_times Vfs = Vfs/num_times Wfs = Wfs/num_times data_dict_mean = {'t': self.times[ti_start:ti_end], 'X': X, 'Y': Y, 'Z': Z, 'U': U, 'V': V, 'W': W, 'Ufs': Ufs, 'Vfs': Vfs, 'Wfs': Wfs} return data_dict_mean
[docs] def pointdata_time_series(self, p_list, ti_start=0, ti_end=-1): """Extract a time series of data at a single point or points. Loops through the time data to extract a time series of data at a specified point or points. Uses nearest-point to avoid interpolation. Parameters ---------- p_list : list List of points, where each point is a tuple of length 3. ti_start : Optional[int] The timestep index to start (default 0). ti_end : Optional[int] The timestep index to end (default -1) Returns ------- data_dict : dict Dictionary of the time series velocity data:: 't' : times 'u' : x-velocity 'v' : y-velocity 'w' : z-velocity 'ufs' : freestream x-velocity 'vfs' : freestream y-velocity 'wfs' : freestream z-velocity p_nearest : list List of the nearest points to those in `p_list` (points at which data was returned). """ # get the grid from the first timestep df_inst = self.get_df_inst(time=self.times[0]) grid_data, grid_dims = self.fielddata_from_df(df_inst) # extract grid coordinates X = grid_data['X'] Y = grid_data['Y'] Z = grid_data['Z'] # initialize empty lists kji_nearest = [] p_nearest = [] # loop through each point for p in p_list: xp, yp, zp = p # compute distance from point to each grid node R = np.power(X-xp, 2) + np.power(Y-yp, 2) + np.power(Z-zp, 2) # find the indices of the place where r is a minimum zi, yi, xi = np.unravel_index(R.argmin(), R.shape) # add this index to the list of indices kji_nearest.append((zi, yi, xi)) # get the actual coordinate of the nearest point and add to list of # nearest points p_nearest.append((X[zi, yi, xi], Y[zi, yi, xi], Z[zi, yi, xi])) # preallocate arrays num_times = len(self.times[ti_start:ti_end]) num_points = len(p_list) u = np.zeros([num_points, num_times]) v = np.zeros([num_points, num_times]) w = np.zeros([num_points, num_times]) ufs = np.zeros([num_points, num_times]) vfs = np.zeros([num_points, num_times]) wfs = np.zeros([num_points, num_times]) # loop through the files and extract data for ti, time in enumerate(self.times[ti_start:ti_end]): # get the dataframe for the current time df_inst = self.get_df_inst(time=time) # extract data from the dataframe grid_data, grid_dims = self.fielddata_from_df(df_inst) for pi, coords in enumerate(kji_nearest): # extract data at point and store in array u[pi, ti] = (grid_data['U'])[coords] v[pi, ti] = (grid_data['V'])[coords] w[pi, ti] = (grid_data['W'])[coords] ufs[pi, ti] = (grid_data['Ufs'])[coords] vfs[pi, ti] = (grid_data['Vfs'])[coords] wfs[pi, ti] = (grid_data['Wfs'])[coords] data_dict = {'t': self.times[ti_start:ti_end], 'u': u, 'v': v, 'w': w, 'ufs': ufs, 'vfs': vfs, 'wfs': wfs} return data_dict, p_nearest