• Facebook
  • Twitter
  • Reddit
  • StumbleUpon
  • Digg
  • email

# read_bins.py
 
# By Nathan C. Hearn
#    July 2, 2008
#
# Reader for output from mass_bins
 
 
def find_array_location(value_array, target_value, reverse_direction=False) :
 
    lower_index = None
    value_lower_upper_fraction = None
 
    ## Assumes value array is monotonically increasing (rev_dir == False)
    ## or decreasing (rev_dir == True)
 
    num_elems = len(value_array)
 
    if (num_elems > 1) :
 
        lo_index = int(0)
        hi_index = int(num_elems - 1)
 
        min_value = None
        max_value = None
 
        if (not reverse_direction) :
 
            min_value = value_array[lo_index]
            max_value = value_array[hi_index]
 
        else :
 
            min_value = value_array[hi_index]
            max_value = value_array[lo_index]
 
        if ((not (target_value < min_value))
            and (not (target_value > max_value))) :
 
            while (lo_index < (hi_index - 1)) :
 
                test_index = (lo_index + hi_index) / 2
 
                test_value = value_array[test_index]
 
                if (not reverse_direction) :
 
                    if (test_value > target_value) :
                        hi_index = test_index
                    else :
                        lo_index = test_index
 
                else :
 
                    if (test_value < target_value) :
                        hi_index = test_index
                    else :
                        lo_index = test_index
 
            lower_index = lo_index
 
            lo_value = value_array[lo_index]
            hi_value = value_array[hi_index]
 
            value_lower_upper_fraction \
                = (target_value - lo_value) / (hi_value - lo_value)
 
    return lower_index, value_lower_upper_fraction
 
 
class BoundsAccum :
 
    def __init__(self, positive_only=False) :
 
        self.__positive_only = positive_only
 
        self.__found_bound = False
 
        self.__min_bound = None
        self.__max_bound = None
 
 
    def found_bound(self) :
        return self.__found_bound
 
    def get_min_bound(self) :
        return self.__min_bound
 
    def get_max_bound(self) :
        return self.__max_bound
 
 
    def add_data(self, value) :
 
        if ((not self.__positive_only) or (value > 0.0)) :
 
            if (self.__found_bound) :
 
                if (value < self.__min_bound) :
                    self.__min_bound = value
                elif (value > self.__max_bound) :
                    self.__max_bound = value
 
            else :
 
                self.__min_bound = value
                self.__max_bound = value
 
                self.__found_bound = True
 
 
class MassBins :
 
    def __init__(self, dataset_name, min_value, max_value, bin_value_list,
                 use_log_scale=False) :
 
        self.__dataset_name = dataset_name
 
        self.__min_value = min_value
        self.__max_value = max_value
 
        self.__bin_values = list(bin_value_list)
 
        self.__log_scale = use_log_scale
 
        self.__num_bins = len(self.__bin_values)
 
        self.__filename_list = list()
 
        self.__sim_times_list = list()
 
        self.__total_mass_list = list()
 
        self.__mass_below_list = list()
        self.__mass_above_list = list()
 
        self.__bin_masses_list = list()
 
        self.__bin_min_radii_list = list()
        self.__bin_max_radii_list = list()
 
        for bin_index in range(self.__num_bins) :
 
            self.__bin_masses_list.append(list())
 
            self.__bin_min_radii_list.append(list())
            self.__bin_max_radii_list.append(list())
 
        self.__num_entries = int(0)
 
 
    def get_dataset_name(self) :
        return self.__dataset_name
 
    def get_num_entries(self) :
        return self.__num_entries
 
    def get_min_value(self) :
        return self.__min_value
 
    def get_max_value(self) :
        return self.__max_value
 
    def get_num_bins(self) :
        return self.__num_bins
 
    def get_bin_values(self) :
        return self.__bin_values
 
    def get_filename(self, entry_index) :
        return self.__filename_list[entry_index]
 
    def get_sim_time(self, entry_index) :
        return self.__sim_times_list[entry_index]
 
    def get_total_mass(self, entry_index) :
        return self.__total_mass_list[entry_index]
 
    def get_mass_below(self, entry_index) :
        return self.__mass_below_list[entry_index]
 
    def get_mass_above(self, entry_index) :
        return self.__mass_above_list[entry_index]
 
    def get_bin_masses(self, entry_index) :
 
        ret_val = list()
 
        for bin_index in range(self.__num_bins) :
            ret_val.append(self.__bin_masses_list[bin_index][entry_index])
 
        return ret_val
 
    def get_sum_bin_masses(self, entry_index) :
 
        ret_val = None
 
        if (self.__num_bins > 0) :
 
            ret_val = float(0.0)
 
            bin_masses = self.get_bin_masses(entry_index)
 
            for mass in bin_masses :
                ret_val += mass
 
        return ret_val
 
 
    def get_bin_min_radii(self, entry_index) :
 
        min_radii = list()
 
        for bin_index in range(self.__num_bins) :
            min_radii.append(self.__bin_min_radii_list[bin_index][entry_index])
 
        return min_radii
 
    def get_bin_max_radii(self, entry_index) :
 
        max_radii = list()
 
        for bin_index in range(self.__num_bins) :
            max_radii.append(self.__bin_max_radii_list[bin_index][entry_index])
 
        return max_radii
 
 
    def get_bin_masses_enclosed_below(self, entry_index) :
 
        ret_val = list()
 
        accum_mass = self.__mass_below_list[entry_index]
 
        bin_masses = self.get_bin_masses(entry_index)
 
        for index in range(self.__num_bins) :
 
            mass = bin_masses[index]
 
            accum_mass += mass
 
            ret_val.append(accum_mass)
 
        return ret_val
 
 
    def get_bin_masses_enclosed_above(self, entry_index) :
 
        ret_val = list()
 
        accum_mass = self.__mass_above_list[entry_index]
 
        bin_masses = self.get_bin_masses(entry_index)
 
        for index in range((self.__num_bins - 1), -1, -1) :
 
            mass = bin_masses[index]
 
            accum_mass += mass
 
            ret_val.insert(0, accum_mass)
 
        return ret_val
 
 
    def get_value_from_mass_above(self, entry_index, target_mass) :
 
        ret_val = None
 
        if (target_mass > 0.0) :
 
            masses_enclosed = self.get_bin_masses_enclosed_above(entry_index)
 
            lo_index, value_lo_hi_fract \
                = find_array_location(masses_enclosed, target_mass, True)
 
            if (lo_index is not None) :
 
                lo_value = self.__bin_values[lo_index]
                hi_value = self.__bin_values[lo_index + 1]
 
                ret_val = (value_lo_hi_fract * hi_value) \
                          + ((1.0 - value_lo_hi_fract) * lo_value)
 
        return ret_val
 
 
    def get_value_from_mass_below(self, entry_index, target_mass) :
 
        ret_val = None
 
        if (target_mass > 0.0) :
 
            masses_enclosed = self.get_bin_masses_enclosed_below(entry_index)
 
            lo_index, value_lo_hi_fract \
                = find_array_location(masses_enclosed, target_mass, False)
 
            if (lo_index is not None) :
 
                lo_value = self.__bin_values[lo_index]
                hi_value = self.__bin_values[lo_index + 1]
 
                ret_val = (value_lo_hi_fract * hi_value) \
                          + ((1.0 - value_lo_hi_fract) * lo_value)
 
        return ret_val
 
 
    def get_global_min_max_mass(self, positive_only=False) :
 
        accum = BoundsAccum(positive_only)
 
        for mass in self.__mass_below_list :
            accum.add_data(mass)
 
        for mass in self.__mass_above_list :
            accum.add_data(mass)
 
        for bin_masses in self.__bin_masses_list :
            for mass in bin_masses :
                accum.add_data(mass)
 
        min_bound = accum.get_min_bound()
        max_bound = accum.get_max_bound()
 
        return min_bound, max_bound
 
 
    def get_global_min_max_radii(self) :
 
        accum = BoundsAccum()
 
        for bin_min_radii in self.__bin_min_radii_list :
            for radius in bin_min_radii :
                if (radius is not None) :
                    accum.add_data(radius)
 
        for bin_max_radii in self.__bin_max_radii_list :
            for radius in bin_max_radii :
                if (radius is not None) :
                    accum.add_data(radius)
 
        min_radius = accum.get_min_bound()
        max_radius = accum.get_max_bound()
 
        return min_radius, max_radius
 
 
    def add_data(self, filename, sim_time, total_mass, mass_below, mass_above,
                 bin_masses, min_radii=None, max_radii=None) :
 
        num_bins = len(bin_masses)
 
        if (num_bins != self.__num_bins) :
            raise RuntimeError("Bin count mismatch")
 
        self.__filename_list.append(str(filename))
 
        self.__sim_times_list.append(sim_time)
 
        self.__total_mass_list.append(total_mass)
 
        self.__mass_below_list.append(mass_below)
        self.__mass_above_list.append(mass_above)
 
        for bin_index in range(num_bins) :
 
            self.__bin_masses_list[bin_index].append(bin_masses[bin_index])
 
            min_rad = None
 
            if (min_radii is not None) :
                min_rad = min_radii[bin_index]
 
            self.__bin_min_radii_list[bin_index].append(min_rad)
 
            max_rad = None
 
            if (max_radii is not None) :
                max_rad = max_radii[bin_index]
 
            self.__bin_max_radii_list[bin_index].append(max_rad)
 
        self.__num_entries += 1
 
 
def read_next_line(f, skip_comments=True) :
 
    ret_val = None
 
    finished = False
 
    while (not finished) :
 
        line = f.readline()
 
        if (line == "") :
 
            finished = True
 
        else :
 
            if (skip_comments) :
                line = line.split("#")[0]
 
            line = line.strip()
 
            if (line != "") :
 
                ret_val = line
                finished = True
 
    return ret_val
 
 
def read_bins(filename) :
 
    f = open(filename, "r")
 
    at_eof = False
 
    ## Read the header
 
    dataset = None
 
    min_value = None
    max_value = None
 
    num_bins = None
 
    log_scale = None
 
    bin_min_values = None
    bin_center_values = None
 
    compute_radii = None
 
    in_header = True
 
    while (in_header and (not at_eof)) :
 
        current_line = read_next_line(f)
 
        if (current_line is None) :
 
            at_eof = True
 
        elif (current_line == "Data") :
 
            in_header = False
 
        else :
 
            words = current_line.split()
 
            num_words = len(words)
 
            keyword = words[0]
 
            if (keyword == "Dataset") :
 
                if (num_words != 2) :
                    raise RuntimeError("Dataset requires a value")
 
                dataset = words[1]
 
            elif (keyword == "MinValue") :
 
                if (num_words != 2) :
                    raise RuntimeError("MinValue requires a value")
 
                min_value = float(words[1])
 
            elif (keyword == "MaxValue") :
 
                if (num_words != 2) :
                    raise RuntimeError("MaxValue requires a value")
 
                max_value = float(words[1])
 
            elif (keyword == "NumBins") :
 
                if (num_words != 2) :
                    raise RuntimeError("NumBins requries a value")
 
                num_bins = int(words[1])
 
            elif (keyword == "LogScale") :
 
                if (num_words != 2) :
                    raise RuntimeError("Logscale requires a value")
 
                if (words[1] == "true") :
                    log_scale = True
                else :
                    log_scale = False
 
            elif (keyword == "BinMinValues") :
 
                if (num_words != 2) :
                    raise RuntimeError("BinMinValues requires "
                                       + "comma-separated list")
 
                val_string_list = words[1].split(",")
 
                bin_min_values = list()
 
                for val_string in val_string_list :
                    bin_min_values.append(float(val_string))
 
            elif (keyword == "BinCenterValues") :
 
                if (num_words != 2) :
                    raise RuntimeError("BinCenterValues requires "
                                       + "comma-separated list")
 
                val_string_list = words[1].split(",")
 
                bin_center_values = list()
 
                for val_string in val_string_list :
                    bin_center_values.append(float(val_string))
 
            elif (keyword == "ComputeRadii") :
 
                if (num_words != 2) :
                    raise RuntimeError("Compute radii requires a value")
 
                if (words[1] == "true") :
                    compute_radii = True
                else :
                    compute_radii = False
 
 
    if (in_header) :
        raise RuntimeError("End of file reached in header")
 
 
    if (dataset is None) :
        raise RuntimeError("Dataset value not read")
 
    if (min_value is None) :
        raise RuntimeError("Min value not read")
 
    if (max_value is None) :
        raise RuntimeError("Max value not read")
 
    if (num_bins is None) :
        raise RuntimeError("Number of bins not read")
 
    if (log_scale is None) :
        raise RuntimeError("Logscale state not read")
 
    if (bin_min_values is None) :
        raise RuntimeError("Bin min values not read")
 
    if (len(bin_min_values) != num_bins) :
        raise RuntimeError("Incompatible bin min values array")
 
    if (bin_center_values is None) :
        raise RuntimeError("Bin center values not read")
 
    if (len(bin_center_values) != num_bins) :
        raise RuntimeError("Incompatible bin center values array")
 
    if (compute_radii is None) :
        raise RuntimeError("Compute radii state not read")
 
 
    ## Process the column headings
 
    data_file_index = None
 
    sim_time_index = None
 
    total_mass_index = None
 
    mass_below_index = None
    mass_above_index = None
 
    bin_masses_index_list = list()
    bin_min_radius_index_list = list()
    bin_max_radius_index_list = list()
 
    for bin_index in range(num_bins) :
        bin_masses_index_list.append(None)
        bin_min_radius_index_list.append(None)
        bin_max_radius_index_list.append(None)
 
 
    column_line = read_next_line(f)
 
    if (column_line is None) :
        raise RuntimeError("End of file reached before column headings")
 
    column_words = column_line.split()
 
    num_columns = len(column_words)
 
    ## WE ASSUME THAT THE BIN VALUES ARE CONSEQUTIVE AND INCREASING WITH
    ## COLUMN INDEX!!
 
    for col_index in range(num_columns) :
 
        word = column_words[col_index]
 
        if (word == "File_Name") :
 
            data_file_index = col_index
 
        elif (word == "Sim_Time") :
 
            sim_time_index = col_index
 
        elif (word == "Total_Mass") :
 
            total_mass_index = col_index
 
        elif (word == "Mass_Below_Range") :
 
            mass_below_index = col_index
 
        elif (word == "Mass_Above_Range") :
 
            mass_above_index = col_index
 
        elif (word[:9] == "Mass_Bin_") :
 
            bin_index = int(word[9:])
 
            bin_masses_index_list[bin_index] = col_index
 
        elif (word[:15] == "Min_Radius_Bin_") :
 
            bin_index = int(word[15:])
 
            bin_min_radius_index_list[bin_index] = col_index
 
        elif (word[:15] == "Max_Radius_Bin_") :
 
            bin_index = int(word[15:])
 
            bin_max_radius_index_list[bin_index] = col_index
 
 
    if (data_file_index is None) :
        raise RuntimeError("Data file column not found")
 
    if (sim_time_index is None) :
        raise RuntimeError("Sim time column not found")
 
    if (total_mass_index is None) :
        raise RuntimeError("Total mass column not found")
 
    if (mass_below_index is None) :
        raise RuntimeError("Mass below range column not found")
 
    if (mass_above_index is None) :
        raise RuntimeError("Mass above range column not found")
 
    for bin_index in range(num_bins) :
 
        if (bin_masses_index_list[bin_index] is None) :
            raise RuntimeError("Bin mass column not found")
 
        if (compute_radii) :
 
            if (bin_min_radius_index_list[bin_index] is None) :
                raise RuntimeError("Bin min radius column not found")
 
            if (bin_max_radius_index_list[bin_index] is None) :
                raise RuntimeError("Bin max radius column not found")
 
 
    ## Set up the MassBins object
 
    mass_bins = MassBins(dataset, min_value, max_value, bin_center_values,
                         log_scale)
 
 
    ## Process the data
 
    while (not at_eof) :
 
        line = read_next_line(f)
 
        if (line is None) :
 
            at_eof = True
 
        else :
 
            line_words = line.split()
 
            if (len(line_words) != num_columns) :
                raise RuntimeError("Column count mismatch")
 
            data_file_name = line_words[data_file_index]
 
            sim_time = float(line_words[sim_time_index])
 
            total_mass = float(line_words[total_mass_index])
 
            mass_below = float(line_words[mass_below_index])
            mass_above = float(line_words[mass_above_index])
 
            bin_masses = list()
 
            bin_min_radii = None
            bin_max_radii = None
 
            if (compute_radii) :
                bin_min_radii = list()
                bin_max_radii = list()
 
            for bin_index in range(num_bins) :
 
                bin_mass_index = bin_masses_index_list[bin_index]
 
                bin_mass = float(line_words[bin_mass_index])
 
                bin_masses.append(bin_mass)
 
                if (compute_radii) :
 
                    bin_min_rad_index = bin_min_radius_index_list[bin_index]
                    bin_max_rad_index = bin_max_radius_index_list[bin_index]
 
                    bin_min_rad_str = line_words[bin_min_rad_index]
 
                    bin_min_rad = None
 
                    if (bin_min_rad_str != "NA") :
                        bin_min_rad = float(bin_min_rad_str)
 
                    bin_max_rad_str = line_words[bin_max_rad_index]
 
                    bin_max_rad = None
 
                    if (bin_max_rad_str != "NA") :
                        bin_max_rad = float(bin_max_rad_str)
 
                    bin_min_radii.append(bin_min_rad)
                    bin_max_radii.append(bin_max_rad)
 
            mass_bins.add_data(data_file_name, sim_time, total_mass,
                               mass_below, mass_above, bin_masses,
                               bin_min_radii, bin_max_radii)
 
    return mass_bins, compute_radii