Source code for data_parser

import numpy as np
import matplotlib.pyplot as plt
import mumpce_py
import mumpce_py.cantera_utils as mpct
import tqdm
import mpc_parallel

main_title_punc = '*'

def find_x_in_comp_string(comp_string,species):
    '''Takes a Cantera composition string and finds the number of moles of a particular species in that string.
    
    :param comp_string: The Cantera composition string being parsed
    :param species: The species name whose value is to be found
    :type comp_string: str
    :type species: str
    :returns: val, the number of moles of the desired species
    '''
    comps = comp_string.split(',')
    for comp in comps:
        if comp.find(species) > -1:
            comp_split = comp.split(':')
            val = float(comp_split[1])
    return val

[docs]def find_data_sets(df): """Takes a pandas.DataFrame object containing mumpce_py.cantera_utils compatible measurements and returns the data sources and data sets in the DataFrame. In the Small Hydrocarbon Databse, measurements are named according to the convention source_setidXX, where 'source' is a string that uniquely identifies the origin of the data (e.g. paper, online database, etc), setid is a unique identifier for each set within that source, and XX is a unique identifier for each measurement within a set. This function will return all of the unique source strings and source_set strings. :param df: The pandas DataFrame to be parsed :type df: pandas.DataFrame :returns: sources,sets, the data sources and data sets contained within the dataframe. """ ids = df.ID.values #Find all of the IDs that are in this DataFrame #Assume that the sets are named according to Source_SetXX where XX identifies a particular measurement within a set. # id_full = np.array([ident.split('_') for ident in ids]) #Returns ['Source' 'SetXX'] for each Source_SetXX id_strip_num = np.array([ident[:-2] for ident in ids]) #Returns ['Source_Set'] for each Source_SetXX sources = np.unique(id_full[:,0]) #Find each unique Source sets = np.unique(id_strip_num) #Find each unique Source_Set return sources,sets
[docs]def make_ign_docs(primary_fuel_name,sources,project_list): '''Creates the documentation for the ignition delay times in the database This function takes a primary fuel name and creates the main documentation page for that fuel. It will also make the subpages for each of the experimental sets described in `sources`. In addition, the function will return the table of contents for the primary fuel's main documentation page. :param primary_fuel_name: The English name of the fuel for which the documentation page is being created :param sources: The list of sources associated with this fuel :param project_list: The list of mumpce Projects containing ignition delay information :returns: main_contents, the table of contents for the primary fuel main page ''' main_contents = '' for source in sources: make_ign_subpage(source,primary_fuel_name,project_list) main_contents = '\n '.join((main_contents,source+primary_fuel_name)) make_main_ign_page(primary_fuel_name,main_contents) return main_contents
[docs]def make_ign_subpage(source,primary_fuel_name,project_list): '''Creates the documentation page for a single data source This function takes a data source and creates the main documentation page for that source. It also creates the individual subpages for each data set associated with that source. :param source: The source for which the main page and subpages will be created. :param primary_fuel_name: The English name of the fuel for which the documentation page is being created :param project_list: The list of mumpce Projects containing ignition delay information :returns: contents, the table of contents for the data source main page ''' #boilerplate for the subpages rst_datasource='''.. data header title {} .. toctree:: :maxdepth: 2 :caption: Contents: {} ''' set_TOC_format = '{} <{}>' #Get projects associated with this project source sets_for_this_source = [project for project in project_list if source in project.name] #Get the data source for this project first_project = sets_for_this_source[0] source_name = first_project[0].comment #Build the title for this source page title_underline = main_title_punc * len(source_name) docname = '\n'.join((source_name,title_underline)) #Start building the TOC for this data source contents = '' for project in sets_for_this_source: #Make the doc page for this project and get the project identifier setname = make_ign_doc_page(project) set_ID = setname.split('_')[1] set_title = 'Set ' + set_ID.capitalize() #Make the line associated with this project in the table of contents set_TOC_line = set_TOC_format.format(set_title,setname) contents = '\n '.join((contents,set_TOC_line)) datasource_contents = rst_datasource.format(docname,contents) datafile = source + primary_fuel_name + '.rst' with open(datafile,'w') as f: f.write(datasource_contents) return contents
[docs]def make_fls_docs(primary_fuel_name,primary_fuel,project_dict): '''Creates the documentation for the laminar flame speeds in the database This function takes a primary fuel name and creates the main documentation page for that fuel. It will also make the subpages for each of the experimental sets described in `sources`. In addition, the function will return the table of contents for the primary fuel's main documentation page. :param primary_fuel_name: The English name of the fuel for which the documentation page is being created :param primary_fuel: The Cantera name of the fuel :param project_dict: The dictionary of mumpce Projects containing laminar flame speed information :returns: main_contents, the table of contents for the primary fuel main page ''' main_contents = '' main_contents = make_fls_subpage(primary_fuel,project_dict) make_main_fls_page(primary_fuel_name,main_contents) return main_contents
[docs]def make_fls_subpage(primary_fuel,project_dict): '''Creates the documentation page for a single data source This function takes a data source and creates the main documentation page for that source. It also creates the individual subpages for each data set associated with that source. :param primary_fuel: The Cantera name of the fuel :param project_dict: The dictionary of mumpce Projects containing laminar flame speed information :returns: contents, the table of contents for the data source main page ''' contents = '' for key in project_dict: project_list = project_dict[key] nominal_T = str(470) bin_ends = key[1].split(',') end_T = int(bin_ends[1].rstrip(']')) print(end_T) if end_T < 325: nominal_T = str(300) elif end_T < 375: nominal_T = str(360) elif end_T < 425: nominal_T = str(400) nominal_pres = key[0] contents = '\n '.join((contents,make_fls_doc_page(primary_fuel,nominal_pres,nominal_T,project_list))) return contents
[docs]def make_main_page(primary_fuel_name,data_source_table,main_rstcontents): '''Creates the table of contents for the top level index page. This function accepts a boilerplate restructured text string and inserts the primary fuel name as the tile and the data source table as the table of contents. It returns the properly-formatted restructured text. :param primary_fuel_name: The English name of the primary fuel. This will be the title of the documentation page :param data_source_table: The table of contents that will go into the index file. This will be inserted into the documentation page as its table of contents :param main_rstcontents: A string representing the index page boilerplate. :returns: main_source_contents, the information that will be written to the documentation file ''' #Build the document title primary_fuel_underline = main_title_punc * len(primary_fuel_name) primary_fuel_title = '\n'.join((primary_fuel_name.capitalize(),primary_fuel_underline)) #Load the data source table and title into the boilerplate and return it to the main function main_source_contents = main_rstcontents.format(primary_fuel_title,data_source_table) return main_source_contents
[docs]def make_main_ign_page(primary_fuel_name,data_source_table): '''Creates the top level index page for a set of ignition delay time projects This function takes a primary fuel name and a table of sources and creates the top-level index page for that fuel. :param primary_fuel_name: The English name of the primary fuel :param data_source_table: The table of data sources that will go into the index file :type primary_fuel_name: str :type data_source_table: str ''' #Title page boilerplate main_rstcontents = ''' {} .. toctree:: :maxdepth: 2 :caption: Contents: {} ''' main_source_contents = make_main_page(primary_fuel_name,data_source_table,main_rstcontents) print(main_source_contents) main_source_file = primary_fuel_name + '_ign.rst' print(main_source_file) with open(main_source_file,'w') as f: f.write(main_source_contents) return main_source_contents
[docs]def make_main_fls_page(primary_fuel_name,data_source_table): '''Creates the top level index page for a set of laminar flame speed projects This function takes a primary fuel name and a table of sources and creates the top-level index page for that fuel. :param primary_fuel_name: The English name of the primary fuel :param data_source_table: The table of data sources that will go into the index file :type primary_fuel_name: str :type data_source_table: str ''' #Title page boilerplate main_rstcontents = ''' {} .. toctree:: :maxdepth: 1 :caption: Contents: {} ''' main_source_contents = make_main_page(primary_fuel_name,data_source_table,main_rstcontents) print(main_source_contents) main_source_file = primary_fuel_name + '_flame.rst' print(main_source_file) with open(main_source_file,'w') as f: f.write(main_source_contents) return main_source_contents
[docs]def load_projects(df): """Takes a pandas.DataFrame object containing mumpce_py.cantera_utils compatible measurements and returns a list of mumpce.Project objects built from measuremenst in the DataFrame. This function will use :py:func:`find_data_sets` to break the DataFrame into sources and sets. Each set will then be built into its own Project, and each Measurement in the Project will be a measurement from that data set. In the Small Hydrocarbon Databse, measurements are named according to the convention source_setidXX, where 'source' is a string that uniquely identifies the origin of the data (e.g. paper, online database, etc), setid is a unique identifier for each set within that source, and XX is a unique identifier for each measurement within a set. :param df: The pandas DataFrame to be parsed :type df: pandas.DataFrame :returns: project_list, the list of Projects contained within the dataframe. """ project_list = [] sources,sets = find_data_sets(df) for data_set in sets: try: #Try to load the project from the expected save file project = mumpce_py.load_project(data_set) except FileNotFoundError: #The project doesn't exist, so create it and its measurement list print('Project {} does not exist, creating it'.format(data_set)) measurement_list = mpct.measurement_initialize_pd(df.iloc[[data_set in s for s in df.ID.values]]) project = mumpce_py.Project(measurement_list=measurement_list,name=data_set) project.save() project_list += [project] return project_list
[docs]def run_project_parallel(project_to_run): '''Check to see if the Measurements in a Project have been evaluated and, if they havent, evaluate them. Returns the Project. This is the parallel version. ''' for meas in project_to_run: print (meas.name, meas.model_value) if project_to_run[0].model_value is None: #If the model value is blank, run the project values = mpc_parallel.evaluate_parallel(project_to_run,nWorkers=5) for meas_name,val in values: project_to_run[meas_name].model_value = val #Load model values back into the project project_to_run.save() return project_to_run
[docs]def run_project(project_to_run): '''Check to see if the Measurements in a Project have been evaluated and, if they havent, evaluate them. Returns the Project. ''' for meas in project_to_run: print (meas.name, meas.model_value) if project_to_run[0].model_value is None: for meas in tqdm.tqdm_notebook(project_to_run,desc=project_to_run.name): meas.evaluate() else: print(project_to_run.name, ' Value has been calculated') project_to_run.save() return project_to_run
[docs]def run_project_sensitivity(project_to_run): '''Check to see if the Measurements in a Project have been evaluated for sensitivity analysis and, if they havent, evaluate them. Returns the Project. ''' if project_to_run[0].sensitivity_list is None: for meas in tqdm.tqdm_notebook(project_to_run,desc=project_to_run.name): meas.evaluate_sensitivity() else: print(project_to_run.name, ' Sensitivity has been calculated') project_to_run.save() return project_to_run
def make_fls_plot(primary_fuel,pres,nominal_T,project_list): '''Creates a plot of the data for laminar flame speeds at a particular nominal condition associated with a list of :py:class:`Project` objects ''' #Figure name figname_fmt = '{}_{}_{}K_plot.png' figname = figname_fmt.format(primary_fuel,pres,nominal_T) fig,ax = plt.subplots() #Extract calculation data for plot project_to_run = project_list[0] phis = [find_x_in_comp_string(meas.model.initial.composition,primary_fuel) for meas in project_to_run] md_v = np.array([meas.model_value for meas in project_to_run]) ax.plot(phis,md_v,'r',label='Model') #Extract experimental data for plot for project in project_list: phis = [find_x_in_comp_string(meas.model.initial.composition,primary_fuel) for meas in project] ex_v = np.array([meas.value for meas in project]) ax.scatter(phis,ex_v,label=project.name) ax.legend(loc='lower center') fig.savefig(figname,dpi=300) return fig,figname def make_data_plot(project): return make_ign_plot(project) def make_ign_plot(project):#,fig,ax): '''Creates a plot of the data for a single ignition delay set associated with a :py:class:`Project` ''' pjname = project.name figname = project.name + '_plot.png' temps = np.array([1000/meas.model.initial.T for meas in project]) ex_v = np.array([meas.value for meas in project]) md_v = np.array([meas.model_value for meas in project]) indices = np.argsort(temps) fig,ax = plt.subplots() ax.scatter(temps[indices],np.exp(ex_v)[indices],color='k') ax.plot(temps[indices],np.exp(md_v)[indices],'r') ax.set_yscale('log') ax.set_xlabel('1000 / T') ax.set_ylabel(r'$\tau$') #for meas,temp,tau in zip(project,temps,np.exp(ex_v)): # ax.text(temp-0.005,tau*1.01,meas.name[-4:],ha='right') #for meas,temp,tau in zip(project,temps,np.exp(md_v)): # ax.text(temp+0.005,tau*1.01,'\n {:4.2f}'.format(tau),ha='left') fig.text(0.2,0.8,project[0].name[4:-2]) fig.savefig(figname,dpi=300) return fig,figname
[docs]def make_fls_doc_page(primary_fuel,nominal_pres,nominal_T,project_list): '''Creates the documentation page for laminar flame speeds at a particular nominal condition associated with a list of :py:class:`Project` objects ''' #This is the default format string that will be used to make the restructured text file rststring = rststring = '''.. data title {} Composition =========== .. composition tables {} Plot ---- .. image:: {} ''' # Format strings to build the composition table condition_line_fmt = '{:^5.2f} {:^10.1f} {:^8.0f}' condition_head_fmt = '{:5} {:10} {:8}' condition_table_outer = condition_head_fmt.format('=====','==========','========',) condition_table_inner = condition_head_fmt.format('-----','----------','--------',) condition_header = condition_head_fmt.format(' Phi ','Pres (kPa)','Temp (K)') #The title of the page doc_title_fmt = '{} atm pressure, {} K nominal temperature' #Build the document title doc_title_text = doc_title_fmt.format(nominal_pres,nominal_T) title_underline = main_title_punc * len(doc_title_text) doctitle = '\n'.join((doc_title_text,title_underline)) #Build the filename docfile_fmt = '{}_{}_{}K_plot' listname = docfile_fmt.format(primary_fuel,nominal_pres,nominal_T) #Initialize the blank data source table source_punc = '-' data_source_table = '' #Blank composition table composition_data = '' for project in project_list: #Get the data source for this project and build the section title source = project[0].comment source_underline = source_punc * len(source) source_header = '\n'.join((source,source_underline)) #Build the header for the composition table composition_data = '\n'.join((composition_data,source_header,'')) condition_table = '\n'.join((condition_table_outer,condition_header,condition_table_outer)) for meas in project: #Extract the temperature, pressure, and equivalence ratio data from this measurement phi = find_x_in_comp_string(meas.model.initial.composition,primary_fuel) temp = meas.model.initial.T pres = meas.model.initial.P/1000 # convert to kPa, #Build this line of the condition table condition_line = condition_line_fmt.format(phi,pres,temp) condition_table = '\n'.join((condition_table,condition_line)) condition_table = '\n'.join((condition_table,condition_table_outer)) #Add the composition table to composition data composition_data = '\n'.join((composition_data,condition_table,'')) #Make the flame speed plot fig,figname = make_fls_plot(primary_fuel,nominal_pres,nominal_T,project_list) #print(composition_data) rstcontents = rststring.format(doctitle,composition_data,figname) #print(rstcontents) filename = listname + '.rst' with open(filename,'w') as f: f.write(rstcontents) return listname
def make_doc_page(project): return make_ign_doc_page(project)
[docs]def make_ign_doc_page(project): '''Creates the documentation page for a single ignition delay set associated with a :py:class:`Project` ''' rststring = '''.. data title {} .. data source {} Composition ----------- .. composition table {} Data Table ---------- .. data table {} Plot ---- .. image:: {} ''' component_line_fmt = '{:9} {:6}' component_table_outer = component_line_fmt.format('=========','======') component_table_inner = component_line_fmt.format('---------','------') condition_line_fmt = '{:^8.0f} {:^10.1f}' condition_head_fmt = '{:8} {:10}' condition_table_outer = condition_head_fmt.format('========','==========') condition_table_inner = condition_head_fmt.format('--------','----------') condition_header =condition_head_fmt.format('Temp (K)','Pres (kPa)') setname = project.name set_ID = setname.split('_')[1] source = project[0].comment + ': Set ' + set_ID.capitalize() title_underline = main_title_punc * len(source) doctitle = '\n'.join((source,title_underline)) #print (source) pj_comp = project[0].model.initial.composition pj_comp_split = pj_comp.split(',') component_header = component_line_fmt.format('Comp','Frac %') component_table = '\n'.join((component_table_outer,component_header,component_table_outer)) for component in pj_comp_split: component_split = component.split(':') component_name = component_split[0] component_frac = float(component_split[1]) component_percent = '{:4.1f} %'.format(component_frac * 100) component_line = component_line_fmt.format(component_name,component_percent) component_table = '\n'.join((component_table,component_line)) component_table = '\n'.join((component_table,component_table_outer)) #print (component_table) condition_table = '\n'.join((condition_table_outer,condition_header,condition_table_outer)) for meas in project: temp = meas.model.initial.T pres = meas.model.initial.P/1000 # convert to kPa, condition_line = condition_line_fmt.format(temp,pres) condition_table = '\n'.join((condition_table,condition_line)) condition_table = '\n'.join((condition_table,condition_table_outer)) #print (condition_table) fig,figname = make_data_plot(project) #fig.close() rstcontents = rststring.format(doctitle,source,component_table,condition_table,figname) filename = project.name + '.rst' with open(filename,'w') as f: f.write(rstcontents) return project.name