Source code for data_parser

import numpy as np
import matplotlib.pyplot as plt
import mumpce_py
import mumpce_py.cantera_utils as mpct
import tqdm
import mpc_parallel

main_title_punc = '*'

def find_x_in_comp_string(comp_string,species):
    '''Takes a Cantera composition string and finds the number of moles of a particular species in that string.
    
    :param comp_string: The Cantera composition string being parsed
    :param species: The species name whose value is to be found
    :type comp_string: str
    :type species: str
    :returns: val, the number of moles of the desired species
    '''
    comps = comp_string.split(',')
    for comp in comps:
        if comp.find(species) > -1:
            comp_split = comp.split(':')
            val = float(comp_split[1])
    return val

[docs]def find_data_sets(df):
    """Takes a pandas.DataFrame object containing mumpce_py.cantera_utils compatible measurements and returns the data sources and data sets in the DataFrame.
    
    In the Small Hydrocarbon Databse, measurements are named according to the convention source_setidXX, where 'source' is a string that uniquely identifies the origin of the data (e.g. paper, online database, etc), setid is a unique identifier for each set within that source, and XX is a unique identifier for each measurement within a set. This function will return all of the unique source strings and source_set strings.
    
    :param df: The pandas DataFrame to be parsed
    :type df: pandas.DataFrame
    :returns: sources,sets, the data sources and data sets contained within the dataframe.
    
    """
    
    ids = df.ID.values #Find all of the IDs that are in this DataFrame
    
    #Assume that the sets are named according to Source_SetXX where XX identifies a particular measurement within a set.
    #
    id_full = np.array([ident.split('_') for ident in ids]) #Returns ['Source' 'SetXX'] for each Source_SetXX
    id_strip_num = np.array([ident[:-2] for ident in ids]) #Returns ['Source_Set'] for each Source_SetXX
    
    sources = np.unique(id_full[:,0]) #Find each unique Source
    sets = np.unique(id_strip_num) #Find each unique Source_Set
    
    return sources,sets

[docs]def make_ign_docs(primary_fuel_name,sources,project_list):
    '''Creates the documentation for the ignition delay times in the database
    
    This function takes a primary fuel name and creates the main documentation page for that fuel. It will also make the subpages for each of the experimental sets described in `sources`. In addition, the function will return the table of contents for the primary fuel's main documentation page.
    
    :param primary_fuel_name: The English name of the fuel for which the documentation page is being created
    :param sources: The list of sources associated with this fuel
    :param project_list: The list of mumpce Projects containing ignition delay information
    :returns: main_contents, the table of contents for the primary fuel main page
    '''
    main_contents = ''
    
    for source in sources:
        make_ign_subpage(source,primary_fuel_name,project_list)
        main_contents = '\n   '.join((main_contents,source+primary_fuel_name))
        
    make_main_ign_page(primary_fuel_name,main_contents)
    
    return main_contents

[docs]def make_ign_subpage(source,primary_fuel_name,project_list):
    '''Creates the documentation page for a single data source
    
    This function takes a data source and creates the main documentation page for that source. It also creates the individual subpages for each data set associated with that source.
    
    :param source: The source for which the main page and subpages will be created.
    :param primary_fuel_name: The English name of the fuel for which the documentation page is being created
    :param project_list: The list of mumpce Projects containing ignition delay information
    :returns: contents, the table of contents for the data source main page
    '''
    #boilerplate for the subpages
    rst_datasource='''.. data header title

{}


.. toctree::
   :maxdepth: 2
   :caption: Contents:
{}

'''
    
    set_TOC_format = '{} <{}>'
    
    #Get projects associated with this project source
    sets_for_this_source = [project for project in project_list if source in project.name]

    #Get the data source for this project
    first_project = sets_for_this_source[0]
    source_name = first_project[0].comment

    #Build the title for this source page
    title_underline = main_title_punc * len(source_name)
    docname = '\n'.join((source_name,title_underline))

    #Start building the TOC for this data source
    contents = ''
    for project in sets_for_this_source:
        #Make the doc page for this project and get the project identifier
        setname = make_ign_doc_page(project)
        set_ID = setname.split('_')[1]             
        set_title = 'Set ' + set_ID.capitalize()

        #Make the line associated with this project in the table of contents
        set_TOC_line = set_TOC_format.format(set_title,setname)
        contents = '\n   '.join((contents,set_TOC_line))

    datasource_contents = rst_datasource.format(docname,contents)
    datafile = source + primary_fuel_name + '.rst'
    
    with open(datafile,'w') as f:
        f.write(datasource_contents)
            
    return contents

[docs]def make_fls_docs(primary_fuel_name,primary_fuel,project_dict):
    '''Creates the documentation for the laminar flame speeds in the database
    
    This function takes a primary fuel name and creates the main documentation page for that fuel. It will also make the subpages for each of the experimental sets described in `sources`. In addition, the function will return the table of contents for the primary fuel's main documentation page.
    
    :param primary_fuel_name: The English name of the fuel for which the documentation page is being created
    :param primary_fuel: The Cantera name of the fuel
    :param project_dict: The dictionary of mumpce Projects containing laminar flame speed information
    :returns: main_contents, the table of contents for the primary fuel main page
    '''
    
    main_contents = ''
    
    main_contents = make_fls_subpage(primary_fuel,project_dict)
        
    make_main_fls_page(primary_fuel_name,main_contents)
    
    return main_contents


[docs]def make_fls_subpage(primary_fuel,project_dict):   
    '''Creates the documentation page for a single data source
    
    This function takes a data source and creates the main documentation page for that source. It also creates the individual subpages for each data set associated with that source.
    
    :param primary_fuel: The Cantera name of the fuel
    :param project_dict: The dictionary of mumpce Projects containing laminar flame speed information
    :returns: contents, the table of contents for the data source main page
    '''
    contents = ''
    
    
    for key in project_dict:
        project_list = project_dict[key]
        nominal_T = str(470)
    
        bin_ends = key[1].split(',')
        end_T = int(bin_ends[1].rstrip(']'))

        print(end_T)

        if end_T < 325:
            nominal_T = str(300)
        elif end_T < 375:
            nominal_T = str(360)
        elif end_T < 425:
            nominal_T = str(400)            

        nominal_pres = key[0]
        
        contents = '\n   '.join((contents,make_fls_doc_page(primary_fuel,nominal_pres,nominal_T,project_list)))

    return contents

[docs]def make_main_page(primary_fuel_name,data_source_table,main_rstcontents):
    '''Creates the table of contents for the top level index page.
    
    This function accepts a boilerplate restructured text string and inserts the primary fuel name as the tile and the data source table as the table of contents. It returns the properly-formatted restructured text.
    
    :param primary_fuel_name: The English name of the primary fuel. This will be the title of the documentation page
    :param data_source_table: The table of contents that will go into the index file. This will be inserted into the documentation page as its table of contents
    :param main_rstcontents: A string representing the index page boilerplate.
    :returns: main_source_contents, the information that will be written to the documentation file
    '''

    #Build the document title
    primary_fuel_underline = main_title_punc * len(primary_fuel_name)
    primary_fuel_title = '\n'.join((primary_fuel_name.capitalize(),primary_fuel_underline))
    
    #Load the data source table and title into the boilerplate and return it to the main function
    main_source_contents = main_rstcontents.format(primary_fuel_title,data_source_table)
    
    return main_source_contents

[docs]def make_main_ign_page(primary_fuel_name,data_source_table):
    '''Creates the top level index page for a set of ignition delay time projects
    
    This function takes a primary fuel name and a table of sources and creates the top-level index page for that fuel. 
    
    :param primary_fuel_name: The English name of the primary fuel
    :param data_source_table: The table of data sources that will go into the index file
    :type primary_fuel_name: str
    :type data_source_table: str
    '''
    #Title page boilerplate
    main_rstcontents = '''

{}

.. toctree::
   :maxdepth: 2
   :caption: Contents:  
{}

'''
    
    main_source_contents = make_main_page(primary_fuel_name,data_source_table,main_rstcontents)
    print(main_source_contents)

    main_source_file = primary_fuel_name + '_ign.rst'
    print(main_source_file)

    with open(main_source_file,'w') as f:
        f.write(main_source_contents)
    return main_source_contents

[docs]def make_main_fls_page(primary_fuel_name,data_source_table):
    '''Creates the top level index page for a set of laminar flame speed projects
    
    This function takes a primary fuel name and a table of sources and creates the top-level index page for that fuel. 
    
    :param primary_fuel_name: The English name of the primary fuel
    :param data_source_table: The table of data sources that will go into the index file
    :type primary_fuel_name: str
    :type data_source_table: str
    '''
    #Title page boilerplate
    main_rstcontents = '''

{}

.. toctree::
   :maxdepth: 1
   :caption: Contents:  
{}

'''
    
    main_source_contents = make_main_page(primary_fuel_name,data_source_table,main_rstcontents)
    print(main_source_contents)

    main_source_file = primary_fuel_name + '_flame.rst'
    print(main_source_file)

    with open(main_source_file,'w') as f:
        f.write(main_source_contents)
    return main_source_contents

[docs]def load_projects(df):
    """Takes a pandas.DataFrame object containing mumpce_py.cantera_utils compatible measurements and returns a list of mumpce.Project objects built from measuremenst in the DataFrame.
    
    This function will use :py:func:`find_data_sets` to break the DataFrame into sources and sets. Each set will then be built into its own Project, and each Measurement in the Project will be a measurement from that data set.
    
    In the Small Hydrocarbon Databse, measurements are named according to the convention source_setidXX, where 'source' is a string that uniquely identifies the origin of the data (e.g. paper, online database, etc), setid is a unique identifier for each set within that source, and XX is a unique identifier for each measurement within a set.
    
    :param df: The pandas DataFrame to be parsed
    :type df: pandas.DataFrame
    :returns: project_list, the list of Projects contained within the dataframe.
    
    """
    project_list = []
    sources,sets = find_data_sets(df)
    
    for data_set in sets:
        try: #Try to load the project from the expected save file
            project = mumpce_py.load_project(data_set)
        except FileNotFoundError: #The project doesn't exist, so create it and its measurement list
            print('Project {} does not exist, creating it'.format(data_set))
            measurement_list = mpct.measurement_initialize_pd(df.iloc[[data_set in s for s in df.ID.values]])
            project = mumpce_py.Project(measurement_list=measurement_list,name=data_set) 
            project.save()
        
        project_list += [project]
    
    return project_list
        
[docs]def run_project_parallel(project_to_run):
    '''Check to see if the Measurements in a Project have been evaluated and, if they havent, evaluate them. Returns the Project. This is the parallel version.
    '''
    for meas in project_to_run:
        print (meas.name, meas.model_value)
        
    if project_to_run[0].model_value is None: #If the model value is blank, run the project
        values = mpc_parallel.evaluate_parallel(project_to_run,nWorkers=5)
        for meas_name,val in values:
            project_to_run[meas_name].model_value = val #Load model values back into the project
    project_to_run.save()
    return project_to_run

[docs]def run_project(project_to_run):
    '''Check to see if the Measurements in a Project have been evaluated and, if they havent, evaluate them. Returns the Project.
    '''
    for meas in project_to_run:
        print (meas.name, meas.model_value)
        
    if project_to_run[0].model_value is None:
        for meas in tqdm.tqdm_notebook(project_to_run,desc=project_to_run.name):
            meas.evaluate()
    else:
        print(project_to_run.name, ' Value has been calculated')
    project_to_run.save()
    return project_to_run

[docs]def run_project_sensitivity(project_to_run):
    '''Check to see if the Measurements in a Project have been evaluated for sensitivity analysis and, if they havent, evaluate them. Returns the Project.
    '''
    if project_to_run[0].sensitivity_list is None:
        for meas in tqdm.tqdm_notebook(project_to_run,desc=project_to_run.name):
            meas.evaluate_sensitivity()
    else:
        print(project_to_run.name, ' Sensitivity has been calculated')
    project_to_run.save()
    return project_to_run

def make_fls_plot(primary_fuel,pres,nominal_T,project_list):
    '''Creates a plot of the data for laminar flame speeds at a particular nominal condition associated with a list of :py:class:`Project` objects
    '''
    #Figure name
    figname_fmt = '{}_{}_{}K_plot.png'
    figname = figname_fmt.format(primary_fuel,pres,nominal_T)
    
    fig,ax = plt.subplots()
    
    #Extract calculation data for plot
    project_to_run = project_list[0]
    phis = [find_x_in_comp_string(meas.model.initial.composition,primary_fuel) for meas in project_to_run]
    md_v = np.array([meas.model_value for meas in project_to_run])
    ax.plot(phis,md_v,'r',label='Model')
    
    #Extract experimental data for plot
    for project in project_list:
        phis = [find_x_in_comp_string(meas.model.initial.composition,primary_fuel) for meas in project]
        ex_v = np.array([meas.value for meas in project])
        ax.scatter(phis,ex_v,label=project.name)
    ax.legend(loc='lower center')
    
    fig.savefig(figname,dpi=300)
    return fig,figname

def make_data_plot(project):
    return make_ign_plot(project)

def make_ign_plot(project):#,fig,ax):
    '''Creates a plot of the data for a single ignition delay set associated with a :py:class:`Project`
    '''
    pjname = project.name
    figname = project.name + '_plot.png'
    
    temps = np.array([1000/meas.model.initial.T for meas in project])
    ex_v = np.array([meas.value for meas in project])
    md_v = np.array([meas.model_value for meas in project])
    
    indices = np.argsort(temps)
    
    fig,ax = plt.subplots()
    ax.scatter(temps[indices],np.exp(ex_v)[indices],color='k')
    ax.plot(temps[indices],np.exp(md_v)[indices],'r')
    ax.set_yscale('log')
    
    ax.set_xlabel('1000 / T')
    ax.set_ylabel(r'$\tau$')

    #for meas,temp,tau in zip(project,temps,np.exp(ex_v)):
    #    ax.text(temp-0.005,tau*1.01,meas.name[-4:],ha='right')
    #for meas,temp,tau in zip(project,temps,np.exp(md_v)):
    #    ax.text(temp+0.005,tau*1.01,'\n  {:4.2f}'.format(tau),ha='left')
    
    fig.text(0.2,0.8,project[0].name[4:-2])
    
    fig.savefig(figname,dpi=300)
    
    return fig,figname

[docs]def make_fls_doc_page(primary_fuel,nominal_pres,nominal_T,project_list):
    '''Creates the documentation page for laminar flame speeds at a particular nominal condition associated with a list of :py:class:`Project` objects
    '''
    #This is the default format string that will be used to make the restructured text file
    rststring = rststring = '''.. data title

{} 


Composition
===========
.. composition tables

{} 
 

Plot
----

.. image:: {}

    '''
    # Format strings to build the composition table
    condition_line_fmt = '{:^5.2f}  {:^10.1f}  {:^8.0f}'
    condition_head_fmt = '{:5}  {:10}  {:8}'
    condition_table_outer = condition_head_fmt.format('=====','==========','========',)
    condition_table_inner = condition_head_fmt.format('-----','----------','--------',)
    condition_header = condition_head_fmt.format(' Phi ','Pres (kPa)','Temp (K)')
    
    #The title of the page
    doc_title_fmt = '{} atm pressure, {} K nominal temperature'

    
    #Build the document title    
    doc_title_text = doc_title_fmt.format(nominal_pres,nominal_T)
    title_underline = main_title_punc * len(doc_title_text)
    doctitle = '\n'.join((doc_title_text,title_underline))
    
    #Build the filename
    docfile_fmt = '{}_{}_{}K_plot'
    listname = docfile_fmt.format(primary_fuel,nominal_pres,nominal_T)
    
    #Initialize the blank data source table
    source_punc = '-'
    data_source_table = ''
    #Blank composition table
    composition_data = ''
    
    for project in project_list:
        
        #Get the data source for this project and build the section title
        source = project[0].comment
        source_underline = source_punc * len(source)
        source_header = '\n'.join((source,source_underline))
        
        #Build the header for the composition table
        composition_data = '\n'.join((composition_data,source_header,''))
        
        condition_table = '\n'.join((condition_table_outer,condition_header,condition_table_outer))
        
        for meas in project:
            #Extract the temperature, pressure, and equivalence ratio data from this measurement
            phi = find_x_in_comp_string(meas.model.initial.composition,primary_fuel)
            temp = meas.model.initial.T
            pres = meas.model.initial.P/1000 # convert to kPa, 
            
            #Build this line of the condition table
            condition_line = condition_line_fmt.format(phi,pres,temp)
            condition_table = '\n'.join((condition_table,condition_line))
        condition_table = '\n'.join((condition_table,condition_table_outer))      
        #Add the composition table to  composition data
        composition_data = '\n'.join((composition_data,condition_table,''))    
    
    #Make the flame speed plot
    fig,figname = make_fls_plot(primary_fuel,nominal_pres,nominal_T,project_list)
    
    #print(composition_data)
    rstcontents = rststring.format(doctitle,composition_data,figname)
    #print(rstcontents)
    
    filename = listname + '.rst'
    
    with open(filename,'w') as f:
        f.write(rstcontents)
        
    return listname
    

def make_doc_page(project):
    return make_ign_doc_page(project)

[docs]def make_ign_doc_page(project):
    '''Creates the documentation page for a single ignition delay set associated with a :py:class:`Project`
    '''
    
    rststring = '''.. data title

{} 

.. data source

{} 

Composition
-----------
.. composition table

{} 

Data Table
----------

.. data table

{} 

Plot
----

.. image:: {}

    '''
    
    component_line_fmt = '{:9}  {:6}'
    component_table_outer = component_line_fmt.format('=========','======')
    component_table_inner = component_line_fmt.format('---------','------')

    condition_line_fmt = '{:^8.0f}  {:^10.1f}'
    condition_head_fmt = '{:8}  {:10}'
    condition_table_outer = condition_head_fmt.format('========','==========')
    condition_table_inner = condition_head_fmt.format('--------','----------')
    condition_header =condition_head_fmt.format('Temp (K)','Pres (kPa)')

    
    setname = project.name
    
    set_ID = setname.split('_')[1]
    
    source = project[0].comment + ': Set ' + set_ID.capitalize()
    
    title_underline = main_title_punc * len(source)
    
    doctitle = '\n'.join((source,title_underline))

    #print (source)
    
    pj_comp = project[0].model.initial.composition
    pj_comp_split = pj_comp.split(',')
        
    component_header = component_line_fmt.format('Comp','Frac %')
        
    component_table = '\n'.join((component_table_outer,component_header,component_table_outer))
    
    
    for component in pj_comp_split:
        component_split = component.split(':')
        
        component_name = component_split[0]
        component_frac = float(component_split[1])
        
        component_percent = '{:4.1f} %'.format(component_frac * 100)
        
        component_line = component_line_fmt.format(component_name,component_percent)
        
        component_table = '\n'.join((component_table,component_line))
    component_table = '\n'.join((component_table,component_table_outer))
    #print (component_table)
    
    condition_table = '\n'.join((condition_table_outer,condition_header,condition_table_outer))
    
    for meas in project:
        
        temp = meas.model.initial.T
        pres = meas.model.initial.P/1000 # convert to kPa, 
        
        condition_line = condition_line_fmt.format(temp,pres)
        condition_table = '\n'.join((condition_table,condition_line))
        
    condition_table = '\n'.join((condition_table,condition_table_outer))    
    #print (condition_table)
    
    fig,figname = make_data_plot(project)
    #fig.close()
    
    rstcontents = rststring.format(doctitle,source,component_table,condition_table,figname)
    
    filename = project.name + '.rst'
    
    
    with open(filename,'w') as f:
        f.write(rstcontents)
    
    return project.name
Source code for data_parser

Table Of Contents

Search