#!/usr/bin/perl -w

#    sequence.pl: Automated sequence analysis of atmospheric oxidation pathways
#    Copyright (C) 2009 Max Planck Gesellschaft
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>
#
#    Author: Tim Butler <tim.butler@mpic.de>
#    Citation: Butler, T.M.: Automated sequence analysis of atmospheric
#              oxidation pathways, Geoscientific Model Development Discussions,
#              (volume no.), (pages), 2009
#
#    If your use of this code contributes to any published work, please
#    cite the above mentioned discussion paper
#    (avaiable at http://www.geoscientific-model-development.net/),
#    or preferably the final revised paper in GMD, should there be one.

use warnings;
use strict;
use Getopt::Std;

# Usage: sequence.pl -f min_fraction

# The user must specify the numerical value of min_fraction on the
# command line. Other information, such as the root species to use, and
# the locations of files specifying alternative reaction rate and
# chemical mechanism information should also be read/set here.
use vars qw($opt_f);
getopt('f');
our $min_fraction = $opt_f or die "Must specify a minimum fraction";
our $root_species = 'CH4';
warn "Root species: $root_species\n";
warn "Min fraction: $min_fraction\n";

# This list of species and the reactions which consume them is derived
# from Table 1 of the above mentioned GMD(D) paper. To use the sequence
# algorithm on other chemical systems, add code here to parse your own
# chemical reaction specification file.
our %consuming = (
          'CH3O2NO2' => [
                          'R10'
                        ],
          'CH3O2' => [
                       'R02',
                       'R03',
                       'R09',
                       'R04',
                       'R05',
                       'R06',
                       'R07',
                       'R08'
                     ],
          'CH3OOH' => [
                        'R14',
                        'R15',
                        'R16'
                      ],
          'CH4' => [
                     'R01',
                   ],
          'HCHO' => [
                      'R18',
                      'R19',
                      'R20',
                      'R21'
                    ],
          'CH3NO3' => [
                        'R12',
                        'R13'
                      ],
          'CH3OH' => [
                       'R17'
                     ],
          'CH3O' => [
                      'R11'
                    ]
        );

# This list of reactions and the species which they affect is derived
# from Table 1 of the above mentioned GMD(D) paper. To use the sequence
# algorithm on other chemical systems, add code here to parse your own
# chemical reaction specification file.
our %effect_list = (
          'R17' => [
                        'HO2',
                        'HCHO',
                        'OH',
                        'CH3OH'
                      ],
          'R05' => [
                        'CH3OOH',
                        'CH3O2',
                        'HO2'
                      ],
          'R16' => [
                        'CH3OOH',
                        'OH',
                        'CH3O',
                        'hv'
                      ],
          'R21' => [
                        'HNO3',
                        'HO2',
                        'CO',
                        'HCHO',
                        'NO3'
                      ],
          'R15' => [
                        'CH3OOH',
                        'HCHO'
                      ],
          'R06' => [
                        'CH3O2',
                        'CH3O'
                      ],
          'R13' => [
                        'NO2',
                        'CH3O',
                        'hv',
                        'CH3NO3'
                      ],
          'R01' => [
                        'CH3O2',
                        'OH',
                        'CH4'
                      ],
          'R18' => [
                        'HO2',
                        'CO',
                        'HCHO',
                        'OH'
                      ],
          'R11' => [
                        'HO2',
                        'HCHO',
                        'CH3O'
                      ],
          'R03' => [
                        'CH3O2',
                        'CH3NO3',
                        'NO'
                      ],
          'R14' => [
                        'CH3O2',
                        'CH3OOH',
                        'OH'
                      ],
          'R09' => [
                        'CH3O2NO2',
                        'CH3O2',
                        'NO2'
                      ],
          'R08' => [
                        'CH3O2',
                        'CH3OH'
                      ],
          'R19' => [
                        'HO2',
                        'CO',
                        'HCHO',
                        'hv'
                      ],
          'R02' => [
                        'CH3O2',
                        'NO2',
                        'CH3O',
                        'NO'
                      ],
          'R04' => [
                        'CH3O2',
                        'NO2',
                        'NO3',
                        'CH3O'
                      ],
          'R12' => [
                        'NO2',
                        'HCHO',
                        'CH3NO3',
                        'OH'
                      ],
          'R07' => [
                        'CH3O2',
                        'HCHO'
                      ],
          'R10' => [
                        'CH3O2NO2',
                        'CH3O2',
                        'NO2'
                      ],
          'R20' => [
                        'CO',
                        'HCHO',
                        'H2',
                        'hv'
                      ]
        );

# This list of reactions and the species which they affect is derived
# from Table 1 of the above mentioned GMD(D) paper. To use the sequence
# algorithm on other chemical systems, add code here to parse your own
# chemical reaction specification file.
our %stoichiometry = (
          'R17' => {
                        'HO2' => 1,
                        'HCHO' => 1,
                        'CH3OH' => -1,
                        'OH' => -1
                      },
          'R05' => {
                        'CH3OOH' => 1,
                        'CH3O2' => -1,
                        'HO2' => -1
                      },
          'R16' => {
                        'CH3OOH' => -1,
                        'hv' => -1,
                        'OH' => 1,
                        'CH3O' => 1
                      },
          'R21' => {
                        'HNO3' => 1,
                        'HO2' => 1,
                        'CO' => 1,
                        'NO3' => -1,
                        'HCHO' => -1
                      },
          'R15' => {
                        'CH3OOH' => -1,
                        'HCHO' => 1
                      },
          'R06' => {
                        'CH3O2' => -1,
                        'CH3O' => 1
                      },
          'R13' => {
                        'hv' => -1,
                        'NO2' => 1,
                        'CH3NO3' => -1,
                        'CH3O' => 1
                      },
          'R01' => {
                        'CH3O2' => 1,
                        'CH4' => -1,
                        'OH' => -1
                      },
          'R18' => {
                        'HO2' => 1,
                        'CO' => 1,
                        'HCHO' => -1,
                        'OH' => -1
                      },
          'R11' => {
                        'HO2' => 1,
                        'HCHO' => 1,
                        'CH3O' => -1
                      },
          'R03' => {
                        'CH3O2' => -1,
                        'NO' => -1,
                        'CH3NO3' => 1
                      },
          'R14' => {
                        'CH3O2' => 1,
                        'CH3OOH' => -1,
                        'OH' => -1
                      },
          'R09' => {
                        'CH3O2NO2' => 1,
                        'CH3O2' => -1,
                        'NO2' => -1
                      },
          'R08' => {
                        'CH3O2' => -1,
                        'CH3OH' => 1
                      },
          'R19' => {
                        'HO2' => 2,
                        'hv' => -1,
                        'CO' => 1,
                        'HCHO' => -1
                      },
          'R02' => {
                        'CH3O2' => -1,
                        'NO' => -1,
                        'NO2' => 1,
                        'CH3O' => 1
                      },
          'R04' => {
                        'CH3O2' => -1,
                        'NO2' => 1,
                        'NO3' => -1,
                        'CH3O' => 1
                      },
          'R12' => {
                        'NO2' => 1,
                        'HCHO' => 1,
                        'CH3NO3' => -1,
                        'OH' => -1
                      },
          'R07' => {
                        'CH3O2' => -1,
                        'HCHO' => 1
                      },
          'R10' => {
                        'CH3O2NO2' => -1,
                        'CH3O2' => 1,
                        'NO2' => 1
                      },
          'R20' => {
                        'hv' => -1,
                        'CO' => 1,
                        'HCHO' => -1,
                        'H2' => 1
                      }
        );

# Please see the above mentioned GMD(D) paper for a discussion of the
# list of intermediate species
our %is_intermediate = (
          'CH3O2NO2' => 1,
          'CH3O2' => 1,
          'CH3NO3' => 1,
          'HCHO' => 1,
          'CH3OOH' => 1,
          'CH3OH' => 1,
          'CH3O' => 1,
        );

# These numbers are taken from the BASE model run described in the above
# mentioned GMD(D) paper. To use other data, you must add code here
# which can read your particular model output format.
our %reaction_rates = (
          'R17' => '4074.32275390625',
          'R05' => '350439.46875',
          'R16' => '27310.56640625',
          'R21' => '12.1550312042236',
          'R15' => '111774.34375',
          'R06' => '4002.5048828125',
          'R13' => '343.086853027344',
          'R01' => '2131474.5',
          'R18' => '1241057.875',
          'R11' => '2011548.875',
          'R03' => '1981.56701660156',
          'R14' => '212371.25',
          'R09' => '2985512.75',
          'R08' => '4063.14868164062',
          'R19' => '329493.53125',
          'R02' => '1979585.375',
          'R04' => '173.879776000977',
          'R12' => '1636.45642089844',
          'R07' => '4063.14868164062',
          'R10' => '2985849.0',
          'R20' => '559558.25'
        );

# These numbers are taken from the BASE model run described in the above
# mentioned GMD(D) paper. To use other data, you must add code here
# which can read your particular model output format.
our %total_production = (
          'CH3O2NO2' => '2985512.75',
          'CH4' => '2131474.5',
          'CH3O2' => '5329694.75',
          'CH3NO3' => '1981.56701660156',
          'CH3O' => '2011415.41291809',
          'CH3OOH' => '350439.46875',
          'HCHO' => '2133097.14660645',
          'CH3OH' => '4063.14868164062',
        );

# The root destruction rate is the sum of the rates of all reactions which
# consume the root species.
our $root_destruction = 0;
$root_destruction += $reaction_rates{$_} for @{ $consuming{$root_species} };

# This is the initial call to sequence(). The results are stored in the
# %contribution associative array.
our %contribution;
sequence($root_species, $total_production{$root_species});

# This code writes the contents of the %contribution associative array
# to standard output in a formatted manner.
foreach my $species (keys %contribution) {
        my @reactions = sort { $contribution{$species}{$a} <=> $contribution{$species}{$b} }
                grep { $contribution{$species}{$_} != 0 } keys %{ $contribution{$species} };
        next if @reactions == 0;
        print "$species\n";
        print "\t$_: $contribution{$species}{$_}\n" for @reactions;
        print "\n";
}

# This subroutine implements the sequence algorithm, described in the
# above mentioned GMD(D) paper.
sub sequence {
    my ($species, $rate) = @_;
    return if $rate/$root_destruction < $min_fraction;
	my $fraction = $rate / $total_production{$species};

    foreach my $consumer (@{ $consuming{$species} }) {
        my $reaction_rate = $reaction_rates{$consumer} * $fraction;
        foreach my $species (@{ $effect_list{$consumer} }) {
            my $rate = $stoichiometry{$consumer}{$species} * $reaction_rate;
            $contribution{$species}{$consumer} += $rate;
			sequence($species, $rate) if $is_intermediate{$species};
        }
    }
	return;
}
