<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing with OASIS Tables v3.0 20080202//EN" "journalpub-oasis3.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:oasis="http://docs.oasis-open.org/ns/oasis-exchange/table" xml:lang="en" dtd-version="3.0" article-type="research-article"><?xmltex \makeatother\@nolinetrue\makeatletter?><?xmltex \bartext{Development and technical paper}?>
  <front>
    <journal-meta><journal-id journal-id-type="publisher">GMD</journal-id><journal-title-group>
    <journal-title>Geoscientific Model Development</journal-title>
    <abbrev-journal-title abbrev-type="publisher">GMD</abbrev-journal-title><abbrev-journal-title abbrev-type="nlm-ta">Geosci. Model Dev.</abbrev-journal-title>
  </journal-title-group><issn pub-type="epub">1991-9603</issn><publisher>
    <publisher-name>Copernicus Publications</publisher-name>
    <publisher-loc>Göttingen, Germany</publisher-loc>
  </publisher></journal-meta>
    <article-meta>
      <article-id pub-id-type="doi">10.5194/gmd-15-7791-2022</article-id><title-group><article-title>Development of a regional feature selection-based machine learning system (RFSML v1.0) for air pollution forecasting over China</article-title><alt-title>Development of RFSML for air pollution forecasting over China</alt-title>
      </title-group><?xmltex \runningtitle{Development of RFSML for air pollution forecasting over China}?><?xmltex \runningauthor{L.~Fang et al.}?>
      <contrib-group>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Fang</surname><given-names>Li</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="yes" rid="aff1">
          <name><surname>Jin</surname><given-names>Jianbing</given-names></name>
          <email>jianbing.jin@nuist.edu.cn</email>
        <ext-link>https://orcid.org/0000-0002-2868-9343</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff2">
          <name><surname>Segers</surname><given-names>Arjo</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-1319-0195</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff3 aff4">
          <name><surname>Lin</surname><given-names>Hai Xiang</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-1653-4854</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Pang</surname><given-names>Mijie</given-names></name>
          
        <ext-link>https://orcid.org/0000-0001-9773-0488</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff5">
          <name><surname>Xiao</surname><given-names>Cong</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff4">
          <name><surname>Deng</surname><given-names>Tuo</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="yes" rid="aff1">
          <name><surname>Liao</surname><given-names>Hong</given-names></name>
          <email>hongliao@nuist.edu.cn</email>
        </contrib>
        <aff id="aff1"><label>1</label><institution>Jiangsu Key Laboratory of Atmospheric Environment Monitoring and Pollution Control, Jiangsu Collaborative Innovation Center of Atmospheric Environment and Equipment Technology, School of Environmental Science and Engineering,<?xmltex \hack{\break}?> Nanjing University of Information Science and Technology, Nanjing, Jiangsu, China</institution>
        </aff>
        <aff id="aff2"><label>2</label><institution>TNO, Department of Climate, Air and Sustainability, Utrecht, the Netherlands</institution>
        </aff>
        <aff id="aff3"><label>3</label><institution>Institute of Environmental Sciences, Leiden University, Leiden, the Netherlands</institution>
        </aff>
        <aff id="aff4"><label>4</label><institution>Delft Institute of Applied Mathematics, Delft University of Technology, Delft, the Netherlands</institution>
        </aff>
        <aff id="aff5"><label>5</label><institution>Key Laboratory of Petroleum Engineering, Ministry of Education, China University of Petroleum, Beijing, China</institution>
        </aff>
      </contrib-group>
      <author-notes><corresp id="corr1">Jianbing Jin (jianbing.jin@nuist.edu.cn) and Hong Liao (hongliao@nuist.edu.cn)</corresp></author-notes><pub-date><day>24</day><month>October</month><year>2022</year></pub-date>
      
      <volume>15</volume>
      <issue>20</issue>
      <fpage>7791</fpage><lpage>7807</lpage>
      <history>
        <date date-type="received"><day>16</day><month>May</month><year>2022</year></date>
           <date date-type="accepted"><day>20</day><month>September</month><year>2022</year></date>
           <date date-type="rev-recd"><day>2</day><month>September</month><year>2022</year></date>
           <date date-type="rev-request"><day>6</day><month>July</month><year>2022</year></date>
      </history>
      <permissions>
        <copyright-statement>Copyright: © 2022 Li Fang et al.</copyright-statement>
        <copyright-year>2022</copyright-year>
      <license license-type="open-access"><license-p>This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this licence, visit <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link></license-p></license></permissions><self-uri xlink:href="https://gmd.copernicus.org/articles/gmd-15-7791-2022.html">This article is available from https://gmd.copernicus.org/articles/gmd-15-7791-2022.html</self-uri><self-uri xlink:href="https://gmd.copernicus.org/articles/gmd-15-7791-2022.pdf">The full text article is available as a PDF file from https://gmd.copernicus.org/articles/gmd-15-7791-2022.pdf</self-uri>
      <abstract><title>Abstract</title>

      <p id="d1e177">With the explosive growth of atmospheric data, machine learning models have achieved great success in air pollution forecasting because of their higher computational efficiency than the traditional chemical transport models. However, in previous studies, new prediction algorithms have only been tested at stations or in a small region; a large-scale air quality forecasting model remains lacking to date. Huge dimensionality also means that redundant input data may lead to increased complexity and therefore the over-fitting of machine learning models. Feature selection is a key topic in machine learning development, but it has not yet been explored in atmosphere-related applications. In this work, a regional feature selection-based machine learning (RFSML) system was developed, which is capable of predicting air quality in the short term with high accuracy at the national scale. Ensemble-Shapley additive global importance analysis is combined with the RFSML system to extract significant regional features and eliminate redundant variables at an affordable computational expense. The significance of the regional features is also explained physically. Compared with a standard machine learning system fed with relative features, the RFSML system driven by the selected key features results in superior interpretability, less training time, and more accurate predictions. This study also provides insights into the difference in interpretability among machine learning models (i.e., random forest, gradient boosting, and multi-layer perceptron models).</p>
  </abstract>
    </article-meta>
  </front>
<body>
      

<sec id="Ch1.S1" sec-type="intro">
  <label>1</label><title>Introduction</title>
      <p id="d1e189">With ongoing economic development and modern industrialization, the subsequent air pollution poses serious threats to resident health <xref ref-type="bibr" rid="bib1.bibx46 bib1.bibx42" id="paren.1"/>. After tobacco and high blood pressure, air pollution has ranked third in risk factors for death and disability in China over the past few decades <xref ref-type="bibr" rid="bib1.bibx55" id="paren.2"/>. The primary air pollutants in China are particulate matter (PM), sulfur dioxide (<inline-formula><mml:math id="M1" display="inline"><mml:mrow class="chem"><mml:msub><mml:mi mathvariant="normal">SO</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula>), carbon monoxide (CO), nitrogen oxides (<inline-formula><mml:math id="M2" display="inline"><mml:mrow class="chem"><mml:msub><mml:mi mathvariant="normal">NO</mml:mi><mml:mi>x</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>) and ozone (<inline-formula><mml:math id="M3" display="inline"><mml:mrow class="chem"><mml:msub><mml:mi mathvariant="normal">O</mml:mi><mml:mn mathvariant="normal">3</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula>) <xref ref-type="bibr" rid="bib1.bibx70" id="paren.3"/>. PM<inline-formula><mml:math id="M4" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> or respirable PM in air with an aerodynamic diameter below 2.5 <inline-formula><mml:math id="M5" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">µ</mml:mi><mml:mi mathvariant="normal">m</mml:mi></mml:mrow></mml:math></inline-formula> is the primary air pollutant, and it has attracted considerable attention from researchers <xref ref-type="bibr" rid="bib1.bibx81" id="paren.4"/>.
Exposure to either long-term or short-term PM<inline-formula><mml:math id="M6" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> is related to respiratory symptoms, lung disease, cardiovascular disease, premature death, and other adverse health effects <xref ref-type="bibr" rid="bib1.bibx60 bib1.bibx16" id="paren.5"/>.
<xref ref-type="bibr" rid="bib1.bibx7" id="text.6"/> and <xref ref-type="bibr" rid="bib1.bibx69" id="text.7"/> reported that PM<inline-formula><mml:math id="M7" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> pollution in winter, particularly in northern China, is severe. It accounted for 15.5 % (1.7 million) of all deaths in China in 2015, despite an improvement in air quality since 2013.
In recent studies, the global exposure mortality model has estimated that 140 200 premature deaths from 2015 to 2019 can be attributed to long-term exposure to PM<inline-formula><mml:math id="M8" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> <xref ref-type="bibr" rid="bib1.bibx26" id="paren.8"/>. An accurate air quality forecast (e.g., forecasting PM<inline-formula><mml:math id="M9" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula>) is therefore valuable to policy makers and health professionals for epidemiological control <xref ref-type="bibr" rid="bib1.bibx79" id="paren.9"/>. In addition, it can provide an early warning for residents, particularly for children, the elderly, and people with respiratory or cardiovascular problems <xref ref-type="bibr" rid="bib1.bibx28" id="paren.10"/>.</p>
      <p id="d1e313">The development of an air pollution forecasting model is possible, as atmospheric chemistry and physical rules have been explored and are now understood in depth <xref ref-type="bibr" rid="bib1.bibx73" id="paren.11"/>. In addition, our ever-increasing computational power can support the complex and heavy computational tasks required for this type of model <xref ref-type="bibr" rid="bib1.bibx62" id="paren.12"/>.
Deterministic models, such as chemical transport models (CTMs), and data-driven methods are commonly employed in forecasting <xref ref-type="bibr" rid="bib1.bibx13 bib1.bibx78" id="paren.13"/>. In several studies, air pollution forecasting has been performed using mainstream air quality CTMs, such as the Weather Research and Forecasting model with Chemistry <xref ref-type="bibr" rid="bib1.bibx23 bib1.bibx86" id="paren.14"/>, Community Multiscale Air Quality model <xref ref-type="bibr" rid="bib1.bibx47" id="paren.15"/>, and GEOS-Chem <xref ref-type="bibr" rid="bib1.bibx5 bib1.bibx33" id="paren.16"/>.
These CTMs can reproduce real atmospheric situations <xref ref-type="bibr" rid="bib1.bibx31 bib1.bibx67" id="paren.17"/>; however, they exhibit several shortcomings. One of the most difficult setbacks is the high uncertainty in emission inventories <xref ref-type="bibr" rid="bib1.bibx29" id="paren.18"/>, which is a great challenge given the variety of contributing sources, complexity of the spatial–temporal profiles, and lack of reliable in situ measurements <xref ref-type="bibr" rid="bib1.bibx40" id="paren.19"/>. Additionally, an idealistic deterministic model requires a delicate and thorough understanding of physical and chemical processes in the atmosphere <xref ref-type="bibr" rid="bib1.bibx72" id="paren.20"/> and an enormous computational capacity to resolve fine-scale variabilities. Therefore, CTMs alone have failed to meet the requirements for an effective air quality early warning system.</p>
      <p id="d1e347">In contrast to CTMs, data-driven methods that do not require a profound knowledge of the complex composition or structure of the atmosphere are also widely utilized in atmospheric modeling <xref ref-type="bibr" rid="bib1.bibx88 bib1.bibx77" id="paren.21"/>. Many of these methods have been employed for air pollution forecasting, including multiple linear regression <xref ref-type="bibr" rid="bib1.bibx64" id="paren.22"/>, nonlinear regression models such as principal component regression <xref ref-type="bibr" rid="bib1.bibx66" id="paren.23"/>, hidden Markov models <xref ref-type="bibr" rid="bib1.bibx74" id="paren.24"/>, support vector machine <xref ref-type="bibr" rid="bib1.bibx1" id="paren.25"/>, and artificial neural networks <xref ref-type="bibr" rid="bib1.bibx19" id="paren.26"/>. Of these methods, machine learning models have gained the greatest popularity because of their capacity to learn complex and nonlinear relationships by assimilating “big” training datasets <xref ref-type="bibr" rid="bib1.bibx51 bib1.bibx39" id="paren.27"/>. Machine learning has brought great opportunities and challenges to the geophysical research community <xref ref-type="bibr" rid="bib1.bibx80" id="paren.28"/>.</p>
      <p id="d1e375">With the explosive growth of data in earth science, the superiority of machine learning for massive data applications has become increasingly prominent <xref ref-type="bibr" rid="bib1.bibx62" id="paren.29"/>. The most representative example is to perform predictions for a target site using a machine learning model trained via a long-term series of in situ historical measurements. The China Ministry of Environmental Protection (MEP) has established many ground-based stations measuring the primary pollutants since 2013 <xref ref-type="bibr" rid="bib1.bibx81" id="paren.30"/>. At present, the monitoring network comprises more than 1500 field stations covering all of China, as can be seen in Fig. <xref ref-type="fig" rid="Ch1.F1"/>. The richness of air quality observations from the monitoring network provides valuable training data and stimulates the development of machine learning air quality forecasting in China <xref ref-type="bibr" rid="bib1.bibx78" id="paren.31"/>.
Previous studies on air pollution forecasting in China have utilized various machine learning models with the ground-based MEP air quality dataset. For example, <xref ref-type="bibr" rid="bib1.bibx41" id="text.32"/> utilized a long short-term memory (LSTM) neural network extended model to predict PM<inline-formula><mml:math id="M10" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> concentrations at a maximum forecast horizon of 24 h for air quality monitoring stations in Beijing, China. <xref ref-type="bibr" rid="bib1.bibx76" id="text.33"/> proposed a composite prediction system based on an LSTM neural network to predict daily PM<inline-formula><mml:math id="M11" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> <inline-formula><mml:math id="M12" display="inline"><mml:mo>/</mml:mo></mml:math></inline-formula> PM<inline-formula><mml:math id="M13" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">10</mml:mn></mml:msub></mml:math></inline-formula> in Wuhan. <xref ref-type="bibr" rid="bib1.bibx84" id="text.34"/> established a hybrid model, integrating deep learning with multi-task learning, to predict hourly PM<inline-formula><mml:math id="M14" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> concentrations in three different districts of Lanzhou. <xref ref-type="bibr" rid="bib1.bibx36" id="text.35"/> utilized four machine learning models to develop an air quality forecasting system that can automatically find the best model and hyperparameter combination for the next 3 d air quality forecast in seven megacities of China.
These works are highly valuable in exploring novel methods of air quality prediction relative to conventional CTMs.
To the best of our knowledge, the aforementioned studies on air pollution forecasting solely focused on a few monitoring stations, typical cities, or small regions, while national-level air quality predictions remain lacking.
The challenges in national-level forecasting include substantial temporal and spatial variances <xref ref-type="bibr" rid="bib1.bibx70" id="paren.36"/> in air pollution and enormous computational power requirements.</p>
      <p id="d1e450">The curse of dimensionality is a common obstruction in modeling; i.e., an increasing amount of input data leads to rapidly increasing complexity, and prediction algorithms are susceptible to over-fitting <xref ref-type="bibr" rid="bib1.bibx63" id="paren.37"/>.
Therefore, considerable research has focused on reducing the dimensionality of input data by selecting only significant variables and eliminating redundancy.
The methods of this research can be classified into three categories: the filter method (e.g., a correlation matrix using the Pearson Correlation), wrapper method (e.g., recursive feature elimination), and embedded method (e.g., Lasso regularization) <xref ref-type="bibr" rid="bib1.bibx10" id="paren.38"/>.
These methods can reduce the adverse effects of irregular variables or noise while retaining prediction performance <xref ref-type="bibr" rid="bib1.bibx25" id="paren.39"/>. They also save computing resources for model training. However, in previous studies on air quality forecasting, filter methods such as Pearson correlation coefficients or the maximal information coefficient (MIC) <xref ref-type="bibr" rid="bib1.bibx38" id="paren.40"/> were commonly utilized for input selection. These input selection methods can help improve the performance of machine learning models; however, they all have serious limitations.
For example,  universal meteorological variables that highly correlate with PM<inline-formula><mml:math id="M15" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> in a large region of China are difficult to find using Pearson correlation coefficients because they can vary substantially both spatially and temporally <xref ref-type="bibr" rid="bib1.bibx81" id="paren.41"/>.
MIC is the most employed method for capturing linear and nonlinear correlations between variable pairs <xref ref-type="bibr" rid="bib1.bibx12" id="paren.42"/>. However, it cannot consider relevance and redundancy simultaneously <xref ref-type="bibr" rid="bib1.bibx71" id="paren.43"/>. Furthermore, MIC is computationally intensive <xref ref-type="bibr" rid="bib1.bibx8" id="paren.44"/>.</p>
      <p id="d1e487">Machine learning algorithms are often considered “black box” models that learn the input–output relationship from immense training samples <xref ref-type="bibr" rid="bib1.bibx9" id="paren.45"/>. Many researchers have devoted enormous efforts to developing and implementing tools to interpret machine learning models. Among these tools, game-theoretic formulations of feature significance are the most widely utilized because they can capture the interactions among features <xref ref-type="bibr" rid="bib1.bibx65" id="paren.46"/>, and they may be the only solution satisfying the four “favorable and fair” axioms <xref ref-type="bibr" rid="bib1.bibx21" id="paren.47"/>. Several scholars have conducted in-depth studies on distinguishing feature significance based on the Shapley value <xref ref-type="bibr" rid="bib1.bibx65" id="paren.48"/>. For example, <xref ref-type="bibr" rid="bib1.bibx57" id="text.49"/> utilized the Shapley additive explanation (SHAP) approach <xref ref-type="bibr" rid="bib1.bibx48" id="paren.50"/> to interpret multiple machine learning models and found that most of the models have similar features. <xref ref-type="bibr" rid="bib1.bibx22" id="text.51"/> successfully interpreted the feature contributions of the guideless irregular dew-point cooler on the predicted parameters based on SHAP.
In addition to SHAP, which explains individual predictions, <xref ref-type="bibr" rid="bib1.bibx15" id="text.52"/> proposed a novel method that can explain model behavior across the entire dataset (global interpretability), called Shapley additive global importance (SAGE). SHAP and SAGE both utilize the Shapley value; however, compared with SHAP, SAGE can simultaneously eliminate larger subsets of redundant features <xref ref-type="bibr" rid="bib1.bibx15" id="paren.53"/>. Additionally, SAGE extracts features from the conditional distribution instead of the marginal distribution because the latter may lead to breaking feature dependencies and producing unlikely feature combinations <xref ref-type="bibr" rid="bib1.bibx48" id="paren.54"/>. Furthermore, investigating the feature importance based on model performance <xref ref-type="bibr" rid="bib1.bibx35" id="paren.55"/> has been verified as a meaningful and effective approach for interpreting data-driven models and is popular in computer science <xref ref-type="bibr" rid="bib1.bibx2" id="paren.56"/>. However, this method has rarely been applied to air quality forecasting using machine learning tools.</p>
      <p id="d1e528">In the present study, the first version of a regional feature selection-based machine learning system (RFSML v1.0) is developed. The system can predict short-term air quality with high accuracy in China. In this study, the RFSML system predicts the primary air pollutant (PM<inline-formula><mml:math id="M16" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula>) concentration over every target site from the China MEP air quality monitoring network by learning its implicit trend from long-term series records.
This method can be extended to other airborne pollutant predictions in future studies.
SAGE analysis is adopted to interpret valuable features and exclude redundant inputs to avoid over-fitting the model during training. Because the SAGE calculations are more time-consuming than the model training, as explained in Sect. <xref ref-type="sec" rid="Ch1.S3.SS1"/>,
they are not repeated for every target site but are implemented in limited ensemble sites that are randomly selected in a given region. China was divided into five densely populated regions, according to the air pollution patterns, which are consistent with the Clean Air Action target regions released by the Chinese State Council, as discussed in Sect. <xref ref-type="sec" rid="Ch1.S2.SS2.SSS3"/>.
The top three critical features in the ensemble SAGE calculations were utilized as the input features for the implicit trend model training for each site. The robustness of the regional feature selection was tested over three widely utilized machine learning models, i.e., random forest (RF), gradient boosting (GB), and multi-layer perceptron (MLP) models, and four forecasting horizons (6, 12, 18, and 24 h).</p>
      <p id="d1e544">The remainder of the paper is organized as follows: the composition of the data used in this study and the pre-processing method are introduced in Sect. <xref ref-type="sec" rid="Ch1.S2"/>. Then, the three machine learning models and their hyperparameter choices utilized in this study are described. The principles of SAGE and the details of the SAGE ranking-based regional feature selection are described at the end of Sect. <xref ref-type="sec" rid="Ch1.S2"/>.
In Sect. <xref ref-type="sec" rid="Ch1.S3"/>, the computational costs of SAGE and machine learning model training are detailed. Then, the results of feature selection in each region are presented and analyzed. The prediction performance of RFSML is evaluated and compared with that of the standard machine learning process. Finally, the conclusions and future prospects are provided in Sect. <xref ref-type="sec" rid="Ch1.S4"/>.</p>
</sec>
<sec id="Ch1.S2">
  <label>2</label><title>Model, data, and methods</title>
      <p id="d1e563">The components of the RFSML method that are used to forecast PM<inline-formula><mml:math id="M17" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> concentrations are described in the following sections.</p>
<sec id="Ch1.S2.SS1">
  <label>2.1</label><title>Model domain and data</title>
      <p id="d1e582">The RFSML system forecasts air pollution levels in the vicinity of a monitoring station. This forecasting uses machine learning by examining the variability in the available station datasets.
The monitoring network consisted of 1588 stations, for data collected in 2019, at the locations displayed in Fig. <xref ref-type="fig" rid="Ch1.F1"/>.
Because the station network is dense, pollution-level forecasting can be performed for nearly any location in eastern China.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F1" specific-use="star"><?xmltex \currentcnt{1}?><?xmltex \def\figurename{Figure}?><label>Figure 1</label><caption><p id="d1e589">Locations of environmental monitoring stations in the study area in 2019 (blue pentagons). Red rectangles represent the five primary megacity clusters in China.</p></caption>
          <?xmltex \igopts{width=341.433071pt}?><graphic xlink:href="https://gmd.copernicus.org/articles/15/7791/2022/gmd-15-7791-2022-f01.png"/>

        </fig>

      <p id="d1e598">The input data for machine learning consisted of hourly averaged air pollutant measurements (e.g., PM<inline-formula><mml:math id="M18" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula>, CO, <inline-formula><mml:math id="M19" display="inline"><mml:mrow class="chem"><mml:msub><mml:mi mathvariant="normal">SO</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="M20" display="inline"><mml:mrow class="chem"><mml:msub><mml:mi mathvariant="normal">NO</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula>) from the Chinese MEP monitoring network, meteorological reanalysis data from ERA5-Land <xref ref-type="bibr" rid="bib1.bibx54" id="paren.57"/>, atmospheric composition data from the Copernicus Atmosphere Monitoring Service (CAMS) reanalysis <xref ref-type="bibr" rid="bib1.bibx32" id="paren.58"/> provided by the European Centre for Medium-Range Weather Forecasts (ECMWF), and emission data from the Multi-resolution Emission Inventory for China (MEIC) inventory, with time factors applied at an hourly resolution.
The input data are summarized in Table <xref ref-type="table" rid="Ch1.T1"/>. The variables in the datasets are correlated with and may drive the PM<inline-formula><mml:math id="M21" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> concentration and are therefore useful predictors.</p>

<?xmltex \floatpos{t}?><table-wrap id="Ch1.T1" specific-use="star"><?xmltex \currentcnt{1}?><label>Table 1</label><caption><p id="d1e654">Summary of empirical input variables.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="5">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="right"/>
     <oasis:colspec colnum="3" colname="col3" align="left"/>
     <oasis:colspec colnum="4" colname="col4" align="left"/>
     <oasis:colspec colnum="5" colname="col5" align="left"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Type</oasis:entry>
         <oasis:entry colname="col2">Number</oasis:entry>
         <oasis:entry colname="col3">Spatial resolution</oasis:entry>
         <oasis:entry colname="col4">Temporal resolution</oasis:entry>
         <oasis:entry colname="col5">Source</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">Ground observation</oasis:entry>
         <oasis:entry colname="col2">4</oasis:entry>
         <oasis:entry colname="col3">Monitoring station</oasis:entry>
         <oasis:entry colname="col4">Hourly</oasis:entry>
         <oasis:entry colname="col5">Monitoring station</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Atmospheric composition</oasis:entry>
         <oasis:entry colname="col2">4</oasis:entry>
         <oasis:entry colname="col3"><inline-formula><mml:math id="M22" display="inline"><mml:mrow><mml:mn mathvariant="normal">0.75</mml:mn><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup><mml:mo>×</mml:mo><mml:mn mathvariant="normal">0.75</mml:mn><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col4">3-Hourly</oasis:entry>
         <oasis:entry colname="col5">CAMS global reanalysis</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Meteorology</oasis:entry>
         <oasis:entry colname="col2">8</oasis:entry>
         <oasis:entry colname="col3"><inline-formula><mml:math id="M23" display="inline"><mml:mrow><mml:mn mathvariant="normal">0.1</mml:mn><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup><mml:mo>×</mml:mo><mml:mn mathvariant="normal">0.1</mml:mn><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col4">Hourly</oasis:entry>
         <oasis:entry colname="col5">ERA5-Land hourly data</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">MEIC</oasis:entry>
         <oasis:entry colname="col2">9</oasis:entry>
         <oasis:entry colname="col3"><inline-formula><mml:math id="M24" display="inline"><mml:mrow><mml:mn mathvariant="normal">0.25</mml:mn><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup><mml:mo>×</mml:mo><mml:mn mathvariant="normal">0.25</mml:mn><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col4">Monthly</oasis:entry>
         <oasis:entry colname="col5">MEIC v1.3</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Time factor</oasis:entry>
         <oasis:entry colname="col2">2</oasis:entry>
         <oasis:entry colname="col3"/>
         <oasis:entry colname="col4">Hourly</oasis:entry>
         <oasis:entry colname="col5"/>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table></table-wrap>

      <p id="d1e839">Data from 2018 and 2019 were used in the experiments. The first 15 690 h (from 1 January 2018 to 15 October 2019) was used for model training and cross-validation, and the actual tests were performed using the remaining 1824 h of data from 15 October to 30 December 2019.
Our RFSML system can of course operate in a rolling way; additional forecasts in a less polluted period, April 2020, are performed with the models similarly trained using the recent 2-year data.</p>
<sec id="Ch1.S2.SS1.SSS1">
  <label>2.1.1</label><title>Air pollutant observations</title>
      <p id="d1e849">The observed air pollutant concentrations at the stations were used as inputs (<inline-formula><mml:math id="M25" display="inline"><mml:mrow class="chem"><mml:msub><mml:mi mathvariant="normal">NO</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M26" display="inline"><mml:mrow class="chem"><mml:msub><mml:mi mathvariant="normal">SO</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula>, and CO) and target variables (PM<inline-formula><mml:math id="M27" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula>) in the model training.
The available time series of PM<inline-formula><mml:math id="M28" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">10</mml:mn></mml:msub></mml:math></inline-formula> observations was missing many values and was therefore excluded from the model.
Additionally, <inline-formula><mml:math id="M29" display="inline"><mml:mrow class="chem"><mml:msub><mml:mi mathvariant="normal">O</mml:mi><mml:mn mathvariant="normal">3</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> observations were excluded because these data exhibit a diurnal cycle that substantially differs from the PM<inline-formula><mml:math id="M30" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> target concentrations.</p>
      <p id="d1e913">Missing data occurred for each of the studied pollutants because of equipment failure, incorrect sensor readings, and improper operation.
For the PM<inline-formula><mml:math id="M31" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> time series, approximately 14.6 % of the observations were missing on average, as illustrated in Fig. <xref ref-type="fig" rid="Ch1.F2"/>a.
However, an uninterrupted time series is necessary for model training and rolling forecasting.
In studies such as <xref ref-type="bibr" rid="bib1.bibx61" id="text.59"/> and <xref ref-type="bibr" rid="bib1.bibx49" id="text.60"/>, it was shown that the observations from surrounding monitoring stations can be utilized to insert suitable values for missing data through imputation. Data imputation tools, such as cubic interpolation, have gained popularity for enhancing monotone data <xref ref-type="bibr" rid="bib1.bibx20" id="paren.61"/>.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F2" specific-use="star"><?xmltex \currentcnt{2}?><?xmltex \def\figurename{Figure}?><label>Figure 2</label><caption><p id="d1e938">Missing fraction of <bold>(a)</bold> original and <bold>(b)</bold> KNN-interpolated PM<inline-formula><mml:math id="M32" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> data. Dots and triangles denote the locations of air quality monitoring stations, and dot colors represent the missing data rate of each monitoring station. Black triangles indicate monitoring stations with 20 % missing data or over 15 % missing data after KNN interpolation that were excluded from the model. </p></caption>
            <?xmltex \igopts{width=455.244094pt}?><graphic xlink:href="https://gmd.copernicus.org/articles/15/7791/2022/gmd-15-7791-2022-f02.png"/>

          </fig>

      <p id="d1e963">In this study, the K-nearest neighbor (KNN) classification method <xref ref-type="bibr" rid="bib1.bibx85" id="paren.62"/> and cubic imputation were combined to create an uninterrupted time series.
The KNN algorithm is illustrated as Algorithm S1 in the Supplement. The KNN algorithm was implemented using the following steps:
<list list-type="order"><list-item>
      <p id="d1e971">Monitoring stations with over 20 % missing data were excluded from the training and prediction because such large amounts of missing data are not believed to be filled with sufficient accuracy.</p></list-item><list-item>
      <p id="d1e975">For each station, the number of monitoring stations within a radius of 0.8<inline-formula><mml:math id="M33" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> was calculated (following the empirical choices suggested in <xref ref-type="bibr" rid="bib1.bibx34" id="altparen.63"/>). If fewer than three surrounding stations were available, the station was excluded from the training and forecasting. If three or four surrounding stations were found, these were all selected, while four stations were selected randomly if more than four surrounding stations were found.</p></list-item><list-item>
      <p id="d1e991">For each target station, a geographic inverse distance weighting technique <xref ref-type="bibr" rid="bib1.bibx4" id="paren.64"/> was used to estimate the missing values using the observed values from the surrounding stations.</p></list-item></list></p>
      <p id="d1e997">After KNN interpolation, the amount of missing data in the PM<inline-formula><mml:math id="M34" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> time series was reduced to approximately 4.5 %, as illustrated in Fig. <xref ref-type="fig" rid="Ch1.F2"/>b.</p>
      <p id="d1e1011">Because there were cases where nearby stations and the target station simultaneously exhibited missing data, some instances of missing values remained after KNN interpolation.
Therefore, cubic imputation <xref ref-type="bibr" rid="bib1.bibx37" id="paren.65"/> was employed to insert values for the remaining missing data. Outliers generated by the cubic imputation were replaced with the minimum or maximum of the original series.
A total of 1263 monitoring stations exhibited no missing data after applying interpolation. Both the mean and standard deviation of the homogenized time series were similar to those of the original data, as illustrated in Fig. S1 in the Supplement.</p>
</sec>
<sec id="Ch1.S2.SS1.SSS2">
  <label>2.1.2</label><title>Air pollutant forecast product and meteorological variables</title>
      <p id="d1e1025">The CAMS reanalysis <xref ref-type="bibr" rid="bib1.bibx32" id="paren.66"/> provides three-dimensional simulations of the atmospheric composition obtained by combining a global atmospheric chemistry model and observations. Therefore, it is expected to surpass pure model-based prediction accuracy. Selected concentrations of trace gases and aerosols from the CAMS reanalysis were inputs for the PM<inline-formula><mml:math id="M35" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> predictor. The PM<inline-formula><mml:math id="M36" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> simulations in this dataset were also used as a benchmark for the RFSML prediction.</p>
      <p id="d1e1049">We obtained the 3-hourly reanalysis data of four air pollutant concentrations (pm2p5, no2, so2, and co), which are reanalysis data (pm2p5, no2, so2, and co) of four ground observations mentioned above, in China from 2018 to 2019. The 3 h temporal resolution of the CAMS reanalysis data is firstly interpolated into 1 h resolution by cubic imputation. Then continuous time series of features at the monitoring stations are extracted from the interpolated 1 h data at a resolution of <inline-formula><mml:math id="M37" display="inline"><mml:mrow><mml:mn mathvariant="normal">0.75</mml:mn><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup><mml:mo>×</mml:mo><mml:mn mathvariant="normal">0.75</mml:mn><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:mrow></mml:math></inline-formula> using the nearest mapping.</p>
      <p id="d1e1072">Meteorological variables, as illustrated in Table <xref ref-type="table" rid="Ch1.T2"/>, were obtained from ERA5-Land data
<xref ref-type="bibr" rid="bib1.bibx14" id="paren.67"/> at a horizontal resolution of  <inline-formula><mml:math id="M38" display="inline"><mml:mrow><mml:mn mathvariant="normal">0.1</mml:mn><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup><mml:mo>×</mml:mo><mml:mn mathvariant="normal">0.1</mml:mn><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:mrow></mml:math></inline-formula> and an hourly temporal resolution for 2018 and 2019. The
data are available from the Climate Data Store via <ext-link xlink:href="https://doi.org/10.24381/cds.e2161bac" ext-link-type="DOI">10.24381/cds.e2161bac</ext-link> <xref ref-type="bibr" rid="bib1.bibx53" id="paren.68"/>.
The time series of meteorological variables used for the machine learning are extracted from this product using the nearest mapping method.</p>

<?xmltex \floatpos{t}?><table-wrap id="Ch1.T2"><?xmltex \currentcnt{2}?><label>Table 2</label><caption><p id="d1e1110">Summary of meteorological variables obtained from ERA5-land dataset.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="3">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="left"/>
     <oasis:colspec colnum="3" colname="col3" align="left"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Meteorology</oasis:entry>
         <oasis:entry colname="col2">Long name</oasis:entry>
         <oasis:entry colname="col3">Unit</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">u10</oasis:entry>
         <oasis:entry colname="col2">10 m <inline-formula><mml:math id="M39" display="inline"><mml:mi>u</mml:mi></mml:math></inline-formula> component of wind</oasis:entry>
         <oasis:entry colname="col3"><inline-formula><mml:math id="M40" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">m</mml:mi><mml:mspace linebreak="nobreak" width="0.125em"/><mml:msup><mml:mi mathvariant="normal">s</mml:mi><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula></oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">v10</oasis:entry>
         <oasis:entry colname="col2">10 m <inline-formula><mml:math id="M41" display="inline"><mml:mi>v</mml:mi></mml:math></inline-formula> component of wind</oasis:entry>
         <oasis:entry colname="col3"><inline-formula><mml:math id="M42" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">m</mml:mi><mml:mspace linebreak="nobreak" width="0.125em"/><mml:msup><mml:mi mathvariant="normal">s</mml:mi><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula></oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">d2m</oasis:entry>
         <oasis:entry colname="col2">2 m dew-point temperature</oasis:entry>
         <oasis:entry colname="col3">K</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">t2m</oasis:entry>
         <oasis:entry colname="col2">2 m temperature</oasis:entry>
         <oasis:entry colname="col3">K</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">skt</oasis:entry>
         <oasis:entry colname="col2">Skin temperature</oasis:entry>
         <oasis:entry colname="col3">K</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">sp</oasis:entry>
         <oasis:entry colname="col2">Surface pressure</oasis:entry>
         <oasis:entry colname="col3">Pa</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">tp</oasis:entry>
         <oasis:entry colname="col2">Total precipitation</oasis:entry>
         <oasis:entry colname="col3">m</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">str</oasis:entry>
         <oasis:entry colname="col2">Surface net thermal radiation</oasis:entry>
         <oasis:entry colname="col3"><inline-formula><mml:math id="M43" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">J</mml:mi><mml:mspace width="0.125em" linebreak="nobreak"/><mml:msup><mml:mi mathvariant="normal">m</mml:mi><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">2</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula></oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table></table-wrap>

</sec>
<sec id="Ch1.S2.SS1.SSS3">
  <label>2.1.3</label><title>Emission inventory</title>
      <p id="d1e1306">MEIC, the most popular anthropogenic emission inventory in China <xref ref-type="bibr" rid="bib1.bibx40" id="paren.69"/>, has been validated to provide consistent aerosol precursor loading for satellite observations <xref ref-type="bibr" rid="bib1.bibx17" id="paren.70"/>. It has been widely employed to quantify the air pollution in multi-atmosphere chemical models. The latest inventory of 2017 from MEIC version 1.3 was obtained via <uri>http://meicmodel.org/index.html</uri> (last access: October 2021) for use in this study. Based on the emission source height distribution, 24 h distribution, and version 2 of the Regional Acid Deposition Model <xref ref-type="bibr" rid="bib1.bibx87" id="paren.71"/> chemical reaction scheme, the original monthly emission data were processed into hourly emission rates. Considering their correlation with PM<inline-formula><mml:math id="M44" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula>, nine pollutant species were selected as machine learning predictor inputs, as displayed in Table <xref ref-type="table" rid="Ch1.T3"/>.</p>

<?xmltex \floatpos{t}?><table-wrap id="Ch1.T3"><?xmltex \currentcnt{3}?><label>Table 3</label><caption><p id="d1e1336">Summary of emission inventory variables.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="2">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="left"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">MEIC</oasis:entry>
         <oasis:entry colname="col2">Full name</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">E_CO</oasis:entry>
         <oasis:entry colname="col2">CO</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">E_ECI</oasis:entry>
         <oasis:entry colname="col2">Elemental carbon PM<inline-formula><mml:math id="M45" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> nuclei mode</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">E_HCHO</oasis:entry>
         <oasis:entry colname="col2">Formaldehyde</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">E_NH3</oasis:entry>
         <oasis:entry colname="col2"><inline-formula><mml:math id="M46" display="inline"><mml:mrow class="chem"><mml:msub><mml:mi mathvariant="normal">NH</mml:mi><mml:mn mathvariant="normal">3</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">E_NO2</oasis:entry>
         <oasis:entry colname="col2"><inline-formula><mml:math id="M47" display="inline"><mml:mrow class="chem"><mml:msub><mml:mi mathvariant="normal">NO</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">E_ORGJ</oasis:entry>
         <oasis:entry colname="col2">Organic PM<inline-formula><mml:math id="M48" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> accumulation mode</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">E_PM25J</oasis:entry>
         <oasis:entry colname="col2">Unspeciated primary PM<inline-formula><mml:math id="M49" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> accumulation mode</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">E_PM_10</oasis:entry>
         <oasis:entry colname="col2">Unspeciated primary PM<inline-formula><mml:math id="M50" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">10</mml:mn></mml:msub></mml:math></inline-formula></oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">E_SO2</oasis:entry>
         <oasis:entry colname="col2"><inline-formula><mml:math id="M51" display="inline"><mml:mrow class="chem"><mml:msub><mml:mi mathvariant="normal">SO</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table></table-wrap>

</sec>
</sec>
<sec id="Ch1.S2.SS2">
  <label>2.2</label><title>The RFSML system </title>
<sec id="Ch1.S2.SS2.SSS1">
  <label>2.2.1</label><title>System framework</title>
      <p id="d1e1523">Figure <xref ref-type="fig" rid="Ch1.F3"/> displays the framework of the proposed RFSML and a standard machine learning system. Note that a standard machine learning system refers to a machine learning system without any feature selection. Standard machine learning is conducted as follows. First, all observations and datasets related to PM<inline-formula><mml:math id="M52" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> are collected, and then the missing values are interpolated into the original dataset. Next, the appropriate machine learning model is selected, and the continuous data time series is reformed into the required input structure. The model is then trained repeatedly until the appropriate hyperparameters are obtained, and finally, predictions are made with the trained model.
Given an input <inline-formula><mml:math id="M53" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mi>n</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> that consists of individual features <inline-formula><mml:math id="M54" display="inline"><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mn mathvariant="normal">1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mi mathvariant="normal">…</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>n</mml:mi></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula>, a predictor <inline-formula><mml:math id="M55" display="inline"><mml:mi mathvariant="script">F</mml:mi></mml:math></inline-formula> is utilized in a supervised learning task to predict the target variable <inline-formula><mml:math id="M56" display="inline"><mml:mi>y</mml:mi></mml:math></inline-formula>. A time series regression, such as rolling forecast, can be expressed as follows:
              <disp-formula id="Ch1.E1" content-type="numbered"><label>1</label><mml:math id="M57" display="block"><mml:mtable class="split" rowspacing="0.2ex" displaystyle="true" columnalign="right left"><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo stretchy="false" mathvariant="normal">^</mml:mo></mml:mover><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi mathvariant="script">F</mml:mi><mml:mo mathsize="1.1em">(</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mn mathvariant="normal">1</mml:mn><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mi mathvariant="normal">…</mml:mi><mml:mo>,</mml:mo><?xmltex \hack{\,}?></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:msubsup><mml:mi>x</mml:mi><mml:mn mathvariant="normal">1</mml:mn><mml:mi>t</mml:mi></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mn mathvariant="normal">2</mml:mn><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mi mathvariant="normal">…</mml:mi><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd/><mml:mtd><mml:mrow><mml:msubsup><mml:mi>x</mml:mi><mml:mn mathvariant="normal">2</mml:mn><mml:mi>t</mml:mi></mml:msubsup><mml:mo>,</mml:mo><mml:mi mathvariant="normal">…</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="normal">…</mml:mi><mml:mo>,</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mi>n</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mi mathvariant="normal">…</mml:mi><mml:mo>,</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi></mml:msubsup><mml:mo mathsize="1.1em">)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
            where, at any instant <inline-formula><mml:math id="M58" display="inline"><mml:mi>t</mml:mi></mml:math></inline-formula>, the input vector storing <inline-formula><mml:math id="M59" display="inline"><mml:mi>n</mml:mi></mml:math></inline-formula> individual features in the previous <inline-formula><mml:math id="M60" display="inline"><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> h is utilized to forecast the target PM<inline-formula><mml:math id="M61" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> concentration <inline-formula><mml:math id="M62" display="inline"><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo mathvariant="normal" stretchy="false">^</mml:mo></mml:mover></mml:math></inline-formula> with a horizon of <inline-formula><mml:math id="M63" display="inline"><mml:mi>h</mml:mi></mml:math></inline-formula> h. The forecast predictor <inline-formula><mml:math id="M64" display="inline"><mml:mi mathvariant="script">F</mml:mi></mml:math></inline-formula> represents the machine learning model (RF, GB, or MLP) trained using the historical data.
Details on the selection of <inline-formula><mml:math id="M65" display="inline"><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M66" display="inline"><mml:mi>h</mml:mi></mml:math></inline-formula> are provided in Sect. <xref ref-type="sec" rid="Ch1.S2.SS2.SSS2"/>.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F3" specific-use="star"><?xmltex \currentcnt{3}?><?xmltex \def\figurename{Figure}?><label>Figure 3</label><caption><p id="d1e1825">RFSML versus standard machine learning system framework.</p></caption>
            <?xmltex \igopts{width=341.433071pt}?><graphic xlink:href="https://gmd.copernicus.org/articles/15/7791/2022/gmd-15-7791-2022-f03.png"/>

          </fig>

      <p id="d1e1834">As mentioned before, some features are residual, and the feature subset can provide sufficient predictive power and less noise for <inline-formula><mml:math id="M67" display="inline"><mml:mi mathvariant="script">F</mml:mi></mml:math></inline-formula>. Thus, the proposed RFSML utilized SAGE to obtain the optimal feature subsets. Considering the computational efficiency, we divided the total national air quality monitoring stations into six types, each of which randomly selected the air quality monitoring stations for feature selection. Given any feature subset <inline-formula><mml:math id="M68" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mi>s</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo mathvariant="italic">{</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mn mathvariant="normal">1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mi mathvariant="normal">…</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>s</mml:mi></mml:msub><mml:mo mathvariant="italic">}</mml:mo></mml:mrow></mml:math></inline-formula>, the machine learning models can be described as follows:
              <disp-formula id="Ch1.E2" content-type="numbered"><label>2</label><mml:math id="M69" display="block"><mml:mtable rowspacing="0.2ex" class="split" displaystyle="true" columnalign="right left"><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo mathvariant="normal" stretchy="false">^</mml:mo></mml:mover><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi mathvariant="script">F</mml:mi><mml:mo mathsize="1.1em">(</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mn mathvariant="normal">1</mml:mn><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mi mathvariant="normal">…</mml:mi><mml:mo>,</mml:mo><?xmltex \hack{\,}?></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:msubsup><mml:mi>x</mml:mi><mml:mn mathvariant="normal">1</mml:mn><mml:mi>t</mml:mi></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mn mathvariant="normal">2</mml:mn><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mi mathvariant="normal">…</mml:mi><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd/><mml:mtd><mml:mrow><mml:msubsup><mml:mi>x</mml:mi><mml:mn mathvariant="normal">2</mml:mn><mml:mi>t</mml:mi></mml:msubsup><mml:mo>,</mml:mo><mml:mi mathvariant="normal">…</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="normal">…</mml:mi><mml:mo>,</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mi>s</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mi mathvariant="normal">…</mml:mi><mml:mo>,</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:msubsup><mml:mo mathsize="1.1em">)</mml:mo><mml:mo>.</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula></p>
</sec>
<sec id="Ch1.S2.SS2.SSS2">
  <label>2.2.2</label><title>Machine learning models</title>
      <p id="d1e2039">Different machine learning algorithms have been used to forecast PM<inline-formula><mml:math id="M70" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> because they can provide promising approaches to handle complex nonlinear relationships. Each algorithm exhibits advantages and drawbacks. Of the machine learning models, typical boosting (e.g., GB) and bagging (e.g., RF) algorithms are widely applied in regression analysis using a set of decision trees. Additionally, artificial neural network models (e.g., MLP) that are composed of many processing elements can successfully perform nonlinear mapping. Thus, to evaluate the robustness of the feature selection, all of the prediction algorithms mentioned above were tested in the present study.</p>
      <p id="d1e2051">The original data in Fig. <xref ref-type="fig" rid="Ch1.F3"/> were converted into a 27-dimensional matrix (<inline-formula><mml:math id="M71" display="inline"><mml:mrow><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mn mathvariant="normal">27</mml:mn></mml:mrow></mml:math></inline-formula>) after preprocessing.
On the basis of the auto-correlation and partial auto-correlation results, a time step <inline-formula><mml:math id="M72" display="inline"><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn mathvariant="normal">9</mml:mn></mml:mrow></mml:math></inline-formula> h was selected for the forecast. The prediction horizon <inline-formula><mml:math id="M73" display="inline"><mml:mi>h</mml:mi></mml:math></inline-formula> spans from 1 to 24 h.
Then, the matrix was converted into supervised learning based on <inline-formula><mml:math id="M74" display="inline"><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M75" display="inline"><mml:mi>h</mml:mi></mml:math></inline-formula>.
The model hyperparameters (Table <xref ref-type="table" rid="Ch1.T4"/>) were designed for each predicting algorithm using 10-fold cross-validation and then fit to each predicting algorithm. Note that “none” for the max depth of RF means “nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples” in Scikit learn <xref ref-type="bibr" rid="bib1.bibx58" id="paren.72"/>.</p>

<?xmltex \floatpos{t}?><table-wrap id="Ch1.T4" specific-use="star"><?xmltex \currentcnt{4}?><label>Table 4</label><caption><p id="d1e2117">Summary of model's hyperparameters.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="6">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="left" colsep="1"/>
     <oasis:colspec colnum="3" colname="col3" align="left"/>
     <oasis:colspec colnum="4" colname="col4" align="left" colsep="1"/>
     <oasis:colspec colnum="5" colname="col5" align="left"/>
     <oasis:colspec colnum="6" colname="col6" align="left"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry namest="col1" nameend="col2" align="center" colsep="1">GB </oasis:entry>
         <oasis:entry namest="col3" nameend="col4" align="center" colsep="1">RF </oasis:entry>
         <oasis:entry namest="col5" nameend="col6" align="center">MLP </oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Hyperparameter</oasis:entry>
         <oasis:entry colname="col2">Final value</oasis:entry>
         <oasis:entry colname="col3">Hyperparameter</oasis:entry>
         <oasis:entry colname="col4">Final value</oasis:entry>
         <oasis:entry colname="col5">Hyperparameter</oasis:entry>
         <oasis:entry colname="col6">Final value</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M76" display="inline"><mml:mi>N</mml:mi></mml:math></inline-formula> estimators</oasis:entry>
         <oasis:entry colname="col2">100</oasis:entry>
         <oasis:entry colname="col3"><inline-formula><mml:math id="M77" display="inline"><mml:mi>N</mml:mi></mml:math></inline-formula> estimators</oasis:entry>
         <oasis:entry colname="col4">100</oasis:entry>
         <oasis:entry colname="col5">Neurons in hidden layer</oasis:entry>
         <oasis:entry colname="col6">100</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Max depth</oasis:entry>
         <oasis:entry colname="col2">3</oasis:entry>
         <oasis:entry colname="col3">Max depth</oasis:entry>
         <oasis:entry colname="col4">None</oasis:entry>
         <oasis:entry colname="col5">Activation</oasis:entry>
         <oasis:entry colname="col6">ReLU</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Loss</oasis:entry>
         <oasis:entry colname="col2">Mean square error</oasis:entry>
         <oasis:entry colname="col3">Loss</oasis:entry>
         <oasis:entry colname="col4">Mean squared error</oasis:entry>
         <oasis:entry colname="col5">Loss</oasis:entry>
         <oasis:entry colname="col6">Mean squared error</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Learning rate</oasis:entry>
         <oasis:entry colname="col2">0.1</oasis:entry>
         <oasis:entry colname="col3">Min sample leaf</oasis:entry>
         <oasis:entry colname="col4">1</oasis:entry>
         <oasis:entry colname="col5">Solver</oasis:entry>
         <oasis:entry colname="col6">Adam</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table></table-wrap>

</sec>
<sec id="Ch1.S2.SS2.SSS3">
  <label>2.2.3</label><title>SAGE-based regional feature selection</title>
      <p id="d1e2286">Many methods have been utilized to investigate the significance of features for machine learning models. The game-theoretic method based on the Shapley value is the most widely adopted.
Unlike SHAP, a well-known method for explaining individual predictions, SAGE explains model behavior across the entire dataset. Global model interpretability helps us understand the distribution of target outcomes based on the features <xref ref-type="bibr" rid="bib1.bibx52" id="paren.73"/>, which is useful for finding the typical features of each region. There are two outstanding added values of SAGE <xref ref-type="bibr" rid="bib1.bibx15" id="paren.74"/>. The first is its ability to remove large subsets of features because only removing individual features gives too little significance to features with sufficient proxies, such as in permutation tests. The other advantage of SAGE is its ability to select notable features from their conditional distribution instead of their marginal distribution, reducing unlikely feature combinations.</p>
      <p id="d1e2295">Given the function <inline-formula><mml:math id="M78" display="inline"><mml:mrow><mml:msub><mml:mi>W</mml:mi><mml:mi mathvariant="script">F</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, which represents the predictive power of a machine learning model <inline-formula><mml:math id="M79" display="inline"><mml:mi mathvariant="script">F</mml:mi></mml:math></inline-formula> with subsets of features <inline-formula><mml:math id="M80" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mi>s</mml:mi></mml:msub><mml:mo>⊆</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mi>n</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, the SAGE algorithm can be written as follows:

                  <disp-formula specific-use="gather" content-type="numbered"><mml:math id="M81" display="block"><mml:mtable displaystyle="true"><mml:mlabeledtr id="Ch1.E3"><mml:mtd><mml:mtext>3</mml:mtext></mml:mtd><mml:mtd><mml:mrow><mml:mstyle class="stylechange" displaystyle="true"/><mml:msub><mml:mi>W</mml:mi><mml:mi mathvariant="script">F</mml:mi></mml:msub><mml:mo>(</mml:mo><mml:mi>S</mml:mi><mml:mo>)</mml:mo><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mi mathvariant="double-struck">E</mml:mi><mml:mo mathsize="1.1em">[</mml:mo><mml:mi mathvariant="normal">ℓ</mml:mi><mml:mo>(</mml:mo><mml:mi mathvariant="double-struck">E</mml:mi><mml:mo>[</mml:mo><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo stretchy="false" mathvariant="normal">^</mml:mo></mml:mover><mml:mo>|</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mi>s</mml:mi></mml:msub><mml:mo>]</mml:mo><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo>)</mml:mo><mml:mo mathsize="1.1em">]</mml:mo></mml:mrow></mml:mtd></mml:mlabeledtr><mml:mlabeledtr id="Ch1.E4"><mml:mtd><mml:mtext>4</mml:mtext></mml:mtd><mml:mtd><mml:mrow><mml:mstyle class="stylechange" displaystyle="true"/><mml:mtable class="split" rowspacing="0.2ex" displaystyle="true" columnalign="right left"><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi mathvariant="italic">ϕ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>(</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mi mathvariant="script">F</mml:mi></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:mo>=</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd/><mml:mtd><mml:mrow><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mn mathvariant="normal">1</mml:mn><mml:mi>n</mml:mi></mml:mfrac></mml:mstyle><mml:munder><mml:mo movablelimits="false">∑</mml:mo><mml:mrow><mml:mi>S</mml:mi><mml:mo>⊆</mml:mo><mml:mi>N</mml:mi><mml:mo>\</mml:mo><mml:mo mathvariant="italic">{</mml:mo><mml:mi>i</mml:mi><mml:mo mathvariant="italic">}</mml:mo></mml:mrow></mml:munder><mml:msup><mml:mfenced close=")" open="("><mml:mstyle displaystyle="true"><mml:mfrac linethickness="0"><mml:mrow><mml:mi>n</mml:mi><mml:mo>-</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mi>S</mml:mi><mml:mo>|</mml:mo></mml:mrow></mml:mfrac></mml:mstyle></mml:mfenced><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:msup><mml:mo mathsize="1.1em">[</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mi mathvariant="script">F</mml:mi></mml:msub><mml:mo>(</mml:mo><mml:mi>S</mml:mi><mml:mo>∪</mml:mo><mml:mo mathvariant="italic">{</mml:mo><mml:mi>i</mml:mi><mml:mo mathvariant="italic">}</mml:mo><mml:mo>)</mml:mo><mml:mo>-</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mi mathvariant="script">F</mml:mi></mml:msub><mml:mo>(</mml:mo><mml:mi>S</mml:mi><mml:mo>)</mml:mo><mml:mo mathsize="1.1em">]</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:mtd></mml:mlabeledtr></mml:mtable></mml:math></disp-formula>

              where <inline-formula><mml:math id="M82" display="inline"><mml:mi mathvariant="normal">ℓ</mml:mi></mml:math></inline-formula> means the loss function that measures the root mean squared error (RMSE) or mean absolute error (MAE); <inline-formula><mml:math id="M83" display="inline"><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo stretchy="false" mathvariant="normal">^</mml:mo></mml:mover></mml:math></inline-formula> is the prediction from <inline-formula><mml:math id="M84" display="inline"><mml:mi mathvariant="script">F</mml:mi></mml:math></inline-formula>; <inline-formula><mml:math id="M85" display="inline"><mml:mi>y</mml:mi></mml:math></inline-formula> represents the target variable; sets <inline-formula><mml:math id="M86" display="inline"><mml:mi>S</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math id="M87" display="inline"><mml:mi>N</mml:mi></mml:math></inline-formula> store <inline-formula><mml:math id="M88" display="inline"><mml:mrow><mml:mo mathvariant="italic">{</mml:mo><mml:mn mathvariant="normal">1</mml:mn><mml:mo>,</mml:mo><mml:mn mathvariant="normal">2</mml:mn><mml:mo>,</mml:mo><mml:mn mathvariant="normal">3</mml:mn><mml:mo>,</mml:mo><mml:mi mathvariant="normal">…</mml:mi><mml:mo>,</mml:mo><mml:mi>s</mml:mi><mml:mo mathvariant="italic">}</mml:mo></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M89" display="inline"><mml:mrow><mml:mo mathvariant="italic">{</mml:mo><mml:mn mathvariant="normal">1</mml:mn><mml:mo>,</mml:mo><mml:mn mathvariant="normal">2</mml:mn><mml:mo>,</mml:mo><mml:mn mathvariant="normal">3</mml:mn><mml:mo>,</mml:mo><mml:mi mathvariant="normal">…</mml:mi><mml:mo>,</mml:mo><mml:mi>n</mml:mi><mml:mo mathvariant="italic">}</mml:mo></mml:mrow></mml:math></inline-formula>, respectively;
<inline-formula><mml:math id="M90" display="inline"><mml:mi>i</mml:mi></mml:math></inline-formula> is each single variable where <inline-formula><mml:math id="M91" display="inline"><mml:mrow><mml:mi>i</mml:mi><mml:mo>∈</mml:mo><mml:mi>N</mml:mi></mml:mrow></mml:math></inline-formula>; and <inline-formula><mml:math id="M92" display="inline"><mml:mi>n</mml:mi></mml:math></inline-formula> is the length of <inline-formula><mml:math id="M93" display="inline"><mml:mi>N</mml:mi></mml:math></inline-formula>. <inline-formula><mml:math id="M94" display="inline"><mml:mrow><mml:msub><mml:mi>W</mml:mi><mml:mi mathvariant="script">F</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> increases with a decline in the loss function for any subset <inline-formula><mml:math id="M95" display="inline"><mml:mrow><mml:mi>S</mml:mi><mml:mo>⊆</mml:mo><mml:mi>N</mml:mi></mml:mrow></mml:math></inline-formula> (note the minus sign in front of the loss function in Eq. <xref ref-type="disp-formula" rid="Ch1.E3"/>). Equation (<xref ref-type="disp-formula" rid="Ch1.E4"/>) represents the Shapley value that is the weighted average of the incremental changes from adding <inline-formula><mml:math id="M96" display="inline"><mml:mi>i</mml:mi></mml:math></inline-formula> to subsets <inline-formula><mml:math id="M97" display="inline"><mml:mrow><mml:mi>S</mml:mi><mml:mo>⊆</mml:mo><mml:mi>N</mml:mi><mml:mo>\</mml:mo><mml:mo mathvariant="italic">{</mml:mo><mml:mi>i</mml:mi><mml:mo mathvariant="italic">}</mml:mo></mml:mrow></mml:math></inline-formula> <xref ref-type="bibr" rid="bib1.bibx15" id="paren.75"/>. The more a feature contributes to the prediction from <inline-formula><mml:math id="M98" display="inline"><mml:mi mathvariant="script">F</mml:mi></mml:math></inline-formula>, the larger the positive values <inline-formula><mml:math id="M99" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">ϕ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>(</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mi mathvariant="script">F</mml:mi></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> would become.</p>
      <p id="d1e2738">The computational costs of the SAGE analysis over machine learning models including RF, GB, and MLP are presented in Sect. <xref ref-type="sec" rid="Ch1.S3.SS1"/>. They are much more expensive than the model training and therefore cannot be repeated over all sites. Meanwhile, air pollution in nearby monitoring stations has inherent similarities because its forcing factors, i.e., meteorological and emission variables, are closely related in a given region.
As in <xref ref-type="bibr" rid="bib1.bibx81" id="text.76"/>, all the available sites were partitioned into six categories in the present study: the North China Plain (NCP; 34–41<inline-formula><mml:math id="M100" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> N, 113–119<inline-formula><mml:math id="M101" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> E), Yangtze River Delta (YRD; 30–33<inline-formula><mml:math id="M102" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> N, 119–122<inline-formula><mml:math id="M103" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> E), Pearl River Delta (PRD; 21.5–24<inline-formula><mml:math id="M104" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> N, 112–115.5<inline-formula><mml:math id="M105" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> E), Sichuan Basin (SCB; 28.5–31.5<inline-formula><mml:math id="M106" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> N, 103.5–107<inline-formula><mml:math id="M107" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> E) Fenwei Plain (FWP; 33–35<inline-formula><mml:math id="M108" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> N, 106.25–111.25<inline-formula><mml:math id="M109" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> E; 35–37<inline-formula><mml:math id="M110" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> N, 108.75–113.75<inline-formula><mml:math id="M111" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> E), and the remainder of China. The locations of these regions can be found in Fig. <xref ref-type="fig" rid="Ch1.F1"/>. Therefore, we propose the regional future selection in which SAGE is only implemented in limited ensemble sites that are randomly selected in a given region, and the selected features would be used for model training and prediction at each regional site.</p>
      <p id="d1e2858">The framework of regional feature selection, as illustrated by Algorithm <xref ref-type="other" rid="Ch1.Prog1"/>, is as follows. A total of 15 ensemble stations are randomly selected in each of the six regions. Taking NCP as an example, the significance of the features of the ensemble monitoring stations with four prediction horizons (6, 12, 18, and 24 h) and three prediction algorithms is analyzed using SAGE algorithms. Then, the outcomes of the ensemble-SAGE model are ranked, as displayed in the heatmap in Fig. <xref ref-type="fig" rid="Ch1.F4"/>.
The heatmap highlights the significant features. PM<inline-formula><mml:math id="M112" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula>, CO, and v10 typically exhibit higher ranks in the 15 random monitoring stations and four prediction horizons.
The heatmaps of the ensemble-SAGE analyses of the other five regions
can be found in Figs. S2–S18 in the Supplement.
The feature significance in different regions is ranked by the sum of the SAGE values in the ensemble monitoring stations and four prediction horizons, as displayed in Fig. <xref ref-type="fig" rid="Ch1.F5"/>. The feature selection based on the ensemble-SAGE analysis concerning Fig. <xref ref-type="fig" rid="Ch1.F5"/> is explained in Sect. <xref ref-type="sec" rid="Ch1.S3.SS2"/>.
Note that we also tried random ensemble numbers 10 and 20 in NCP key feature extraction using the MLP model at several prediction horizons. The choice of 15 is shown to give the most robust result with a minimum computation cost, and it is therefore used for all regional feature selections in this study.</p><?xmltex \floatpos{t}?><boxed-text content-type="algorithm" position="float" id="Ch1.Prog1"><?xmltex \currentcnt{1}?><label>Algorithm 1</label><caption><p id="d1e2883">Regional feature selection.</p></caption><disp-quote content-type="algorithmic" specific-use="numbering{0}"><list>

    <list-item>

      <p id="d1e2890" specific-use="STATE"><bold>Input:</bold>  data {site<inline-formula><mml:math id="M113" display="inline"><mml:msub><mml:mi/><mml:mi mathvariant="normal">d</mml:mi></mml:msub></mml:math></inline-formula>}<inline-formula><mml:math id="M114" display="inline"><mml:mrow><mml:msubsup><mml:mi/><mml:mrow><mml:mi mathvariant="normal">d</mml:mi><mml:mo>=</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow><mml:mi>D</mml:mi></mml:msubsup></mml:mrow></mml:math></inline-formula>, region <inline-formula><mml:math id="M115" display="inline"><mml:mi>z</mml:mi></mml:math></inline-formula>, machine learning model <inline-formula><mml:math id="M116" display="inline"><mml:mi mathvariant="script">F</mml:mi></mml:math></inline-formula>, predicting horizon <inline-formula><mml:math id="M117" display="inline"><mml:mi>h</mml:mi></mml:math></inline-formula>, SAGE algorithm</p>
              </list-item>
            </list></disp-quote>
            <disp-quote content-type="algorithmic" specific-use="numbering{1}"><list>

    <list-item>

      <p id="d1e2948" specific-use="STATE">Initialize <inline-formula><mml:math id="M118" display="inline"><mml:mrow><mml:mi>h</mml:mi><mml:mo>=</mml:mo><mml:mo>[</mml:mo><mml:mn mathvariant="normal">6</mml:mn><mml:mo>,</mml:mo><mml:mn mathvariant="normal">12</mml:mn><mml:mo>,</mml:mo><mml:mn mathvariant="normal">18</mml:mn><mml:mo>,</mml:mo><mml:mn mathvariant="normal">24</mml:mn><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula>, ensemble size = 15</p>
              </list-item>

    <list-item>

      <p id="d1e2982" specific-use="FOR"><bold>for</bold> <inline-formula><mml:math id="M119" display="inline"><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn mathvariant="normal">1</mml:mn><mml:mtext mathvariant="bold"> to </mml:mtext><mml:mi>l</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mo>(</mml:mo><mml:mi>z</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> <bold>do</bold> <list>
    <list-item>
      <p id="d1e3019" specific-use="STATE">Find all sites (<inline-formula><mml:math id="M120" display="inline"><mml:mrow><mml:msub><mml:mi>D</mml:mi><mml:mi>r</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>) in <inline-formula><mml:math id="M121" display="inline"><mml:mrow><mml:msub><mml:mi>z</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> from {site<inline-formula><mml:math id="M122" display="inline"><mml:msub><mml:mi/><mml:mi mathvariant="normal">d</mml:mi></mml:msub></mml:math></inline-formula>}<inline-formula><mml:math id="M123" display="inline"><mml:mrow><mml:msubsup><mml:mi/><mml:mrow><mml:mi mathvariant="normal">d</mml:mi><mml:mo>=</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow><mml:mi>D</mml:mi></mml:msubsup></mml:mrow></mml:math></inline-formula></p></list-item>
    <list-item>
      <p id="d1e3071" specific-use="STATE">Select ensemble sites from <inline-formula><mml:math id="M124" display="inline"><mml:mrow><mml:msub><mml:mi>D</mml:mi><mml:mi>r</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> randomly</p></list-item>
    <list-item>
      <p id="d1e3087" specific-use="FOR"><bold>for</bold> <inline-formula><mml:math id="M125" display="inline"><mml:mrow><mml:mi>e</mml:mi><mml:mo>=</mml:mo><mml:mn mathvariant="normal">1</mml:mn><mml:mtext mathvariant="bold"> to </mml:mtext></mml:mrow></mml:math></inline-formula> ensemble size  <bold>do</bold> <list>
    <list-item>
      <p id="d1e3112" specific-use="FOR"><bold>for</bold> <inline-formula><mml:math id="M126" display="inline"><mml:mrow><mml:mi>f</mml:mi><mml:mo>=</mml:mo><mml:mn mathvariant="normal">1</mml:mn><mml:mtext mathvariant="bold"> to </mml:mtext><mml:mtext>len</mml:mtext><mml:mo>(</mml:mo><mml:mi mathvariant="script">F</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula>  <bold>do</bold> <list>
    <list-item>
      <p id="d1e3145" specific-use="FOR"><bold>for</bold> <inline-formula><mml:math id="M127" display="inline"><mml:mrow><mml:mi>g</mml:mi><mml:mo>=</mml:mo><mml:mn mathvariant="normal">1</mml:mn><mml:mtext mathvariant="bold"> to </mml:mtext><mml:mtext>len</mml:mtext><mml:mo>(</mml:mo><mml:mi>h</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula>  <bold>do</bold> <list>
    <list-item>
      <p id="d1e3178" specific-use="STATE">Employ SAGE algorithm</p></list-item>
    <list-item>
      <p id="d1e3183" specific-use="STATE">Rank importance of input (<inline-formula><mml:math id="M128" display="inline"><mml:mi>A</mml:mi></mml:math></inline-formula>) for each <inline-formula><mml:math id="M129" display="inline"><mml:mi>h</mml:mi></mml:math></inline-formula>, <inline-formula><mml:math id="M130" display="inline"><mml:mi mathvariant="script">F</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math id="M131" display="inline"><mml:mi>z</mml:mi></mml:math></inline-formula></p></list-item></list></p></list-item>
    <list-item>
      <p id="d1e3215" specific-use="ENDFOR"><bold>end</bold> <bold>for</bold></p></list-item>
    <list-item>
      <p id="d1e3224" specific-use="STATE">Re-rank <inline-formula><mml:math id="M132" display="inline"><mml:mi>A</mml:mi></mml:math></inline-formula>'s importance (<inline-formula><mml:math id="M133" display="inline"><mml:mi>B</mml:mi></mml:math></inline-formula>) for each <inline-formula><mml:math id="M134" display="inline"><mml:mi mathvariant="script">F</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math id="M135" display="inline"><mml:mi>z</mml:mi></mml:math></inline-formula></p></list-item></list></p></list-item>
    <list-item>
      <p id="d1e3256" specific-use="ENDFOR"><bold>end</bold> <bold>for</bold></p></list-item></list></p></list-item>
    <list-item>
      <p id="d1e3265" specific-use="ENDFOR"><bold>end</bold> <bold>for</bold></p></list-item>
    <list-item>
      <p id="d1e3274" specific-use="STATE">Re-rank <inline-formula><mml:math id="M136" display="inline"><mml:mi>B</mml:mi></mml:math></inline-formula>'s importance (<inline-formula><mml:math id="M137" display="inline"><mml:mi>C</mml:mi></mml:math></inline-formula>) for each <inline-formula><mml:math id="M138" display="inline"><mml:mi>z</mml:mi></mml:math></inline-formula></p></list-item>
    <list-item>
      <p id="d1e3300" specific-use="STATE">Take the three most important variables as features for each <inline-formula><mml:math id="M139" display="inline"><mml:mi>z</mml:mi></mml:math></inline-formula></p></list-item></list></p>
              </list-item>

    <list-item>

      <p id="d1e3312" specific-use="ENDFOR"><bold>end</bold> <bold>for</bold></p>
              </list-item>
            </list></disp-quote></boxed-text>

      <?xmltex \floatpos{t}?><fig id="Ch1.F4" specific-use="star"><?xmltex \currentcnt{4}?><?xmltex \def\figurename{Figure}?><label>Figure 4</label><caption><p id="d1e3323">Heatmap of all empirical features with 15 random monitoring stations in NCP and four prediction horizons. The circle, diamond, square, and triangle represent the 6, 12, 18, and 24 h prediction horizons, respectively. The heatmap is based on the SAGE analysis ranking of features training by MLP. The warmer the row color, the more significant the corresponding feature.</p></caption>
            <?xmltex \igopts{width=384.112205pt}?><graphic xlink:href="https://gmd.copernicus.org/articles/15/7791/2022/gmd-15-7791-2022-f04.png"/>

          </fig>

      <?xmltex \floatpos{t}?><fig id="Ch1.F5" specific-use="star"><?xmltex \currentcnt{5}?><?xmltex \def\figurename{Figure}?><label>Figure 5</label><caption><p id="d1e3334">Heatmap of empirical features for six regions with three machine learning models. Each column represents the rearrangement of the sum of 15 monitoring stations and four prediction horizons. Black vertical lines are used to distinguish each region. The warmer the row color, the more critical the corresponding feature.</p></caption>
            <?xmltex \igopts{width=341.433071pt}?><graphic xlink:href="https://gmd.copernicus.org/articles/15/7791/2022/gmd-15-7791-2022-f05.png"/>

          </fig>

</sec>
</sec>
</sec>
<sec id="Ch1.S3">
  <label>3</label><title>Results and discussion</title>
<sec id="Ch1.S3.SS1">
  <label>3.1</label><title>Computational complexity analysis</title>
      <p id="d1e3360">Instead of performing feature selection for every forecast model independently,
our proposed ensemble-SAGE analysis successfully interprets the important regional features for PM<inline-formula><mml:math id="M140" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> prediction with substantially less computation complexity.
In addition, the regional feature selection improves the forecast accuracy and saves significant computing power for the machine learning model training by excluding redundant inputs and speeding up the model convergence. In this study, all computations concerning the SAGE-based feature selection and machine learning model training were conducted on several nodes configured with <inline-formula><mml:math id="M141" display="inline"><mml:mrow><mml:mn mathvariant="normal">4</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">16</mml:mn></mml:mrow></mml:math></inline-formula>-core 2.1 GHz Intel Xeon E5-2620 v4 CPUs and with 64 GB of memory.</p>
      <p id="d1e3384">The computational cost of SAGE varies significantly, with average times of 7353, 3891, and 3325 s when using GB, RF, and MLP, respectively. The maximum time costs reach 61 209, 65 127, and 23 931 s with GB, RF, and MLP, respectively. Thus, using SAGE for each PM<inline-formula><mml:math id="M142" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> forecasting model with different air quality monitoring stations, prediction horizons, and machine learning models is time-consuming. As illustrated in Table <xref ref-type="table" rid="Ch1.T5"/>, the time cost for the three machine learning model training sessions is greatly reduced using the inputs from SAGE-based regional feature selection.</p>

<?xmltex \floatpos{t}?><table-wrap id="Ch1.T5" specific-use="star"><?xmltex \currentcnt{5}?><label>Table 5</label><caption><p id="d1e3401">Summary of mean and maximum time costs of machine learning model training.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="5">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="right"/>
     <oasis:colspec colnum="3" colname="col3" align="right"/>
     <oasis:colspec colnum="4" colname="col4" align="right"/>
     <oasis:colspec colnum="5" colname="col5" align="right"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Time (s)</oasis:entry>
         <oasis:entry colname="col2">Mean_standardML</oasis:entry>
         <oasis:entry colname="col3">Mean_RFSML</oasis:entry>
         <oasis:entry colname="col4">Max_standardML</oasis:entry>
         <oasis:entry colname="col5">Max_RFSML</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">GB</oasis:entry>
         <oasis:entry colname="col2">91.024</oasis:entry>
         <oasis:entry colname="col3">12.57</oasis:entry>
         <oasis:entry colname="col4">93.952</oasis:entry>
         <oasis:entry colname="col5">13.751</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">RF</oasis:entry>
         <oasis:entry colname="col2">291.466</oasis:entry>
         <oasis:entry colname="col3">36.854</oasis:entry>
         <oasis:entry colname="col4">319.75</oasis:entry>
         <oasis:entry colname="col5">42.907</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">MLP</oasis:entry>
         <oasis:entry colname="col2">5.592</oasis:entry>
         <oasis:entry colname="col3">3.553</oasis:entry>
         <oasis:entry colname="col4">18.316</oasis:entry>
         <oasis:entry colname="col5">12.237</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table></table-wrap>

</sec>
<sec id="Ch1.S3.SS2">
  <label>3.2</label><title>Regional feature selection analysis</title>
      <p id="d1e3506">The results of the SAGE-based regional feature selection concerning the three machine learning models, six partitioned regions, and four forecasting horizons are discussed in this section. Taking the NCP as an example, critical features that govern the performance of PM<inline-formula><mml:math id="M143" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> forecasting vary across stations and prediction horizons, as illustrated in Fig. <xref ref-type="fig" rid="Ch1.F4"/>. However, PM<inline-formula><mml:math id="M144" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula>, CO, and v10 (features) play an overwhelmingly positive role in MLP-based PM<inline-formula><mml:math id="M145" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> forecasting for most selected stations and predicting horizons. This result indicates that these three features suit all of the stations in the NCP.
SAGE analysis heatmaps for other regions or using different prediction algorithms can be found in Figs. S2–S18. Consistently critical features can be easily extracted from the five megacity cluster regions using our selection method. However, they are difficult to extract from the remaining area of China. This area does not have universal key features because its stations are spread widely across China and therefore exhibit substantially different air quality patterns. An improved station clustering method can help solve this issue and will be explored in our future research.</p>
      <p id="d1e3538">To extract the important robust features that fit all stations in a given region, we summed and ranked the SAGE analysis values in the ensemble monitoring stations and prediction horizons. The ensemble-SAGE ranking is displayed in Fig. <xref ref-type="fig" rid="Ch1.F5"/>. There are consistent, crucial features in the six cluster regions, regardless of the prediction algorithm or horizon.
PM<inline-formula><mml:math id="M146" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> is the most critical feature for predicting its trend at a particular region and with a particular prediction algorithm.
In addition to PM<inline-formula><mml:math id="M147" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula>, two variables from CAMS reanalysis, co and pm2p5, are critical across all regions. This result suggests that the forecast of these variables from CAMS reanalysis can help capture the varying trend in the machine learning models, even though the predictions are different from the actual values.
By contrast, time factors (week and hour information) are the least important features for short-term prediction. This result is consistent with that of <xref ref-type="bibr" rid="bib1.bibx27" id="text.77"/>, where no distinct weekday/weekend difference was observed for PM<inline-formula><mml:math id="M148" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> in the NCP and PRD.
<?xmltex \hack{\newpage}?>
Considering their generality and robustness, we selected the top three critical features for each region, as illustrated in Table <xref ref-type="table" rid="Ch1.T6"/>.
Note that the ensemble SAGE analysis selected different key features in different regions.
In the NCP, the simulation of CO from CAMS reanalysis, which is a representative air pollutant, includes valuable information other than CO observations. This result implies that local precursor emissions are a major contributor to PM pollution <xref ref-type="bibr" rid="bib1.bibx24" id="paren.78"/>, and non-point source pollution may be more favorable for PM<inline-formula><mml:math id="M149" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> forecasting.
Additionally, v10, which represents regional transmission, is a critical feature for PM<inline-formula><mml:math id="M150" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> forecasting in the NCP, PRD, and YRD. This result indicates that regional transmission plays a vital role in these three regions <xref ref-type="bibr" rid="bib1.bibx11 bib1.bibx45" id="paren.79"/>. This finding is consistent with findings reported in recent studies. <xref ref-type="bibr" rid="bib1.bibx83" id="text.80"/> found that the anomalously high, normalized, and near-surface meridional wind is typically the primary cause of the severe haze in the NCP using a chemical transport model. <xref ref-type="bibr" rid="bib1.bibx30" id="text.81"/> illustrated that regional transport accounts for over half of PM<inline-formula><mml:math id="M151" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> under the polluted northerly airflow in winter. <xref ref-type="bibr" rid="bib1.bibx50" id="text.82"/> discovered that the regional PM<inline-formula><mml:math id="M152" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> pollution in winter is primarily from northern and eastern China using a trajectory model. However, v10 is less significant in the SCB. This result is because of the blocking effect of the plateau terrain on the northeasterly winds <xref ref-type="bibr" rid="bib1.bibx68" id="paren.83"/>; hence, winds are frequently static, particularly in winter and autumn <xref ref-type="bibr" rid="bib1.bibx43" id="paren.84"/>. By contrast, d2m and tp are crucial features for hourly PM<inline-formula><mml:math id="M153" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> forecasting in the SCB. This finding may be because polluted weather patterns are typically associated with higher relative humidity in that area, and tp, representing rainfall, is vital to eliminate air pollution in a basin <xref ref-type="bibr" rid="bib1.bibx82" id="paren.85"/>.</p>

<?xmltex \floatpos{t}?><table-wrap id="Ch1.T6"><?xmltex \currentcnt{6}?><label>Table 6</label><caption><p id="d1e3652">Summary of selected features.</p></caption><oasis:table frame="topbot"><?xmltex \begin{scaleboxenv}{.92}[.92]?><oasis:tgroup cols="7">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="left"/>
     <oasis:colspec colnum="3" colname="col3" align="left"/>
     <oasis:colspec colnum="4" colname="col4" align="left"/>
     <oasis:colspec colnum="5" colname="col5" align="left"/>
     <oasis:colspec colnum="6" colname="col6" align="left"/>
     <oasis:colspec colnum="7" colname="col7" align="left"/>
     <oasis:thead>
       <oasis:row rowsep="1">

         <oasis:entry colname="col1">Region</oasis:entry>

         <oasis:entry colname="col2">NCP</oasis:entry>

         <oasis:entry colname="col3">PRD</oasis:entry>

         <oasis:entry colname="col4">SCB</oasis:entry>

         <oasis:entry colname="col5">YRD</oasis:entry>

         <oasis:entry colname="col6">FWP</oasis:entry>

         <oasis:entry colname="col7">REST</oasis:entry>

       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>

         <oasis:entry colname="col1" morerows="2">Feature</oasis:entry>

         <oasis:entry colname="col2">PM<inline-formula><mml:math id="M154" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col3">PM<inline-formula><mml:math id="M155" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col4">PM<inline-formula><mml:math id="M156" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col5">PM<inline-formula><mml:math id="M157" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col6">PM<inline-formula><mml:math id="M158" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col7">PM<inline-formula><mml:math id="M159" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula></oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2">v10</oasis:entry>

         <oasis:entry colname="col3">v10</oasis:entry>

         <oasis:entry colname="col4">d2m</oasis:entry>

         <oasis:entry colname="col5">v10</oasis:entry>

         <oasis:entry colname="col6">d2m</oasis:entry>

         <oasis:entry colname="col7">co</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2">co</oasis:entry>

         <oasis:entry colname="col3">pm2p5</oasis:entry>

         <oasis:entry colname="col4">tp</oasis:entry>

         <oasis:entry colname="col5">pm2p5</oasis:entry>

         <oasis:entry colname="col6">co</oasis:entry>

         <oasis:entry colname="col7">pm2p5</oasis:entry>

       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup><?xmltex \end{scaleboxenv}?></oasis:table></table-wrap>

</sec>
<sec id="Ch1.S3.SS3">
  <label>3.3</label><title>Performance of RFSML</title>
      <p id="d1e3831">This section presents the forecasting skill of the proposed RFSML system driven by regional features selected by the ensemble-SAGE-based model. The results are also compared with those of a standard machine learning forecasting model and fourth-generation ECMWF global reanalysis data. The latter is referred to as the benchmark of chemical transport models. To highlight the improvements by using the selected key features, the regional performance which represents the average of the forecasting performance in all sites of the given region is introduced.</p>
      <p id="d1e3834">Figure <xref ref-type="fig" rid="Ch1.F6"/> displays the times series of the simulated PM<inline-formula><mml:math id="M160" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> for the three forecasting systems (MLP model and at a predicting horizon of 12 h) versus observational data.
Each subplot represents a random monitoring station in the corresponding five megacity cluster regions. The subplots illustrate the typical behaviors observed for the other monitoring stations, machine learning models, and prediction horizons.
Both the standard machine learning and RFSML models outperform the simple CTM. This result indicates that the machine learning algorithms are superior in air pollution prediction <xref ref-type="bibr" rid="bib1.bibx59" id="paren.86"/>. Additionally, PM<inline-formula><mml:math id="M161" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> predictions with selected key features perform better than the standard machine learning forecast that uses all related features. The GB and RF machine learning models used in this study also show steady improvements.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F6" specific-use="star"><?xmltex \currentcnt{6}?><?xmltex \def\figurename{Figure}?><label>Figure 6</label><caption><p id="d1e3862">Time series of test time in five megacity cluster regions. The black dots and red pentacles represent original and interpolated PM<inline-formula><mml:math id="M162" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> respectively. The solid lines in light gray, light blue, and dark violet represent prediction of CAMS reanalysis, the standard machine learning system and RFSML respectively. Panels <bold>(a)</bold>–<bold>(e)</bold> represent a random site in NCP, YRD, PRD, SCB, and FWP respectively. Note that those are predictions 12 h in advance, which are in parallel with CAMS reanalysis's predicting horizon, and the machine learning model used here is MLP.</p></caption>
          <?xmltex \igopts{width=426.791339pt}?><graphic xlink:href="https://gmd.copernicus.org/articles/15/7791/2022/gmd-15-7791-2022-f06.png"/>

        </fig>

      <p id="d1e3887">Both the RFSML and standard machine learning predictions typically underestimate high PM<inline-formula><mml:math id="M163" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> concentrations as the prediction horizon increases. This underestimation can be ascribed to three primary possible reasons. First, the correct features are difficult to obtain, and unsuitable features can bring significant bias and noise into the prediction algorithms. Second, the construction of our prediction algorithm network may be insufficiently complex or deep to determine the actual relationship between features and the PM<inline-formula><mml:math id="M164" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula>. However, considering the purpose of a real-time forecast, the time to forecast, which is closely related to the complexity of the prediction algorithm network, cannot be too long. Third, considering our test period only included late autumn and early winter of 2019, the training and validation periods only included autumn and winter of 2018, which is too short for a prediction algorithm to learn the complex relationship for hourly PM<inline-formula><mml:math id="M165" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> forecasting. Seasonal training and validation may obtain satisfactory outcomes for a particular seasonal forecast <xref ref-type="bibr" rid="bib1.bibx3" id="paren.87"/>.</p>
      <p id="d1e3920">Figure <xref ref-type="fig" rid="Ch1.F7"/> displays the spatial distribution of the RMSEs (columns a and b) and MAEs (columns c and d) of the PM<inline-formula><mml:math id="M166" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> forecast for all stations either using the standard machine learning or RFSML system at a forecasting horizon of 12 h.
The RMSEs and MAEs significantly decreased when using the selected key features for all three machine learning models, particularly in regions with severe PM<inline-formula><mml:math id="M167" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> pollution, e.g., the NCP and FWP. This consistent improvement also occurs when the forecast horizon changes to 6, 18, and 24 h, and the results are illustrated in Figs. S19–S21 in the Supplement.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F7" specific-use="star"><?xmltex \currentcnt{7}?><?xmltex \def\figurename{Figure}?><label>Figure 7</label><caption><p id="d1e3945">Spatial distribution of RMSEs and MAEs at a prediction horizon of 12 h. Panels <bold>(a)</bold> and <bold>(c)</bold> are results of standard machine learning system, while panels <bold>(b)</bold> and <bold>(d)</bold> are results of RFSML. The cooler the color tone, the lower the RMSEs and MAEs and thus the better the prediction performance.</p></caption>
          <?xmltex \igopts{width=483.69685pt}?><graphic xlink:href="https://gmd.copernicus.org/articles/15/7791/2022/gmd-15-7791-2022-f07.png"/>

        </fig>

      <p id="d1e3966">A modified Taylor diagram <xref ref-type="bibr" rid="bib1.bibx75" id="paren.88"/> is plotted in Fig. <xref ref-type="fig" rid="Ch1.F8"/> to show the overall outcome. RFSML forecasts with selected features typically exhibit a lower RMSE and higher <inline-formula><mml:math id="M168" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> than the standard forecasts. The best improvement is obtained when the deep learning (MLP) model is used, while forecasts with the selected new features in the RF model are not significantly improved and even not as good as with forecasts that use all features.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F8" specific-use="star"><?xmltex \currentcnt{8}?><?xmltex \def\figurename{Figure}?><label>Figure 8</label><caption><p id="d1e3983">A modified Taylor diagram that illustrates RMSE and correlation coefficient values in six regions. The black and red colors represent forecasts with standard machine learning models and RFSML. Round, diamond, and fork shapes represent RF, MLP, and GB respectively. The transparency of markers indicates the four prediction horizons, where the transparency increases as the forecast hours increase.</p></caption>
          <?xmltex \igopts{width=455.244094pt}?><graphic xlink:href="https://gmd.copernicus.org/articles/15/7791/2022/gmd-15-7791-2022-f08.png"/>

        </fig>

      <p id="d1e3993">This result can be explained by the characteristics of the two types of prediction algorithms. RF increases the diversity of the trees through the bootstrapped aggregation of several regression trees (bagging) <xref ref-type="bibr" rid="bib1.bibx6" id="paren.89"/>. It has the advantage of maintaining low bias because tree-based methods with bagging can reduce the variance of an estimated prediction function. Some uninformative features can be ignored through bagging; i.e., RF reduces the high variance by growing the individual trees to a deep level and then making their predictions, typically through averaging <xref ref-type="bibr" rid="bib1.bibx44" id="paren.90"/>. By contrast, MLP, which implements the global approximation strategy <xref ref-type="bibr" rid="bib1.bibx56" id="paren.91"/>, may face problems of multicollinearity and noise caused by uninformative features.</p>
      <p id="d1e4005">The RMSE increases and <inline-formula><mml:math id="M169" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> declines with an increase in the prediction horizons across all regions and machine learning models in general. The average coefficient of determination (<inline-formula><mml:math id="M170" display="inline"><mml:mrow><mml:msup><mml:mi>R</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula>) of the 24 h forecast (the maximum horizon set in this study) based on the three machine learning models increases from 0.47 to 0.65 in the NCP, from 0.41 to 0.52 in the PRD, from 0.62 to 0.67 in the SCB, from 0.44 to 0.57 in the YRD, and from 0.62 to 0.65 in the FWP when using the ensemble-SAGE analysis-based feature selection. This results indicate that the RFSML system can provide the operational PM<inline-formula><mml:math id="M171" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> forecast with a maximum horizon of 24 h.</p>
      <p id="d1e4035">To further confirm the predictive capability in a rolling way, we make forecasts over a less polluted month, April 2020. Specific results can be found in Table S1 in the Supplement. Steady improvement of predicting performance is still achieved by RFSML. Time series, as given in Fig. S22 in the Supplement, show similar results as the main text that RFSML has better predictive ability than standard machine learning. As is illustrated in Figs. S23–S24 in the Supplement, RFSML has both lower RMSE and MAE values than standard machine learning, which implies the advantage of RFSML.</p>
</sec>
</sec>
<sec id="Ch1.S4" sec-type="conclusions">
  <label>4</label><title>Conclusions and future work</title>
      <p id="d1e4047">Machine learning models have been successfully utilized in air quality forecasts worldwide because of their high computational efficiency and accuracy. However, substantial room for improvement remains. In this study, we developed the RFSML v1.0 system, which can predict national air quality with high accuracy in real time in China.</p>
      <p id="d1e4050">In a standard machine learning system, all related features are typically utilized in model training and prediction. However, the high dimensionality and redundant input data may lead to increased complexity and machine learning model over-fitting. To overcome this obstacle, we combined an ensemble-SAGE analysis with our RFSML system. This method extracts the key features in a given region at an affordable extra cost, and the significance of these regional selected features are explained physically. Compared with the standard machine learning system that was fed with all relative features, the RFSML system driven by the selected key features resulted in superior interpretability, less training time, and more accurate predictions. Statistically, the average RMSE and MAE of predictions were reduced from 24.74 and 16.54 <inline-formula><mml:math id="M172" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">µ</mml:mi><mml:mi mathvariant="normal">g</mml:mi><mml:mspace width="0.125em" linebreak="nobreak"/><mml:msup><mml:mi mathvariant="normal">m</mml:mi><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>  to 21.54 and 13.7 <inline-formula><mml:math id="M173" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">µ</mml:mi><mml:mi mathvariant="normal">g</mml:mi><mml:mspace linebreak="nobreak" width="0.125em"/><mml:msup><mml:mi mathvariant="normal">m</mml:mi><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, respectively, with RFSML. Additionally, <inline-formula><mml:math id="M174" display="inline"><mml:mrow><mml:msup><mml:mi>R</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula> increased from 0.6 to 0.7, and the average forecasting model training cost was reduced from 129.36 to 17.66 s.
Among the three machine learning models studied, the prediction performance of RFSML with MLP exhibited the greatest increase, with <inline-formula><mml:math id="M175" display="inline"><mml:mrow><mml:msup><mml:mi>R</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula> increasing from 0.55 to 0.72. By contrast, RF exhibited the least improvement, with <inline-formula><mml:math id="M176" display="inline"><mml:mrow><mml:msup><mml:mi>R</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula> increasing from 0.61 to 0.66. In addition, RF and GB were more robust than MLP for certain underlying uninformative features, while MLP was more susceptible to over-fitting. Meanwhile, RFSML provides only predictions over the air quality monitoring sites where historical data are available for machine learning model training, instead of a gridded forecast. A Bayesian-theory-based prediction fusion is being explored now to extend the RFSML forecast available at single stations to a gridded one.</p>
      <p id="d1e4124">The six-region partition used here was empirical and not based on science. Additionally, stations in a given region may exhibit different air quality patterns, particularly in the “remainder” region. Therefore, our ensemble-SAGE analysis does not always select the representative feature, limiting the machine model interpretability and prediction ability. A more scientific station partition like spatial clustering would be determined for future studies.</p>
      <p id="d1e4127">Based on the results of this study, the RFSML system can accurately predict air quality in the short term at the national scale; this renders it valuable for health professionals and policy makers in terms of providing early warning to population categories more susceptible to air pollution (e.g., children, elderly, and people with respiratory or cardiovascular issues) and reducing and regulating air pollution.</p>
</sec>

      
      </body>
    <back><notes notes-type="codedataavailability"><title>Code and data availability</title>

      <p id="d1e4135">The ground-based air quality monitoring observations are from the network established by the China Ministry of Environmental Protection and accessible via <uri>https://quotsoft.net/air/</uri> (last access: June 2022). The measurements used in this study also are archived on Zenodo (<ext-link xlink:href="https://doi.org/10.5281/zenodo.6551820" ext-link-type="DOI">10.5281/zenodo.6551820</ext-link>, <xref ref-type="bibr" rid="bib1.bibx18" id="altparen.92"/>). The RFSML algorithm is in the Python environment and is archived on Zenodo (<ext-link xlink:href="https://doi.org/10.5281/zenodo.6551850" ext-link-type="DOI">10.5281/zenodo.6551850</ext-link>, , <xref ref-type="bibr" rid="bib1.bibx18" id="altparen.93"/>).</p>
  </notes><app-group>
        <supplementary-material position="anchor"><p id="d1e4153">The supplement related to this article is available online at: <inline-supplementary-material xlink:href="https://doi.org/10.5194/gmd-15-7791-2022-supplement" xlink:title="pdf">https://doi.org/10.5194/gmd-15-7791-2022-supplement</inline-supplementary-material>.</p></supplementary-material>
        </app-group><notes notes-type="authorcontribution"><title>Author contributions</title>

      <p id="d1e4162">JJ conceived the study and designed the RFSML system.
LF wrote the code of RFSML and carried out the prediction and evaluation.
HXL, AS, CX, TD, and HL provided useful comments on the paper.
LF prepared the manuscript with contributions from JJ and all other co-authors.</p>
  </notes><notes notes-type="competinginterests"><title>Competing interests</title>

      <p id="d1e4168">The contact author has declared that none of the authors has any competing interests.</p>
  </notes><notes notes-type="disclaimer"><title>Disclaimer</title>

      <p id="d1e4174">Publisher's note: Copernicus Publications remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.</p>
  </notes><notes notes-type="financialsupport"><title>Financial support</title>

      <p id="d1e4180">This work is supported by the National Natural Science Foundation of China (grant nos. 42105109 and 42021004) and the Natural Science Foundation of Jiangsu Province (grant nos. BK20210664 and BK20220031).</p>
  </notes><notes notes-type="reviewstatement"><title>Review statement</title>

      <p id="d1e4186">This paper was edited by Augustin Colette and reviewed by two anonymous referees.</p>
  </notes><ref-list>
    <title>References</title>

      <ref id="bib1.bibx1"><?xmltex \def\ref@label{{Abu Awad et al.(2017){Abu Awad}, Koutrakis, Coull, and Schwartz}}?><label>Abu Awad et al.(2017)Abu Awad, Koutrakis, Coull, and Schwartz</label><?label ABUAWAD2017427?><mixed-citation>Abu Awad, Y., Koutrakis, P., Coull, B. A., and Schwartz, J.: A spatio-temporal prediction model based on support vector machine regression: Ambient Black Carbon in three New England States, Environ. Res., 159, 427–434, <ext-link xlink:href="https://doi.org/10.1016/j.envres.2017.08.039" ext-link-type="DOI">10.1016/j.envres.2017.08.039</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bibx2"><?xmltex \def\ref@label{{Altmann et al.(2010)Altmann, Tolo\c{s}i, Sander, and Lengauer}}?><label>Altmann et al.(2010)Altmann, Toloşi, Sander, and Lengauer</label><?label Altmann2010?><mixed-citation>Altmann, A., Toloşi, L., Sander, O., and Lengauer, T.: Permutation importance: a corrected feature importance measure, Bioinformatics, 26, 1340–1347, <ext-link xlink:href="https://doi.org/10.1093/bioinformatics/btq134" ext-link-type="DOI">10.1093/bioinformatics/btq134</ext-link>, 2010.</mixed-citation></ref>
      <ref id="bib1.bibx3"><?xmltex \def\ref@label{{Bai et al.(2019)Bai, Li, Zeng, Li, and Zhang}}?><label>Bai et al.(2019)Bai, Li, Zeng, Li, and Zhang</label><?label bai2019224?><mixed-citation>Bai, Y., Li, Y., Zeng, B., Li, C., and Zhang, J.: Hourly PM<inline-formula><mml:math id="M177" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> concentration forecast using stacked autoencoder model with emphasis on seasonality, J. Clean. Prod., 224, 739–750, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx4"><?xmltex \def\ref@label{{Bartier and Keller(1996)}}?><label>Bartier and Keller(1996)</label><?label BARTIER1996795?><mixed-citation>Bartier, P. M. and Keller, C.: Multivariate interpolation to incorporate thematic surface data using inverse distance weighting (IDW), Comput. Geosci., 22, 795–799, <ext-link xlink:href="https://doi.org/10.1016/0098-3004(96)00021-0" ext-link-type="DOI">10.1016/0098-3004(96)00021-0</ext-link>, 1996.</mixed-citation></ref>
      <ref id="bib1.bibx5"><?xmltex \def\ref@label{{Bey et al.(2001)Bey, Jacob, Yantosca, Logan, Field, Fiore, Li, Liu, Mickley, and Schultz}}?><label>Bey et al.(2001)Bey, Jacob, Yantosca, Logan, Field, Fiore, Li, Liu, Mickley, and Schultz</label><?label Bey2001JD000807?><mixed-citation>Bey, I., Jacob, D. J., Yantosca, R. M., Logan, J. A., Field, B. D., Fiore, A. M., Li, Q., Liu, H. Y., Mickley, L. J., and Schultz, M. G.: Global modeling of tropospheric chemistry with assimilated meteorology: Model description and evaluation, J. Geophys. Res.-Atmos., 106, 23073–23095, <ext-link xlink:href="https://doi.org/10.1029/2001JD000807" ext-link-type="DOI">10.1029/2001JD000807</ext-link>, 2001.</mixed-citation></ref>
      <ref id="bib1.bibx6"><?xmltex \def\ref@label{{Brokamp et al.(2017)Brokamp, Jandarov, Rao, LeMasters, and Ryan}}?><label>Brokamp et al.(2017)Brokamp, Jandarov, Rao, LeMasters, and Ryan</label><?label BROKAMP20171?><mixed-citation>Brokamp, C., Jandarov, R., Rao, M., LeMasters, G., and Ryan, P.: Exposure assessment models for elemental components of particulate matter in an urban environment: A comparison of regression and random forest approaches, Atmos. Environ., 151, 1–11, <ext-link xlink:href="https://doi.org/10.1016/j.atmosenv.2016.11.066" ext-link-type="DOI">10.1016/j.atmosenv.2016.11.066</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bibx7"><?xmltex \def\ref@label{{Burnett et al.(2018)Burnett, Chen, Szyszkowicz, Fann, Hubbell, Pope, Apte, Brauer, Cohen, Weichenthal et al.}}?><label>Burnett et al.(2018)Burnett, Chen, Szyszkowicz, Fann, Hubbell, Pope, Apte, Brauer, Cohen, Weichenthal et al.</label><?label burnett2018global?><mixed-citation>
Burnett, R., Chen, H., Szyszkowicz, M., Fann, N., Hubbell, B., Pope, C. A., Apte, J. S., Brauer, M., Cohen, A.,
Weichenthal, S., Coggins, J., Di, Q,. Brunekreef, B., Frostad, J., Lim, S. S., Kan, H., Walker, K. D., Thurston, G. D., Hayes,
R. B., Lim, C. C., Turner, M. C., Jerrett, M., Krewski, D., Gapstur, S. M., Diver, W. R., Ostro, B., Goldberg, D., Crouse, D.
L., Martin, R. V., Peters, P., Pinault, L., Tjepkema, M., van, Donkelaar. A., Villeneuve, P. J., Miller, A. B., Yin, P., Zhou,
M., Wang, L., Janssen, NAH., Marra, M., Atkinson, R. W., Tsang, H., Quoc, Thach. T., Cannon, J. B., Allen, R. T., Hart, J.
E., Laden, F., Cesaroni, G., Forastiere, F., Weinmayr, G., Jaensch, A., Nagel, G., Concin, H., and Spadaro, J. V.: Global estimates of mortality associated with long-term exposure to outdoor fine particulate matter, P. Natl. Acad. Sci. USA, 115, 9592–9597, 2018.</mixed-citation></ref>
      <ref id="bib1.bibx8"><?xmltex \def\ref@label{{Cao et al.(2021)Cao, Chen, Chen, Zhang, and Yuan}}?><label>Cao et al.(2021)Cao, Chen, Chen, Zhang, and Yuan</label><?label cao2021improved?><mixed-citation>Cao, D., Chen, Y., Chen, J., Zhang, H., and Yuan, Z.: An improved algorithm for the maximal information coefficient and its application, Roy. Soc. Open Sci., 8, 201424, <ext-link xlink:href="https://doi.org/10.1098/rsos.201424" ext-link-type="DOI">10.1098/rsos.201424</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx9"><?xmltex \def\ref@label{{Casalicchio et al.(2019)Casalicchio, Molnar, and Bischl}}?><label>Casalicchio et al.(2019)Casalicchio, Molnar, and Bischl</label><?label Casalicchio2019?><mixed-citation>Casalicchio, G., Molnar, C., and Bischl, B.: Visualizing the Feature Importance for Black Box Models, in: Machine Learning and Knowledge Discovery in Databases, edited by: Berlingerio, M., Bonchi, F., Gärtner, T., Hurley, N., and Ifrim, G., Springer International Publishing, Cham, 655–670, <ext-link xlink:href="https://doi.org/10.1007/978-3-030-10925-7_40" ext-link-type="DOI">10.1007/978-3-030-10925-7_40</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx10"><?xmltex \def\ref@label{{Chandrashekar and Sahin(2014)}}?><label>Chandrashekar and Sahin(2014)</label><?label CHANDRASHEKAR201416?><mixed-citation>Chandrashekar, G. and Sahin, F.: A survey on feature selection methods, Comput. Electr. Eng., 40, 16–28, <ext-link xlink:href="https://doi.org/10.1016/j.compeleceng.2013.11.024" ext-link-type="DOI">10.1016/j.compeleceng.2013.11.024</ext-link>, 2014.</mixed-citation></ref>
      <ref id="bib1.bibx11"><?xmltex \def\ref@label{{Chen et al.(2017)Chen, Liu, Lang, Zhou, Wei, Wang, and Guo}}?><label>Chen et al.(2017)Chen, Liu, Lang, Zhou, Wei, Wang, and Guo</label><?label CHEN2017280?><mixed-citation>Chen, D., Liu, X., Lang, J., Zhou, Y., Wei, L., Wang, X., and Guo, X.: Estimating the contribution of regional transport to PM<inline-formula><mml:math id="M178" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> air pollution in a rural area on the North China Plain, Sci. Total Environ., 583, 280–291, <ext-link xlink:href="https://doi.org/10.1016/j.scitotenv.2017.01.066" ext-link-type="DOI">10.1016/j.scitotenv.2017.01.066</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bibx12"><?xmltex \def\ref@label{{Chen et al.(2016)Chen, Zeng, Luo, and Yuan}}?><label>Chen et al.(2016)Chen, Zeng, Luo, and Yuan</label><?label chen2016new?><mixed-citation>Chen, Y., Zeng, Y., Luo, F., and Yuan, Z.: A new algorithm to optimize maximal information coefficient, PloS one, 11, e0157567,  <ext-link xlink:href="https://doi.org/10.1371/journal.pone.0157567" ext-link-type="DOI">10.1371/journal.pone.0157567</ext-link>, 2016.</mixed-citation></ref>
      <ref id="bib1.bibx13"><?xmltex \def\ref@label{{Cobourn(2010)}}?><label>Cobourn(2010)</label><?label COBOURN20103015?><mixed-citation>Cobourn, W. G.: An enhanced PM<inline-formula><mml:math id="M179" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> air quality forecast model based on nonlinear regression and back-trajectory concentrations, Atmos. Environ., 44, 3015–3023, <ext-link xlink:href="https://doi.org/10.1016/j.atmosenv.2010.05.009" ext-link-type="DOI">10.1016/j.atmosenv.2010.05.009</ext-link>, 2010.</mixed-citation></ref>
      <ref id="bib1.bibx14"><?xmltex \def\ref@label{{Copernicus Climate Change Service (C3S)}(2017)}?><label>Copernicus Climate Change Service (C3S)(2017)</label><?label era?><mixed-citation>Copernicus Climate Change Service (C3S): ERA5: Fifth generation of ECMWF atmospheric reanalyses of the global climate. Copernicus Climate Change Service Climate Data Store (CDS), <uri>https://cds.climate.copernicus.eu/cdsapp#!/home</uri> (last access: June 2022), 2017.</mixed-citation></ref>
      <ref id="bib1.bibx15"><?xmltex \def\ref@label{{Covert et al.(2020)Covert, Lundberg, and Lee}}?><label>Covert et al.(2020)Covert, Lundberg, and Lee</label><?label Covert2020_c7bf0b7c?><mixed-citation>Covert, I., Lundberg, S. M., and Lee, S.-I.: Understanding Global Feature Contributions With Additive Importance Measures, in: Advances in Neural Information Processing Systems, vol. 33, edited by: Larochelle, H., Ranzato, M., Hadsell, R., Balcan, M. F., and Lin, H., Curran Associates, Inc., 17212–17223, <uri>https://proceedings.neurips.cc/paper/2020/file/c7bf0b7c1a86d5eb3be2c722cf2cf746-Paper.pdf</uri> (last access: June 2022), 2020.</mixed-citation></ref>
      <ref id="bib1.bibx16"><?xmltex \def\ref@label{{Di et al.(2019)Di, Amini, Shi, Kloog, Silvern, Kelly, Sabath, Choirat, Koutrakis, Lyapustin, Wang, Mickley, and Schwartz}}?><label>Di et al.(2019)Di, Amini, Shi, Kloog, Silvern, Kelly, Sabath, Choirat, Koutrakis, Lyapustin, Wang, Mickley, and Schwartz</label><?label DI2019104909?><mixed-citation>Di, Q., Amini, H., Shi, L., Kloog, I., Silvern, R., Kelly, J., Sabath, M. B., Choirat, C., Koutrakis, P., Lyapustin, A., Wang, Y., Mickley, L. J., and Schwartz, J.: An ensemble-based model of PM<inline-formula><mml:math id="M180" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> concentration across the contiguous United States with high spatiotemporal resolution, Environ. Int., 130, 104909, <ext-link xlink:href="https://doi.org/10.1016/j.envint.2019.104909" ext-link-type="DOI">10.1016/j.envint.2019.104909</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx17"><?xmltex \def\ref@label{{Fan et al.(2018)Fan, Liu, Ma, Zhang, Li, Jiang, Zhang, Zhao, Yang, Wu, and Wang}}?><label>Fan et al.(2018)Fan, Liu, Ma, Zhang, Li, Jiang, Zhang, Zhao, Yang, Wu, and Wang</label><?label acp-18-1395-2018?><mixed-citation>Fan, T., Liu, X., Ma, P.-L., Zhang, Q., Li, Z., Jiang, Y., Zhang, F., Zhao, C., Yang, X., Wu, F., and Wang, Y.: Emission or atmospheric processes? An attempt to attribute the source of large bias of aerosols in eastern China simulated by global climate models, Atmos. Chem. Phys., 18, 1395–1417, <ext-link xlink:href="https://doi.org/10.5194/acp-18-1395-2018" ext-link-type="DOI">10.5194/acp-18-1395-2018</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bibx18"><?xmltex \def\ref@label{Fang(2022)}?><label>Fang(2022)</label><?label fang?><mixed-citation>Fang, L.: The ground observations for RFSML, Zenodo [data set and code], <ext-link xlink:href="https://doi.org/10.5281/zenodo.6551820" ext-link-type="DOI">10.5281/zenodo.6551820</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bibx19"><?xmltex \def\ref@label{{Fernando et al.(2012)Fernando, Mammarella, Grandoni, Fedele, {Di Marco}, Dimitrova, and Hyde}}?><label>Fernando et al.(2012)Fernando, Mammarella, Grandoni, Fedele, Di Marco, Dimitrova, and Hyde</label><?label FERNANDO201262?><mixed-citation>Fernando, H., Mammarella, M., Grandoni, G., Fedele, P., Di Marco, R., Dimitrova, R., and Hyde, P.: Forecasting PM<inline-formula><mml:math id="M181" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">10</mml:mn></mml:msub></mml:math></inline-formula> in metropolitan areas: Efficacy of neural networks, Environ. Pollut., 163, 62–67, <ext-link xlink:href="https://doi.org/10.1016/j.envpol.2011.12.018" ext-link-type="DOI">10.1016/j.envpol.2011.12.018</ext-link>, 2012.</mixed-citation></ref>
      <ref id="bib1.bibx20"><?xmltex \def\ref@label{{Fritsch and Carlson(1980)}}?><label>Fritsch and Carlson(1980)</label><?label fritsch1980monotone?><mixed-citation>
Fritsch, F. N. and Carlson, R. E.: Monotone piecewise cubic interpolation, SIAM J. Numer. Anal., 17, 238–246, 1980.</mixed-citation></ref>
      <ref id="bib1.bibx21"><?xmltex \def\ref@label{{Fryer et al.(2021)Fryer, Str{\"{u}}mke, and Nguyen}}?><label>Fryer et al.(2021)Fryer, Strümke, and Nguyen</label><?label Daniel2021-2102-10936?><mixed-citation>Fryer, D. V., Strümke, I., and Nguyen, H.: Shapley values for feature selection: The good, the bad, and the axioms, arXiv [preprint], <ext-link xlink:href="https://doi.org/10.48550/arXiv.2102.10936" ext-link-type="DOI">10.48550/arXiv.2102.10936</ext-link>, 22 February 2021.</mixed-citation></ref>
      <ref id="bib1.bibx22"><?xmltex \def\ref@label{{{Golizadeh Akhlaghi} et al.(2021){Golizadeh Akhlaghi}, Aslansefat, Zhao, Sadati, Badiei, Xiao, Shittu, Fan, and Ma}}?><label>Golizadeh Akhlaghi et al.(2021)Golizadeh Akhlaghi, Aslansefat, Zhao, Sadati, Badiei, Xiao, Shittu, Fan, and Ma</label><?label GOLIZADEHAKHLAGHI2021116062?><mixed-citation>Golizadeh Akhlaghi, Y., Aslansefat, K., Zhao, X., Sadati, S., Badiei, A., Xiao, X., Shittu, S., Fan, Y., and Ma, X.: Hourly performance forecast of a dew point cooler using explainable Artificial Intelligence and evolutionary optimisations by 2050, Appl. Energ., 281, 116062, <ext-link xlink:href="https://doi.org/10.1016/j.apenergy.2020.116062" ext-link-type="DOI">10.1016/j.apenergy.2020.116062</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx23"><?xmltex \def\ref@label{{Grell et al.(2005)Grell, Peckham, Schmitz, McKeen, Frost, Skamarock, and Eder}}?><label>Grell et al.(2005)Grell, Peckham, Schmitz, McKeen, Frost, Skamarock, and Eder</label><?label GRELL20056957?><mixed-citation>Grell, G. A., Peckham, S. E., Schmitz, R., McKeen, S. A., Frost, G., Skamarock, W. C., and Eder, B.: Fully coupled “online” chemistry within the WRF model, Atmos. Environ., 39, 6957–6975, <ext-link xlink:href="https://doi.org/10.1016/j.atmosenv.2005.04.027" ext-link-type="DOI">10.1016/j.atmosenv.2005.04.027</ext-link>, 2005.</mixed-citation></ref>
      <ref id="bib1.bibx24"><?xmltex \def\ref@label{{Guo et al.(2016)Guo, He, Liu, Miao, Liu, and Zhai}}?><label>Guo et al.(2016)Guo, He, Liu, Miao, Liu, and Zhai</label><?label GUO2016311?><mixed-citation>Guo, J., He, J., Liu, H., Miao, Y., Liu, H., and Zhai, P.: Impact of various emission control schemes on air quality using WRF-Chem during APEC China 2014, Atmos. Environ., 140, 311–319, <ext-link xlink:href="https://doi.org/10.1016/j.atmosenv.2016.05.046" ext-link-type="DOI">10.1016/j.atmosenv.2016.05.046</ext-link>, 2016.</mixed-citation></ref>
      <ref id="bib1.bibx25"><?xmltex \def\ref@label{{Guyon and Elisseeff(2003)}}?><label>Guyon and Elisseeff(2003)</label><?label guyon2003introduction?><mixed-citation>
Guyon, I. and Elisseeff, A.: An introduction to variable and feature selection, J. Mach. Learn. Res., 3, 1157–1182, 2003.</mixed-citation></ref>
      <ref id="bib1.bibx26"><?xmltex \def\ref@label{{Hao et al.(2021)Hao, Li, Wang, Liao, Yin, Hu, Wei, and Dang}}?><label>Hao et al.(2021)Hao, Li, Wang, Liao, Yin, Hu, Wei, and Dang</label><?label HAO2021118118?><mixed-citation>Hao, X., Li, J., Wang, H., Liao, H., Yin, Z., Hu, J., Wei, Y., and Dang, R.: Long-term health impact of PM<inline-formula><mml:math id="M182" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> under whole-year COVID-19 lockdown in China, Environ. Pollut., 290, 118118, <ext-link xlink:href="https://doi.org/10.1016/j.envpol.2021.118118" ext-link-type="DOI">10.1016/j.envpol.2021.118118</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx27"><?xmltex \def\ref@label{{Hu et al.(2014)Hu, Wang, Ying, and Zhang}}?><label>Hu et al.(2014)Hu, Wang, Ying, and Zhang</label><?label HU2014598?><mixed-citation>Hu, J., Wang, Y., Ying, Q., and Zhang, H.: Spatial and temporal variability of PM<inline-formula><mml:math id="M183" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> and PM<inline-formula><mml:math id="M184" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">10</mml:mn></mml:msub></mml:math></inline-formula> over the North China Plain and the Yangtze River Delta, China, Atmos. Environ., 95, 598–609, <ext-link xlink:href="https://doi.org/10.1016/j.atmosenv.2014.07.019" ext-link-type="DOI">10.1016/j.atmosenv.2014.07.019</ext-link>, 2014.</mixed-citation></ref>
      <ref id="bib1.bibx28"><?xmltex \def\ref@label{{Hu et al.(2017)Hu, Li, Huang, Ying, Zhang, Zhao, Wang, and Zhang}}?><label>Hu et al.(2017)Hu, Li, Huang, Ying, Zhang, Zhao, Wang, and Zhang</label><?label hu-201717?><mixed-citation>Hu, J., Li, X., Huang, L., Ying, Q., Zhang, Q., Zhao, B., Wang, S., and Zhang, H.: Ensemble prediction of air quality using the WRF/CMAQ model system for health effect studies in China, Atmos. Chem. Phys., 17, 13103–13118, <ext-link xlink:href="https://doi.org/10.5194/acp-17-13103-2017" ext-link-type="DOI">10.5194/acp-17-13103-2017</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bibx29"><?xmltex \def\ref@label{{Huang et al.(2021)Huang, Liu, Yang, Xing, Zhang, Bian, Li, Sahu, Wang, and Liu}}?><label>Huang et al.(2021)Huang, Liu, Yang, Xing, Zhang, Bian, Li, Sahu, Wang, and Liu</label><?label Huang202114?><mixed-citation>Huang, L., Liu, S., Yang, Z., Xing, J., Zhang, J., Bian, J., Li, S., Sahu, S. K., Wang, S., and Liu, T.-Y.: Exploring deep learning for air pollutant emission estimation, Geosci. Model Dev., 14, 4641–4654, <ext-link xlink:href="https://doi.org/10.5194/gmd-14-4641-2021" ext-link-type="DOI">10.5194/gmd-14-4641-2021</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx30"><?xmltex \def\ref@label{{Huang et al.(2018)Huang, Zou, He, Hu, Pr\'{e}v\^{o}t, and Zhang}}?><label>Huang et al.(2018)Huang, Zou, He, Hu, Prévôt, and Zhang</label><?label Huang-18-11563-2018?><mixed-citation>Huang, X.-F., Zou, B.-B., He, L.-Y., Hu, M., Prévôt, A. S. H., and Zhang, Y.-H.: Exploration of PM<inline-formula><mml:math id="M185" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> sources on the regional scale in the Pearl River Delta based on ME-2 modeling, Atmos. Chem. Phys., 18, 11563–11580, <ext-link xlink:href="https://doi.org/10.5194/acp-18-11563-2018" ext-link-type="DOI">10.5194/acp-18-11563-2018</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bibx31"><?xmltex \def\ref@label{{Hutzell and Luecken(2008)}}?><label>Hutzell and Luecken(2008)</label><?label HUTZELL2008164?><mixed-citation>Hutzell, W. T. and Luecken, D. J.: Fate and transport of emissions for several trace metals over the United States, Sci. Total Environ., 396, 164–179, <ext-link xlink:href="https://doi.org/10.1016/j.scitotenv.2008.02.020" ext-link-type="DOI">10.1016/j.scitotenv.2008.02.020</ext-link>, 2008.</mixed-citation></ref>
      <ref id="bib1.bibx32"><?xmltex \def\ref@label{{Inness et al.(2019)Inness, Ades, Agust\'{\i}-Panareda, Barr\'{e}, Benedictow, Blechschmidt, Dominguez, Engelen, Eskes, Flemming, Huijnen, Jones, Kipling, Massart, Parrington, Peuch, Razinger, Remy, Schulz, and Suttie}}?><label>Inness et al.(2019)Inness, Ades, Agustí-Panareda, Barré, Benedictow, Blechschmidt, Dominguez, Engelen, Eskes, Flemming, Huijnen, Jones, Kipling, Massart, Parrington, Peuch, Razinger, Remy, Schulz, and Suttie</label><?label Inness-3515-2019?><mixed-citation>Inness, A., Ades, M., Agustí-Panareda, A., Barré, J., Benedictow, A., Blechschmidt, A.-M., Dominguez, J. J., Engelen, R., Eskes, H., Flemming, J., Huijnen, V., Jones, L., Kipling, Z., Massart, S., Parrington, M., Peuch, V.-H., Razinger, M., Remy, S., Schulz, M., and Suttie, M.: The CAMS reanalysis of atmospheric composition, Atmos. Chem. Phys., 19, 3515–3556, <ext-link xlink:href="https://doi.org/10.5194/acp-19-3515-2019" ext-link-type="DOI">10.5194/acp-19-3515-2019</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx33"><?xmltex \def\ref@label{{Jeong and Park(2018)}}?><label>Jeong and Park(2018)</label><?label JEONG2018885?><mixed-citation>Jeong, J. I. and Park, R. J.: Efficacy of dust aerosol forecasts for East Asia using the adjoint of GEOS-Chem with ground-based observations, Environ. Pollut., 234, 885–893, <ext-link xlink:href="https://doi.org/10.1016/j.envpol.2017.12.025" ext-link-type="DOI">10.1016/j.envpol.2017.12.025</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bibx34"><?xmltex \def\ref@label{{Jin et al.(2019)Jin, Lin, Segers, Xie, and Heemink}}?><label>Jin et al.(2019)Jin, Lin, Segers, Xie, and Heemink</label><?label Jin2019Machine?><mixed-citation>Jin, J., Lin, H. X., Segers, A., Xie, Y., and Heemink, A.: Machine learning for observation bias correction with application to dust storm data assimilation, Atmos. Chem. Phys., 19, 10009–10026, <ext-link xlink:href="https://doi.org/10.5194/acp-19-10009-2019" ext-link-type="DOI">10.5194/acp-19-10009-2019</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx35"><?xmltex \def\ref@label{{Jothi et al.(2021)Jothi, Husain, and Rashid}}?><label>Jothi et al.(2021)Jothi, Husain, and Rashid</label><?label JOTHI2021103?><mixed-citation>Jothi, N., Husain, W., and Rashid, N. A.: Predicting generalized anxiety disorder among women using Shapley value, J. Infect. Public Heal., 14, 103–108, <ext-link xlink:href="https://doi.org/10.1016/j.jiph.2020.02.042" ext-link-type="DOI">10.1016/j.jiph.2020.02.042</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx36"><?xmltex \def\ref@label{{Ke et al.(2021)Ke, Gong, He, Zhang, Cui, Wang, Mo, Zhou, and Zhang}}?><label>Ke et al.(2021)Ke, Gong, He, Zhang, Cui, Wang, Mo, Zhou, and Zhang</label><?label KE2021151204?><mixed-citation>Ke, H., Gong, S., He, J., Zhang, L., Cui, B., Wang, Y., Mo, J., Zhou, Y., and Zhang, H.: Development and application of an automated air quality forecasting system based on machine learning, Sci. Total Environ., 806, 151204, <ext-link xlink:href="https://doi.org/10.1016/j.scitotenv.2021.151204" ext-link-type="DOI">10.1016/j.scitotenv.2021.151204</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx37"><?xmltex \def\ref@label{{Kincaid et al.(2009)Kincaid, Kincaid, and Cheney}}?><label>Kincaid et al.(2009)Kincaid, Kincaid, and Cheney</label><?label kincaid2009numerical?><mixed-citation>
Kincaid, D., Kincaid, D. R., and Cheney, E. W.: Numerical analysis: mathematics of scientific computing, vol. 2, American Mathematical Soc.,  ISBN 978-0-8218-4788-6, 2009.</mixed-citation></ref>
      <ref id="bib1.bibx38"><?xmltex \def\ref@label{{Kinney and Atwal(2014)}}?><label>Kinney and Atwal(2014)</label><?label kinney2014equitability?><mixed-citation>
Kinney, J. B. and Atwal, G. S.: Equitability, mutual information, and the maximal information coefficient, P. Natl. Acad. Sci. USA, 111, 3354–3359, 2014.</mixed-citation></ref>
      <ref id="bib1.bibx39"><?xmltex \def\ref@label{{Leufen et al.(2021)Leufen, Kleinert, and Schultz}}?><label>Leufen et al.(2021)Leufen, Kleinert, and Schultz</label><?label gmd-14-1553-2021?><mixed-citation>Leufen, L. H., Kleinert, F., and Schultz, M. G.: MLAir (v1.0) – a tool to enable fast and flexible machine learning on air data time series, Geosci. Model Dev., 14, 1553–1574, <ext-link xlink:href="https://doi.org/10.5194/gmd-14-1553-2021" ext-link-type="DOI">10.5194/gmd-14-1553-2021</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx40"><?xmltex \def\ref@label{{M.~Li et al.(2017)Li, Liu, Geng, Hong, Liu, Song, Tong, Zheng, Cui, Man, Zhang, and He}}?><label>M. Li et al.(2017)Li, Liu, Geng, Hong, Liu, Song, Tong, Zheng, Cui, Man, Zhang, and He</label><?label Li2017834?><mixed-citation>Li, M., Liu, H., Geng, G., Hong, C., Liu, F., Song, Y., Tong, D., Zheng, B., Cui, H., Man, H., Zhang, Q., and He, K.: Anthropogenic emission inventories in China: a review, Natl. Sci. Rev., 4, 834–866, <ext-link xlink:href="https://doi.org/10.1093/nsr/nwx150" ext-link-type="DOI">10.1093/nsr/nwx150</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bibx41"><?xmltex \def\ref@label{{X.~Li et al.(2017)Li, Peng, Yao, Cui, Hu, You, and Chi}}?><label>X. Li et al.(2017)Li, Peng, Yao, Cui, Hu, You, and Chi</label><?label LI2017997?><mixed-citation>Li, X., Peng, L., Yao, X., Cui, S., Hu, Y., You, C., and Chi, T.: Long short-term memory neural network for air pollutant concentration predictions: Method development and evaluation, Environ. Pollut., 231, 997–1004, <ext-link xlink:href="https://doi.org/10.1016/j.envpol.2017.08.114" ext-link-type="DOI">10.1016/j.envpol.2017.08.114</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bibx42"><?xmltex \def\ref@label{{Li et al.(2014)Li, Ma, {van der Kuijp}, Yuan, and Huang}}?><label>Li et al.(2014)Li, Ma, van der Kuijp, Yuan, and Huang</label><?label LI2014843?><mixed-citation>Li, Z., Ma, Z., van der Kuijp, T. J., Yuan, Z., and Huang, L.: A review of soil heavy metal pollution from mines in China: Pollution and health risk assessment, Sci. Total Environ., 468–469, 843–853, <ext-link xlink:href="https://doi.org/10.1016/j.scitotenv.2013.08.090" ext-link-type="DOI">10.1016/j.scitotenv.2013.08.090</ext-link>, 2014.</mixed-citation></ref>
      <ref id="bib1.bibx43"><?xmltex \def\ref@label{{Liao et al.(2017)Liao, Wang, Ai, Gui, Duan, Zhao, Zhang, Jiang, and Sun}}?><label>Liao et al.(2017)Liao, Wang, Ai, Gui, Duan, Zhao, Zhang, Jiang, and Sun</label><?label LIAO20171056?><mixed-citation>Liao, T., Wang, S., Ai, J., Gui, K., Duan, B., Zhao, Q., Zhang, X., Jiang, W., and Sun, Y.: Heavy pollution episodes, transport pathways and potential sources of PM<inline-formula><mml:math id="M186" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> during the winter of 2013 in Chengdu (China), Sci. Total Environ., 584–585, 1056–1065, <ext-link xlink:href="https://doi.org/10.1016/j.scitotenv.2017.01.160" ext-link-type="DOI">10.1016/j.scitotenv.2017.01.160</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bibx44"><?xmltex \def\ref@label{{Liaw and  Wiener(2002)Liaw, Wiener et al.}}?><label>Liaw and  Wiener(2002)Liaw, Wiener et al.</label><?label liaw2002?><mixed-citation>
Liaw, A. and Wiener, M.: Classification and regression by randomForest, R news, 2, 18–22, 2002.</mixed-citation></ref>
      <ref id="bib1.bibx45"><?xmltex \def\ref@label{{Liu et al.(2017)Liu, He, Guo, Miao, Yin, Wang, Xu, Liu, Yan, Li, and Zhai}}?><label>Liu et al.(2017)Liu, He, Guo, Miao, Yin, Wang, Xu, Liu, Yan, Li, and Zhai</label><?label LIU2017235?><mixed-citation>Liu, H., He, J., Guo, J., Miao, Y., Yin, J., Wang, Y., Xu, H., Liu, H., Yan, Y., Li, Y., and Zhai, P.: The blue skies in Beijing during APEC 2014: A quantitative assessment of emission control efficiency and meteorological influence, Atmos. Environ., 167, 235–244, <ext-link xlink:href="https://doi.org/10.1016/j.atmosenv.2017.08.032" ext-link-type="DOI">10.1016/j.atmosenv.2017.08.032</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bibx46"><?xmltex \def\ref@label{{Liu and Diamond(2005)}}?><label>Liu and Diamond(2005)</label><?label Liu2005?><mixed-citation>Liu, J. and Diamond, J.: China's environment in a globalizing world, Nature, 435, 1179–1186, <ext-link xlink:href="https://doi.org/10.1038/4351179a" ext-link-type="DOI">10.1038/4351179a</ext-link>, 2005.</mixed-citation></ref>
      <ref id="bib1.bibx47"><?xmltex \def\ref@label{{Liu et al.(2018)Liu, Lau, Sandbrink, and Fung}}?><label>Liu et al.(2018)Liu, Lau, Sandbrink, and Fung</label><?label Liu20184175?><mixed-citation>Liu, T., Lau, A. K. H., Sandbrink, K., and Fung, J. C. H.: Time Series Forecasting of Air Quality Based On Regional Numerical Modeling in Hong Kong, J. Geophys. Res.-Atmos., 123, 4175–4196, <ext-link xlink:href="https://doi.org/10.1002/2017JD028052" ext-link-type="DOI">10.1002/2017JD028052</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bibx48"><?xmltex \def\ref@label{{Lundberg and Lee(2017{\natexlab{a}})}}?><label>Lundberg and Lee(2017a)</label><?label NIPS2017_7062?><mixed-citation>Lundberg, S. M. and Lee, S.-I.: A Unified Approach to Interpreting Model Predictions, <uri>http://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions.pdf</uri> (last access: June 2022), 2017.</mixed-citation></ref>
      <ref id="bib1.bibx49"><?xmltex \def\ref@label{{J.~Ma et al.(2019)Ma, Ding, Gan, Lin, and Wan}}?><label>J. Ma et al.(2019)Ma, Ding, Gan, Lin, and Wan</label><?label ma2019?><mixed-citation>Ma, J., Ding, Y., Gan, V. J. L., Lin, C., and Wan, Z.: Spatiotemporal Prediction of PM<inline-formula><mml:math id="M187" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> Concentrations at Different Time Granularities Using IDW-BLSTM, IEEE Access, 7, 107897–107907, <ext-link xlink:href="https://doi.org/10.1109/ACCESS.2019.2932445" ext-link-type="DOI">10.1109/ACCESS.2019.2932445</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx50"><?xmltex \def\ref@label{{T.~Ma et al.(2019)Ma, Duan, He, Qin, Tong, Geng, Liu, Li, Yang, Ye, Xu, Zhang, and Ma}}?><label>T. Ma et al.(2019)Ma, Duan, He, Qin, Tong, Geng, Liu, Li, Yang, Ye, Xu, Zhang, and Ma</label><?label MA20198?><mixed-citation>Ma, T., Duan, F., He, K., Qin, Y., Tong, D., Geng, G., Liu, X., Li, H., Yang, S., Ye, S., Xu, B., Zhang, Q., and Ma, Y.: Air pollution characteristics and their relationship with emissions and meteorology in the Yangtze River Delta region during 2014–2016, J. Environ. Sci.-China, 83, 8–20, <ext-link xlink:href="https://doi.org/10.1016/j.jes.2019.02.031" ext-link-type="DOI">10.1016/j.jes.2019.02.031</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx51"><?xmltex \def\ref@label{{Masih(2019)}}?><label>Masih(2019)</label><?label masih2019machine?><mixed-citation>
Masih, A.: Machine learning algorithms in air quality modeling, Global Journal of Environmental Science and Management, 5, 515–534, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx52"><?xmltex \def\ref@label{{Molnar(2020)}}?><label>Molnar(2020)</label><?label molnar2019?><mixed-citation>
Molnar, C.: Interpretable Machine Learning, Lulu.com, 2020.</mixed-citation></ref>
      <ref id="bib1.bibx53"><?xmltex \def\ref@label{Muñoz Sabater(2021)}?><label>Muñoz Sabater(2021)</label><?label data?><mixed-citation>Muñoz Sabater, J.: ERA5-Land hourly data from 1950 to 1980, Copernicus Climate Change Service (C3S)
Climate Data Store (CDS)[data set], <ext-link xlink:href="https://doi.org/10.24381/cds.e2161bac" ext-link-type="DOI">10.24381/cds.e2161bac</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx54"><?xmltex \def\ref@label{{Mu\~{n}oz Sabater et al.(2021)Mu\~{n}oz Sabater, Dutra, Agust\'{\i}-Panareda, Albergel, Arduini, Balsamo, Boussetta, Choulga, Harrigan, Hersbach, Martens, Miralles, Piles, Rodr\'{\i}guez-Fern\'{a}ndez, Zsoter, Buontempo, and Th\'{e}paut}}?><label>Muñoz Sabater et al.(2021)Muñoz Sabater, Dutra, Agustí-Panareda, Albergel, Arduini, Balsamo, Boussetta, Choulga, Harrigan, Hersbach, Martens, Miralles, Piles, Rodríguez-Fernández, Zsoter, Buontempo, and Thépaut</label><?label essd-13-4349-2021?><mixed-citation>Muñoz-Sabater, J., Dutra, E., Agustí-Panareda, A., Albergel, C., Arduini, G., Balsamo, G., Boussetta, S., Choulga, M., Harrigan, S., Hersbach, H., Martens, B., Miralles, D. G., Piles, M., Rodríguez-Fernández, N. J., Zsoter, E., Buontempo, C., and Thépaut, J.-N.: ERA5-Land: a state-of-the-art global reanalysis dataset for land applications, Earth Syst. Sci. Data, 13, 4349–4383, <ext-link xlink:href="https://doi.org/10.5194/essd-13-4349-2021" ext-link-type="DOI">10.5194/essd-13-4349-2021</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx55"><?xmltex \def\ref@label{{Murray et al.(2020)Murray, Aravkin, Zheng, Abbafati, Abbas, Abbasi-Kangevari, Abd-Allah, Abdelalim, Abdollahi, Abdollahpour et al.}}?><label>Murray et al.(2020)Murray, Aravkin, Zheng, Abbafati, Abbas, Abbasi-Kangevari, Abd-Allah, Abdelalim, Abdollahi, Abdollahpour et al.</label><?label murray2020global?><mixed-citation>
Murray, C. J., Aravkin, A. Y., Zheng, P., Abbafati, C., Abbas, K. M., Abbasi-Kangevari, M., Abd-Allah, F., Abdelalim, A., Abdollahi, M., Abdollahpour, I.,  and GBD 2019 Risk Factors Collaborators: Global burden of 87 risk factors in 204 countries and territories, 1990–2019: a systematic analysis for the Global Burden of Disease Study 2019, Lancet, 396, 1223–1249, 2020.</mixed-citation></ref>
      <ref id="bib1.bibx56"><?xmltex \def\ref@label{{Osowski et al.(2004)Osowski, Siwek, and Markiewicz}}?><label>Osowski et al.(2004)Osowski, Siwek, and Markiewicz</label><?label osowski2004mlp?><mixed-citation>
Osowski, S., Siwek, K., and Markiewicz, T.: MLP and SVM networks-a comparative study, in: Proceedings of the 6th Nordic Signal Processing Symposium, 2004, NORSIG 2004, Espoo, Finland, 11–11 June 2004, 37–40, ISBN 951-22-7065-X IEEE,2004.</mixed-citation></ref>
      <ref id="bib1.bibx57"><?xmltex \def\ref@label{{Park and Park(2021)}}?><label>Park and Park(2021)</label><?label PARK2021107744?><mixed-citation>Park, H. and Park, D. Y.: Comparative analysis on predictability of natural ventilation rate based on machine learning algorithms, Build. Environ., 195, 107744, <ext-link xlink:href="https://doi.org/10.1016/j.buildenv.2021.107744" ext-link-type="DOI">10.1016/j.buildenv.2021.107744</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx58"><?xmltex \def\ref@label{{Pedregosa et al.(2011)Pedregosa, Varoquaux, Gramfort, Michel, Thirion, Grisel, Blondel, Prettenhofer, Weiss, Dubourg, Vanderplas, Passos, Cournapeau, Brucher, Perrot, and Duchesnay}}?><label>Pedregosa et al.(2011)Pedregosa, Varoquaux, Gramfort, Michel, Thirion, Grisel, Blondel, Prettenhofer, Weiss, Dubourg, Vanderplas, Passos, Cournapeau, Brucher, Perrot, and Duchesnay</label><?label scikit-learn?><mixed-citation>
Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., Thirion, B., Grisel, O., Blondel, M., Prettenhofer, P., Weiss, R., Dubourg, V., Vanderplas, J., Passos, A., Cournapeau, D., Brucher, M., Perrot, M., and Duchesnay, E.: Scikit-learn: Machine Learning in Python, J. Mach. Learn. Res., 12, 2825–2830, 2011.</mixed-citation></ref>
      <ref id="bib1.bibx59"><?xmltex \def\ref@label{{P\'{e}rez et al.(2000)P\'{e}rez, Trier, and Reyes}}?><label>Pérez et al.(2000)Pérez, Trier, and Reyes</label><?label PEREZ20001189?><mixed-citation>Pérez, P., Trier, A., and Reyes, J.: Prediction of PM<inline-formula><mml:math id="M188" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> concentrations several hours in advance using neural networks in Santiago, Chile, Atmos. Environ., 34, 1189–1196, <ext-link xlink:href="https://doi.org/10.1016/S1352-2310(99)00316-7" ext-link-type="DOI">10.1016/S1352-2310(99)00316-7</ext-link>, 2000.</mixed-citation></ref>
      <ref id="bib1.bibx60"><?xmltex \def\ref@label{{Pui et al.(2014)Pui, Chen, and Zuo}}?><label>Pui et al.(2014)Pui, Chen, and Zuo</label><?label PUI20141?><mixed-citation>Pui, D. Y., Chen, S.-C., and Zuo, Z.: PM<inline-formula><mml:math id="M189" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> in China: Measurements, sources, visibility and health effects, and mitigation, Particuology, 13, 1–26, <ext-link xlink:href="https://doi.org/10.1016/j.partic.2013.11.001" ext-link-type="DOI">10.1016/j.partic.2013.11.001</ext-link>, 2014.</mixed-citation></ref>
      <ref id="bib1.bibx61"><?xmltex \def\ref@label{{Qin et al.(2019)Qin, Cen, and Guo}}?><label>Qin et al.(2019)Qin, Cen, and Guo</label><?label Qin2019?><mixed-citation>Qin, Z., Cen, C., and Guo, X.: Prediction of Air Quality Based on KNN-LSTM, J. Phys. Conf. Ser., 1237, 042030, <ext-link xlink:href="https://doi.org/10.1088/1742-6596/1237/4/042030" ext-link-type="DOI">10.1088/1742-6596/1237/4/042030</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx62"><?xmltex \def\ref@label{{Reichstein et al.(2019)Reichstein, Camps-Valls, Stevens, Jung, Denzler, Carvalhais, and Prabhat}}?><label>Reichstein et al.(2019)Reichstein, Camps-Valls, Stevens, Jung, Denzler, Carvalhais, and Prabhat</label><?label Reichstein2019DeepLA?><mixed-citation>
Reichstein, M., Camps-Valls, G., Stevens, B., Jung, M., Denzler, J., Carvalhais, N., and Prabhat: Deep learning and process understanding for data-driven Earth system science, Nature, 566, 195–204, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx63"><?xmltex \def\ref@label{{Rodriguez-Galiano et al.(2012)Rodriguez-Galiano, Chica-Olmo, Abarca-Hernandez, Atkinson, and Jeganathan}}?><label>Rodriguez-Galiano et al.(2012)Rodriguez-Galiano, Chica-Olmo, Abarca-Hernandez, Atkinson, and Jeganathan</label><?label RODRIGUEZGALIANO201293?><mixed-citation>Rodriguez-Galiano, V., Chica-Olmo, M., Abarca-Hernandez, F., Atkinson, P., and Jeganathan, C.: Random Forest classification of Mediterranean land cover using multi-seasonal imagery and multi-seasonal texture, Remote Sens. Environ., 121, 93–107, <ext-link xlink:href="https://doi.org/10.1016/j.rse.2011.12.003" ext-link-type="DOI">10.1016/j.rse.2011.12.003</ext-link>, 2012.</mixed-citation></ref>
      <ref id="bib1.bibx64"><?xmltex \def\ref@label{{Sawaragi et al.(1979)Sawaragi, Soeda, Tamura, Yoshimura, Ohe, Chujo, and Ishihara}}?><label>Sawaragi et al.(1979)Sawaragi, Soeda, Tamura, Yoshimura, Ohe, Chujo, and Ishihara</label><?label SAWARAGI1979441?><mixed-citation>Sawaragi, Y., Soeda, T., Tamura, H., Yoshimura, T., Ohe, S., Chujo, Y., and Ishihara, H.: Statistical prediction of air pollution levels using non-physical models, Automatica, 15, 441–451, <ext-link xlink:href="https://doi.org/10.1016/0005-1098(79)90018-9" ext-link-type="DOI">10.1016/0005-1098(79)90018-9</ext-link>, 1979.</mixed-citation></ref>
      <ref id="bib1.bibx65"><?xmltex \def\ref@label{{Shapley(1952)}}?><label>Shapley(1952)</label><?label shapley?><mixed-citation>Shapley, L. S.: A Value for N-Person Games, RAND Corporation, Santa Monica, CA, <ext-link xlink:href="https://doi.org/10.7249/P0295" ext-link-type="DOI">10.7249/P0295</ext-link>, 1952.</mixed-citation></ref>
      <ref id="bib1.bibx66"><?xmltex \def\ref@label{{Shishegaran et al.(2020)Shishegaran, Saeedi, Kumar, and Ghiasinejad}}?><label>Shishegaran et al.(2020)Shishegaran, Saeedi, Kumar, and Ghiasinejad</label><?label SHISHEGARAN2020120825?><mixed-citation>Shishegaran, A., Saeedi, M., Kumar, A., and Ghiasinejad, H.: Prediction of air quality in Tehran by developing the nonlinear ensemble model, J. Clean. Prod., 259, 120825, <ext-link xlink:href="https://doi.org/10.1016/j.jclepro.2020.120825" ext-link-type="DOI">10.1016/j.jclepro.2020.120825</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bibx67"><?xmltex \def\ref@label{{Shtein et al.(2020)Shtein, Kloog, Schwartz, Silibello, Michelozzi, Gariazzo, Viegi, Forastiere, Karnieli, and Just}}?><label>Shtein et al.(2020)Shtein, Kloog, Schwartz, Silibello, Michelozzi, Gariazzo, Viegi, Forastiere, Karnieli, and Just</label><?label Shtein2020120?><mixed-citation>Shtein, A., Kloog, I., Schwartz, J., Silibello, C., Michelozzi, P., Gariazzo, C., Viegi, G., Forastiere, F., Karnieli, A., and Just, A.: Estimating Daily PM<inline-formula><mml:math id="M190" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> and PM<inline-formula><mml:math id="M191" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">10</mml:mn></mml:msub></mml:math></inline-formula> over Italy Using an Ensemble Model, Environ. Sci. Technol., 54, 120–128, <ext-link xlink:href="https://doi.org/10.1021/acs.est.9b04279" ext-link-type="DOI">10.1021/acs.est.9b04279</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bibx68"><?xmltex \def\ref@label{{Shu et al.(2021)Shu, Liu, Zhao, Xia, Wang, Cao, Wang, Zhang, Zheng, Shen, Luo, and Li}}?><label>Shu et al.(2021)Shu, Liu, Zhao, Xia, Wang, Cao, Wang, Zhang, Zheng, Shen, Luo, and Li</label><?label Shu-21-9253-2021?><mixed-citation>Shu, Z., Liu, Y., Zhao, T., Xia, J., Wang, C., Cao, L., Wang, H., Zhang, L., Zheng, Y., Shen, L., Luo, L., and Li, Y.: Elevated 3D structures of PM<inline-formula><mml:math id="M192" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> and impact of complex terrain-forcing circulations on heavy haze pollution over Sichuan Basin, China, Atmos. Chem. Phys., 21, 9253–9268, <ext-link xlink:href="https://doi.org/10.5194/acp-21-9253-2021" ext-link-type="DOI">10.5194/acp-21-9253-2021</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx69"><?xmltex \def\ref@label{{Song et al.(2017{\natexlab{a}})Song, He, Wu, Jin, Chen, Li, Ren, Zhang, and Mao}}?><label>Song et al.(2017a)Song, He, Wu, Jin, Chen, Li, Ren, Zhang, and Mao</label><?label SONG2017575?><mixed-citation>Song, C., He, J., Wu, L., Jin, T., Chen, X., Li, R., Ren, P., Zhang, L., and Mao, H.: Health burden attributable to ambient PM<inline-formula><mml:math id="M193" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> in China, Environ. Pollut., 223, 575–586, <ext-link xlink:href="https://doi.org/10.1016/j.envpol.2017.01.060" ext-link-type="DOI">10.1016/j.envpol.2017.01.060</ext-link>, 2017a.</mixed-citation></ref>
      <ref id="bib1.bibx70"><?xmltex \def\ref@label{{Song et al.(2017{\natexlab{b}})Song, Wu, Xie, He, Chen, Wang, Lin, Jin, Wang, Liu, Dai, Liu, Wang, and Mao}}?><label>Song et al.(2017b)Song, Wu, Xie, He, Chen, Wang, Lin, Jin, Wang, Liu, Dai, Liu, Wang, and Mao</label><?label SONG2017334?><mixed-citation>Song, C., Wu, L., Xie, Y., He, J., Chen, X., Wang, T., Lin, Y., Jin, T., Wang, A., Liu, Y., Dai, Q., Liu, B., Wang, Y., and Mao, H.: Air pollution in China: Status and spatiotemporal variations, Environ. Pollut., 227, 334–347, <ext-link xlink:href="https://doi.org/10.1016/j.envpol.2017.04.075" ext-link-type="DOI">10.1016/j.envpol.2017.04.075</ext-link>, 2017b.</mixed-citation></ref>
      <ref id="bib1.bibx71"><?xmltex \def\ref@label{{Sun et al.(2018)Sun, Li, Dai, Song, and Lang}}?><label>Sun et al.(2018)Sun, Li, Dai, Song, and Lang</label><?label SUN2018606?><mixed-citation>Sun, G., Li, J., Dai, J., Song, Z., and Lang, F.: Feature selection for IoT based on maximal information coefficient, Future Gener. Comp. Sy., 89, 606–616, <ext-link xlink:href="https://doi.org/10.1016/j.future.2018.05.060" ext-link-type="DOI">10.1016/j.future.2018.05.060</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bibx72"><?xmltex \def\ref@label{{Sun and Li(2020{\natexlab{a}})}}?><label>Sun and Li(2020a)</label><?label SUN2020110?><mixed-citation>Sun, W. and Li, Z.: Hourly PM<inline-formula><mml:math id="M194" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> concentration forecasting based on feature extraction and stacking-driven ensemble model for the winter of the Beijing-Tianjin-Hebei area, Atmos. Pollut. Res., 11, 110–121, <ext-link xlink:href="https://doi.org/10.1016/j.apr.2020.02.022" ext-link-type="DOI">10.1016/j.apr.2020.02.022</ext-link>, 2020a.</mixed-citation></ref>
      <ref id="bib1.bibx73"><?xmltex \def\ref@label{{Sun and Li(2020{\natexlab{b}})}}?><label>Sun and Li(2020b)</label><?label SUN2020121442?><mixed-citation>Sun, W. and Li, Z.: Hourly PM<inline-formula><mml:math id="M195" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> concentration forecasting based on mode decomposition-recombination technique and ensemble learning approach in severe haze episodes of China, J. Clean. Prod., 263, 121442, <ext-link xlink:href="https://doi.org/10.1016/j.jclepro.2020.121442" ext-link-type="DOI">10.1016/j.jclepro.2020.121442</ext-link>, 2020b.</mixed-citation></ref>
      <ref id="bib1.bibx74"><?xmltex \def\ref@label{{Sun et al.(2013)Sun, Zhang, Palazoglu, Singh, Zhang, and Liu}}?><label>Sun et al.(2013)Sun, Zhang, Palazoglu, Singh, Zhang, and Liu</label><?label SUN201393?><mixed-citation>Sun, W., Zhang, H., Palazoglu, A., Singh, A., Zhang, W., and Liu, S.: Prediction of 24-hour-average PM<inline-formula><mml:math id="M196" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> concentrations using a hidden Markov model with different emission distributions in Northern California, Sci. Total Environ., 443, 93–103, <ext-link xlink:href="https://doi.org/10.1016/j.scitotenv.2012.10.070" ext-link-type="DOI">10.1016/j.scitotenv.2012.10.070</ext-link>, 2013.</mixed-citation></ref>
      <ref id="bib1.bibx75"><?xmltex \def\ref@label{{Taylor(2005)}}?><label>Taylor(2005)</label><?label taylor2005?><mixed-citation>Taylor, K. E.: Taylor diagram primer, Work. Pap., 1–4, <uri>https://www.atmos.albany.edu/daes/atmclasses/atm401/spring_2016/ppts_pdfs/Taylor_diagram_primer.pdf</uri> (last
access: October 2022), 2005.</mixed-citation></ref>
      <ref id="bib1.bibx76"><?xmltex \def\ref@label{{Wu et al.(2020)Wu, Wang, He, and Wu}}?><label>Wu et al.(2020)Wu, Wang, He, and Wu</label><?label Wu202013?><mixed-citation>Wu, X., Wang, Y., He, S., and Wu, Z.:  PM<inline-formula><mml:math id="M197" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> / PM<inline-formula><mml:math id="M198" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">10</mml:mn></mml:msub></mml:math></inline-formula>  ratio prediction based on a long short-term memory neural network in Wuhan, China, Geosci. Model Dev., 13, 1499–1511, <ext-link xlink:href="https://doi.org/10.5194/gmd-13-1499-2020" ext-link-type="DOI">10.5194/gmd-13-1499-2020</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bibx77"><?xmltex \def\ref@label{{Xi et al.(2015)Xi, Wei, Xiaoguang, Yijie, Xinxin, Wenjun, and Jin}}?><label>Xi et al.(2015)Xi, Wei, Xiaoguang, Yijie, Xinxin, Wenjun, and Jin</label><?label Xi2015176?><mixed-citation>Xi, X., Wei, Z., Xiaoguang, R., Yijie, W., Xinxin, B., Wenjun, Y., and Jin, D.: A comprehensive evaluation of air pollution prediction improvement by a machine learning method, in: 2015 IEEE International Conference on Service Operations And Logistics, And Informatics (SOLI),  Yasmine Hammamet, Tunisia, 15–17 November 2015, 176–181, <ext-link xlink:href="https://doi.org/10.1109/SOLI.2015.7367615" ext-link-type="DOI">10.1109/SOLI.2015.7367615</ext-link>, 2015.</mixed-citation></ref>
      <ref id="bib1.bibx78"><?xmltex \def\ref@label{{Xu et al.(2021)Xu, Jin, Wang, Segers, Deng, and Lin}}?><label>Xu et al.(2021)Xu, Jin, Wang, Segers, Deng, and Lin</label><?label XU2021118022?><mixed-citation>Xu, M., Jin, J., Wang, G., Segers, A., Deng, T., and Lin, H. X.: Machine learning based bias correction for numerical chemical transport models, Atmos. Environ., 248, 118022, <ext-link xlink:href="https://doi.org/10.1016/j.atmosenv.2020.118022" ext-link-type="DOI">10.1016/j.atmosenv.2020.118022</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx79"><?xmltex \def\ref@label{{Xue et al.(2019)Xue, Zhu, Zheng, Liu, Li, and Zhang}}?><label>Xue et al.(2019)Xue, Zhu, Zheng, Liu, Li, and Zhang</label><?label XUE2019430?><mixed-citation>Xue, T., Zhu, T., Zheng, Y., Liu, J., Li, X., and Zhang, Q.: Change in the number of PM<inline-formula><mml:math id="M199" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula>-attributed deaths in China from 2000 to 2010: Comparison between estimations from census-based epidemiology and pre-established exposure-response functions, Environ. Int., 129, 430–437, <ext-link xlink:href="https://doi.org/10.1016/j.envint.2019.05.067" ext-link-type="DOI">10.1016/j.envint.2019.05.067</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx80"><?xmltex \def\ref@label{{Yu and Ma(2021)}}?><label>Yu and Ma(2021)</label><?label Yu2021RG000742?><mixed-citation>Yu, S. and Ma, J.: Deep Learning for Geophysics: Current and Future Trends, Rev. Geophys., 59, e2021RG000742, <ext-link xlink:href="https://doi.org/10.1029/2021RG000742" ext-link-type="DOI">10.1029/2021RG000742</ext-link>, 2021.
</mixed-citation></ref><?xmltex \hack{\newpage}?>
      <ref id="bib1.bibx81"><?xmltex \def\ref@label{{Zhai et al.(2019)Zhai, Jacob, Wang, Shen, Li, Zhang, Gui, Zhao, and Liao}}?><label>Zhai et al.(2019)Zhai, Jacob, Wang, Shen, Li, Zhang, Gui, Zhao, and Liao</label><?label Zhai-11031-2019?><mixed-citation>Zhai, S., Jacob, D. J., Wang, X., Shen, L., Li, K., Zhang, Y., Gui, K., Zhao, T., and Liao, H.: Fine particulate matter (PM<inline-formula><mml:math id="M200" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula>) trends in China, 2013–2018: separating contributions from anthropogenic emissions and meteorology, Atmos. Chem. Phys., 19, 11031–11041, <ext-link xlink:href="https://doi.org/10.5194/acp-19-11031-2019" ext-link-type="DOI">10.5194/acp-19-11031-2019</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx82"><?xmltex \def\ref@label{{Zhan et al.(2019)Zhan, Xie, Fang, Wang, Wu, Lu, Li, Chen, Zhuang, Li, Zhang, Gao, Ren, and Zhao}}?><label>Zhan et al.(2019)Zhan, Xie, Fang, Wang, Wu, Lu, Li, Chen, Zhuang, Li, Zhang, Gao, Ren, and Zhao</label><?label ZHAN201934?><mixed-citation>Zhan, C., Xie, M., Fang, D., Wang, T., Wu, Z., Lu, H., Li, M., Chen, P., Zhuang, B., Li, S., Zhang, Z., Gao, D., Ren, J., and Zhao, M.: Synoptic weather patterns and their impacts on regional particle pollution in the city cluster of the Sichuan Basin, China, Atmos. Environ., 208, 34–47, <ext-link xlink:href="https://doi.org/10.1016/j.atmosenv.2019.03.033" ext-link-type="DOI">10.1016/j.atmosenv.2019.03.033</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx83"><?xmltex \def\ref@label{{Zhang et al.(2018)Zhang, Ma, Zhao, Liu, Wang, Jia, and Zhang}}?><label>Zhang et al.(2018)Zhang, Ma, Zhao, Liu, Wang, Jia, and Zhang</label><?label ZHANG20181308?><mixed-citation>Zhang, Q., Ma, Q., Zhao, B., Liu, X., Wang, Y., Jia, B., and Zhang, X.: Winter haze over North China Plain from 2009 to 2016: Influence of emission and meteorology, Environ. Pollut., 242, 1308–1318, <ext-link xlink:href="https://doi.org/10.1016/j.envpol.2018.08.019" ext-link-type="DOI">10.1016/j.envpol.2018.08.019</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bibx84"><?xmltex \def\ref@label{{Zhang et al.(2020)Zhang, Wu, Wang, Sun, and Liu}}?><label>Zhang et al.(2020)Zhang, Wu, Wang, Sun, and Liu</label><?label ZHANG2020122722?><mixed-citation>Zhang, Q., Wu, S., Wang, X., Sun, B., and Liu, H.: A PM<inline-formula><mml:math id="M201" display="inline"><mml:msub><mml:mi/><mml:mn mathvariant="normal">2.5</mml:mn></mml:msub></mml:math></inline-formula> concentration prediction model based on multi-task deep learning for intensive air quality monitoring stations, J. Clean. Prod., 275, 122722, <ext-link xlink:href="https://doi.org/10.1016/j.jclepro.2020.122722" ext-link-type="DOI">10.1016/j.jclepro.2020.122722</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bibx85"><?xmltex \def\ref@label{{Zhang(2012)}}?><label>Zhang(2012)</label><?label ZHANG20122541?><mixed-citation>Zhang, S.: Nearest neighbor selection for iteratively kNN imputation, J. Syst. Software, 85, 2541–2552, <ext-link xlink:href="https://doi.org/10.1016/j.jss.2012.05.073" ext-link-type="DOI">10.1016/j.jss.2012.05.073</ext-link>, 2012.</mixed-citation></ref>
      <ref id="bib1.bibx86"><?xmltex \def\ref@label{{Zhou et al.(2017)Zhou, Xu, Xie, Chang, Gao, Gu, and Zhou}}?><label>Zhou et al.(2017)Zhou, Xu, Xie, Chang, Gao, Gu, and Zhou</label><?label ZHOU201794?><mixed-citation>Zhou, G., Xu, J., Xie, Y., Chang, L., Gao, W., Gu, Y., and Zhou, J.: Numerical air quality forecasting over eastern China: An operational application of WRF-Chem, Atmos. Environ., 153, 94–108, <ext-link xlink:href="https://doi.org/10.1016/j.atmosenv.2017.01.020" ext-link-type="DOI">10.1016/j.atmosenv.2017.01.020</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bibx87"><?xmltex \def\ref@label{{Zimmermann and Poppe(1996)}}?><label>Zimmermann and Poppe(1996)</label><?label ZIMMERMANN19961255?><mixed-citation>Zimmermann, J. and Poppe, D.: A supplement for the RADM2 chemical mechanism: The photooxidation of isoprene, Atmos. Environ., 30, 1255–1269, <ext-link xlink:href="https://doi.org/10.1016/1352-2310(95)00417-3" ext-link-type="DOI">10.1016/1352-2310(95)00417-3</ext-link>, 1996.</mixed-citation></ref>
      <ref id="bib1.bibx88"><?xmltex \def\ref@label{{Ziomas et al.(1995)Ziomas, Melas, Zerefos, Bais, and Paliatsos}}?><label>Ziomas et al.(1995)Ziomas, Melas, Zerefos, Bais, and Paliatsos</label><?label ZIOMAS19953703?><mixed-citation>Ziomas, I. C., Melas, D., Zerefos, C. S., Bais, A. F., and Paliatsos, A. G.: Forecasting peak pollutant levels from meteorological variables, Atmos. Environ., 29, 3703–3711, <ext-link xlink:href="https://doi.org/10.1016/1352-2310(95)00131-H" ext-link-type="DOI">10.1016/1352-2310(95)00131-H</ext-link>, 1995.</mixed-citation></ref>

  </ref-list></back>
    <!--<article-title-html>Development of a regional feature selection-based machine learning system (RFSML v1.0) for air pollution forecasting over China</article-title-html>
<abstract-html/>
<ref-html id="bib1.bib1"><label>Abu Awad et al.(2017)Abu Awad, Koutrakis, Coull, and Schwartz</label><mixed-citation>
Abu Awad, Y., Koutrakis, P., Coull, B. A., and Schwartz, J.: A spatio-temporal prediction model based on support vector machine regression: Ambient Black Carbon in three New England States, Environ. Res., 159, 427–434, <a href="https://doi.org/10.1016/j.envres.2017.08.039" target="_blank">https://doi.org/10.1016/j.envres.2017.08.039</a>, 2017.
</mixed-citation></ref-html>
<ref-html id="bib1.bib2"><label>Altmann et al.(2010)Altmann, Toloşi, Sander, and Lengauer</label><mixed-citation>
Altmann, A., Toloşi, L., Sander, O., and Lengauer, T.: Permutation importance: a corrected feature importance measure, Bioinformatics, 26, 1340–1347, <a href="https://doi.org/10.1093/bioinformatics/btq134" target="_blank">https://doi.org/10.1093/bioinformatics/btq134</a>, 2010.
</mixed-citation></ref-html>
<ref-html id="bib1.bib3"><label>Bai et al.(2019)Bai, Li, Zeng, Li, and Zhang</label><mixed-citation>
Bai, Y., Li, Y., Zeng, B., Li, C., and Zhang, J.: Hourly PM<sub>2.5</sub> concentration forecast using stacked autoencoder model with emphasis on seasonality, J. Clean. Prod., 224, 739–750, 2019.
</mixed-citation></ref-html>
<ref-html id="bib1.bib4"><label>Bartier and Keller(1996)</label><mixed-citation>
Bartier, P. M. and Keller, C.: Multivariate interpolation to incorporate thematic surface data using inverse distance weighting (IDW), Comput. Geosci., 22, 795–799, <a href="https://doi.org/10.1016/0098-3004(96)00021-0" target="_blank">https://doi.org/10.1016/0098-3004(96)00021-0</a>, 1996.
</mixed-citation></ref-html>
<ref-html id="bib1.bib5"><label>Bey et al.(2001)Bey, Jacob, Yantosca, Logan, Field, Fiore, Li, Liu, Mickley, and Schultz</label><mixed-citation>
Bey, I., Jacob, D. J., Yantosca, R. M., Logan, J. A., Field, B. D., Fiore, A. M., Li, Q., Liu, H. Y., Mickley, L. J., and Schultz, M. G.: Global modeling of tropospheric chemistry with assimilated meteorology: Model description and evaluation, J. Geophys. Res.-Atmos., 106, 23073–23095, <a href="https://doi.org/10.1029/2001JD000807" target="_blank">https://doi.org/10.1029/2001JD000807</a>, 2001.
</mixed-citation></ref-html>
<ref-html id="bib1.bib6"><label>Brokamp et al.(2017)Brokamp, Jandarov, Rao, LeMasters, and Ryan</label><mixed-citation>
Brokamp, C., Jandarov, R., Rao, M., LeMasters, G., and Ryan, P.: Exposure assessment models for elemental components of particulate matter in an urban environment: A comparison of regression and random forest approaches, Atmos. Environ., 151, 1–11, <a href="https://doi.org/10.1016/j.atmosenv.2016.11.066" target="_blank">https://doi.org/10.1016/j.atmosenv.2016.11.066</a>, 2017.
</mixed-citation></ref-html>
<ref-html id="bib1.bib7"><label>Burnett et al.(2018)Burnett, Chen, Szyszkowicz, Fann, Hubbell, Pope, Apte, Brauer, Cohen, Weichenthal et al.</label><mixed-citation>
Burnett, R., Chen, H., Szyszkowicz, M., Fann, N., Hubbell, B., Pope, C. A., Apte, J. S., Brauer, M., Cohen, A.,
Weichenthal, S., Coggins, J., Di, Q,. Brunekreef, B., Frostad, J., Lim, S. S., Kan, H., Walker, K. D., Thurston, G. D., Hayes,
R. B., Lim, C. C., Turner, M. C., Jerrett, M., Krewski, D., Gapstur, S. M., Diver, W. R., Ostro, B., Goldberg, D., Crouse, D.
L., Martin, R. V., Peters, P., Pinault, L., Tjepkema, M., van, Donkelaar. A., Villeneuve, P. J., Miller, A. B., Yin, P., Zhou,
M., Wang, L., Janssen, NAH., Marra, M., Atkinson, R. W., Tsang, H., Quoc, Thach. T., Cannon, J. B., Allen, R. T., Hart, J.
E., Laden, F., Cesaroni, G., Forastiere, F., Weinmayr, G., Jaensch, A., Nagel, G., Concin, H., and Spadaro, J. V.: Global estimates of mortality associated with long-term exposure to outdoor fine particulate matter, P. Natl. Acad. Sci. USA, 115, 9592–9597, 2018.
</mixed-citation></ref-html>
<ref-html id="bib1.bib8"><label>Cao et al.(2021)Cao, Chen, Chen, Zhang, and Yuan</label><mixed-citation>
Cao, D., Chen, Y., Chen, J., Zhang, H., and Yuan, Z.: An improved algorithm for the maximal information coefficient and its application, Roy. Soc. Open Sci., 8, 201424, <a href="https://doi.org/10.1098/rsos.201424" target="_blank">https://doi.org/10.1098/rsos.201424</a>, 2021.
</mixed-citation></ref-html>
<ref-html id="bib1.bib9"><label>Casalicchio et al.(2019)Casalicchio, Molnar, and Bischl</label><mixed-citation>
Casalicchio, G., Molnar, C., and Bischl, B.: Visualizing the Feature Importance for Black Box Models, in: Machine Learning and Knowledge Discovery in Databases, edited by: Berlingerio, M., Bonchi, F., Gärtner, T., Hurley, N., and Ifrim, G., Springer International Publishing, Cham, 655–670, <a href="https://doi.org/10.1007/978-3-030-10925-7_40" target="_blank">https://doi.org/10.1007/978-3-030-10925-7_40</a>, 2019.
</mixed-citation></ref-html>
<ref-html id="bib1.bib10"><label>Chandrashekar and Sahin(2014)</label><mixed-citation>
Chandrashekar, G. and Sahin, F.: A survey on feature selection methods, Comput. Electr. Eng., 40, 16–28, <a href="https://doi.org/10.1016/j.compeleceng.2013.11.024" target="_blank">https://doi.org/10.1016/j.compeleceng.2013.11.024</a>, 2014.
</mixed-citation></ref-html>
<ref-html id="bib1.bib11"><label>Chen et al.(2017)Chen, Liu, Lang, Zhou, Wei, Wang, and Guo</label><mixed-citation>
Chen, D., Liu, X., Lang, J., Zhou, Y., Wei, L., Wang, X., and Guo, X.: Estimating the contribution of regional transport to PM<sub>2.5</sub> air pollution in a rural area on the North China Plain, Sci. Total Environ., 583, 280–291, <a href="https://doi.org/10.1016/j.scitotenv.2017.01.066" target="_blank">https://doi.org/10.1016/j.scitotenv.2017.01.066</a>, 2017.
</mixed-citation></ref-html>
<ref-html id="bib1.bib12"><label>Chen et al.(2016)Chen, Zeng, Luo, and Yuan</label><mixed-citation>
Chen, Y., Zeng, Y., Luo, F., and Yuan, Z.: A new algorithm to optimize maximal information coefficient, PloS one, 11, e0157567,  <a href="https://doi.org/10.1371/journal.pone.0157567" target="_blank">https://doi.org/10.1371/journal.pone.0157567</a>, 2016.
</mixed-citation></ref-html>
<ref-html id="bib1.bib13"><label>Cobourn(2010)</label><mixed-citation>
Cobourn, W. G.: An enhanced PM<sub>2.5</sub> air quality forecast model based on nonlinear regression and back-trajectory concentrations, Atmos. Environ., 44, 3015–3023, <a href="https://doi.org/10.1016/j.atmosenv.2010.05.009" target="_blank">https://doi.org/10.1016/j.atmosenv.2010.05.009</a>, 2010.
</mixed-citation></ref-html>
<ref-html id="bib1.bib14"><label>Copernicus Climate Change Service (C3S)(2017)</label><mixed-citation>
Copernicus Climate Change Service (C3S): ERA5: Fifth generation of ECMWF atmospheric reanalyses of the global climate. Copernicus Climate Change Service Climate Data Store (CDS), <a href="https://cds.climate.copernicus.eu/cdsapp#!/home" target="_blank"/> (last access: June 2022), 2017.
</mixed-citation></ref-html>
<ref-html id="bib1.bib15"><label>Covert et al.(2020)Covert, Lundberg, and Lee</label><mixed-citation>
Covert, I., Lundberg, S. M., and Lee, S.-I.: Understanding Global Feature Contributions With Additive Importance Measures, in: Advances in Neural Information Processing Systems, vol. 33, edited by: Larochelle, H., Ranzato, M., Hadsell, R., Balcan, M. F., and Lin, H., Curran Associates, Inc., 17212–17223, <a href="https://proceedings.neurips.cc/paper/2020/file/c7bf0b7c1a86d5eb3be2c722cf2cf746-Paper.pdf" target="_blank"/> (last access: June 2022), 2020.
</mixed-citation></ref-html>
<ref-html id="bib1.bib16"><label>Di et al.(2019)Di, Amini, Shi, Kloog, Silvern, Kelly, Sabath, Choirat, Koutrakis, Lyapustin, Wang, Mickley, and Schwartz</label><mixed-citation>
Di, Q., Amini, H., Shi, L., Kloog, I., Silvern, R., Kelly, J., Sabath, M. B., Choirat, C., Koutrakis, P., Lyapustin, A., Wang, Y., Mickley, L. J., and Schwartz, J.: An ensemble-based model of PM<sub>2.5</sub> concentration across the contiguous United States with high spatiotemporal resolution, Environ. Int., 130, 104909, <a href="https://doi.org/10.1016/j.envint.2019.104909" target="_blank">https://doi.org/10.1016/j.envint.2019.104909</a>, 2019.
</mixed-citation></ref-html>
<ref-html id="bib1.bib17"><label>Fan et al.(2018)Fan, Liu, Ma, Zhang, Li, Jiang, Zhang, Zhao, Yang, Wu, and Wang</label><mixed-citation>
Fan, T., Liu, X., Ma, P.-L., Zhang, Q., Li, Z., Jiang, Y., Zhang, F., Zhao, C., Yang, X., Wu, F., and Wang, Y.: Emission or atmospheric processes? An attempt to attribute the source of large bias of aerosols in eastern China simulated by global climate models, Atmos. Chem. Phys., 18, 1395–1417, <a href="https://doi.org/10.5194/acp-18-1395-2018" target="_blank">https://doi.org/10.5194/acp-18-1395-2018</a>, 2018.
</mixed-citation></ref-html>
<ref-html id="bib1.bib18"><label>Fang(2022)</label><mixed-citation>
Fang, L.: The ground observations for RFSML, Zenodo [data set and code], <a href="https://doi.org/10.5281/zenodo.6551820" target="_blank">https://doi.org/10.5281/zenodo.6551820</a>, 2022.
</mixed-citation></ref-html>
<ref-html id="bib1.bib19"><label>Fernando et al.(2012)Fernando, Mammarella, Grandoni, Fedele, Di Marco, Dimitrova, and Hyde</label><mixed-citation>
Fernando, H., Mammarella, M., Grandoni, G., Fedele, P., Di Marco, R., Dimitrova, R., and Hyde, P.: Forecasting PM<sub>10</sub> in metropolitan areas: Efficacy of neural networks, Environ. Pollut., 163, 62–67, <a href="https://doi.org/10.1016/j.envpol.2011.12.018" target="_blank">https://doi.org/10.1016/j.envpol.2011.12.018</a>, 2012.
</mixed-citation></ref-html>
<ref-html id="bib1.bib20"><label>Fritsch and Carlson(1980)</label><mixed-citation>
Fritsch, F. N. and Carlson, R. E.: Monotone piecewise cubic interpolation, SIAM J. Numer. Anal., 17, 238–246, 1980.
</mixed-citation></ref-html>
<ref-html id="bib1.bib21"><label>Fryer et al.(2021)Fryer, Strümke, and Nguyen</label><mixed-citation>
Fryer, D. V., Strümke, I., and Nguyen, H.: Shapley values for feature selection: The good, the bad, and the axioms, arXiv [preprint], <a href="https://doi.org/10.48550/arXiv.2102.10936" target="_blank">https://doi.org/10.48550/arXiv.2102.10936</a>, 22 February 2021.
</mixed-citation></ref-html>
<ref-html id="bib1.bib22"><label>Golizadeh Akhlaghi et al.(2021)Golizadeh Akhlaghi, Aslansefat, Zhao, Sadati, Badiei, Xiao, Shittu, Fan, and Ma</label><mixed-citation>
Golizadeh Akhlaghi, Y., Aslansefat, K., Zhao, X., Sadati, S., Badiei, A., Xiao, X., Shittu, S., Fan, Y., and Ma, X.: Hourly performance forecast of a dew point cooler using explainable Artificial Intelligence and evolutionary optimisations by 2050, Appl. Energ., 281, 116062, <a href="https://doi.org/10.1016/j.apenergy.2020.116062" target="_blank">https://doi.org/10.1016/j.apenergy.2020.116062</a>, 2021.
</mixed-citation></ref-html>
<ref-html id="bib1.bib23"><label>Grell et al.(2005)Grell, Peckham, Schmitz, McKeen, Frost, Skamarock, and Eder</label><mixed-citation>
Grell, G. A., Peckham, S. E., Schmitz, R., McKeen, S. A., Frost, G., Skamarock, W. C., and Eder, B.: Fully coupled “online” chemistry within the WRF model, Atmos. Environ., 39, 6957–6975, <a href="https://doi.org/10.1016/j.atmosenv.2005.04.027" target="_blank">https://doi.org/10.1016/j.atmosenv.2005.04.027</a>, 2005.
</mixed-citation></ref-html>
<ref-html id="bib1.bib24"><label>Guo et al.(2016)Guo, He, Liu, Miao, Liu, and Zhai</label><mixed-citation>
Guo, J., He, J., Liu, H., Miao, Y., Liu, H., and Zhai, P.: Impact of various emission control schemes on air quality using WRF-Chem during APEC China 2014, Atmos. Environ., 140, 311–319, <a href="https://doi.org/10.1016/j.atmosenv.2016.05.046" target="_blank">https://doi.org/10.1016/j.atmosenv.2016.05.046</a>, 2016.
</mixed-citation></ref-html>
<ref-html id="bib1.bib25"><label>Guyon and Elisseeff(2003)</label><mixed-citation>
Guyon, I. and Elisseeff, A.: An introduction to variable and feature selection, J. Mach. Learn. Res., 3, 1157–1182, 2003.
</mixed-citation></ref-html>
<ref-html id="bib1.bib26"><label>Hao et al.(2021)Hao, Li, Wang, Liao, Yin, Hu, Wei, and Dang</label><mixed-citation>
Hao, X., Li, J., Wang, H., Liao, H., Yin, Z., Hu, J., Wei, Y., and Dang, R.: Long-term health impact of PM<sub>2.5</sub> under whole-year COVID-19 lockdown in China, Environ. Pollut., 290, 118118, <a href="https://doi.org/10.1016/j.envpol.2021.118118" target="_blank">https://doi.org/10.1016/j.envpol.2021.118118</a>, 2021.
</mixed-citation></ref-html>
<ref-html id="bib1.bib27"><label>Hu et al.(2014)Hu, Wang, Ying, and Zhang</label><mixed-citation>
Hu, J., Wang, Y., Ying, Q., and Zhang, H.: Spatial and temporal variability of PM<sub>2.5</sub> and PM<sub>10</sub> over the North China Plain and the Yangtze River Delta, China, Atmos. Environ., 95, 598–609, <a href="https://doi.org/10.1016/j.atmosenv.2014.07.019" target="_blank">https://doi.org/10.1016/j.atmosenv.2014.07.019</a>, 2014.
</mixed-citation></ref-html>
<ref-html id="bib1.bib28"><label>Hu et al.(2017)Hu, Li, Huang, Ying, Zhang, Zhao, Wang, and Zhang</label><mixed-citation>
Hu, J., Li, X., Huang, L., Ying, Q., Zhang, Q., Zhao, B., Wang, S., and Zhang, H.: Ensemble prediction of air quality using the WRF/CMAQ model system for health effect studies in China, Atmos. Chem. Phys., 17, 13103–13118, <a href="https://doi.org/10.5194/acp-17-13103-2017" target="_blank">https://doi.org/10.5194/acp-17-13103-2017</a>, 2017.
</mixed-citation></ref-html>
<ref-html id="bib1.bib29"><label>Huang et al.(2021)Huang, Liu, Yang, Xing, Zhang, Bian, Li, Sahu, Wang, and Liu</label><mixed-citation>
Huang, L., Liu, S., Yang, Z., Xing, J., Zhang, J., Bian, J., Li, S., Sahu, S. K., Wang, S., and Liu, T.-Y.: Exploring deep learning for air pollutant emission estimation, Geosci. Model Dev., 14, 4641–4654, <a href="https://doi.org/10.5194/gmd-14-4641-2021" target="_blank">https://doi.org/10.5194/gmd-14-4641-2021</a>, 2021.
</mixed-citation></ref-html>
<ref-html id="bib1.bib30"><label>Huang et al.(2018)Huang, Zou, He, Hu, Prévôt, and Zhang</label><mixed-citation>
Huang, X.-F., Zou, B.-B., He, L.-Y., Hu, M., Prévôt, A. S. H., and Zhang, Y.-H.: Exploration of PM<sub>2.5</sub> sources on the regional scale in the Pearl River Delta based on ME-2 modeling, Atmos. Chem. Phys., 18, 11563–11580, <a href="https://doi.org/10.5194/acp-18-11563-2018" target="_blank">https://doi.org/10.5194/acp-18-11563-2018</a>, 2018.
</mixed-citation></ref-html>
<ref-html id="bib1.bib31"><label>Hutzell and Luecken(2008)</label><mixed-citation>
Hutzell, W. T. and Luecken, D. J.: Fate and transport of emissions for several trace metals over the United States, Sci. Total Environ., 396, 164–179, <a href="https://doi.org/10.1016/j.scitotenv.2008.02.020" target="_blank">https://doi.org/10.1016/j.scitotenv.2008.02.020</a>, 2008.
</mixed-citation></ref-html>
<ref-html id="bib1.bib32"><label>Inness et al.(2019)Inness, Ades, Agustí-Panareda, Barré, Benedictow, Blechschmidt, Dominguez, Engelen, Eskes, Flemming, Huijnen, Jones, Kipling, Massart, Parrington, Peuch, Razinger, Remy, Schulz, and Suttie</label><mixed-citation>
Inness, A., Ades, M., Agustí-Panareda, A., Barré, J., Benedictow, A., Blechschmidt, A.-M., Dominguez, J. J., Engelen, R., Eskes, H., Flemming, J., Huijnen, V., Jones, L., Kipling, Z., Massart, S., Parrington, M., Peuch, V.-H., Razinger, M., Remy, S., Schulz, M., and Suttie, M.: The CAMS reanalysis of atmospheric composition, Atmos. Chem. Phys., 19, 3515–3556, <a href="https://doi.org/10.5194/acp-19-3515-2019" target="_blank">https://doi.org/10.5194/acp-19-3515-2019</a>, 2019.
</mixed-citation></ref-html>
<ref-html id="bib1.bib33"><label>Jeong and Park(2018)</label><mixed-citation>
Jeong, J. I. and Park, R. J.: Efficacy of dust aerosol forecasts for East Asia using the adjoint of GEOS-Chem with ground-based observations, Environ. Pollut., 234, 885–893, <a href="https://doi.org/10.1016/j.envpol.2017.12.025" target="_blank">https://doi.org/10.1016/j.envpol.2017.12.025</a>, 2018.
</mixed-citation></ref-html>
<ref-html id="bib1.bib34"><label>Jin et al.(2019)Jin, Lin, Segers, Xie, and Heemink</label><mixed-citation>
Jin, J., Lin, H. X., Segers, A., Xie, Y., and Heemink, A.: Machine learning for observation bias correction with application to dust storm data assimilation, Atmos. Chem. Phys., 19, 10009–10026, <a href="https://doi.org/10.5194/acp-19-10009-2019" target="_blank">https://doi.org/10.5194/acp-19-10009-2019</a>, 2019.
</mixed-citation></ref-html>
<ref-html id="bib1.bib35"><label>Jothi et al.(2021)Jothi, Husain, and Rashid</label><mixed-citation>
Jothi, N., Husain, W., and Rashid, N. A.: Predicting generalized anxiety disorder among women using Shapley value, J. Infect. Public Heal., 14, 103–108, <a href="https://doi.org/10.1016/j.jiph.2020.02.042" target="_blank">https://doi.org/10.1016/j.jiph.2020.02.042</a>, 2021.
</mixed-citation></ref-html>
<ref-html id="bib1.bib36"><label>Ke et al.(2021)Ke, Gong, He, Zhang, Cui, Wang, Mo, Zhou, and Zhang</label><mixed-citation>
Ke, H., Gong, S., He, J., Zhang, L., Cui, B., Wang, Y., Mo, J., Zhou, Y., and Zhang, H.: Development and application of an automated air quality forecasting system based on machine learning, Sci. Total Environ., 806, 151204, <a href="https://doi.org/10.1016/j.scitotenv.2021.151204" target="_blank">https://doi.org/10.1016/j.scitotenv.2021.151204</a>, 2021.
</mixed-citation></ref-html>
<ref-html id="bib1.bib37"><label>Kincaid et al.(2009)Kincaid, Kincaid, and Cheney</label><mixed-citation>
Kincaid, D., Kincaid, D. R., and Cheney, E. W.: Numerical analysis: mathematics of scientific computing, vol. 2, American Mathematical Soc.,  ISBN 978-0-8218-4788-6, 2009.
</mixed-citation></ref-html>
<ref-html id="bib1.bib38"><label>Kinney and Atwal(2014)</label><mixed-citation>
Kinney, J. B. and Atwal, G. S.: Equitability, mutual information, and the maximal information coefficient, P. Natl. Acad. Sci. USA, 111, 3354–3359, 2014.
</mixed-citation></ref-html>
<ref-html id="bib1.bib39"><label>Leufen et al.(2021)Leufen, Kleinert, and Schultz</label><mixed-citation>
Leufen, L. H., Kleinert, F., and Schultz, M. G.: MLAir (v1.0) – a tool to enable fast and flexible machine learning on air data time series, Geosci. Model Dev., 14, 1553–1574, <a href="https://doi.org/10.5194/gmd-14-1553-2021" target="_blank">https://doi.org/10.5194/gmd-14-1553-2021</a>, 2021.
</mixed-citation></ref-html>
<ref-html id="bib1.bib40"><label>M. Li et al.(2017)Li, Liu, Geng, Hong, Liu, Song, Tong, Zheng, Cui, Man, Zhang, and He</label><mixed-citation>
Li, M., Liu, H., Geng, G., Hong, C., Liu, F., Song, Y., Tong, D., Zheng, B., Cui, H., Man, H., Zhang, Q., and He, K.: Anthropogenic emission inventories in China: a review, Natl. Sci. Rev., 4, 834–866, <a href="https://doi.org/10.1093/nsr/nwx150" target="_blank">https://doi.org/10.1093/nsr/nwx150</a>, 2017.
</mixed-citation></ref-html>
<ref-html id="bib1.bib41"><label>X. Li et al.(2017)Li, Peng, Yao, Cui, Hu, You, and Chi</label><mixed-citation>
Li, X., Peng, L., Yao, X., Cui, S., Hu, Y., You, C., and Chi, T.: Long short-term memory neural network for air pollutant concentration predictions: Method development and evaluation, Environ. Pollut., 231, 997–1004, <a href="https://doi.org/10.1016/j.envpol.2017.08.114" target="_blank">https://doi.org/10.1016/j.envpol.2017.08.114</a>, 2017.
</mixed-citation></ref-html>
<ref-html id="bib1.bib42"><label>Li et al.(2014)Li, Ma, van der Kuijp, Yuan, and Huang</label><mixed-citation>
Li, Z., Ma, Z., van der Kuijp, T. J., Yuan, Z., and Huang, L.: A review of soil heavy metal pollution from mines in China: Pollution and health risk assessment, Sci. Total Environ., 468–469, 843–853, <a href="https://doi.org/10.1016/j.scitotenv.2013.08.090" target="_blank">https://doi.org/10.1016/j.scitotenv.2013.08.090</a>, 2014.
</mixed-citation></ref-html>
<ref-html id="bib1.bib43"><label>Liao et al.(2017)Liao, Wang, Ai, Gui, Duan, Zhao, Zhang, Jiang, and Sun</label><mixed-citation>
Liao, T., Wang, S., Ai, J., Gui, K., Duan, B., Zhao, Q., Zhang, X., Jiang, W., and Sun, Y.: Heavy pollution episodes, transport pathways and potential sources of PM<sub>2.5</sub> during the winter of 2013 in Chengdu (China), Sci. Total Environ., 584–585, 1056–1065, <a href="https://doi.org/10.1016/j.scitotenv.2017.01.160" target="_blank">https://doi.org/10.1016/j.scitotenv.2017.01.160</a>, 2017.
</mixed-citation></ref-html>
<ref-html id="bib1.bib44"><label>Liaw and  Wiener(2002)Liaw, Wiener et al.</label><mixed-citation>
Liaw, A. and Wiener, M.: Classification and regression by randomForest, R news, 2, 18–22, 2002.
</mixed-citation></ref-html>
<ref-html id="bib1.bib45"><label>Liu et al.(2017)Liu, He, Guo, Miao, Yin, Wang, Xu, Liu, Yan, Li, and Zhai</label><mixed-citation>
Liu, H., He, J., Guo, J., Miao, Y., Yin, J., Wang, Y., Xu, H., Liu, H., Yan, Y., Li, Y., and Zhai, P.: The blue skies in Beijing during APEC 2014: A quantitative assessment of emission control efficiency and meteorological influence, Atmos. Environ., 167, 235–244, <a href="https://doi.org/10.1016/j.atmosenv.2017.08.032" target="_blank">https://doi.org/10.1016/j.atmosenv.2017.08.032</a>, 2017.
</mixed-citation></ref-html>
<ref-html id="bib1.bib46"><label>Liu and Diamond(2005)</label><mixed-citation>
Liu, J. and Diamond, J.: China's environment in a globalizing world, Nature, 435, 1179–1186, <a href="https://doi.org/10.1038/4351179a" target="_blank">https://doi.org/10.1038/4351179a</a>, 2005.
</mixed-citation></ref-html>
<ref-html id="bib1.bib47"><label>Liu et al.(2018)Liu, Lau, Sandbrink, and Fung</label><mixed-citation>
Liu, T., Lau, A. K. H., Sandbrink, K., and Fung, J. C. H.: Time Series Forecasting of Air Quality Based On Regional Numerical Modeling in Hong Kong, J. Geophys. Res.-Atmos., 123, 4175–4196, <a href="https://doi.org/10.1002/2017JD028052" target="_blank">https://doi.org/10.1002/2017JD028052</a>, 2018.
</mixed-citation></ref-html>
<ref-html id="bib1.bib48"><label>Lundberg and Lee(2017a)</label><mixed-citation>
Lundberg, S. M. and Lee, S.-I.: A Unified Approach to Interpreting Model Predictions, <a href="http://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions.pdf" target="_blank"/> (last access: June 2022), 2017.
</mixed-citation></ref-html>
<ref-html id="bib1.bib49"><label>J. Ma et al.(2019)Ma, Ding, Gan, Lin, and Wan</label><mixed-citation>
Ma, J., Ding, Y., Gan, V. J. L., Lin, C., and Wan, Z.: Spatiotemporal Prediction of PM<sub>2.5</sub> Concentrations at Different Time Granularities Using IDW-BLSTM, IEEE Access, 7, 107897–107907, <a href="https://doi.org/10.1109/ACCESS.2019.2932445" target="_blank">https://doi.org/10.1109/ACCESS.2019.2932445</a>, 2019.
</mixed-citation></ref-html>
<ref-html id="bib1.bib50"><label>T. Ma et al.(2019)Ma, Duan, He, Qin, Tong, Geng, Liu, Li, Yang, Ye, Xu, Zhang, and Ma</label><mixed-citation>
Ma, T., Duan, F., He, K., Qin, Y., Tong, D., Geng, G., Liu, X., Li, H., Yang, S., Ye, S., Xu, B., Zhang, Q., and Ma, Y.: Air pollution characteristics and their relationship with emissions and meteorology in the Yangtze River Delta region during 2014–2016, J. Environ. Sci.-China, 83, 8–20, <a href="https://doi.org/10.1016/j.jes.2019.02.031" target="_blank">https://doi.org/10.1016/j.jes.2019.02.031</a>, 2019.
</mixed-citation></ref-html>
<ref-html id="bib1.bib51"><label>Masih(2019)</label><mixed-citation>
Masih, A.: Machine learning algorithms in air quality modeling, Global Journal of Environmental Science and Management, 5, 515–534, 2019.
</mixed-citation></ref-html>
<ref-html id="bib1.bib52"><label>Molnar(2020)</label><mixed-citation>
Molnar, C.: Interpretable Machine Learning, Lulu.com, 2020.
</mixed-citation></ref-html>
<ref-html id="bib1.bib53"><label>Muñoz Sabater(2021)</label><mixed-citation>
Muñoz Sabater, J.: ERA5-Land hourly data from 1950 to 1980, Copernicus Climate Change Service (C3S)
Climate Data Store (CDS)[data set], <a href="https://doi.org/10.24381/cds.e2161bac" target="_blank">https://doi.org/10.24381/cds.e2161bac</a>, 2021.
</mixed-citation></ref-html>
<ref-html id="bib1.bib54"><label>Muñoz Sabater et al.(2021)Muñoz Sabater, Dutra, Agustí-Panareda, Albergel, Arduini, Balsamo, Boussetta, Choulga, Harrigan, Hersbach, Martens, Miralles, Piles, Rodríguez-Fernández, Zsoter, Buontempo, and Thépaut</label><mixed-citation>
Muñoz-Sabater, J., Dutra, E., Agustí-Panareda, A., Albergel, C., Arduini, G., Balsamo, G., Boussetta, S., Choulga, M., Harrigan, S., Hersbach, H., Martens, B., Miralles, D. G., Piles, M., Rodríguez-Fernández, N. J., Zsoter, E., Buontempo, C., and Thépaut, J.-N.: ERA5-Land: a state-of-the-art global reanalysis dataset for land applications, Earth Syst. Sci. Data, 13, 4349–4383, <a href="https://doi.org/10.5194/essd-13-4349-2021" target="_blank">https://doi.org/10.5194/essd-13-4349-2021</a>, 2021.
</mixed-citation></ref-html>
<ref-html id="bib1.bib55"><label>Murray et al.(2020)Murray, Aravkin, Zheng, Abbafati, Abbas, Abbasi-Kangevari, Abd-Allah, Abdelalim, Abdollahi, Abdollahpour et al.</label><mixed-citation>
Murray, C. J., Aravkin, A. Y., Zheng, P., Abbafati, C., Abbas, K. M., Abbasi-Kangevari, M., Abd-Allah, F., Abdelalim, A., Abdollahi, M., Abdollahpour, I.,  and GBD 2019 Risk Factors Collaborators: Global burden of 87 risk factors in 204 countries and territories, 1990–2019: a systematic analysis for the Global Burden of Disease Study 2019, Lancet, 396, 1223–1249, 2020.
</mixed-citation></ref-html>
<ref-html id="bib1.bib56"><label>Osowski et al.(2004)Osowski, Siwek, and Markiewicz</label><mixed-citation>
Osowski, S., Siwek, K., and Markiewicz, T.: MLP and SVM networks-a comparative study, in: Proceedings of the 6th Nordic Signal Processing Symposium, 2004, NORSIG 2004, Espoo, Finland, 11–11 June 2004, 37–40, ISBN 951-22-7065-X IEEE,2004.
</mixed-citation></ref-html>
<ref-html id="bib1.bib57"><label>Park and Park(2021)</label><mixed-citation>
Park, H. and Park, D. Y.: Comparative analysis on predictability of natural ventilation rate based on machine learning algorithms, Build. Environ., 195, 107744, <a href="https://doi.org/10.1016/j.buildenv.2021.107744" target="_blank">https://doi.org/10.1016/j.buildenv.2021.107744</a>, 2021.
</mixed-citation></ref-html>
<ref-html id="bib1.bib58"><label>Pedregosa et al.(2011)Pedregosa, Varoquaux, Gramfort, Michel, Thirion, Grisel, Blondel, Prettenhofer, Weiss, Dubourg, Vanderplas, Passos, Cournapeau, Brucher, Perrot, and Duchesnay</label><mixed-citation>
Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., Thirion, B., Grisel, O., Blondel, M., Prettenhofer, P., Weiss, R., Dubourg, V., Vanderplas, J., Passos, A., Cournapeau, D., Brucher, M., Perrot, M., and Duchesnay, E.: Scikit-learn: Machine Learning in Python, J. Mach. Learn. Res., 12, 2825–2830, 2011.
</mixed-citation></ref-html>
<ref-html id="bib1.bib59"><label>Pérez et al.(2000)Pérez, Trier, and Reyes</label><mixed-citation>
Pérez, P., Trier, A., and Reyes, J.: Prediction of PM<sub>2.5</sub> concentrations several hours in advance using neural networks in Santiago, Chile, Atmos. Environ., 34, 1189–1196, <a href="https://doi.org/10.1016/S1352-2310(99)00316-7" target="_blank">https://doi.org/10.1016/S1352-2310(99)00316-7</a>, 2000.
</mixed-citation></ref-html>
<ref-html id="bib1.bib60"><label>Pui et al.(2014)Pui, Chen, and Zuo</label><mixed-citation>
Pui, D. Y., Chen, S.-C., and Zuo, Z.: PM<sub>2.5</sub> in China: Measurements, sources, visibility and health effects, and mitigation, Particuology, 13, 1–26, <a href="https://doi.org/10.1016/j.partic.2013.11.001" target="_blank">https://doi.org/10.1016/j.partic.2013.11.001</a>, 2014.
</mixed-citation></ref-html>
<ref-html id="bib1.bib61"><label>Qin et al.(2019)Qin, Cen, and Guo</label><mixed-citation>
Qin, Z., Cen, C., and Guo, X.: Prediction of Air Quality Based on KNN-LSTM, J. Phys. Conf. Ser., 1237, 042030, <a href="https://doi.org/10.1088/1742-6596/1237/4/042030" target="_blank">https://doi.org/10.1088/1742-6596/1237/4/042030</a>, 2019.
</mixed-citation></ref-html>
<ref-html id="bib1.bib62"><label>Reichstein et al.(2019)Reichstein, Camps-Valls, Stevens, Jung, Denzler, Carvalhais, and Prabhat</label><mixed-citation>
Reichstein, M., Camps-Valls, G., Stevens, B., Jung, M., Denzler, J., Carvalhais, N., and Prabhat: Deep learning and process understanding for data-driven Earth system science, Nature, 566, 195–204, 2019.
</mixed-citation></ref-html>
<ref-html id="bib1.bib63"><label>Rodriguez-Galiano et al.(2012)Rodriguez-Galiano, Chica-Olmo, Abarca-Hernandez, Atkinson, and Jeganathan</label><mixed-citation>
Rodriguez-Galiano, V., Chica-Olmo, M., Abarca-Hernandez, F., Atkinson, P., and Jeganathan, C.: Random Forest classification of Mediterranean land cover using multi-seasonal imagery and multi-seasonal texture, Remote Sens. Environ., 121, 93–107, <a href="https://doi.org/10.1016/j.rse.2011.12.003" target="_blank">https://doi.org/10.1016/j.rse.2011.12.003</a>, 2012.
</mixed-citation></ref-html>
<ref-html id="bib1.bib64"><label>Sawaragi et al.(1979)Sawaragi, Soeda, Tamura, Yoshimura, Ohe, Chujo, and Ishihara</label><mixed-citation>
Sawaragi, Y., Soeda, T., Tamura, H., Yoshimura, T., Ohe, S., Chujo, Y., and Ishihara, H.: Statistical prediction of air pollution levels using non-physical models, Automatica, 15, 441–451, <a href="https://doi.org/10.1016/0005-1098(79)90018-9" target="_blank">https://doi.org/10.1016/0005-1098(79)90018-9</a>, 1979.
</mixed-citation></ref-html>
<ref-html id="bib1.bib65"><label>Shapley(1952)</label><mixed-citation>
Shapley, L. S.: A Value for N-Person Games, RAND Corporation, Santa Monica, CA, <a href="https://doi.org/10.7249/P0295" target="_blank">https://doi.org/10.7249/P0295</a>, 1952.
</mixed-citation></ref-html>
<ref-html id="bib1.bib66"><label>Shishegaran et al.(2020)Shishegaran, Saeedi, Kumar, and Ghiasinejad</label><mixed-citation>
Shishegaran, A., Saeedi, M., Kumar, A., and Ghiasinejad, H.: Prediction of air quality in Tehran by developing the nonlinear ensemble model, J. Clean. Prod., 259, 120825, <a href="https://doi.org/10.1016/j.jclepro.2020.120825" target="_blank">https://doi.org/10.1016/j.jclepro.2020.120825</a>, 2020.
</mixed-citation></ref-html>
<ref-html id="bib1.bib67"><label>Shtein et al.(2020)Shtein, Kloog, Schwartz, Silibello, Michelozzi, Gariazzo, Viegi, Forastiere, Karnieli, and Just</label><mixed-citation>
Shtein, A., Kloog, I., Schwartz, J., Silibello, C., Michelozzi, P., Gariazzo, C., Viegi, G., Forastiere, F., Karnieli, A., and Just, A.: Estimating Daily PM<sub>2.5</sub> and PM<sub>10</sub> over Italy Using an Ensemble Model, Environ. Sci. Technol., 54, 120–128, <a href="https://doi.org/10.1021/acs.est.9b04279" target="_blank">https://doi.org/10.1021/acs.est.9b04279</a>, 2020.
</mixed-citation></ref-html>
<ref-html id="bib1.bib68"><label>Shu et al.(2021)Shu, Liu, Zhao, Xia, Wang, Cao, Wang, Zhang, Zheng, Shen, Luo, and Li</label><mixed-citation>
Shu, Z., Liu, Y., Zhao, T., Xia, J., Wang, C., Cao, L., Wang, H., Zhang, L., Zheng, Y., Shen, L., Luo, L., and Li, Y.: Elevated 3D structures of PM<sub>2.5</sub> and impact of complex terrain-forcing circulations on heavy haze pollution over Sichuan Basin, China, Atmos. Chem. Phys., 21, 9253–9268, <a href="https://doi.org/10.5194/acp-21-9253-2021" target="_blank">https://doi.org/10.5194/acp-21-9253-2021</a>, 2021.
</mixed-citation></ref-html>
<ref-html id="bib1.bib69"><label>Song et al.(2017a)Song, He, Wu, Jin, Chen, Li, Ren, Zhang, and Mao</label><mixed-citation>
Song, C., He, J., Wu, L., Jin, T., Chen, X., Li, R., Ren, P., Zhang, L., and Mao, H.: Health burden attributable to ambient PM<sub>2.5</sub> in China, Environ. Pollut., 223, 575–586, <a href="https://doi.org/10.1016/j.envpol.2017.01.060" target="_blank">https://doi.org/10.1016/j.envpol.2017.01.060</a>, 2017a.
</mixed-citation></ref-html>
<ref-html id="bib1.bib70"><label>Song et al.(2017b)Song, Wu, Xie, He, Chen, Wang, Lin, Jin, Wang, Liu, Dai, Liu, Wang, and Mao</label><mixed-citation>
Song, C., Wu, L., Xie, Y., He, J., Chen, X., Wang, T., Lin, Y., Jin, T., Wang, A., Liu, Y., Dai, Q., Liu, B., Wang, Y., and Mao, H.: Air pollution in China: Status and spatiotemporal variations, Environ. Pollut., 227, 334–347, <a href="https://doi.org/10.1016/j.envpol.2017.04.075" target="_blank">https://doi.org/10.1016/j.envpol.2017.04.075</a>, 2017b.
</mixed-citation></ref-html>
<ref-html id="bib1.bib71"><label>Sun et al.(2018)Sun, Li, Dai, Song, and Lang</label><mixed-citation>
Sun, G., Li, J., Dai, J., Song, Z., and Lang, F.: Feature selection for IoT based on maximal information coefficient, Future Gener. Comp. Sy., 89, 606–616, <a href="https://doi.org/10.1016/j.future.2018.05.060" target="_blank">https://doi.org/10.1016/j.future.2018.05.060</a>, 2018.
</mixed-citation></ref-html>
<ref-html id="bib1.bib72"><label>Sun and Li(2020a)</label><mixed-citation>
Sun, W. and Li, Z.: Hourly PM<sub>2.5</sub> concentration forecasting based on feature extraction and stacking-driven ensemble model for the winter of the Beijing-Tianjin-Hebei area, Atmos. Pollut. Res., 11, 110–121, <a href="https://doi.org/10.1016/j.apr.2020.02.022" target="_blank">https://doi.org/10.1016/j.apr.2020.02.022</a>, 2020a.
</mixed-citation></ref-html>
<ref-html id="bib1.bib73"><label>Sun and Li(2020b)</label><mixed-citation>
Sun, W. and Li, Z.: Hourly PM<sub>2.5</sub> concentration forecasting based on mode decomposition-recombination technique and ensemble learning approach in severe haze episodes of China, J. Clean. Prod., 263, 121442, <a href="https://doi.org/10.1016/j.jclepro.2020.121442" target="_blank">https://doi.org/10.1016/j.jclepro.2020.121442</a>, 2020b.
</mixed-citation></ref-html>
<ref-html id="bib1.bib74"><label>Sun et al.(2013)Sun, Zhang, Palazoglu, Singh, Zhang, and Liu</label><mixed-citation>
Sun, W., Zhang, H., Palazoglu, A., Singh, A., Zhang, W., and Liu, S.: Prediction of 24-hour-average PM<sub>2.5</sub> concentrations using a hidden Markov model with different emission distributions in Northern California, Sci. Total Environ., 443, 93–103, <a href="https://doi.org/10.1016/j.scitotenv.2012.10.070" target="_blank">https://doi.org/10.1016/j.scitotenv.2012.10.070</a>, 2013.
</mixed-citation></ref-html>
<ref-html id="bib1.bib75"><label>Taylor(2005)</label><mixed-citation>
Taylor, K. E.: Taylor diagram primer, Work. Pap., 1–4, <a href="https://www.atmos.albany.edu/daes/atmclasses/atm401/spring_2016/ppts_pdfs/Taylor_diagram_primer.pdf" target="_blank"/> (last
access: October 2022), 2005.
</mixed-citation></ref-html>
<ref-html id="bib1.bib76"><label>Wu et al.(2020)Wu, Wang, He, and Wu</label><mixed-citation>
Wu, X., Wang, Y., He, S., and Wu, Z.:  PM<sub>2.5</sub>&thinsp;/&thinsp;PM<sub>10</sub>  ratio prediction based on a long short-term memory neural network in Wuhan, China, Geosci. Model Dev., 13, 1499–1511, <a href="https://doi.org/10.5194/gmd-13-1499-2020" target="_blank">https://doi.org/10.5194/gmd-13-1499-2020</a>, 2020.
</mixed-citation></ref-html>
<ref-html id="bib1.bib77"><label>Xi et al.(2015)Xi, Wei, Xiaoguang, Yijie, Xinxin, Wenjun, and Jin</label><mixed-citation>
Xi, X., Wei, Z., Xiaoguang, R., Yijie, W., Xinxin, B., Wenjun, Y., and Jin, D.: A comprehensive evaluation of air pollution prediction improvement by a machine learning method, in: 2015 IEEE International Conference on Service Operations And Logistics, And Informatics (SOLI),  Yasmine Hammamet, Tunisia, 15–17 November 2015, 176–181, <a href="https://doi.org/10.1109/SOLI.2015.7367615" target="_blank">https://doi.org/10.1109/SOLI.2015.7367615</a>, 2015.
</mixed-citation></ref-html>
<ref-html id="bib1.bib78"><label>Xu et al.(2021)Xu, Jin, Wang, Segers, Deng, and Lin</label><mixed-citation>
Xu, M., Jin, J., Wang, G., Segers, A., Deng, T., and Lin, H. X.: Machine learning based bias correction for numerical chemical transport models, Atmos. Environ., 248, 118022, <a href="https://doi.org/10.1016/j.atmosenv.2020.118022" target="_blank">https://doi.org/10.1016/j.atmosenv.2020.118022</a>, 2021.
</mixed-citation></ref-html>
<ref-html id="bib1.bib79"><label>Xue et al.(2019)Xue, Zhu, Zheng, Liu, Li, and Zhang</label><mixed-citation>
Xue, T., Zhu, T., Zheng, Y., Liu, J., Li, X., and Zhang, Q.: Change in the number of PM<sub>2.5</sub>-attributed deaths in China from 2000 to 2010: Comparison between estimations from census-based epidemiology and pre-established exposure-response functions, Environ. Int., 129, 430–437, <a href="https://doi.org/10.1016/j.envint.2019.05.067" target="_blank">https://doi.org/10.1016/j.envint.2019.05.067</a>, 2019.
</mixed-citation></ref-html>
<ref-html id="bib1.bib80"><label>Yu and Ma(2021)</label><mixed-citation>
Yu, S. and Ma, J.: Deep Learning for Geophysics: Current and Future Trends, Rev. Geophys., 59, e2021RG000742, <a href="https://doi.org/10.1029/2021RG000742" target="_blank">https://doi.org/10.1029/2021RG000742</a>, 2021.

</mixed-citation></ref-html>
<ref-html id="bib1.bib81"><label>Zhai et al.(2019)Zhai, Jacob, Wang, Shen, Li, Zhang, Gui, Zhao, and Liao</label><mixed-citation>
Zhai, S., Jacob, D. J., Wang, X., Shen, L., Li, K., Zhang, Y., Gui, K., Zhao, T., and Liao, H.: Fine particulate matter (PM<sub>2.5</sub>) trends in China, 2013–2018: separating contributions from anthropogenic emissions and meteorology, Atmos. Chem. Phys., 19, 11031–11041, <a href="https://doi.org/10.5194/acp-19-11031-2019" target="_blank">https://doi.org/10.5194/acp-19-11031-2019</a>, 2019.
</mixed-citation></ref-html>
<ref-html id="bib1.bib82"><label>Zhan et al.(2019)Zhan, Xie, Fang, Wang, Wu, Lu, Li, Chen, Zhuang, Li, Zhang, Gao, Ren, and Zhao</label><mixed-citation>
Zhan, C., Xie, M., Fang, D., Wang, T., Wu, Z., Lu, H., Li, M., Chen, P., Zhuang, B., Li, S., Zhang, Z., Gao, D., Ren, J., and Zhao, M.: Synoptic weather patterns and their impacts on regional particle pollution in the city cluster of the Sichuan Basin, China, Atmos. Environ., 208, 34–47, <a href="https://doi.org/10.1016/j.atmosenv.2019.03.033" target="_blank">https://doi.org/10.1016/j.atmosenv.2019.03.033</a>, 2019.
</mixed-citation></ref-html>
<ref-html id="bib1.bib83"><label>Zhang et al.(2018)Zhang, Ma, Zhao, Liu, Wang, Jia, and Zhang</label><mixed-citation>
Zhang, Q., Ma, Q., Zhao, B., Liu, X., Wang, Y., Jia, B., and Zhang, X.: Winter haze over North China Plain from 2009 to 2016: Influence of emission and meteorology, Environ. Pollut., 242, 1308–1318, <a href="https://doi.org/10.1016/j.envpol.2018.08.019" target="_blank">https://doi.org/10.1016/j.envpol.2018.08.019</a>, 2018.
</mixed-citation></ref-html>
<ref-html id="bib1.bib84"><label>Zhang et al.(2020)Zhang, Wu, Wang, Sun, and Liu</label><mixed-citation>
Zhang, Q., Wu, S., Wang, X., Sun, B., and Liu, H.: A PM<sub>2.5</sub> concentration prediction model based on multi-task deep learning for intensive air quality monitoring stations, J. Clean. Prod., 275, 122722, <a href="https://doi.org/10.1016/j.jclepro.2020.122722" target="_blank">https://doi.org/10.1016/j.jclepro.2020.122722</a>, 2020.
</mixed-citation></ref-html>
<ref-html id="bib1.bib85"><label>Zhang(2012)</label><mixed-citation>
Zhang, S.: Nearest neighbor selection for iteratively kNN imputation, J. Syst. Software, 85, 2541–2552, <a href="https://doi.org/10.1016/j.jss.2012.05.073" target="_blank">https://doi.org/10.1016/j.jss.2012.05.073</a>, 2012.
</mixed-citation></ref-html>
<ref-html id="bib1.bib86"><label>Zhou et al.(2017)Zhou, Xu, Xie, Chang, Gao, Gu, and Zhou</label><mixed-citation>
Zhou, G., Xu, J., Xie, Y., Chang, L., Gao, W., Gu, Y., and Zhou, J.: Numerical air quality forecasting over eastern China: An operational application of WRF-Chem, Atmos. Environ., 153, 94–108, <a href="https://doi.org/10.1016/j.atmosenv.2017.01.020" target="_blank">https://doi.org/10.1016/j.atmosenv.2017.01.020</a>, 2017.
</mixed-citation></ref-html>
<ref-html id="bib1.bib87"><label>Zimmermann and Poppe(1996)</label><mixed-citation>
Zimmermann, J. and Poppe, D.: A supplement for the RADM2 chemical mechanism: The photooxidation of isoprene, Atmos. Environ., 30, 1255–1269, <a href="https://doi.org/10.1016/1352-2310(95)00417-3" target="_blank">https://doi.org/10.1016/1352-2310(95)00417-3</a>, 1996.
</mixed-citation></ref-html>
<ref-html id="bib1.bib88"><label>Ziomas et al.(1995)Ziomas, Melas, Zerefos, Bais, and Paliatsos</label><mixed-citation>
Ziomas, I. C., Melas, D., Zerefos, C. S., Bais, A. F., and Paliatsos, A. G.: Forecasting peak pollutant levels from meteorological variables, Atmos. Environ., 29, 3703–3711, <a href="https://doi.org/10.1016/1352-2310(95)00131-H" target="_blank">https://doi.org/10.1016/1352-2310(95)00131-H</a>, 1995.
</mixed-citation></ref-html>--></article>
