<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing with OASIS Tables v3.0 20080202//EN" "https://jats.nlm.nih.gov/nlm-dtd/publishing/3.0/journalpub-oasis3.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:oasis="http://docs.oasis-open.org/ns/oasis-exchange/table" xml:lang="en" dtd-version="3.0" article-type="research-article">
  <front>
    <journal-meta><journal-id journal-id-type="publisher">GMD</journal-id><journal-title-group>
    <journal-title>Geoscientific Model Development</journal-title>
    <abbrev-journal-title abbrev-type="publisher">GMD</abbrev-journal-title><abbrev-journal-title abbrev-type="nlm-ta">Geosci. Model Dev.</abbrev-journal-title>
  </journal-title-group><issn pub-type="epub">1991-9603</issn><publisher>
    <publisher-name>Copernicus Publications</publisher-name>
    <publisher-loc>Göttingen, Germany</publisher-loc>
  </publisher></journal-meta>
    <article-meta>
      <article-id pub-id-type="doi">10.5194/gmd-19-3213-2026</article-id><title-group><article-title>MeteoSaver v1.0: a machine-learning based software  for the transcription of historical weather data</article-title><alt-title>MeteoSaver 1.0</alt-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author" corresp="yes" rid="aff1">
          <name><surname>Muheki</surname><given-names>Derrick</given-names></name>
          <email>derrick.muheki@vub.be</email>
        <ext-link>https://orcid.org/0000-0001-9390-2836</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff2">
          <name><surname>Vercruysse</surname><given-names>Bas</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff2 aff10">
          <name><surname>Chandrasekar</surname><given-names>Krishna Kumar Thirukokaranam</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff2">
          <name><surname>Verbruggen</surname><given-names>Christophe</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff2 aff9">
          <name><surname>Birkholz</surname><given-names>Julie M.</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff3">
          <name><surname>Hufkens</surname><given-names>Koen</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff4">
          <name><surname>Verbeeck</surname><given-names>Hans</given-names></name>
          
        <ext-link>https://orcid.org/0000-0003-1490-0168</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff5">
          <name><surname>Boeckx</surname><given-names>Pascal</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Lampe</surname><given-names>Seppe</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-7907-4496</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff6">
          <name><surname>Hawkins</surname><given-names>Ed</given-names></name>
          
        <ext-link>https://orcid.org/0000-0001-9477-3677</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff7">
          <name><surname>Thorne</surname><given-names>Peter</given-names></name>
          
        <ext-link>https://orcid.org/0000-0003-0485-9798</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff8">
          <name><surname>Ntumba</surname><given-names>Dominique Kankonde</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff8">
          <name><surname>Moulasa</surname><given-names>Olivier Kapalay</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Thiery</surname><given-names>Wim</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-5183-6145</ext-link></contrib>
        <aff id="aff1"><label>1</label><institution>Vrije Universiteit Brussel, Department of Water and Climate, 1050 Brussels, Belgium</institution>
        </aff>
        <aff id="aff2"><label>2</label><institution>Ghent University, Department of History, Ghent Centre for Digital Humanities, 9000 Ghent, Belgium</institution>
        </aff>
        <aff id="aff3"><label>3</label><institution>BlueGreen Labs (bv), 9120 Melsele, Belgium</institution>
        </aff>
        <aff id="aff4"><label>4</label><institution>Ghent University, Department of Environment, 9000 Ghent, Belgium</institution>
        </aff>
        <aff id="aff5"><label>5</label><institution>Ghent University, Isotope Bioscience Laboratory – ISOFYS, 9000 Ghent, Belgium</institution>
        </aff>
        <aff id="aff6"><label>6</label><institution>University of Reading, National Centre for Atmospheric Science, Department of Meteorology, RG6 6ET Reading, UK</institution>
        </aff>
        <aff id="aff7"><label>7</label><institution>Maynooth University, ICARUS Climate Research Centre, Maynooth, Ireland</institution>
        </aff>
        <aff id="aff8"><label>8</label><institution>Institut National pour l'Etude et la Recherche Agronomiques, Direction Générale,  Kinshasa, Democratic Republic of the Congo</institution>
        </aff>
        <aff id="aff9"><label>9</label><institution>Digital Research Lab, KBR  –  Royal Library of Belgium, 1000 Brussels, Belgium</institution>
        </aff>
        <aff id="aff10"><label>10</label><institution>Royal Museums of Art and History, 1000 Brussels, Belgium</institution>
        </aff>
      </contrib-group>
      <author-notes><corresp id="corr1">Derrick Muheki (derrick.muheki@vub.be)</corresp></author-notes><pub-date><day>23</day><month>April</month><year>2026</year></pub-date>
      
      <volume>19</volume>
      <issue>8</issue>
      <fpage>3213</fpage><lpage>3255</lpage>
      <history>
        <date date-type="received"><day>2</day><month>December</month><year>2024</year></date>
           <date date-type="rev-request"><day>10</day><month>June</month><year>2025</year></date>
           <date date-type="rev-recd"><day>20</day><month>March</month><year>2026</year></date>
           <date date-type="accepted"><day>27</day><month>March</month><year>2026</year></date>
      </history>
      <permissions>
        <copyright-statement>Copyright: © 2026 Derrick Muheki et al.</copyright-statement>
        <copyright-year>2026</copyright-year>
      <license license-type="open-access"><license-p>This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this licence, visit <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link></license-p></license></permissions><self-uri xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026.html">This article is available from https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026.html</self-uri><self-uri xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026.pdf">The full text article is available as a PDF file from https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026.pdf</self-uri>
      <abstract><title>Abstract</title>

      <p id="d2e267">Archives of observed weather data present unique opportunities for scientists to obtain long time series of the historical climate for many regions of the world. Unfortunately, most of these observational records are to-date available only on paper, and thus require digitization and transcription to facilitate analysis of climatic trends. Here we present a new open-source software, MeteoSaver, that uses machine learning (ML) algorithms to transcribe handwritten records of historical weather data. MeteoSaver version 1.0 processes images of tabular sheets alongside user-defined configuration settings, performing transcription through five sequential steps: (i) image pre-processing, (ii) table and cell detection, (iii) transcription, (iv) quality assessment and quality control, and (v) data formatting and upload. As an illustration and evaluation of the software, we apply MeteoSaver to ten pictured sheets of handwritten temperature and precipitation observations from the Democratic Republic of the Congo. The results show that 95 <inline-formula><mml:math id="M1" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula>–100 <inline-formula><mml:math id="M2" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> of the daily temperature values can be transcribed, of which a median of 74.4 <inline-formula><mml:math id="M3" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> reached the highest internal quality flag and 74 <inline-formula><mml:math id="M4" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> matches with the manually transcribed record, yielding a median mean absolute error of 0.3 <inline-formula><mml:math id="M5" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula>. These results illustrate that MeteoSaver can be applied to a range of handwriting styles and varying tabular dimensions, paper sizes, and maintenance conditions, highlighting its potential for transcribing tabular meteorological observations from multiple regions, especially if the sheets have a consistent format. Overall, our open-source software can help address the challenges of limited available hydroclimatic data within many regions of the world, by helping to save millions of handwritten records of historical weather data presently stored in archives, and expedite research on the climate and environmental changes in data scarce regions.</p>
  </abstract>
    
<funding-group>
<award-group id="gs1">
<funding-source>Fonds Wetenschappelijk Onderzoek</funding-source>
<award-id>11M8825N</award-id>
<award-id>11M8823N</award-id>
</award-group>
<award-group id="gs2">
<funding-source>HORIZON EUROPE European Research Council</funding-source>
<award-id>101076909</award-id>
</award-group>
<award-group id="gs3">
<funding-source>European Commission</funding-source>
<award-id>101081369</award-id>
</award-group>
</funding-group>
</article-meta>
  </front>
<body>
      

<sec id="Ch1.S1" sec-type="intro">
  <label>1</label><title>Introduction</title>
      <p id="d2e321">Today, access to in-situ meteorological data is more crucial than ever in order to comprehend our globe's climate variability and the effects of climate change. These observational datasets are key for: (i) gaining process understanding of components within the climate system <xref ref-type="bibr" rid="bib1.bibx23 bib1.bibx40" id="paren.1"/>, (ii) calibrating satellite data <xref ref-type="bibr" rid="bib1.bibx17 bib1.bibx22 bib1.bibx1" id="paren.2"/>, (iii) constraining reanalysis products <xref ref-type="bibr" rid="bib1.bibx21 bib1.bibx5 bib1.bibx27" id="paren.3"/>, (iv) evaluation and improvement of climate models <xref ref-type="bibr" rid="bib1.bibx15 bib1.bibx7" id="paren.4"/>, and (v) constraining uncertainties in future climate projections <xref ref-type="bibr" rid="bib1.bibx14" id="paren.5"/>, among others.</p>
      <p id="d2e339">However, the availability of in-situ meteorological data is limited in many regions, particularly in the Global South, which poses a significant challenge to these efforts <xref ref-type="bibr" rid="bib1.bibx52 bib1.bibx23 bib1.bibx43" id="paren.6"/>. As a result, gridded products for meteorological data often lack sufficient data for regions in the Global South. For example, <xref ref-type="bibr" rid="bib1.bibx44" id="text.7"/> reports that less than 40 stations across Central Africa were employed in the development of the University of East Anglia  –  Climate Research Unit (CRU) TS3.10 gridded surface temperature dataset for the period 1976–2012, for an area of approximately <inline-formula><mml:math id="M6" display="inline"><mml:mrow><mml:mn mathvariant="normal">3</mml:mn><mml:mo>×</mml:mo><mml:msup><mml:mn mathvariant="normal">10</mml:mn><mml:mn mathvariant="normal">6</mml:mn></mml:msup><mml:mspace linebreak="nobreak" width="0.125em"/><mml:mrow class="unit"><mml:msup><mml:mi mathvariant="normal">km</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msup></mml:mrow></mml:mrow></mml:math></inline-formula>. Similarly, only 353 stations across the entire South American continent provided surface temperature data to the Global Historical Climatology Network (GHCN-V3) for the period 1951–2011, which is a gridded climate product of the US National Climatic Data Center <xref ref-type="bibr" rid="bib1.bibx52" id="paren.8"/>. This absence of sufficient in-situ data often leads to inconclusive statements about how climate change is affecting these regions. For example, Central Africa and Southern South America are the only regions where the Intergovernmental Panel on Climate Change (IPCC) has not reported any observed changes in hot extremes or attributed these changes to anthropogenic activities, due to the scarcity of in-situ data <xref ref-type="bibr" rid="bib1.bibx23 bib1.bibx43" id="paren.9"/>. Moreover, for the same reason, observed changes in heavy precipitation since the 1950s remain unreported for much of Central and South America, as well as Central and East Africa <xref ref-type="bibr" rid="bib1.bibx23 bib1.bibx43" id="paren.10"/>. Similarly, a recent study by World Weather Attribution highlighted how insufficient historical weather data hindered the assessment of climate change's role in the catastrophic 2023 Lake Kivu floods, which resulted in over 500 deaths in the Democratic Republic of Congo (DRC) and Rwanda <xref ref-type="bibr" rid="bib1.bibx28" id="paren.11"/>. As climate change escalates, the need for reliable historical weather records becomes ever larger. These records are essential not only for scientific research but also for informing discussions on loss and damage by strengthening evidence from attribution research <xref ref-type="bibr" rid="bib1.bibx37" id="paren.12"/>. This is particularly crucial as losses and damages are projected to rise with climate change, becoming more concentrated in developing countries across the Global South <xref ref-type="bibr" rid="bib1.bibx24" id="paren.13"/>, where current capacity to carry out attribution studies is currently limited by data availability <xref ref-type="bibr" rid="bib1.bibx38 bib1.bibx29" id="paren.14"/>.</p>
      <p id="d2e392">In many regions of the world, millions of daily weather records, such as precipitation and temperature records from various stations are still stored in hard copies within archives (e.g. <xref ref-type="bibr" rid="bib1.bibx19 bib1.bibx20 bib1.bibx9 bib1.bibx36" id="altparen.15"/>). For instance, until 2021, 4 million copies of meteorological records collected from 44 African countries through the African Centre of Meteorological Applications for Development (ACMAD) initiative were stored by the Royal Meteorological Institute of Belgium (RMI) solely in microfilm and microfiche formats <xref ref-type="bibr" rid="bib1.bibx36" id="paren.16"/>. Additionally, <xref ref-type="bibr" rid="bib1.bibx9" id="text.17"/> reports the existence of partially explored archives in countries such as Finland, Sweden, Denmark, France, and Portugal, among others, which still hold ship logs in hard copy. These logs contain meteorological data, such as barometric pressure, recorded along ship routes, including those in the Southern Hemisphere, where data availability remains particularly scarce <xref ref-type="bibr" rid="bib1.bibx9 bib1.bibx49 bib1.bibx26" id="paren.18"/>. These extensive records of observed weather, present valuable opportunities to address the challenges of limited observations, and understanding of our climate variability, by providing long time series of historical climate data for many regions of the world. For example, <xref ref-type="bibr" rid="bib1.bibx20" id="text.19"/> demonstrates how rescued atmospheric pressure observations from archived data by the UK Met Office and Scottish Meteorological Society were assimilated into the reanalysis of Storm Ulysses, a severe windstorm that occurred in 1903. This significantly improved the reconstruction of the event and the estimation of risks associated with similar windstorms <xref ref-type="bibr" rid="bib1.bibx20" id="paren.20"/>. Moreover, on a broader scale, it demonstrated the value of archived weather data in enhancing the accuracy of extreme event reconstructions and understanding present-day risks <xref ref-type="bibr" rid="bib1.bibx20 bib1.bibx9" id="paren.21"/>, further underscoring the need for data rescue projects.</p>
      <p id="d2e417">According to the World Meteorological Organization (WMO), data rescue involves all efforts to access, catalog and preserve data at risk of being lost by converting these historical hard copy records into digital and/or machine-readable formats <xref ref-type="bibr" rid="bib1.bibx50" id="paren.22"/>. These data rescue projects are typically divided into two stages: digitization and transcription. The digitization stage involves organising and imaging (scanning) the hard copy records, as well as saving their corresponding metadata <xref ref-type="bibr" rid="bib1.bibx50" id="paren.23"/>. This ensures easy identification and retrieval of the digital data copies. The transcription stage then involves converting the data from these digital copies into machine-readable formats, such as spreadsheets, ready for analysis. Currently, 139 past and ongoing data rescue projects from various parts of the world are reported on the Data Rescue Portal, operated by the WMO and the Copernicus Climate Change Service (C3S) <xref ref-type="bibr" rid="bib1.bibx11" id="paren.24"/>. A successful example of these data rescue projects is the recently completed WeatherRescue.org initiative, which digitized and transcribed over 3 million daily weather observations, including atmospheric pressure, precipitation, and temperature, recorded in the Scottish Highlands between 1883–1910 <xref ref-type="bibr" rid="bib1.bibx18 bib1.bibx11" id="paren.25"/>. Following this, the data was uploaded to open-access repositories through the C3S, helping to fill gaps in the region's historical climate record <xref ref-type="bibr" rid="bib1.bibx18" id="paren.26"/>. A second example is the digitization and transcription of approximately 5 million weather observations recorded between 1861–1919 in Southern Poland, sourced from the archives of the Institute of Meteorology and Water Management <xref ref-type="bibr" rid="bib1.bibx51" id="paren.27"/>. This rescued data was key in the study by <xref ref-type="bibr" rid="bib1.bibx51" id="text.28"/>, which highlighted climate variability in the Małopolska region. This project also underscored the urgent need to digitize archived in-situ data, as many weather records in these archives had deteriorated significantly, making parts of the recorded data unrecognizable and difficult to transcribe, resulting in the loss of valuable information <xref ref-type="bibr" rid="bib1.bibx51" id="paren.29"/>.</p>
      <p id="d2e446"><xref ref-type="bibr" rid="bib1.bibx9" id="text.30"/> points out that one of the major challenges in data rescue projects is the transcription process. Many projects tend to halt after the digitization phase, leaving the transcription to be completed at a later stage <xref ref-type="bibr" rid="bib1.bibx9" id="paren.31"/>. This is because traditional transcription relies on manual efforts, where observations in the images or scans from the digitization phase, are keyed into a spreadsheet or standardized format by hand (e.g. <xref ref-type="bibr" rid="bib1.bibx51 bib1.bibx30" id="altparen.32"/>). This process is labor-intensive and time-consuming, as transcribing large data collections can require numerous man-years of effort <xref ref-type="bibr" rid="bib1.bibx19" id="paren.33"/>. Recently, the involvement of citizen scientists, on platforms such as Zooniverse, in transcribing historical weather data from images to digital form has proven more efficient, significantly reducing the person-years required to complete transcription projects, with some projects even mobilizing thousands of volunteers (e.g. <xref ref-type="bibr" rid="bib1.bibx18 bib1.bibx19 bib1.bibx12" id="altparen.34"/>). However, the success of these citizen science projects relies on effective mobilization factors such as media coverage, volunteer availability and willingness, ongoing engagement, and thorough preparation of the images for upload to the platforms <xref ref-type="bibr" rid="bib1.bibx19" id="paren.35"/>. More recently, several efforts have been made to integrate the transcription into classroom-based assessments (e.g. <xref ref-type="bibr" rid="bib1.bibx36" id="altparen.36"/>), but these initiatives have almost exclusively been limited to university settings and have not yet cracked transcription at speed and scale.</p>
      <p id="d2e470">An emerging approach to addressing the challenges of transcription involves using Artificial Intelligence/Machine Learning (AI/ML) algorithms. This approach harnesses the power of computer vision algorithms and Optical Character Recognition/Handwritten Text Recognition (OCR/HTR) models to transcribe large datasets more efficiently and quickly <xref ref-type="bibr" rid="bib1.bibx48 bib1.bibx35" id="paren.37"/>. For example, in the field of Digital Humanities, OCR/HTR tools have been explored to expedite and reduce the cost of transcribing vast collections of historical texts and documents in libraries and archives <xref ref-type="bibr" rid="bib1.bibx35 bib1.bibx46 bib1.bibx41" id="paren.38"/>. A notable case is the recent transcription of over 3 million pages of historical texts from the National Archives of the Netherlands using Transkribus HTR models <xref ref-type="bibr" rid="bib1.bibx47" id="paren.39"/>. However, despite being suggested by numerous studies (e.g. <xref ref-type="bibr" rid="bib1.bibx6 bib1.bibx30" id="altparen.40"/>), these AI/ML approaches for transcribing historical meteorological data have not been extensively explored. <xref ref-type="bibr" rid="bib1.bibx6" id="text.41"/> highlights that the current models used for transcribing historical texts or pages would need to be adapted to effectively transcribe historical meteorological data, which is often presented in tabular or ledger format. <xref ref-type="bibr" rid="bib1.bibx48" id="text.42"/> demonstrates that commercial transcription software like Transkribus and Amazon Textract currently offer more advanced text recognition models compared to open-source alternatives such as Tesseract and Pylaia, and they also include table structure detection. However, transcribing large volumes of data with these commercial tools would be very costly <xref ref-type="bibr" rid="bib1.bibx48" id="paren.43"/>. Therefore, there is a need to further train and improve open-source transcription models, not only as a cost-effective solution but also to uphold the principles of Open Science in transcribing historical meteorological data <xref ref-type="bibr" rid="bib1.bibx48" id="paren.44"/>.</p>
      <p id="d2e498">To help overcome these challenges, we here present an ML based open source software, MeteoSaver, used here for the transcription of the digitized data to machine readable format. We illustrate the functionality of the software using an example temperature sheet and test its applicability by applying it to ten different temperature sheets, pictured in an archive in DRC, that span multiple handwriting styles, paper quality, and maintenance conditions. The software can be executed on both a local machine and high-performance computing (HPC) infrastructure. The promising results of the software suggest that it can be used to accelerate data rescue efforts worldwide and save numerous man-years of manual data entry from historical weather records in similar case studies.</p>

      <fig id="F1" specific-use="star"><label>Figure 1</label><caption><p id="d2e503">An example one month observed weather data sheet for Station Binga (2°18<sup>′</sup> N, 20°30<sup>′</sup> E) in DRC available within the archives of INERA, Yangambi. The sheet contains the following information: (1) the station name, year (Année) and month (Mois) in the top right corner, (2) the data owner (Institute) in the top left corner, (3) a table (center) showing the pentad number (N<sup>o</sup> de la pentade), Date, Bellani (measure for total solar radiation), extreme temperatures in degree Celsius (Températures extrêmes) consisting of maximum, minimum and average temperatures, as well as the diurnal temperature range (Ampl.), Evaporation in centimeter cubed, Rainfall (Pluies) in millimeters per day, and Temperature and Humidity (Température et Humidité) recorded at three times of the day, namely 6 <inline-formula><mml:math id="M10" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">a</mml:mi><mml:mo>.</mml:mo><mml:mi mathvariant="normal">m</mml:mi><mml:mo>.</mml:mo></mml:mrow></mml:math></inline-formula>, 3 and 6 <inline-formula><mml:math id="M11" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">p</mml:mi><mml:mo>.</mml:mo><mml:mi mathvariant="normal">m</mml:mi><mml:mo>.</mml:mo></mml:mrow></mml:math></inline-formula>, and (4) observer's names (les observateurs) in the bottom left, authorization signature (Visa du Chef hiérarchique) in the bottom right, and extra comments written in the bottom center of the sheet. Additionally, the table also shows 5 <inline-formula><mml:math id="M12" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:math></inline-formula> (and/or 6 <inline-formula><mml:math id="M13" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:math></inline-formula>) totals (Tot.) and means (Moy.), as well as monthly totals and mean values. Note that the values in these sheets are all handwritten, and the handwriting styles vary throughout the dataset.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f01.jpg"/>

      </fig>

</sec>
<sec id="Ch1.S2">
  <label>2</label><title>Data</title>
      <p id="d2e592">To illustrate and test MeteoSaver, we use ten different sheets of meteorological observations across DRC, including daily records of temperature, precipitation, humidity, and other variables. (see Figs. <xref ref-type="fig" rid="F1"/>, <xref ref-type="fig" rid="FA1"/>–<xref ref-type="fig" rid="FA9"/>, and Table <xref ref-type="table" rid="T2"/>). The ten sheets were selected to span a range of handwriting styles, locations, paper size, quality, color, and maintenance conditions. Each sheet records a range of variables at daily resolution across one month, their multi-day totals and averages (5 and/or 6 <inline-formula><mml:math id="M14" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:math></inline-formula>), as well as monthly totals and averages. Some variables are directly observed, while others are calculated diagnostics. From the available variables, we here focus on daily minimum temperature (observed), daily maximum temperature (observed), daily mean temperature (diagnosed), and diurnal temperature range (diagnosed) (see Sect. <xref ref-type="sec" rid="Ch1.S3.SS5"/>). The sheets originate from the Yangambi branch of the Institut National pour l'Etude et la Recherche Agronomiques (INERA); a detailed description of the data digitization procedure is provided in <xref ref-type="bibr" rid="bib1.bibx33" id="text.45"/>. All ten sheets were manually transcribed by the main author for use in the independent evaluation (see Sect. <xref ref-type="sec" rid="Ch1.S4"/>).</p>
</sec>
<sec id="Ch1.S3">
  <label>3</label><title>MeteoSaver v1.0 software</title>
      <p id="d2e627">We have developed an open source software, MeteoSaver, which uses machine learning algorithms to transcribe tabular handwritten historical weather data into spreadsheets ready for analysis (<uri>https://github.com/VUB-HYDR/MeteoSaver/</uri>, last access: 20 March 2026). The software is written in Python 3.9 and is flexible to be applicable to similar case studies. Here, we describe the setup of MeteoSaver v1.0, which is organized into six modules: (i) configuration, (ii) image pre-processing, (iii) table and cell detection, (iv) transcription, (v) quality assessment and quality control, and (vi) data formatting and upload (Fig. <xref ref-type="fig" rid="F2"/>). For demonstration purposes, we present the results of each step using one sample data sheet (shown in Fig. <xref ref-type="fig" rid="F1"/>), and discuss how these steps in MeteoSaver can be applied in similar case studies.</p>

      <fig id="F2"><label>Figure 2</label><caption><p id="d2e639">Schematic representation of the modules in MeteoSaver v1.0, illustrating the sequential transcription process from images of paper records to the final digital time series.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f02.png"/>

      </fig>

<sec id="Ch1.S3.SS1">
  <label>3.1</label><title>Module 1: configuration</title>
      <p id="d2e655">To execute MeteoSaver, the configuration module first requires user settings to ensure smooth operation across different systems, infrastructures, images, and tabular formats (see Table <xref ref-type="table" rid="T1"/>). The primary setting, listed under General in Table <xref ref-type="table" rid="T1"/>, specifies whether the software will be run on a local machine or HPC infrastructure. On a local machine, the software processes tasks from Modules 2–6 sequentially (see Fig. <xref ref-type="fig" rid="F2"/>), handling one digitized sheet (and station) at a time. In contrast, when using HPC infrastructure, the software takes advantage of increased processing power, utilizing multiple CPUs and larger dedicated memory, to perform these tasks in parallel for each station. In this case, users can also specify the number of CPUs to be utilized.</p>

<table-wrap id="T1" specific-use="star"><label>Table 1</label><caption><p id="d2e667">Configuration: User settings required for execution of MeteoSaver v1.0.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="3">
     <oasis:colspec colnum="1" colname="col1" align="justify" colwidth="70mm"/>
     <oasis:colspec colnum="2" colname="col2" align="justify" colwidth="120mm"/>
     <oasis:colspec colnum="3" colname="col3" align="justify" colwidth="30mm"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Setting/Value</oasis:entry>
         <oasis:entry colname="col2">Description</oasis:entry>
         <oasis:entry colname="col3">Default Value</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row rowsep="1">
         <oasis:entry namest="col1" nameend="col3" align="left">General </oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">run_mode</oasis:entry>
         <oasis:entry colname="col2">Defines the environment where the software will be run. Options: <monospace>local</monospace> for personal computers or <monospace>hpc</monospace> for High Performance Computing.</oasis:entry>
         <oasis:entry colname="col3">local</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">num_processors</oasis:entry>
         <oasis:entry colname="col2">Specifies the number of CPUs to use when running on an HPC infrastructure (in <monospace>hpc</monospace> mode).</oasis:entry>
         <oasis:entry colname="col3">18</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry namest="col1" nameend="col3" align="left">Directories </oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">full_datadir</oasis:entry>
         <oasis:entry colname="col2">Directory containing folders of historical weather data sheet images, organized by station number.</oasis:entry>
         <oasis:entry colname="col3">data/00_post1960…</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">pre_QA_QC_transcribed_hydroclimate_data_dir</oasis:entry>
         <oasis:entry colname="col2">Directory for storing pre-QA/QC transcribed hydroclimate data.</oasis:entry>
         <oasis:entry colname="col3">results/01_pre…</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">post_QA_QC_transcribed_hydroclimate_data_dir</oasis:entry>
         <oasis:entry colname="col2">Directory for storing post-QA/QC transcribed hydroclimate data.</oasis:entry>
         <oasis:entry colname="col3">results/02_post…</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">validation_dir</oasis:entry>
         <oasis:entry colname="col2">Directory for validation results.</oasis:entry>
         <oasis:entry colname="col3">results/03_valid…</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">final_refined_daily_hydroclimate_data_dir</oasis:entry>
         <oasis:entry colname="col2">Directory for final refined hydroclimate data after all quality checks.</oasis:entry>
         <oasis:entry colname="col3">results/04_final…</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">transient_transcription_output_dir</oasis:entry>
         <oasis:entry colname="col2">Directory to store transient transcription outputs during processing.</oasis:entry>
         <oasis:entry colname="col3">results/05_trans…</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">manually_transcribed_data_dir</oasis:entry>
         <oasis:entry colname="col2">Directory for manually transcribed data, used for validation purposes.</oasis:entry>
         <oasis:entry colname="col3">results/06_manual…</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">metadata_file_path</oasis:entry>
         <oasis:entry colname="col2">Directory for all the stations metadata.</oasis:entry>
         <oasis:entry colname="col3">data/01_meta…</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry namest="col1" nameend="col3" align="left">Table and Cell Detection </oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">clip_up, clip_down, clip_left, clip_right</oasis:entry>
         <oasis:entry colname="col2">Clipping values (in pixels) to remove headers and row labels from detected tables.</oasis:entry>
         <oasis:entry colname="col3">430, 270, 200, 150</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">max_table_width, max_table_height</oasis:entry>
         <oasis:entry colname="col2">Maximum expected table width and height (in pixels) to ensure correct table detection.</oasis:entry>
         <oasis:entry colname="col3">3900, 3600</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">min_cell_width_threshold, max_cell_width_threshold, min_cell_height_threshold, max_cell_height_threshold</oasis:entry>
         <oasis:entry colname="col2">Minimum and maximum width and height (in pixels) allowed for detected table cells.</oasis:entry>
         <oasis:entry colname="col3">50, 200, 28, 90</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">no_of_rows,  no_of_columns, no_of_rows_including_headers</oasis:entry>
         <oasis:entry colname="col2">Expected number of rows and columns in detected tables.</oasis:entry>
         <oasis:entry colname="col3">43, 24, 46</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">space_height_threshold,  space_width_threshold, max_cell_height_per_box</oasis:entry>
         <oasis:entry colname="col2">Minimum height and width space (in pixels) between bounding boxes to detect missing cells, with height for newly added (missing) bounding boxes/cells.</oasis:entry>
         <oasis:entry colname="col3">50, 120, 50</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry namest="col1" nameend="col3" align="left">Transcription </oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">ocr_model</oasis:entry>
         <oasis:entry colname="col2">Defines which OCR/HTR model to use (e.g. Tesseract-OCR, EasyOCR, PaddleOCR).</oasis:entry>
         <oasis:entry colname="col3">Tesseract-OCR</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">tesseract_path</oasis:entry>
         <oasis:entry colname="col2">Path to the Tesseract-OCR executable file, needed for Tesseract-OCR model.</oasis:entry>
         <oasis:entry colname="col3">…/tesseract.exe</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">system_tessdata_dir</oasis:entry>
         <oasis:entry colname="col2">Directory path to trained OCR/HTR language models for transcription (e.g. Tesseract tessdata).</oasis:entry>
         <oasis:entry colname="col3">…/tessdata</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry namest="col1" nameend="col3" align="left">QA/QC </oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">daily_temperature_columns; daily_temperature_columns_and_diurnal_temperature; daily_precipitation_column</oasis:entry>
         <oasis:entry colname="col2">Columns to focus on for quality checks, here columns with Daily Maximum Temperature, Minimum Temperature, Average Temperature, the Diurnal Temperature Range and Daily Precipitation.</oasis:entry>
         <oasis:entry colname="col3">D,E,F;  D,E,F,G; K</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">max_temperature_threshold, min_temperature_threshold</oasis:entry>
         <oasis:entry colname="col2">Temperature thresholds (in <inline-formula><mml:math id="M15" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula>) to flag invalid transcribed temperature readings.</oasis:entry>
         <oasis:entry colname="col3">40, 5</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">decimal_places</oasis:entry>
         <oasis:entry colname="col2">Number of decimal places in the data sheets.</oasis:entry>
         <oasis:entry colname="col3">1</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">uncertainty_margin</oasis:entry>
         <oasis:entry colname="col2">Defines the uncertainty margin to be used in temperature calculations and quality checks.</oasis:entry>
         <oasis:entry colname="col3">0.2</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">multi_day_totals, multi_day_averages</oasis:entry>
         <oasis:entry colname="col2">Flags to specify whether multi-day totals or averages (e.g. 5 <inline-formula><mml:math id="M16" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:math></inline-formula> totals) are present in the sheets.</oasis:entry>
         <oasis:entry colname="col3">True, True</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">max_days_for_multi_day_total</oasis:entry>
         <oasis:entry colname="col2">Maximum number of days contained in multi-day totals (e.g. 5 or 6 <inline-formula><mml:math id="M17" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:math></inline-formula>).</oasis:entry>
         <oasis:entry colname="col3">6</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">multi_day_totals_rows; final_totals_rows</oasis:entry>
         <oasis:entry colname="col2">Rows where multi-day totals or final totals are located (if applicable).</oasis:entry>
         <oasis:entry colname="col3">9,16,23,30,37,45;  45</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">excluded_rows</oasis:entry>
         <oasis:entry colname="col2">Rows to exclude during QA/QC checks involving multi-day totals (e.g. headers).</oasis:entry>
         <oasis:entry colname="col3">1,2,3,9,16,23,30,37,45</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">excluded_columns</oasis:entry>
         <oasis:entry colname="col2">Columns to exclude during QA/QC checks (e.g. date).</oasis:entry>
         <oasis:entry colname="col3">1,2,3,15,20,25,26,27</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">additional_excluded_rows</oasis:entry>
         <oasis:entry colname="col2">Extra rows to exclude during QA/QC checks involving multi-day averages (if applicable).</oasis:entry>
         <oasis:entry colname="col3">10,17,24,31,38,46</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry namest="col1" nameend="col3" align="left">Data Formatting </oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">date_column</oasis:entry>
         <oasis:entry colname="col2">Specifies which column contains the date information.</oasis:entry>
         <oasis:entry colname="col3">B</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table><table-wrap-foot><p id="d2e670">Note: (i) The default settings and values in our case study are described in the subsequent sections (see Sects. <xref ref-type="sec" rid="Ch1.S3.SS3"/>–<xref ref-type="sec" rid="Ch1.S3.SS6"/>). (ii) Paths with “…” indicate truncated directory names that are relative to the repository root; see the repository for full names.</p></table-wrap-foot></table-wrap>

      <p id="d2e1130">The second step requires specifying the directories for both input and output files, as outlined in Directories in Table <xref ref-type="table" rid="T1"/>. For inputs, this includes the folders containing digitized (scanned) historical weather data sheets. For outputs, the locations for the following must be defined: (i) pre-quality assessment and quality control (QA/QC) transcribed data, (ii) post-QA/QC transcribed data, (iii) final refined data, (iv) transient transcription outputs, and (v) manually transcribed data.</p>
      <p id="d2e1136">Next, the user must input the required settings for the subsequent subsections (Modules 3–6). For Module 3 (Table and Cell Detection), this includes thresholds for maximum table width and height (in pixels) to ensure accurate automatic table detection, as well as clipping values (in pixels) to address table headers and row labels. For Module 4 (Transcription), settings involve selecting the preferred OCR/HTR model, specifying the OCR/HTR execution file directories if applicable, and defining the expected number of rows, columns, and cell dimensions (in pixels) to ensure proper data placement in spreadsheets after transcription. For Module 5 (QA/QC), users configure quality control checks (see Sect. <xref ref-type="sec" rid="Ch1.S3.SS5"/>), including specifying the variables of interest (e.g. daily maximum temperature) and their respective columns, setting thresholds for these variables, defining uncertainty margins, and indicating the presence of multi-day totals or averages, along with their corresponding rows if applicable. Finally, in Module 6 (Data Formatting and Upload), the date column in each sheet is specified as a prerequisite for final data formatting.</p>
      <p id="d2e1141">These configuration settings ensure the software remains flexible and adaptable to similar case studies. The default settings and values for our case study are described in the following subsections (Sects. <xref ref-type="sec" rid="Ch1.S3.SS3"/>–<xref ref-type="sec" rid="Ch1.S3.SS6"/>).</p>
</sec>
<sec id="Ch1.S3.SS2">
  <label>3.2</label><title>Module 2: image pre-processing</title>
      <p id="d2e1157">Within our framework, we use the image processing module of OpenCV (Open Source Computer Vision; <uri>http://opencv.org/</uri>, last access: 20 March 2026) for pre-processing the images (digitized data sheets). This module, part of the larger OpenCV library, offers multiple computer vision algorithms tailored for image processing. The first step involves loading the images, which should be in an OpenCV-supported format such as JPEG/JPG, PNG, TIFF, BMP, or WEBP. Each image corresponds to data from a single station and month. In our framework, these images follow a specific naming convention: “STN_YYYYMM_SF” or “STN_YYYYMM_HD”, based on the data inventory. Here, <italic>STN</italic> refers to the three-digit station number, <italic>YYYY</italic> is the year, <italic>MM</italic> is the month, <italic>SF</italic> represents Standard Format (printed tabular format), and <italic>HD</italic> indicates a hand-drawn version of the standard format. In the following steps, we focus on the images in <italic>SF</italic> format.</p>
      <p id="d2e1182">The software processes one image (sheet) at a time on a local machine, or multiple sheets (one per station per allocated CPU) when running on HPC infrastructure. Each image is loaded and converted to grayscale to enhance intensity variations, which is critical for more accurate and efficient table and cell detection (see Sect. <xref ref-type="sec" rid="Ch1.S3.SS3"/>).</p>

      <fig id="F3" specific-use="star"><label>Figure 3</label><caption><p id="d2e1189">Binary image using adaptive thresholding for an example one month observed weather data sheet for Station Binga (2°18<sup>′</sup> N, 20°30<sup>′</sup> E) in DRC available from the archives of INERA, Yangambi.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f03.png"/>

        </fig>

      <p id="d2e1217">Additionally, we binarize the grayscale images using adaptive thresholding, which converts each pixel into either 0 or 1 based on localized thresholds (intensities). Instead of a global threshold, each pixel's threshold is calculated as the mean intensity from a small neighboring area. This method is particularly advantageous for handling images with uneven lighting and localized features, making it ideal for binarizing images with varying paper quality across the sheet and small or faint handwritten text <xref ref-type="bibr" rid="bib1.bibx53 bib1.bibx42" id="paren.46"/>. Pixels with intensities above the threshold are set to white (e.g. the blank areas on the sheets), while those below the threshold (e.g. the handwritten values) are set to black. The result is a binary image composed entirely of black and white pixels (see Fig. <xref ref-type="fig" rid="F3"/>), which is optimal for text recognition tasks (as in Sect. <xref ref-type="sec" rid="Ch1.S3.SS4"/>).</p>
</sec>
<sec id="Ch1.S3.SS3">
  <label>3.3</label><title>Module 3: table and cell detection</title>
      <p id="d2e1235">Similarly, we utilize OpenCV's image processing module for both table and cell detection in the pre-processed images (see Sect. <xref ref-type="sec" rid="Ch1.S3.SS2"/>). Here, we employ these ML algorithms following methodologies similar to those described by <xref ref-type="bibr" rid="bib1.bibx3" id="text.47"/>, customizing them for our case study. First, we binarize the grayscale images again using Otsu's thresholding, which is effective for table detection as it determines a global threshold that maximizes contrast between the foreground and background <xref ref-type="bibr" rid="bib1.bibx4" id="paren.48"/>. Unlike adaptive thresholding, which uses localized thresholds, Otsu's method converts the image into pixels of 0 or 1 based on a single automatically calculated threshold <xref ref-type="bibr" rid="bib1.bibx4" id="paren.49"/>. This second binary image is used solely for the table detection step in this module.</p>
      <p id="d2e1249">Next, the table in this second binary image is detected using contours. As described by <xref ref-type="bibr" rid="bib1.bibx54" id="text.50"/>, contours are here defined as curves that connect all continuous points with the same color and intensity. Thus, these contours are present for both the individual cells and the entire table in the image <xref ref-type="bibr" rid="bib1.bibx54 bib1.bibx34" id="paren.51"/>. In the MeteoSaver framework, we apply morphological operations such as dilation and erosion in OpenCV to close small gaps within the binary image (e.g. gaps in horizontal and vertical lines in the table). This allows us to identify the largest contour in the image as the table. To ensure accurate detection, we set the maximum allowable contour dimensions – table width and height – in the configuration module (see Table <xref ref-type="table" rid="T1"/>) to 3900 and 3600 <inline-formula><mml:math id="M20" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">pixels</mml:mi></mml:mrow></mml:math></inline-formula>, respectively. This precaution helps avoid instances where the entire sheet is mistakenly identified as the largest contour instead of the table. Using the coordinates of the detected table, we then crop the first binary image (from adaptive thresholding) to isolate the table for further steps (Fig. <xref ref-type="fig" rid="F4"/>). An additional, but optional, step in our pipeline allows clipping out the headers and the columns with the dates and pentad numbers, allowing us to focus only on the recorded observations (handwritten values). This step is crucial because, in the subsequent Transcription module (Sect. <xref ref-type="sec" rid="Ch1.S3.SS4"/>), we restrict the OCR/HTR model to recognize only digits (0–9). This restriction helps reduce noise from extraneous characters, such as those in the headers and some row labels, thereby enhancing the model's recognition accuracy. In the current version of the software, the latter functionality is tailored to the formats of the ten illustrative sheets considered here, but it can be modified through the configuration module (see Table <xref ref-type="table" rid="T1"/>). Moreover, our framework includes a de-skewing function to correct any skew in the table, if present. The function first detects horizontal lines in the image using morphological operations and calculates the average angle of these lines. It then rotates the image by the calculated angle to properly align the horizontal content. This ensures the table is horizontally aligned, a critical step for further analysis.</p>

      <fig id="F4" specific-use="star"><label>Figure 4</label><caption><p id="d2e1277">Detected table extracted using MeteoSaver for an example one month observed weather data sheet for Station Binga (2°18<sup>′</sup> N, 20°30<sup>′</sup> E) in DRC available from the archives of INERA, Yangambi.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f04.png"/>

        </fig>

      <p id="d2e1305">We then invert the clipped binary image, turning the handwritten values (previously black) to white and the background (previously white) to black. Using OpenCV's structuring element, eroding and dilating features, we identify and erase the vertical and horizontal lines in the inverted images <xref ref-type="bibr" rid="bib1.bibx54" id="paren.52"/> (see Fig. <xref ref-type="fig" rid="F5"/>). The structuring element allows us to define the pixel neighborhood over which erosion and dilation are applied successively in both the vertical and horizontal directions <xref ref-type="bibr" rid="bib1.bibx54" id="paren.53"/>. Erosion shrinks the white regions in the inverted image (including both lines and text), thinning the lines until they disappear, while dilation restores the eroded text by expanding the remaining white regions. The combined effect of these operations produces an inverted image without any horizontal or vertical lines (see Fig. <xref ref-type="fig" rid="F5"/>). This step is essential for detecting the cells in the table.</p>

      <fig id="F5" specific-use="star"><label>Figure 5</label><caption><p id="d2e1320">Inverted binary image showing the clipped detected table using MeteoSaver for an example one month observed weather data sheet. Here, we clip the detected table to exclude the headers and row labels such that only the handwritten records are considered for subsequent text detection and transcription steps. In this image, we show how we eliminate both the vertical and horizontal lines from the image using openCV to facilitate the detection of only values as the white blobs in close proximity in the inverted binary image (see Fig. <xref ref-type="fig" rid="F6"/>).</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f05.png"/>

        </fig>

      <p id="d2e1331">By applying dilution again on the inverted image without horizontal or vertical lines (as in Fig. <xref ref-type="fig" rid="F5"/>), we convert the recorded values (text) into white blobs against a black background (see Fig. <xref ref-type="fig" rid="F6"/>). Contours are then identified around these blobs, enabling the determination of bounding boxes (identified cells) for each detected text area. To filter out small or over sized blobs – often markings or spots on the sheet rather than actual text – we set minimum bounding box dimensions of 50 <inline-formula><mml:math id="M23" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">pixels</mml:mi></mml:mrow></mml:math></inline-formula> in width and 28 <inline-formula><mml:math id="M24" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">pixels</mml:mi></mml:mrow></mml:math></inline-formula> in height, and maximum dimensions of 200 <inline-formula><mml:math id="M25" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">pixels</mml:mi></mml:mrow></mml:math></inline-formula> in width and 90 <inline-formula><mml:math id="M26" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">pixels</mml:mi></mml:mrow></mml:math></inline-formula> in height within the configuration module.</p>

      <fig id="F6" specific-use="star"><label>Figure 6</label><caption><p id="d2e1373">Inverted binary image showing detected text/cell using white blobs in close proximity, in the inverted binary image. The location of these white blobs (detected text) is then used in MeteoSaver to draw boundary boxes on the binary image (see Fig. <xref ref-type="fig" rid="F7"/>), serving as a prerequisite step for text recognition, here termed as transcription.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f06.png"/>

        </fig>

      <p id="d2e1384">An additional, optional step in our framework is to verify the total number of detected bounding boxes per column and insert missing boxes where needed (see Category: Table and Cell Detection in Table <xref ref-type="table" rid="T1"/>). Based on the expected number of rows per column (in our case, 43), we identify columns with fewer detected boxes and then check for spaces that exceed the space height threshold (set to 50 <inline-formula><mml:math id="M27" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">pixels</mml:mi></mml:mrow></mml:math></inline-formula>) in between bounding boxes. Missing bounding boxes (with a set height 50 <inline-formula><mml:math id="M28" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">pixels</mml:mi></mml:mrow></mml:math></inline-formula>) are then inserted into the largest spaces, provided there are neighboring boxes within the same row and that the space between them does not exceed the width threshold (set to 120 <inline-formula><mml:math id="M29" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">pixels</mml:mi></mml:mrow></mml:math></inline-formula>), ensuring alignment.</p>
      <p id="d2e1414">Finally, we overlay all the bounding boxes onto the previously clipped binary images (Fig. <xref ref-type="fig" rid="F7"/>). These bounding boxes highlight the detected text and table cells in each image. Their dimensions and coordinates are then passed to the Transcription module for further clipping of the table in preparation for the next handwritten text recognition step (Sect. <xref ref-type="sec" rid="Ch1.S3.SS4"/>).</p>

      <fig id="F7" specific-use="star"><label>Figure 7</label><caption><p id="d2e1423">Detected text on the image is highlighted with green boundary boxes. These boxes are then used to clip the image, and the clips are processed by the OCR model to recognize the handwritten values. The coordinates of the boundary boxes are utilized to determine the location of the recognized text for accurate placement in a two-dimensional array.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f07.png"/>

        </fig>

</sec>
<sec id="Ch1.S3.SS4">
  <label>3.4</label><title>Module 4: transcription</title>
      <p id="d2e1440">The transcription module of MeteoSaver v1.0 leverages our prior work on training an open-source Optical Character Recognition/Handwritten Text Recognition (OCR/HTR) model using the Tesseract OCR framework <xref ref-type="bibr" rid="bib1.bibx48" id="paren.54"/>. In this prior work, we evaluated both open-source OCR/HTR models, such as Tesseract OCR and Pylaia, and commercial alternatives, including Transkribus, Microsoft Azure, Google Document AI, and Amazon Textract. As demonstrated in <xref ref-type="bibr" rid="bib1.bibx48" id="text.55"/>, although commercial OCR/HTR models currently offer higher accuracy in text recognition, their use would be extremely costly for transcribing extensive archival datasets and would not align with Open Science principles. For these reasons, we trained the Tesseract OCR model further, enhancing the open-source French Tesseract model with over 35 000 text images from the INERA digitized data <xref ref-type="bibr" rid="bib1.bibx48" id="paren.56"/>. This was due to the lack of available pre-trained models for handwritten digits in Tesseract OCR, which are primarily optimized for printed text, necessitating additional training. Additionally, following the recommendations in <xref ref-type="bibr" rid="bib1.bibx48" id="text.57"/>, we provide the option to integrate multiple OCR models within our framework. Currently, MeteoSaver includes two additional open-source OCR models: PaddleOCR <xref ref-type="bibr" rid="bib1.bibx39" id="paren.58"/> and EasyOCR <xref ref-type="bibr" rid="bib1.bibx25" id="paren.59"/>.</p>
      <p id="d2e1462">In this module, we input clipped binary images, iterating over the detected cells from the previous Table and Cell detection module (bounding boxes shown with green borders in Fig. <xref ref-type="fig" rid="F7"/>) and feed each into the OCR/HTR model to recognize the handwritten values. To reduce noise from extraneous characters – such as dotted lines in the tables that might be interpreted as decimal points – and to improve accuracy, we restrict the OCR/HTR model to recognize only the digits 0–9.</p>
      <p id="d2e1467">The bounding box locations are used to identify boxes within the same row using K-means clustering (as in <xref ref-type="bibr" rid="bib1.bibx45" id="altparen.60"/>), while the image width (clipped binary table shown in Fig. <xref ref-type="fig" rid="F7"/>) determines column placement. K-means clustering groups boxes with similar vertical positions by taking the maximum expected rows as the number of clusters and calculating the average location of each cluster to group boxes into rows. Once row placement is established, column placement is determined by dividing the image into fixed-width segments based on its width, mapping each bounding box to a column using its <inline-formula><mml:math id="M30" display="inline"><mml:mi>x</mml:mi></mml:math></inline-formula> coordinate. Finally, we place the recognized handwritten values from each bounding box into a two-dimensional array, organizing them into the correct rows and columns to preserve the original structure of the data. As an additional step, we also define and include the original headers and row labels (such as the Date, Tot. and Moy. labels) in the array (see Fig. <xref ref-type="fig" rid="F8"/>).</p>

      <fig id="F8" specific-use="star"><label>Figure 8</label><caption><p id="d2e1487">Pre-quality controlled table with transcribed values using MeteoSaver.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f08.png"/>

        </fig>

</sec>
<sec id="Ch1.S3.SS5">
  <label>3.5</label><title>Module 5: quality assessment and quality control</title>
      <p id="d2e1504">Following the transcription of the data, quality assessment and quality control (QA/QC) is carried out to ensure the final output data is highly accurate with reference to the original handwritten daily temperature records (see Fig. <xref ref-type="fig" rid="F9"/>). Here, accuracy refers to the agreement between the automatically transcribed and QA/QC confirmed values and their corresponding manually transcribed values, evaluated within a set uncertainty margin to account for small numerical differences arising from rounding or other minor discrepancies. This assumes that the manually transcribed values are correct, which may not always be the case, as manual transcription is also subject to errors depending on the methods applied. This assumption means that the resulting inferred error rates are a conservative estimate.</p>

      <fig id="F9" specific-use="star"><label>Figure 9</label><caption><p id="d2e1511">Flow chart showing the quality assessment and quality control checks for the transcribed values with focus mainly on the daily temperature values.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f09.png"/>

        </fig>

      <p id="d2e1520">This module performs two complementary roles: (i) validation checks that assess whether the transcribed values are logically and physically consistent, and (ii) correction operations that adjust specific transcription errors where the correct value can be inferred from the structure of the table such as totals or averages, or from related variables.</p>
      <p id="d2e1524">As a prerequisite, we define the number of decimal places in the original sheets within the QA/QC category of the configuration file (Table. <xref ref-type="table" rid="T1"/>, set to one decimal place in our case. This is because during the transcription, the OCR/HTR model is restricted to recognize only digits 0–9 (see Sect. <xref ref-type="sec" rid="Ch1.S3.SS4"/>). We also define the specific columns to assess, in our case those containing daily maximum, minimum, and average temperatures (<inline-formula><mml:math id="M31" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M32" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="M33" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, respectively), along with the diurnal temperature range (DTR). Another key user setting addresses the presence of multi-day totals or averages within the sheet, such as pentad (5 and 3, 4, or 6 <inline-formula><mml:math id="M34" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:math></inline-formula> for the final pentad, depending on the month), weekly, 10 <inline-formula><mml:math id="M35" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:math></inline-formula>, or monthly sums and means. In our case study, the sheets include pentad totals and averages (Figs. <xref ref-type="fig" rid="F1"/> and <xref ref-type="fig" rid="F8"/>).</p>
      <p id="d2e1585">Additional user settings in the QA/QC category of the configuration module include: (i) specifying an uncertainty margin (in degrees Celsius) for recorded temperature values to account for potential rounding errors during observations, particularly in average temperature readings. In our case, this margin is set to 0.2 <inline-formula><mml:math id="M36" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula>, meaning QA/QC checks will only flag transcribed values for correction if they fall outside this range when compared to calculated temperatures obtained during QA/QC. (ii) defining maximum and minimum temperature thresholds to identify unusually high or low values that may have been erroneously transcribed, with reference to regional temperature ranges from the literature. According to <xref ref-type="bibr" rid="bib1.bibx2" id="text.61"/>, the average daily maximum temperatures reported within the Congo basin during the period 1950–1959 were between 30–31 <inline-formula><mml:math id="M37" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula>, with an approximate increase in daily temperatures of 0.60–1.62 <inline-formula><mml:math id="M38" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula> per 30 <inline-formula><mml:math id="M39" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">year</mml:mi></mml:mrow></mml:math></inline-formula> period. Therefore, we use 40 <inline-formula><mml:math id="M40" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula> as the threshold for daily maximum temperature in the illustrative case study to ensure extremes are captured, but transcription errors are identified. For the daily minimum temperature threshold, we use 5 <inline-formula><mml:math id="M41" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula>. Following this, a series of QA/QC checks are conducted on the transcribed data  by loading the two-dimensional array from the Transcription module into a spreadsheet format and creating a backup copy of the file (see Fig. <xref ref-type="fig" rid="F9"/>). Only daily temperature values that pass the following QA/QC checks are included in the final time series for their respective stations (see Figs. <xref ref-type="fig" rid="F13"/>–<xref ref-type="fig" rid="F14"/>).</p>
      <p id="d2e1656">The first check involves verifying that the transcribed values for <inline-formula><mml:math id="M42" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M43" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="M44" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> contain fewer than four digits. This check is specific to daily temperature values recorded in <inline-formula><mml:math id="M45" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula> units with one decimal place, where the decimal place is deliberately not recognized by the OCR/HTR model. For example, a value of 27.8 <inline-formula><mml:math id="M46" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula> would be correctly transcribed as “278” (Fig. <xref ref-type="fig" rid="F11"/>a and b). Therefore, if more than three digits are detected (e.g. MeteoSaver reads “1278”), it is likely that a wrong transcription was made. If this condition is not met, a specific adjustment – unique to our sheets – is applied: the first digit is removed from the value (i.e. “1278” becomes “278” in our example through this data transformation step), and the cell is flagged to indicate this manipulation (see Fig. <xref ref-type="fig" rid="F11"/>a and b, with manipulated values in b shown in orange). This adjustment addresses cases where the OCR/HTR system mistakenly interprets a cell boundary line as an extra digit, such as “1” (as in Fig. <xref ref-type="fig" rid="F11"/>a and b). This data transformation assumes that the first digit of the wrongly transcribed value is erroneous which may not always be true, for example, if an extra digit occurs in the middle or at the end of the value.</p>
      <p id="d2e1719">However, if the check is passed, the transcribed temperature values are then adjusted to match the required decimal places, set to one in this case (see Fig. <xref ref-type="fig" rid="F11"/>b and c, “278” becomes “27.8” in our example through this postprocessing step). This step corresponds to a scaling operation based on the number of decimal places specified in the configuration settings (Table. <xref ref-type="table" rid="T1"/>). This is because the original observations were recorded to one decimal place (Figs. <xref ref-type="fig" rid="F1"/>, <xref ref-type="fig" rid="FA1"/>–<xref ref-type="fig" rid="FA9"/>), whereas the OCR/HTR model was restricted to recognize only digits (0–9) to avoid misinterpreting dotted table lines as decimal points.</p>
      <p id="d2e1732">In the second check, daily temperature values are tested to ensure they fall within the set maximum and minimum thresholds and are flagged if they do not (see Fig. <xref ref-type="fig" rid="F11"/>c and d, with flagged values in d shown in dark red). If a daily temperature value exceeds the maximum threshold and is also greater than 100, a specific adjustment is applied by dividing the value by 10. For values within the thresholds, the multi-day (here, pentad) totals and averages for <inline-formula><mml:math id="M47" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M48" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="M49" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> are calculated and compared with the transcribed multi-day totals and averages. Similarly, the multi-day totals for daily precipitation values (<inline-formula><mml:math id="M50" display="inline"><mml:mi>P</mml:mi></mml:math></inline-formula>) are calculated and compared with the transcribed multi-day totals. If the transcribed values match (or are within the set uncertainty margin of) the calculated totals or averages, both the multi-day values and their respective daily values are flagged as confirmed or not confirmed accordingly (see Fig. <xref ref-type="fig" rid="F11"/>c and d, with unconfirmed pentad total and average values in d shown in grey).</p>
      <p id="d2e1780">Thereafter, logical checks are performed for each day (that is to say, per row) as follows: (i) <inline-formula><mml:math id="M51" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> must be less than <inline-formula><mml:math id="M52" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, which in turn must be less than <inline-formula><mml:math id="M53" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, with values flagged if this condition is not met. (see Fig. <xref ref-type="fig" rid="F11"/>h, with flagged values in red). (ii) if two of the three daily temperature values (<inline-formula><mml:math id="M54" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M55" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="M56" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>) have already been confirmed through previous checks, the third unconfirmed value would then be calculated (using Eq. <xref ref-type="disp-formula" rid="Ch1.E1"/>) and flagged as confirmed. (iii) If the above condition is not met, but at least two digits of the calculated third value match those in the transcribed third value, we replace the transcribed value with the calculated one and flag the three values as confirmed. (iv) if the transcribed value for <inline-formula><mml:math id="M57" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> is equal to (or is within the set uncertainty margin of) the average of the transcribed <inline-formula><mml:math id="M58" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> and  <inline-formula><mml:math id="M59" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> of that same day (as in Eq. <xref ref-type="disp-formula" rid="Ch1.E1"/>), all daily values are confirmed (see Fig. <xref ref-type="fig" rid="F11"/>d and h, with confirmed <inline-formula><mml:math id="M60" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M61" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="M62" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> in h shown in green. (v) the relationship between <inline-formula><mml:math id="M63" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M64" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="M65" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, and the transcribed diurnal temperature range (DTR) is then used to correct unconfirmed values. Here, we iterate through three equations (Eqs. <xref ref-type="disp-formula" rid="Ch1.E2"/>–<xref ref-type="disp-formula" rid="Ch1.E4"/>) to calculate the unconfirmed transcribed values for <inline-formula><mml:math id="M66" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M67" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="M68" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, and to confirm those transcribed values that fall within the set uncertainty margin (see Fig. <xref ref-type="fig" rid="F11"/>g and h, with confirmed <inline-formula><mml:math id="M69" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M70" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M71" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> and DTR in g shown in green). The following equations define the relationships between <inline-formula><mml:math id="M72" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M73" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M74" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, and DTR:</p>
      <p id="d2e2067">First, the daily average temperature is calculated as:

                <disp-formula id="Ch1.E1" content-type="numbered"><label>1</label><mml:math id="M75" display="block"><mml:mstyle class="stylechange" displaystyle="true"/><mml:mrow><mml:mstyle displaystyle="true" class="stylechange"/><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow><mml:mn mathvariant="normal">2</mml:mn></mml:mfrac></mml:mstyle></mml:mrow></mml:math></disp-formula></p>
      <p id="d2e2101">Next, the diurnal temperature range (DTR) is defined as the difference between <inline-formula><mml:math id="M76" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M77" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>:

                <disp-formula id="Ch1.E2" content-type="numbered"><label>2</label><mml:math id="M78" display="block"><mml:mstyle displaystyle="true" class="stylechange"/><mml:mrow><mml:mstyle class="stylechange" displaystyle="true"/><mml:mtext>DTR</mml:mtext><mml:mo>=</mml:mo><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub><mml:mo>.</mml:mo></mml:mrow></mml:math></disp-formula></p>

      <fig id="F10" specific-use="star"><label>Figure 10</label><caption><p id="d2e2154">Key for the colors (flags) showing quality assessment and quality control checks shown in Figs. <xref ref-type="fig" rid="F11"/> and <xref ref-type="fig" rid="F12"/>.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f10.png"/>

          
        </fig>

      <fig id="F11" specific-use="star"><label>Figure 11</label><caption><p id="d2e2171">Two examples of pentad transcribed temperature values (Top: Station Rwindi [0°47<sup>′</sup> S, 29°17<sup>′</sup> E], August 1972, and Bottom: Station Binga [2°18<sup>′</sup> N, 20°30<sup>′</sup> E], May 1969) illustrate the sequence of QA/QC checks performed on the initial transcribed values, leading to the final confirmed values (flagged in green). The arrows, along with their respective labels between each panel, indicate the specific QA/QC checks applied at each stage for the pentad, corresponding to the procedures and equations described in Sect. <xref ref-type="sec" rid="Ch1.S3.SS5"/> and illustrating the progression of the QA/QC workflow. Red-bordered cells, rows, or columns highlight examples where unconfirmed transcribed values were identified and corrected during QA/QC, with changes reflected in the subsequent panel. See Key for all colors (quality flags) in Fig. <xref ref-type="fig" rid="F10"/>.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f11.png"/>

          
        </fig>

      <p id="d2e2223">Then, by substituting terms from Eq. (<xref ref-type="disp-formula" rid="Ch1.E1"/>), DTR can also be expressed in terms of <inline-formula><mml:math id="M83" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M84" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> (in cases of unconfirmed or incorrectly transcribed <inline-formula><mml:math id="M85" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> values) as:

                <disp-formula id="Ch1.E3" content-type="numbered"><label>3</label><mml:math id="M86" display="block"><mml:mstyle displaystyle="true" class="stylechange"/><mml:mrow><mml:mstyle class="stylechange" displaystyle="true"/><mml:mtext>DTR</mml:mtext><mml:mo>=</mml:mo><mml:mn mathvariant="normal">2</mml:mn><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub><mml:mo>-</mml:mo><mml:mn mathvariant="normal">2</mml:mn><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub><mml:mo>.</mml:mo></mml:mrow></mml:math></disp-formula></p>
      <p id="d2e2291">Or in terms of <inline-formula><mml:math id="M87" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M88" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>  (in cases of unconfirmed or incorrectly transcribed <inline-formula><mml:math id="M89" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> values) as:

                <disp-formula id="Ch1.E4" content-type="numbered"><label>4</label><mml:math id="M90" display="block"><mml:mstyle class="stylechange" displaystyle="true"/><mml:mrow><mml:mstyle displaystyle="true" class="stylechange"/><mml:mtext>DTR</mml:mtext><mml:mo>=</mml:mo><mml:mn mathvariant="normal">2</mml:mn><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub><mml:mo>-</mml:mo><mml:mn mathvariant="normal">2</mml:mn><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub><mml:mo>.</mml:mo></mml:mrow></mml:math></disp-formula></p>
      <p id="d2e2356">Lastly, temperature threshold checks, as well as multi-day temperature totals and averages, are re-assessed as previously described to leverage all confirmed temperature values for correcting any remaining unconfirmed <inline-formula><mml:math id="M91" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M92" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="M93" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> values (see Fig. <xref ref-type="fig" rid="F11"/>f and g and Fig. <xref ref-type="fig" rid="F11"/>e and f, respectively, with confirmed <inline-formula><mml:math id="M94" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M95" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="M96" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> values shown in green). We follow the sequence of QA/QC checks outlined above, starting with vertical checks (columns) and then moving to horizontal ones (rows). This approach first confirms values with more inputs (in this case, columns with 5 or 6 values using the total or average), followed by values with fewer inputs (rows with 3 or 4 values). This order minimizes the risk of incorrectly confirming values based on limited input data.</p>
      <p id="d2e2430">Each of the checks described above assigns a quality flag to the data, color-coded according to Fig. <xref ref-type="fig" rid="F10"/> as follows: (i) White cells indicate values that remain unchanged from the original transcription, except for adjustments to match the required decimal places (here, 1 decimal place). (ii) Green cells represent values confirmed as correct by the QA/QC checks, either as transcribed or calculated. (iii) Orange-highlighted cells indicate that the original transcribed temperature value had more than three digits and was adjusted to three digits. (iv) Red cells highlight cases where transcribed temperature values did not meet the conditions <inline-formula><mml:math id="M97" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub><mml:mo>&lt;</mml:mo><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub><mml:mo>&lt;</mml:mo><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>. (v) Dark red cells show that the transcribed value exceeded the set thresholds for maximum or minimum temperature. (vi) Grey cells indicate that the multi-day (here, 5 <inline-formula><mml:math id="M98" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:math></inline-formula>) total or average did not match the sum or average of the transcribed daily values. (vii) Finally, pink cells indicate missing or  untranscribed multi-day total or average values. Only the confirmed (green) daily temperature values are passed to the next module, Data Formatting and Upload (Sect. <xref ref-type="sec" rid="Ch1.S3.SS6"/>). It is recommended that values flagged in white, orange, red, dark red, grey, and pink be manually reviewed by an expert transcriber.</p>
      <p id="d2e2470">In addition to the checks described above, an optional time series consistency check can be applied once a longer temperature series is available for a given station. This step requires the consolidation of daily observations across multiple sheets and therefore cannot be applied during the initial QA/QC stage. It involves identifying outliers in the temperature time series for each station by using standard deviation to detect unusual patterns in the transcribed temperature records (as in <xref ref-type="bibr" rid="bib1.bibx10" id="altparen.62"/>). This check within our framework unfolds in two steps, following the creation of a distribution of all values across the station's time series: (i) Temperature values that deviate more than three standard deviations from the mean are identified, flagged, and removed (until confirmed by an expert). (ii) we identify abrupt transitions in daily temperature by examining the standard deviation differences of consecutive days. Specifically, we check for cases where a large deviation (e.g. less than <inline-formula><mml:math id="M99" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>4 standard deviations from the mean) on one day is followed by a large deviation in the opposite direction (e.g. more than <inline-formula><mml:math id="M100" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mn mathvariant="normal">4</mml:mn></mml:mrow></mml:math></inline-formula> standard deviations) on the next day, and then a return to a similar deviation on the third day. When this pattern is observed, we flag the middle day as an outlier and remove it from the time series (until confirmed by an expert). For example, if a sequence of days shows a temperature that deviates significantly below the mean, followed by a sharp increase above the mean, and then returns to a lower deviation on the following day, the middle day is flagged. This approach allows us to capture rapid shifts in temperature that may indicate transcription errors or anomalies, even if the individual values do not exceed the fixed <inline-formula><mml:math id="M101" display="inline"><mml:mrow><mml:mo>±</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:math></inline-formula> standard deviation threshold used in the first method. Although included in our framework, the first step is not applied in this demonstration because we illustrate with a single month's data (a short series), where it could lead to mistakenly removing extreme but valid values.</p>
</sec>
<sec id="Ch1.S3.SS6">
  <label>3.6</label><title>Module 6: data formatting and upload</title>
      <p id="d2e2511">In the final module of MeteoSaver, we consolidate all confirmed daily transcribed data (flagged in green) from the previous QA/QC module across all monthly sheets for each station to create long temperature time series per station. As a prerequisite, users specify the column in the monthly sheets that contains date information in the configuration file (see Sect. <xref ref-type="sec" rid="Ch1.S3.SS1"/>). Additionally, the month and year information for each monthly sheet is automatically retrieved from the file names, following the naming convention outlined earlier (see Sect. <xref ref-type="sec" rid="Ch1.S3.SS2"/>).</p>
      <p id="d2e2518">Finally, we prepare the confirmed historical weather data time series (in this case, temperature) for upload to open-access repositories. In our framework, we convert the data into the standard format prescribed by the Copernicus Data Rescue Service, known as the Station Exchange Format (SEF) (<uri>https://datarescue.climate.copernicus.eu/station-exchange-format-sef</uri>, last access: 17 April 2026). This format standardizes digitized historical weather records, ensuring compatibility with the Copernicus observational database, facilitating integration and access for climate research and data applications. We integrate the confirmed data with relevant metadata for each station, as specified by the user under the Directories category in the configuration module (see Table <xref ref-type="table" rid="T1"/>). This metadata includes key station details such as station name, ID, latitude, longitude, altitude, data source, units, and other relevant information.</p>

      <fig id="F12" specific-use="star"><label>Figure 12</label><caption><p id="d2e2528">Post-quality controlled table using MeteoSaver, showing confirmed values of daily maximum, minimum and average temperature, diurnal temperature range, and daily precipitation (highlighted in green) for the weather sheet shown in Fig. <xref ref-type="fig" rid="F1"/>. The description of the colors in the post-quality controlled table is given in Fig. <xref ref-type="fig" rid="F10"/>.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f12.png"/>

        </fig>

</sec>
</sec>
<sec id="Ch1.S4">
  <label>4</label><title>Software evaluation and results</title>
      <p id="d2e2550">We apply MeteoSaver v1.0 on ten sample data sheets (see Appendix Figs. <xref ref-type="fig" rid="FA1"/>–<xref ref-type="fig" rid="FA9"/>) considering various handwriting styles, paper sizes and formats, and maintenance conditions (ranging from well-preserved in the archives to poor or torn). Here, we present the final results from the QA/QC checks and final data formatting of the transcribed ten sample sheets (Table <xref ref-type="table" rid="T2"/> and Figs. <xref ref-type="fig" rid="F13"/> and <xref ref-type="fig" rid="F14"/>). In addition, we provide the results for all module steps detailed in Sect. <xref ref-type="sec" rid="Ch1.S3"/> (Appendix Figs. <xref ref-type="fig" rid="FB1"/>–<xref ref-type="fig" rid="FB9"/>). We validate the accuracy of MeteoSaver's output in our case study by comparing it to data obtained from manual transcription of these sample data sheets. (Table <xref ref-type="table" rid="T2"/>).</p>

      <fig id="F13" specific-use="star"><label>Figure 13</label><caption><p id="d2e2574">Time series plot of the daily maximum (red), average (orange), and minimum (blue) temperatures for the respective stations. Each variable shows automatically transcribed values as solid markers, while manually transcribed values are displayed as lighter time series bands with a 0.2 <inline-formula><mml:math id="M102" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula> uncertainty margin applied during QA/QC checks. The accuracy percentage and mean absolute error (MAE) between the automatically and manually transcribed values are noted in the upper right corner of the plot. The accuracy percentage denotes the percentage of confirmed, automatically transcribed values that fall within 0.2 <inline-formula><mml:math id="M103" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula> of the manually transcribed value. Together, these metrics quantify the agreement between the automatically transcribed values using MeteoSaver v1.0 (markers) and their corresponding manually transcribed values (bands), providing an indication of the reliability of the automatically transcribed observations (with respect to manual transcriptions) for subsequent climatological analyses. The analysis assumes that the manually transcribed values are correct; however, this may not always be the case, as manual transcription is also subject to errors depending on the methods applied.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f13.png"/>

      </fig>

      <fig id="F14" specific-use="star"><label>Figure 14</label><caption><p id="d2e2605">Time series plot of the daily maximum (red), average (orange), and minimum (blue) temperatures for the respective stations. Each variable shows automatically transcribed values as solid markers, while manually transcribed values are displayed as lighter time series bands with a 0.2 <inline-formula><mml:math id="M104" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula> uncertainty margin applied during QA/QC checks. The accuracy percentage and mean absolute error (MAE) between the automatically and manually transcribed values are noted in the upper right corner of the plot. The accuracy percentage denotes the percentage of confirmed, automatically transcribed values that fall within 0.2 <inline-formula><mml:math id="M105" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula> of the manually transcribed value. Together, these metrics quantify the agreement between the automatically transcribed values using MeteoSaver v1.0 (markers) and their corresponding manually transcribed values (bands), providing an indication of the reliability of the automatically transcribed observations (with respect to manual transcriptions) for subsequent climatological analyses.   The analysis assumes that the manually transcribed values are correct; however, this may not always be the case, as manual transcription is also subject to errors depending on the methods applied.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f14.png"/>

        
      </fig>

      <p id="d2e2637">The results indicate that between 95 <inline-formula><mml:math id="M106" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula>–100 <inline-formula><mml:math id="M107" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> of the handwritten temperature records (daily maximum, minimum, and average temperatures) from the 10 sheets were successfully detected using the Table and Cell Detection module, with a median of 100 <inline-formula><mml:math id="M108" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> of cells identified across the sheets. These detected cells were then automatically transcribed by our software. Of these transcribed values, a median of 74.4 <inline-formula><mml:math id="M109" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> across the sheets achieved the highest quality flag and were therefore confirmed by the QA/QC. This means that 25.6 <inline-formula><mml:math id="M110" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> of the transcribed values were excluded from the final output timeseries because they could not be confirmed by the QA/QC. The confirmed temperature values showed a median match rate of 74 <inline-formula><mml:math id="M111" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> with manually transcribed records (see Table <xref ref-type="table" rid="T2"/>). Here, accuracy is defined as the proportion of automatically transcribed and QA/QC confirmed values that match manually transcribed values, considering a set uncertainty margin of 0.2 <inline-formula><mml:math id="M112" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula>. Additionally, we calculate the mean absolute error (MAE) of these automatically transcribed temperature values compared to the manually transcribed ones. The MAE across these transcribed sheets ranged from 0.0–0.9 <inline-formula><mml:math id="M113" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula>, with a median of 0.3 <inline-formula><mml:math id="M114" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula> (see Table <xref ref-type="table" rid="T2"/>).</p>

<table-wrap id="T2" specific-use="star"><label>Table 2</label><caption><p id="d2e2726">Validation results of ten sample data sheets transcribed using MeteoSaver v1.0.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="9">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="left"/>
     <oasis:colspec colnum="3" colname="col3" align="center"/>
     <oasis:colspec colnum="4" colname="col4" align="left"/>
     <oasis:colspec colnum="5" colname="col5" align="right"/>
     <oasis:colspec colnum="6" colname="col6" align="center"/>
     <oasis:colspec colnum="7" colname="col7" align="center"/>
     <oasis:colspec colnum="8" colname="col8" align="right"/>
     <oasis:colspec colnum="9" colname="col9" align="center"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">No.</oasis:entry>
         <oasis:entry colname="col2">Station name</oasis:entry>
         <oasis:entry colname="col3">Station No.</oasis:entry>
         <oasis:entry colname="col4">Date</oasis:entry>
         <oasis:entry colname="col5"><inline-formula><mml:math id="M116" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> Cells Identified</oasis:entry>
         <oasis:entry colname="col6">Transcribed Temp. values</oasis:entry>
         <oasis:entry colname="col7">Confirmed Temp. values</oasis:entry>
         <oasis:entry colname="col8"><inline-formula><mml:math id="M117" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> Accuracy</oasis:entry>
         <oasis:entry colname="col9">MAE (<inline-formula><mml:math id="M118" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula>)</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">1</oasis:entry>
         <oasis:entry colname="col2">Binga</oasis:entry>
         <oasis:entry colname="col3">203</oasis:entry>
         <oasis:entry colname="col4">May 1969</oasis:entry>
         <oasis:entry colname="col5">100.0</oasis:entry>
         <oasis:entry colname="col6">93</oasis:entry>
         <oasis:entry colname="col7">93</oasis:entry>
         <oasis:entry colname="col8">100.0</oasis:entry>
         <oasis:entry colname="col9">0.0</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">2</oasis:entry>
         <oasis:entry colname="col2">Binga</oasis:entry>
         <oasis:entry colname="col3">203</oasis:entry>
         <oasis:entry colname="col4">June 1969</oasis:entry>
         <oasis:entry colname="col5">100.0</oasis:entry>
         <oasis:entry colname="col6">91</oasis:entry>
         <oasis:entry colname="col7">86</oasis:entry>
         <oasis:entry colname="col8">87.2</oasis:entry>
         <oasis:entry colname="col9">0.1</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">3</oasis:entry>
         <oasis:entry colname="col2">Loeka-Bumba</oasis:entry>
         <oasis:entry colname="col3">209</oasis:entry>
         <oasis:entry colname="col4">October 1968</oasis:entry>
         <oasis:entry colname="col5">100.0</oasis:entry>
         <oasis:entry colname="col6">93</oasis:entry>
         <oasis:entry colname="col7">84</oasis:entry>
         <oasis:entry colname="col8">52.4</oasis:entry>
         <oasis:entry colname="col9">0.9</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">4</oasis:entry>
         <oasis:entry colname="col2">Kisanga-Plateau</oasis:entry>
         <oasis:entry colname="col3">501</oasis:entry>
         <oasis:entry colname="col4">October 1972</oasis:entry>
         <oasis:entry colname="col5">95.0</oasis:entry>
         <oasis:entry colname="col6">89</oasis:entry>
         <oasis:entry colname="col7">64</oasis:entry>
         <oasis:entry colname="col8">54.7</oasis:entry>
         <oasis:entry colname="col9">0.6</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">5</oasis:entry>
         <oasis:entry colname="col2">Kaniama-Kasese</oasis:entry>
         <oasis:entry colname="col3">508</oasis:entry>
         <oasis:entry colname="col4">February 1988</oasis:entry>
         <oasis:entry colname="col5">100.0</oasis:entry>
         <oasis:entry colname="col6">93</oasis:entry>
         <oasis:entry colname="col7">57</oasis:entry>
         <oasis:entry colname="col8">73.7</oasis:entry>
         <oasis:entry colname="col9">0.3</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">6</oasis:entry>
         <oasis:entry colname="col2">Mulungu-Mulohe</oasis:entry>
         <oasis:entry colname="col3">601</oasis:entry>
         <oasis:entry colname="col4">May 1967</oasis:entry>
         <oasis:entry colname="col5">97.8</oasis:entry>
         <oasis:entry colname="col6">91</oasis:entry>
         <oasis:entry colname="col7">70</oasis:entry>
         <oasis:entry colname="col8">84.3</oasis:entry>
         <oasis:entry colname="col9">0.2</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">7</oasis:entry>
         <oasis:entry colname="col2">Nioka-Lekwa</oasis:entry>
         <oasis:entry colname="col3">702</oasis:entry>
         <oasis:entry colname="col4">February 1964</oasis:entry>
         <oasis:entry colname="col5">100.0</oasis:entry>
         <oasis:entry colname="col6">89</oasis:entry>
         <oasis:entry colname="col7">35</oasis:entry>
         <oasis:entry colname="col8">74.3</oasis:entry>
         <oasis:entry colname="col9">0.3</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">8</oasis:entry>
         <oasis:entry colname="col2">Nioka-Lekwa</oasis:entry>
         <oasis:entry colname="col3">702</oasis:entry>
         <oasis:entry colname="col4">April 1964</oasis:entry>
         <oasis:entry colname="col5">98.9</oasis:entry>
         <oasis:entry colname="col6">89</oasis:entry>
         <oasis:entry colname="col7">40</oasis:entry>
         <oasis:entry colname="col8">70</oasis:entry>
         <oasis:entry colname="col9">0.4</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">9</oasis:entry>
         <oasis:entry colname="col2">Rwindi</oasis:entry>
         <oasis:entry colname="col3">904</oasis:entry>
         <oasis:entry colname="col4">August 1972</oasis:entry>
         <oasis:entry colname="col5">95.7</oasis:entry>
         <oasis:entry colname="col6">89</oasis:entry>
         <oasis:entry colname="col7">72</oasis:entry>
         <oasis:entry colname="col8">87.5</oasis:entry>
         <oasis:entry colname="col9">0.1</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">10</oasis:entry>
         <oasis:entry colname="col2">Mutsora</oasis:entry>
         <oasis:entry colname="col3">905</oasis:entry>
         <oasis:entry colname="col4">November 1973</oasis:entry>
         <oasis:entry colname="col5">100</oasis:entry>
         <oasis:entry colname="col6">91</oasis:entry>
         <oasis:entry colname="col7">45</oasis:entry>
         <oasis:entry colname="col8">64.4</oasis:entry>
         <oasis:entry colname="col9">0.7</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup>

</oasis:table><table-wrap-foot><p id="d2e2729">Where: The transcribed values are the daily maximum, minimum, and average temperatures. Accuracy takes into consideration an uncertainty margin of 0.2 <inline-formula><mml:math id="M115" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula> for transcribed and calculated (confirmed) temperature values.</p></table-wrap-foot></table-wrap>

</sec>
<sec id="Ch1.S5">
  <label>5</label><title>Discussion</title>
      <p id="d2e3149">In this demonstration, we illustrate the application of MeteoSaver v1.0 on ten sample sheets, where machine learning algorithms are used both to detect tables and cells and to transcribe the data within them. The software also performs QA/QC checks to flag confirmed values and formats the data into Station Exchange Format (SEF) for upload to open-access repositories. The ten sheets, with various handwriting styles, paper sizes, and maintenance conditions, are used to evaluate its flexibility and accuracy in transcribing historical weather data.</p>
      <p id="d2e3152">Processing each sheet on a local machine equipped with an 11th Gen Intel<sup>®</sup> Core™ i7-1165G7 and 16.0 GB of RAM takes under 8 <inline-formula><mml:math id="M119" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">min</mml:mi></mml:mrow></mml:math></inline-formula>. Because MeteoSaver processes individual sheets independently, and because it can also be executed on HPC infrastructure through the configuration settings (Sect. <xref ref-type="sec" rid="Ch1.S3.SS1"/>), the transcription process can be parallelized across multiple CPU cores to allow multiple sheets to be processed simultaneously, significantly reducing the total processing time for large archives. For example, processing 1000 sheets sequentially on a local machine would require approximately 130 <inline-formula><mml:math id="M120" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">h</mml:mi></mml:mrow></mml:math></inline-formula>, whereas distributing the workload across 20 parallel CPU cores with the same specifications on HPC infrastructure would reduce the processing time to under 7 <inline-formula><mml:math id="M121" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">h</mml:mi></mml:mrow></mml:math></inline-formula>. The parallel processing of individual sheets also means that computationally intensive steps such as image pre-processing, table and cell detection and transcription can take advantage of increased processing power and larger dedicated memory on HPC infrastructure.</p>
      <p id="d2e3185">While the initial transcription results from this sample are promising, with a median accuracy of 74 <inline-formula><mml:math id="M122" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula>, there are limitations in this first version of the software. In the following subsections, we discuss the strengths and weaknesses of this version, highlight developments that were excluded from the initial release, and provide ideas for future improvements in each module.</p>
<sec id="Ch1.S5.SS1">
  <label>5.1</label><title>Table and cell detection</title>
      <p id="d2e3203">In our framework, the current table and cell detection module, which utilizes OpenCV's ML algorithms, performs well in identifying tables and cells within entire sheets, achieving a cell detection rate of 95 <inline-formula><mml:math id="M123" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula>–100 <inline-formula><mml:math id="M124" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> across the sheets (see Table <xref ref-type="table" rid="T2"/>). However, while our additional step of adding missing bounding boxes (cells) based on the expected number of rows per column (in our case, 43) generally yields good results, it has some limitations. In some instances, a missing bounding box is added in a large gap between rows that does not actually correspond to a cell (see Fig. <xref ref-type="fig" rid="F7"/>).</p>
      <p id="d2e3226">To address this limitation, we explored an alternative approach during software development that involves template matching. In this method, horizontal and vertical guides (lines) are defined for each table layout template using image editing software and overlaid on the sheet to represent cell boundaries. However, this method proved time-consuming, as it requires manually defining guides for each specific template and paper size to ensure accurate alignment with the table lines on different images. This approach therefore becomes impractical for sheets with varying paper sizes or slight modifications in table layout (e.g. the same template but with different row or column widths). Consequently, it lacks re-usability across different case studies, as each user would need to create their own template guides – an unrealistic demand for large historical datasets with diverse templates and paper sizes.</p>
      <p id="d2e3229">For these reasons, we retained the current table and cell detection module in this initial release. We recommend further refinement within this table and cell detection pipeline to achieve closer to 100 <inline-formula><mml:math id="M125" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> cell detection accuracy across various table templates.</p>
</sec>
<sec id="Ch1.S5.SS2">
  <label>5.2</label><title>Transcription</title>
      <p id="d2e3250">Within this software release, we offer three open-source OCR/HTR models for text transcription within the identified cells (bounding boxes): Tesseract OCR, EasyOCR, and PaddleOCR. We primarily demonstrate transcription using Tesseract OCR, as it outperforms the other two models due to the availability of our custom-trained language dataset based on the off-the-shelf French Tesseract OCR model, further enhanced with thousands of images of handwritten digits (as detailed in our previous work <xref ref-type="bibr" rid="bib1.bibx48" id="altparen.63"/>). While EasyOCR and PaddleOCR are included for flexibility, they are mainly optimized for printed text and digits, making Tesseract the preferred choice for the handwritten data in our sheets. Nevertheless, we include them within our framework for easier integration with potential updates of the models or language datasets, including adaptations for typeset or printed digits.</p>
      <p id="d2e3256">To minimize noise from extraneous characters, we restrict the OCR/HTR recognized characters within the bounding boxes to digits 0–9. This prevents misreadings, such as dotted lines being read as decimal points or certain handwritten digits being mistakenly recognized as letters. This process generates a two-dimensional array of initially transcribed values, temporarily disregarding decimal places, organized into rows and columns based on bounding box coordinates (see Sect. <xref ref-type="sec" rid="Ch1.S3.SS4"/> and Fig. <xref ref-type="fig" rid="F8"/>). The original decimal places, as specified by the user (see Table. <xref ref-type="table" rid="T1"/>) are reinstated in the subsequent QA/QC module (as in Fig. <xref ref-type="fig" rid="F12"/>). For additional characters like minus signs, users are advised to include the minus sign in the OCR/HTR-recognized character set.</p>
</sec>
<sec id="Ch1.S5.SS3">
  <label>5.3</label><title>Quality assessment and quality control</title>
      <p id="d2e3275">The QA/QC results in this release, as outlined in Sect. <xref ref-type="sec" rid="Ch1.S3.SS5"/>, demonstrate that our current pipeline is robust in identifying transcription errors by employing (i) user-defined data thresholds, (ii) multi-day totals and averages, (iii) logic checks, such as <inline-formula><mml:math id="M126" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub><mml:mo>&lt;</mml:mo><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub><mml:mo>&lt;</mml:mo><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, (iv) iterative comparisons between related variables, including the relationship between <inline-formula><mml:math id="M127" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M128" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M129" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, and the Diurnal Temperature Range (DTR), and (v) reiteration across the different checks (Fig. <xref ref-type="fig" rid="F9"/>).</p>
      <p id="d2e3341">It is important to note that some QA/QC checks, specifically the user-defined thresholds (here, maximum and minimum temperature thresholds), are region-specific and should be informed by expert knowledge or prior climatological studies. However, while these regional thresholds are generally effective for identifying incorrectly transcribed values, they may potentially flag correctly transcribed observations associated with extreme events, potentially excluding these undocumented local extremes in the final output dataset.</p>
      <p id="d2e3344">Notably, the presence of pentad totals and averages in our sheets proved particularly valuable for QA/QC, as they simplified the process of recalculating and confirming transcribed values within each pentad. In contrast, the more common monthly totals and/or averages found in many international archived weather data sheets would present greater challenges, particularly when multiple daily values are transcribed incorrectly.</p>
      <p id="d2e3347">An evaluation of the values that achieved the highest quality flag revealed a median confirmation rate of 74.4 <inline-formula><mml:math id="M130" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> of the transcribed temperature data across the sheets, either confirmed or corrected during QA/QC. This means that 25.6 <inline-formula><mml:math id="M131" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> of the transcribed values were excluded from the final output timeseries. Among these excluded values, a substantial fraction is correctly transcribed but could not be validated by the QA/QC framework. This may occur, for example, when incorrectly transcribed values appear within the same pentad, preventing confirmation of the correctly transcribed values through the QA/QC checks, or in rare cases when extreme but valid observations fall outside the predefined threshold criteria. While the QA/QC checks are used to validate and refine the transcribed data, as demonstrated in Fig. <xref ref-type="fig" rid="F11"/>, their effectiveness heavily relies on the initial transcription quality, which depends on the OCR/HTR model, the variability in handwriting styles, and the maintenance condition of the paper sheets in the archives. For instance, when the paper condition is well-preserved (as in Fig. <xref ref-type="fig" rid="F1"/>) and the initial transcription is nearly accurate across most cells, only the first few QA/QC checks are typically sufficient to confirm all daily temperature values in a pentad, as illustrated in Fig. <xref ref-type="fig" rid="F11"/>i–j. On the other hand, in cases where the initial transcription contains multiple errors, all QA/QC checks and iterative re-evaluations in our framework are necessary to confirm the temperature values in that pentad, as seen in Fig. <xref ref-type="fig" rid="F11"/>a–h. The latter could, in rare cases, lead to incorrectly confirmed values if the originally transcribed data contains errors that still meet multiple QA/QC checks, or may result in values falling outside the user-defined uncertainty margin, which would therefore remain unconfirmed. Users should be aware that the current QA/QC framework may exclude some valid extreme observations and therefore additional manual verification is advised for applications focusing on rare extremes.</p>
      <p id="d2e3376">In our study, the final confirmed temperature values showed a match rate of 52.4 <inline-formula><mml:math id="M132" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula>–100 <inline-formula><mml:math id="M133" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> with manually transcribed records, yielding a median accuracy of 74 <inline-formula><mml:math id="M134" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> and a median mean absolute error of 0.3 <inline-formula><mml:math id="M135" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula> (see Table <xref ref-type="table" rid="T2"/>). While the accuracy indicates the proportion of automatically transcribed values that match the manually transcribed values with a predefined uncertainty margin, the MAE provides an indication of the magnitude of transcription deviations. The median MAE of 0.3 <inline-formula><mml:math id="M136" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula> observed in these sample sheets is comparable to typical uncertainties associated with historical thermometer measurements of 0.2 <inline-formula><mml:math id="M137" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula> (1<inline-formula><mml:math id="M138" display="inline"><mml:mi mathvariant="italic">σ</mml:mi></mml:math></inline-formula>) <xref ref-type="bibr" rid="bib1.bibx31 bib1.bibx8 bib1.bibx16" id="paren.64"/>. Because many climatological analyses rely on aggregated statistics derived from combined station data, such as spatial averages and long-term trends, transcription deviations of this magnitude are unlikely to substantially affect the resulting climatological interpretations <xref ref-type="bibr" rid="bib1.bibx8" id="paren.65"/>. However, for analyses requiring precise daily values such as extreme-event detection, additional manual verification may still be advisable.</p>
      <p id="d2e3449">These reported performance metrics are specific to this study's sample weather sheet formats, input image quality, handwriting styles on these sheets, paper maintenance conditions, and manual transcription quality. Consequently, the reported performance may differ when MeteoSaver is applied to other historical datasets with different table structures, handwriting styles or image quality. Additionally, the variability of climate variables should be considered, for example, while temperature values in the DRC exhibit relatively small annual ranges, extratropical regions often experience much larger seasonal variations (on the order of tens of degrees). In such contexts, transcription errors may be larger and more difficult to detect through the applied QA/QC procedures. Nevertheless, the modular design of MeteoSaver allows users to adapt the configuration and retrain the OCR/HTR models to accommodate different table layouts and handwriting styles.</p>
      <p id="d2e3452">To enhance transcription accuracy, we recommend further training of the OCR/HTR model on a wider range of handwriting styles, specifically for handwritten digits. This would improve transcription accuracy even prior to the QA/QC step, subsequently enhancing the accuracy of QA/QC-verified values. Moreover, in future software versions, we suggest incorporating a feedback loop where corrected and confirmed values from the QA/QC process serve as additional training data for the OCR/HTR models. This iterative approach would enable the OCR/HTR model to continuously learn from past corrections and improve its ability to transcribe specific handwriting styles over time. This “on-the-fly” learning capability would progressively increase the model's transcription accuracy with each batch of post-processed data.</p>
      <p id="d2e3455">While our demonstration focuses exclusively on QA/QC checks for daily temperature and precipitation values, the evaluation of this first version of the software primarily focuses on temperature variables (daily maximum, minimum, and average temperatures). This choice was made because temperature allows the illustration of a broader set of QA/QC procedures available in the sheets, including pentad totals and averages, logical consistency checks (e.g. <inline-formula><mml:math id="M139" display="inline"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mtext>min</mml:mtext></mml:msub><mml:mo>&lt;</mml:mo><mml:msub><mml:mi>T</mml:mi><mml:mtext>avg</mml:mtext></mml:msub><mml:mo>&lt;</mml:mo><mml:msub><mml:mi>T</mml:mi><mml:mtext>max</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>), diurnal temperature range checks, and threshold tests. Together, these checks provide a comprehensive demonstration of the QA/QC framework implemented in MeteoSaver. In contrast, precipitation values presented in the sample sheets include only one QA/QC constraint: the pentad total, which limits the ability to identify and correct erroneously transcribed daily precipitation values. As a result, fewer daily precipitation values can be confirmed through the QA/QC framework compared with daily temperature values (Figs. <xref ref-type="fig" rid="F12"/> and <xref ref-type="fig" rid="FB1"/>–<xref ref-type="fig" rid="FB9"/>).</p>
      <p id="d2e3489">Nevertheless, future software versions could expand these checks to include additional variables and diagnostics. For instance, in our sheets, the columns under <italic>Température et Humidité</italic> contain vapor pressure (<inline-formula><mml:math id="M140" display="inline"><mml:mi>e</mml:mi></mml:math></inline-formula>) and relative humidity (<inline-formula><mml:math id="M141" display="inline"><mml:mi>U</mml:mi></mml:math></inline-formula>) recorded at specific times, which are calculated using the observed dry-bulb temperature (<inline-formula><mml:math id="M142" display="inline"><mml:mi>T</mml:mi></mml:math></inline-formula>) and wet-bulb temperature (<inline-formula><mml:math id="M143" display="inline"><mml:mrow><mml:msubsup><mml:mi>T</mml:mi><mml:mi>a</mml:mi><mml:mo>′</mml:mo></mml:msubsup></mml:mrow></mml:math></inline-formula>). This would allow us to incorporate more equations within the QA/QC module to validate an even broader range of transcribed values across the sheets. Therefore, for this initial release, we provide a detailed description of the current QA/QC checks to guide software users and illustrate the framework's flexibility.</p>
</sec>
<sec id="Ch1.S5.SS4">
  <label>5.4</label><title>Potential of the software</title>
      <p id="d2e3537">Our study demonstrates the flexibility of MeteoSaver in transcribing historical tabular weather data across a range of handwriting styles, table dimensions, paper sizes, and maintenance conditions, highlighting its potential contribution to ongoing climate data rescue efforts. Throughout our model development, we focused on reusability for similar case studies, equipping MeteoSaver with numerous configurable settings within the configuration module (see Sect. <xref ref-type="sec" rid="Ch1.S3.SS1"/>) to allow users to tailor the software for specific table formats. Given this flexibility, MeteoSaver has substantial potential for transcribing millions of rescued archived weather records, such as those available on the C3S Data Rescue Service Portal. It is important to note, however, that in this initial release, users may need to make further adjustments when applying the software to data sheets with complex tabular formats or variables or data types beyond temperature and precipitation, particularly within the QA/QC checks.</p>
      <p id="d2e3542">MeteoSaver therefore complements existing efforts in historical climate data rescue. Many recent efforts rely on manual transcription workflows, including citizen science initiatives <xref ref-type="bibr" rid="bib1.bibx36 bib1.bibx18 bib1.bibx12" id="paren.66"/>, while other approaches explore open-source and commercial OCR/HTR models to directly transcribe individual values in scanned historical documents <xref ref-type="bibr" rid="bib1.bibx48 bib1.bibx35" id="paren.67"/>. MeteoSaver, on the other hand, contributes to these existing data rescue efforts by providing an open-source end-to-end workflow that integrates machine-learning into image processing, table and cell detection, and transcription, as well as QA/QC and data formatting ready for upload. In addition, MeteoSaver can potentially make use of quality-controlled manually transcribed data as training data with multiple handwriting styles for the OCR models used in the transcription module. By automating these key steps of the data rescue process, following the digitization (imaging) of paper-based records, MeteoSaver aims to substantially reduce the manual effort required for climate data rescue while integrating QA/QC procedures into OCR/HTR-based transcription workflows. Furthermore, its modular and open-source framework allows for continuous improvement of the machine-learning components as additional training data becomes available.</p>
      <p id="d2e3551">While we showcase its application on tabular weather data, we also envision MeteoSaver's potential in transcribing other historical environmental records in tabular and numerical form, spanning fields like hydrology, biology, ecology, and oceanography. For instance, <xref ref-type="bibr" rid="bib1.bibx13" id="text.68"/> recently highlighted data rescue efforts for historical river flow records from Irish catchments, recorded from the early 1940s and recently transcribed manually; a process where MeteoSaver could potentially have saved numerous hours of manual work.</p>
</sec>
</sec>
<sec id="Ch1.S6" sec-type="conclusions">
  <label>6</label><title>Conclusions</title>
      <p id="d2e3566">We introduce MeteoSaver, a new open-source software that uses ML algorithms to automate the transcription of handwritten historical weather records. MeteoSaver version 1.0 takes pictures of tabular sheets as input, along with user-defined settings in the configuration module, and transcribes the data through five iterative steps: (i) image pre-processing, (ii) table and cell detection, (iii) transcription, (iv) quality assessment and quality control, and (v) data formatting and upload.</p>
      <p id="d2e3569">MeteoSaver is applied on images of ten sample sheets with various handwriting styles, paper sizes, and maintenance conditions to assess its flexibility and accuracy in transcribing historical weather data. Each sheet is processed in under 8 <inline-formula><mml:math id="M144" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">min</mml:mi></mml:mrow></mml:math></inline-formula> on a local machine powered by an 11th Gen Intel<sup>®</sup> Core™ i7-1165G7 and 16.0 GB of RAM. The initial results are promising, with 95 <inline-formula><mml:math id="M145" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula>–100 <inline-formula><mml:math id="M146" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> of handwritten temperature records (daily maximum, minimum, and average) detected by the Table and Cell Detection module and successfully transcribed by the Transcription module. Of these, a median of 74.4 <inline-formula><mml:math id="M147" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> of transcribed values were confirmed by the Quality Assessment and Quality Control (QA/QC) module, with 74 <inline-formula><mml:math id="M148" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:math></inline-formula> accuracy against manually transcribed values (considering an uncertainty margin of 0.2 <inline-formula><mml:math id="M149" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula>). The mean absolute error across these transcribed sheets ranged from 0.0 to 0.9 <inline-formula><mml:math id="M150" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula>, with a median of 0.3 <inline-formula><mml:math id="M151" display="inline"><mml:mrow class="unit"><mml:mi mathvariant="normal">°</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow></mml:math></inline-formula>.</p>
      <p id="d2e3646">While the initial outcomes are promising, we outline recommendations for future software versions to (i) enhance the robustness of the table and cell detection module, (ii) improve transcription accuracy by further training OCR/HTR models on diverse handwriting datasets and enabling continuous learning from corrected values in the QA/QC steps, and (iii) expand QA/QC checks to accommodate additional variables and related diagnostics.</p>
      <p id="d2e3649">Nevertheless, MeteoSaver v1.0 offers a fast, reliable and open source framework to transcribe vast amounts of historical tabular weather data, employing machine learning algorithms and QA/QC techniques to ensure accuracy of transcribed results, thereby saving significant manual effort. Therefore, MeteoSaver addresses one of the main challenges in climate data rescue projects – the labor-intensive transcription process – and paves the way for rescuing millions of weather records globally. This framework is especially valuable for data-scarce regions, such as those in the Global South, where archived weather data can now be digitized to bridge data gaps. This will enable climate scientists to analyze long-term climate trends in these previously understudied areas and better quantify the impacts of climate change on these regions.</p>
</sec>

      
      </body>
    <back><app-group>

<app id="App1.Ch1.S1">
  <label>Appendix A</label><title>Sample observed weather data sheets for different stations in DRC available within the archives of INERA, Yangambi</title>

      <fig id="FA1"><label>Figure A1</label><caption><p id="d2e3665">Observed weather data sheet for June 1969 at Station Binga (2°18<sup>′</sup> N, 20°30<sup>′</sup> E) in DRC available within the archives of INERA, Yangambi. Refer to Fig. <xref ref-type="fig" rid="F1"/> for the table structure information.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f15.jpg"/>
        

      </fig>

<fig id="FA2"><label>Figure A2</label><caption><p id="d2e3699">Observed weather data sheet for October 1968 at Station Loeka-Bumba (2°15<sup>′</sup> N, 22°49<sup>′</sup> E) in DRC available within the archives of INERA, Yangambi. Refer to Fig. <xref ref-type="fig" rid="F1"/> for the table structure information.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f16.jpg"/>
        

      </fig>

<fig id="FA3"><label>Figure A3</label><caption><p id="d2e3734">Observed weather data sheet for October 1972 at Station Kisanga-Plateau (11°44<sup>′</sup> S, 27°25<sup>′</sup> E) in DRC available within the archives of INERA, Yangambi. Refer to Fig. <xref ref-type="fig" rid="F1"/> for the table structure information.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f17.jpg"/>
        

      </fig>

<fig id="FA4"><label>Figure A4</label><caption><p id="d2e3768">Observed weather data sheet for February 1988 at Station Kaniama-Kasese in DRC available within the archives of INERA, Yangambi. Refer to Fig. <xref ref-type="fig" rid="F1"/> for the table structure information.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f18.jpg"/>
        

      </fig>

<fig id="FA5"><label>Figure A5</label><caption><p id="d2e3784">Observed weather data sheet for May 1967 at Station Mulungu-Mulohe (2°18<sup>′</sup> S, 28°47<sup>′</sup> E) in DRC available within the archives of INERA, Yangambi. Refer to Fig. <xref ref-type="fig" rid="F1"/> for the table structure information.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f19.jpg"/>
        

      </fig>

<fig id="FA6"><label>Figure A6</label><caption><p id="d2e3819">Observed weather data sheet for February 1964 at Station Nioka-Lekwa (2°07<sup>′</sup> N, 30°38<sup>′</sup> E) in DRC available within the archives of INERA, Yangambi. Refer to Fig. <xref ref-type="fig" rid="F1"/> for the table structure information.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f20.jpg"/>
        

      </fig>

<fig id="FA7"><label>Figure A7</label><caption><p id="d2e3853">Observed weather data sheet for April 1964 at Station Nioka-Lekwa (2°07<sup>′</sup> N, 30°38<sup>′</sup> E) in DRC available within the archives of INERA, Yangambi. Refer to Fig. <xref ref-type="fig" rid="F1"/> for the table structure information.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f21.jpg"/>
        

      </fig>

<fig id="FA8"><label>Figure A8</label><caption><p id="d2e3887">Observed weather data sheet for August 1972 at Station Rwindi (0°47<sup>′</sup> S, 29°17<sup>′</sup> E) in DRC available within the archives of INERA, Yangambi. Refer to Fig. <xref ref-type="fig" rid="F1"/> for the table structure information.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f22.jpg"/>
        

      </fig>

<fig id="FA9"><label>Figure A9</label><caption><p id="d2e3922">Observed weather data sheet for November 1973 at Station Mutsora (0°19<sup>′</sup> N, 29°44<sup>′</sup> E) in DRC available within the archives of INERA, Yangambi. Refer to Fig. <xref ref-type="fig" rid="F1"/> for the table structure information.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f23.jpg"/>
        

      </fig>


</app>

<app id="App1.Ch1.S2">
  <label>Appendix B</label><title>Post QA/QC transcribed values of daily maximum, minimum and average temperature, and diurnal temperature range, using MeteoSaver v1.0</title>

      <fig id="FB1"><label>Figure B1</label><caption><p id="d2e3965">Post-quality controlled table using MeteoSaver, showing confirmed values of daily maximum, minimum and average temperature, diurnal temperature range, and daily precipitation (highlighted in green) for the Station Binga in June 1969 (Fig. <xref ref-type="fig" rid="FA1"/>). The description of the colors in the post-quality controlled table is given in Fig. <xref ref-type="fig" rid="F10"/>.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f24.png"/>
        

      </fig>

<fig id="FB2"><label>Figure B2</label><caption><p id="d2e3983">Post-quality controlled table using MeteoSaver, showing confirmed values of daily maximum, minimum and average temperature, diurnal temperature range, and daily precipitation (highlighted in green) for the Station Loeka-Bumba in October 1968 (Fig. <xref ref-type="fig" rid="FA2"/>). The description of the colors in the post-quality controlled table is given in Fig. <xref ref-type="fig" rid="F10"/>.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f25.png"/>
        

      </fig>

<fig id="FB3"><label>Figure B3</label><caption><p id="d2e4002">Post-quality controlled table using MeteoSaver, showing confirmed values of daily maximum, minimum and average temperature, diurnal temperature range, and daily precipitation (highlighted in green) for the Station Kisanga-Plateau in October 1972 (Fig. <xref ref-type="fig" rid="FA3"/>). The description of the colors in the post-quality controlled table is given in Fig. <xref ref-type="fig" rid="F10"/>.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f26.png"/>
        

      </fig>

<fig id="FB4"><label>Figure B4</label><caption><p id="d2e4020">Post-quality controlled table using MeteoSaver, showing confirmed values of daily maximum, minimum and average temperature, diurnal temperature range, and daily precipitation (highlighted in green) for the Station Kaniama-Kasese (Fig. <xref ref-type="fig" rid="FA4"/>). The description of the colors in the post-quality controlled table is given in Fig. <xref ref-type="fig" rid="F10"/>.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f27.png"/>
        

      </fig>

<fig id="FB5"><label>Figure B5</label><caption><p id="d2e4038">Post-quality controlled table using MeteoSaver, showing confirmed values of daily maximum, minimum and average temperature, diurnal temperature range, and daily precipitation (highlighted in green) for the Station Mulungu-Mulohe in May 1967 (Fig. <xref ref-type="fig" rid="FA5"/>). The description of the colors in the post-quality controlled table is given in Fig. <xref ref-type="fig" rid="F10"/>.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f28.png"/>
        

      </fig>

<fig id="FB6"><label>Figure B6</label><caption><p id="d2e4057">Post-quality controlled table using MeteoSaver, showing confirmed values of daily maximum, minimum and average temperature, diurnal temperature range, and daily precipitation (highlighted in green) for the Station Nioka-Lekwa in February 1964 (Fig. <xref ref-type="fig" rid="FA6"/>). The description of the colors in the post-quality controlled table is given in Fig. <xref ref-type="fig" rid="F10"/>.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f29.png"/>
        

      </fig>

<fig id="FB7"><label>Figure B7</label><caption><p id="d2e4075">Post-quality controlled table using MeteoSaver, showing confirmed values of daily maximum, minimum and average temperature, diurnal temperature range, and daily precipitation (highlighted in green) for the Station Nioka-Lekwa in April 1964 (Fig. <xref ref-type="fig" rid="FA7"/>). The description of the colors in the post-quality controlled table is given in Fig. <xref ref-type="fig" rid="F10"/>.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f30.png"/>
        

      </fig>

<fig id="FB8"><label>Figure B8</label><caption><p id="d2e4093">Post-quality controlled table using MeteoSaver, showing confirmed values of daily maximum, minimum and average temperature, diurnal temperature range, and daily precipitation (highlighted in green) for the Station Rwindi in August 1972 (Fig. <xref ref-type="fig" rid="FA8"/>). The description of the colors in the post-quality controlled table is given in Fig. <xref ref-type="fig" rid="F10"/>.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f31.png"/>
        

      </fig>

<fig id="FB9"><label>Figure B9</label><caption><p id="d2e4112">Post-quality controlled table using MeteoSaver, showing confirmed values of daily maximum, minimum and average temperature, diurnal temperature range, and daily precipitation (highlighted in green) for the Station Mutsora in November 1973 (Fig. <xref ref-type="fig" rid="FA9"/>). The description of the colors in the post-quality controlled table is given in Fig. <xref ref-type="fig" rid="F10"/>.</p></caption>
        <graphic xlink:href="https://gmd.copernicus.org/articles/19/3213/2026/gmd-19-3213-2026-f32.png"/>
        

      </fig>

</app>
  </app-group><notes notes-type="codedataavailability"><title>Code and data availability</title>

      <p id="d2e4131">All the scripts used in MeteoSaver v1.0, and sample weather data sheets, used in this paper are available on Zenodo at  <ext-link xlink:href="https://doi.org/10.5281/zenodo.19123862" ext-link-type="DOI">10.5281/zenodo.19123862</ext-link> <xref ref-type="bibr" rid="bib1.bibx32" id="paren.69"/>. Additionally, they are available through the GitHub repository of the Department of Water and Climate at the Vrije Universiteit Brussel (<uri>https://github.com/VUB-HYDR/MeteoSaver/</uri>, last access: 20 March 2026).</p>
  </notes><app-group>
        <supplementary-material position="anchor"><p id="d2e4143">The supplement related to this article is available online at <inline-supplementary-material xlink:href="https://doi.org/10.5194/gmd-19-3213-2026-supplement" xlink:title="zip">https://doi.org/10.5194/gmd-19-3213-2026-supplement</inline-supplementary-material>.</p></supplementary-material>
        </app-group><notes notes-type="authorcontribution"><title>Author contributions</title>

      <p id="d2e4152">DM and WT designed the study. DM, WT, KH, and SL developed the scripts and conducted the development of MeteoSaver with assistance from BV, CV, EH, HV, JMB, KKTC, PB, and PT. DKN and OKM facilitated access to the dataset used in the software demonstration. All authors provided feedback during the software development and throughout the writing of this paper.</p>
  </notes><notes notes-type="competinginterests"><title>Competing interests</title>

      <p id="d2e4158">At least one of the (co-)authors is a member of the editorial board of <italic>Geoscientific Model Development</italic>. The peer-review process was guided by an independent editor, and the authors also have no other competing interests to declare.</p>
  </notes><notes notes-type="disclaimer"><title>Disclaimer</title>

      <p id="d2e4170">Publisher's note: Copernicus Publications remains neutral with regard to jurisdictional claims made in the text, published maps, institutional affiliations, or any other geographical representation in this paper. The authors bear the ultimate responsibility for providing appropriate place names. Views expressed in the text are those of the authors and do not necessarily reflect the views of the publisher.</p>
  </notes><ack><title>Acknowledgements</title><p id="d2e4176">We would like to thank the Institut National pour l'Etude et la Recherche Agronomiques (INERA) situated in the Democratic Republic of the Congo (DRC) for granting us access to the extensive historical weather database available in the archives in Yangambi, DRC. Derrick Muheki is a research fellow at the Research Foundation – Flanders (11M8825N). Wim Thiery received funding from the European Research Council (ERC) under the European Union's Horizon Framework research and innovation programme (grant agreement no. 101076909; “LACRIMA” project). Compute and storage resources and services used in this work were provided by the VSC (Flemish Supercomputer Center), funded by the Research Foundation – Flanders (FWO) and the Flemish Government.</p></ack><notes notes-type="financialsupport"><title>Financial support</title>

      <p id="d2e4181">This research has been supported by the Fonds Wetenschappelijk Onderzoek (grant nos. 11M8825N and 11M8823N), the HORIZON EUROPE European Research Council (grant no. 101076909), and the European Union's Horizon 2020 (grant agreement no. 101081369).</p>
  </notes><notes notes-type="reviewstatement"><title>Review statement</title>

      <p id="d2e4187">This paper was edited by Taesam Lee and reviewed by Chris Lennard and one anonymous referee.</p>
  </notes><ref-list>
    <title>References</title>

      <ref id="bib1.bibx1"><label>Adler et al.(2018)</label><mixed-citation>Adler, R. F., Sapiano, M. R. P., Huffman, G. J., Wang, J.-J., Gu, G., Bolvin, D., Chiu, L., Schneider, U., Becker, A., Nelkin, E., Xie, P., Ferraro, R., and Shin, D.-B.: The Global Precipitation Climatology Project (GPCP) monthly analysis (new version 2.3) and a review of 2017 global precipitation, Atmosphere-Basel, 9, <ext-link xlink:href="https://doi.org/10.3390/atmos9040138" ext-link-type="DOI">10.3390/atmos9040138</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bibx2"><label>Alsdorf et al.(2016)</label><mixed-citation>Alsdorf, D., Beighley, E., Laraque, A., Lee, H., Tshimanga, R., O'Loughlin, F., Mahé, G., Dinga, B., Moukandi, G., and Spencer, R. G. M.: Opportunities for hydrologic research in the Congo Basin, Rev. Geophys., 54, 378–409, <ext-link xlink:href="https://doi.org/10.1002/2016RG000517" ext-link-type="DOI">10.1002/2016RG000517</ext-link>, 2016.</mixed-citation></ref>
      <ref id="bib1.bibx3"><label>Badami(2023)</label><mixed-citation>Badami, K.: How to Extract Table from Image in Python (OpenCV &amp; OCR), <uri>https://livefiredev.com/how-to-extract-table-from-image-in-python-opencv-ocr/</uri>, gitHub repository: <uri>https://github.com/livefiredev/ocr-extract-table-from-image-python</uri> (last access: 19 July 2024), 2023.</mixed-citation></ref>
      <ref id="bib1.bibx4"><label>Bangare et al.(2015)</label><mixed-citation>Bangare, S. L., Dubal, A., Bangare, P. S., and Patil, S. T.: Reviewing Otsu's method for image thresholding, International Journal of Applied Engineering Research, 10, <ext-link xlink:href="https://doi.org/10.37622/IJAER/10.9.2015.21777-21783" ext-link-type="DOI">10.37622/IJAER/10.9.2015.21777-21783</ext-link>, 2015.</mixed-citation></ref>
      <ref id="bib1.bibx5"><label>Bell et al.(2021)</label><mixed-citation>Bell, B., Hersbach, H., Simmons, A., Berrisford, P., Dahlgren, P., Horányi, A., Muñoz-Sabater, J., Nicolas, J., Radu, R., Schepers, D., Soci, C., Villaume, S., Bidlot, J.-R., Haimberger, L., Woollen, J., Buontempo, C., and Thépaut, J.-N.: The ERA5 global reanalysis: preliminary extension to 1950, Q. J. Roy. Meteor. Soc., 147, 4186–4227, <ext-link xlink:href="https://doi.org/10.1002/qj.4174" ext-link-type="DOI">10.1002/qj.4174</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx6"><label>Bradshaw et al.(2015)</label><mixed-citation>Bradshaw, E., Rickards, L., and Aarup, T.: Sea level data archaeology and the Global Sea Level Observing System (GLOSS), GeoResJ, 6, 9–16, <ext-link xlink:href="https://doi.org/10.1016/j.grj.2015.02.005" ext-link-type="DOI">10.1016/j.grj.2015.02.005</ext-link>, 2015.</mixed-citation></ref>
      <ref id="bib1.bibx7"><label>Braverman et al.(2017)</label><mixed-citation>Braverman, A., Chatterjee, S., Heyman, M., and Cressie, N.: Probabilistic evaluation of competing climate models, Adv. Stat. Clim. Meteorol. Oceanogr., 3, 93–105, <ext-link xlink:href="https://doi.org/10.5194/ascmo-3-93-2017" ext-link-type="DOI">10.5194/ascmo-3-93-2017</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bibx8"><label>Brohan et al.(2006)</label><mixed-citation>Brohan, P., Kennedy, J., Harris, I., Tett, S., and Jones, P.: Uncertainty estimates in regional and global observed temperature changes: a new data set from 1850, J. Geophys. Res.-Atmos., 111, <ext-link xlink:href="https://doi.org/10.1029/2005JD006548" ext-link-type="DOI">10.1029/2005JD006548</ext-link>, 2006.</mixed-citation></ref>
      <ref id="bib1.bibx9"><label>Brönnimann et al.(2018)</label><mixed-citation>Brönnimann, S., Brugnara, Y., Allan, R. J., Brunet, M., Compo, G. P., Crouthamel, R. I., Jones, P. D., Jourdain, S., Luterbacher, J., Siegmund, P., Valente, M. A., and Wilkinson, C. W.: A roadmap to climate data rescue services, Geosci. Data J., 5, 28–39, <ext-link xlink:href="https://doi.org/10.1002/gdj3.56" ext-link-type="DOI">10.1002/gdj3.56</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bibx10"><label>Chauhan and Parashar(2020)</label><mixed-citation>Chauhan, R. and Parashar, Y.: Outlier Pattern Detection in Time Series Sequences using Standard Deviation and Mean, Int. J. Res. Appl. Sci. Eng. Technol., 8, 858–863, <ext-link xlink:href="https://doi.org/10.22214/ijraset.2020.4000" ext-link-type="DOI">10.22214/ijraset.2020.4000</ext-link>, 2020. </mixed-citation></ref>
      <ref id="bib1.bibx11"><label>Copernicus Climate Change Service(2024)</label><mixed-citation>Copernicus Climate Change Service: Data Rescue Projects: C3S Data Rescue Portal, <uri>https://datarescue.climate.copernicus.eu/</uri>, last access: 1 October 2024.</mixed-citation></ref>
      <ref id="bib1.bibx12"><label>Craig and Hawkins(2020)</label><mixed-citation>Craig, P. M. and Hawkins, E.: Digitizing observations from the Met Office Daily Weather Reports for 1900–1910 using citizen scientist volunteers, Geosci. Data J., 7, 116–134, <ext-link xlink:href="https://doi.org/10.1002/gdj3.93" ext-link-type="DOI">10.1002/gdj3.93</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bibx13"><label>de Smeth et al.(2024)</label><mixed-citation>de Smeth, K., Comer, J., and Murphy, C.: Hydrometric data rescue and extension of river flow records: method development and application to catchments modified by arterial drainage, Geosci. Data J., 11, 176–196, <ext-link xlink:href="https://doi.org/10.1002/gdj3.206" ext-link-type="DOI">10.1002/gdj3.206</ext-link>, 2024.</mixed-citation></ref>
      <ref id="bib1.bibx14"><label>Eyring et al.(2019)</label><mixed-citation>Eyring, V., Cox, P. M., Flato, G. M., Gleckler, P. J., Abramowitz, G., Caldwell, P., Collins, W. D., Gier, B. K., Hall, A. D., Hoffman, F. M., Hurtt, G. C., Jahn, A., Jones, C. D., Klein, S. A., Krasting, J. P., Kwiatkowski, L., Lorenz, R., Maloney, E., Meehl, G. A., Pendergrass, A. G., Pincus, R., Ruane, A. C., Russell, J. L., Sanderson, B. M., Santer, B. D., Sherwood, S. C., Simpson, I. R., Stouffer, R. J., and Williamson, M. S.: Taking climate model evaluation to the next level, Nat. Clim. Change, 9, 102–110, <ext-link xlink:href="https://doi.org/10.1038/s41558-018-0355-y" ext-link-type="DOI">10.1038/s41558-018-0355-y</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx15"><label>Flato et al.(2013)</label><mixed-citation>Flato, G., Marotzke, J., Abiodun, B., Braconnot, P., Chou, S., Collins, W., Cox, P., Driouech, F., Emori, S., Eyring, V., Forest, C., Gleckler, P., Guilyardi, E., Jakob, C., Kattsov, V., Reason, C., and Rummukainen, M.: Evaluation of climate models, in: Climate Change 2013: The Physical Science Basis, Contribution of Working Group I to the Fifth Assessment Report of the Intergovernmental Panel on Climate Change, edited by: Stocker, T., Qin, D., Plattner, G.-K., Tignor, M., Allen, S., Boschung, J., Nauels, A., Xia, Y., Bex, V., and Midgley, P., book section 9, Cambridge University Press, Cambridge, UK and New York, NY, USA, <ext-link xlink:href="https://doi.org/10.1017/CBO9781107415324.020" ext-link-type="DOI">10.1017/CBO9781107415324.020</ext-link>, 741–866, 2013.</mixed-citation></ref>
      <ref id="bib1.bibx16"><label>Folland et al.(2001)</label><mixed-citation>Folland, C., Rayner, N., Brown, S., Smith, T., Shen, S., Parker, D., Macadam, I., Jones, P., Nicholls, N., and Sexton, D.: Global temperature change and its uncertainties since 1861, Geophys. Res. Lett., 28, <ext-link xlink:href="https://doi.org/10.1029/2001GL012877" ext-link-type="DOI">10.1029/2001GL012877</ext-link>, 2001.</mixed-citation></ref>
      <ref id="bib1.bibx17"><label>Funk et al.(2015)</label><mixed-citation>Funk, C., Peterson, P., Landsfeld, M., Pedreros, D., Verdin, J., Shukla, S., Husak, G., Rowland, J., Harrison, L., Hoell, A., and Michaelsen, J.: The climate hazards infrared precipitation with stations – a new environmental record for monitoring extremes, Scientific Data, 2, <ext-link xlink:href="https://doi.org/10.1038/sdata.2015.66" ext-link-type="DOI">10.1038/sdata.2015.66</ext-link>, 2015.</mixed-citation></ref>
      <ref id="bib1.bibx18"><label>Hawkins et al.(2019)</label><mixed-citation>Hawkins, E., Burt, S., Brohan, P., Lockwood, M., Richardson, H., Roy, M., and Thomas, S.: Hourly weather observations from the Scottish Highlands (1883–1904) rescued by volunteer citizen scientists, Geosci. Data J., 6, 160–173, <ext-link xlink:href="https://doi.org/10.1002/gdj3.79" ext-link-type="DOI">10.1002/gdj3.79</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx19"><label>Hawkins et al.(2022)</label><mixed-citation>Hawkins, E., Burt, S., McCarthy, M., Murphy, C., Ross, C., Baldock, M., Brazier, J., Hersee, G., Huntley, J., Meats, R., O'Grady, J., Scrimgeour, I., and Silk, T.: Millions of historical monthly rainfall observations taken in the UK and Ireland rescued by citizen scientists, Geosci. Data J., 10, 246–261, <ext-link xlink:href="https://doi.org/10.1002/gdj3.157" ext-link-type="DOI">10.1002/gdj3.157</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bibx20"><label>Hawkins et al.(2023)</label><mixed-citation>Hawkins, E., Brohan, P., Burgess, S. N., Burt, S., Compo, G. P., Gray, S. L., Haigh, I. D., Hersbach, H., Kuijjer, K., Martínez-Alvarado, O., McColl, C., Schurer, A. P., Slivinski, L., and Williams, J.: Rescuing historical weather observations improves quantification of severe windstorm risks, Nat. Hazards Earth Syst. Sci., 23, 1465–1482, <ext-link xlink:href="https://doi.org/10.5194/nhess-23-1465-2023" ext-link-type="DOI">10.5194/nhess-23-1465-2023</ext-link>, 2023.</mixed-citation></ref>
      <ref id="bib1.bibx21"><label>Hersbach et al.(2020)</label><mixed-citation>Hersbach, H., Bell, B., Berrisford, P., Hirahara, S., Horányi, A., Muñoz-Sabater, J., Nicolas, J., Peubey, C., Radu, R., Schepers, D., Simmons, A., Soci, C., Abdalla, S., Abellan, X., Balsamo, G., Bechtold, P., Biavati, G., Bidlot, J., Bonavita, M., De Chiara, G., Dahlgren, P., Dee, D., Diamantakis, M., Dragani, R., Flemming, J., Forbes, R., Fuentes, M., Geer, A., Haimberger, L., Healy, S., Hogan, R. J., Hólm, E., Janisková, M., Keeley, S., Laloyaux, P., Lopez, P., Lupu, C., Radnoti, G., de Rosnay, P., Rozum, I., Vamborg, F., Villaume, S., and Thépaut, J.-N.: The ERA5 global reanalysis, Q. J. Roy. Meteor. Soc., 146, 1999–2049, <ext-link xlink:href="https://doi.org/10.1002/qj.3803" ext-link-type="DOI">10.1002/qj.3803</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bibx22"><label>Huffman et al.(2023)</label><mixed-citation>Huffman, G. J., Adler, R. F., Behrangi, A., Bolvin, D. T., Nelkin, E. J., Gu, G., and Ehsani, M. R.: The New Version 3.2 Global Precipitation Climatology Project (GPCP) monthly and daily precipitation products, J. Climate, 36, 7635–7655, <ext-link xlink:href="https://doi.org/10.1175/JCLI-D-23-0123.1" ext-link-type="DOI">10.1175/JCLI-D-23-0123.1</ext-link>, 2023.</mixed-citation></ref>
      <ref id="bib1.bibx23"><label>IPCC(2021)</label><mixed-citation>IPCC: Climate Change 2021: The Physical Science Basis, Contribution of Working Group I to the Sixth Assessment Report of the Intergovernmental Panel on Climate Change, Cambridge University Press, Cambridge, UK and New York, NY, USA, <ext-link xlink:href="https://doi.org/10.1017/9781009157896" ext-link-type="DOI">10.1017/9781009157896</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx24"><label>IPCC(2022)</label><mixed-citation>IPCC: Summary for policymakers, in: Climate Change 2022: Impacts, Adaptation and Vulnerability, Contribution of Working Group II to the Sixth Assessment Report of the Intergovernmental Panel on Climate Change, edited by: Pörtner, H. O., Roberts, D. C., Tignor, M., Poloczanska, E. S., Mintenbeck, K., Alegría, A., Craig, M., Langsdorf, S., Löschke, S., Möller, V., Okem, A., and Rama, B., Cambridge University Press, Cambridge, UK and New York, NY, USA, <ext-link xlink:href="https://doi.org/10.1017/9781009325844.001" ext-link-type="DOI">10.1017/9781009325844.001</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bibx25"><label>JaidedAI(2023)</label><mixed-citation>JaidedAI: EasyOCR, <uri>https://github.com/JaidedAI/EasyOCR</uri> (last access: 19 July 2024), 2023.</mixed-citation></ref>
      <ref id="bib1.bibx26"><label>Jourdain et al.(2015)</label><mixed-citation>Jourdain, S., Émeline Roucaute, Dandin, P., Javelle, J.-P., Donet, I., Ménassère, S., and Cénac, N.: Le sauvetage de données climatologiques anciennes à Météo-France : De la conservation des documents à la mise à disposition des données, La Météorologie, 89, 47–55, <ext-link xlink:href="https://doi.org/10.4267/2042/56598" ext-link-type="DOI">10.4267/2042/56598</ext-link>, 2015.</mixed-citation></ref>
      <ref id="bib1.bibx27"><label>Kalnay et al.(1996)</label><mixed-citation>Kalnay, E., Kanamitsu, M., Kistler, R., Collins, W., Deaven, D., Gandin, L., Iredell, M., Saha, S., White, G., Woollen, J., Zhu, Y., Chelliah, M., Ebisuzaki, W., Higgins, W., Janowiak, J., Mo, K. C., Ropelewski, C., Wang, J., Leetmaa, A., Reynolds, R., Jenne, R., and Joseph, D.: The NCEP/NCAR 40-year reanalysis project, B. Am. Meteorol. Soc., 77, 437–472, <ext-link xlink:href="https://doi.org/10.1175/1520-0477(1996)077&lt;0437:TNYRP&gt;2.0.CO;2" ext-link-type="DOI">10.1175/1520-0477(1996)077&lt;0437:TNYRP&gt;2.0.CO;2</ext-link>, 1996.</mixed-citation></ref>
      <ref id="bib1.bibx28"><label>Kimutai et al.(2023)</label><mixed-citation>Kimutai, J., Faka, D., Ayabagabo, P., Barnes, C., Zachariah, M., Pinto, I., Vahlberg, M., Singh, R., Heinrich, D., Raju, E., Thalheimer, L., Sivanu, S., Otto, F., Philip, S., Kiswendsida, G., and Nioulé, L.: Limited data prevent assessment of role of climate change in deadly floods affecting highly vulnerable communities around Lake Kivu, Grantham Institute for Climate Change [data set], <ext-link xlink:href="https://doi.org/10.25561/105152" ext-link-type="DOI">10.25561/105152</ext-link>, 2023.</mixed-citation></ref>
      <ref id="bib1.bibx29"><label>King et al.(2023)</label><mixed-citation>King, A. D., Grose, M. R., Kimutai, J., Pinto, I., and Harrington, L. J.: Event attribution is not ready for a major role in loss and damage, Nat. Clim. Change, 13, 415–417, <ext-link xlink:href="https://doi.org/10.1038/s41558-023-01651-2" ext-link-type="DOI">10.1038/s41558-023-01651-2</ext-link>, 2023.</mixed-citation></ref>
      <ref id="bib1.bibx30"><label>Latapy et al.(2022)</label><mixed-citation>Latapy, A., Ferret, Y., Testut, L., Talke, S., Aarup, T., Pons, F., Jan, G., Bradshaw, E., and Pouvreau, N.: Data rescue process in the context of sea level reconstructions: an overview of the methodology, lessons learned, up-to-date best practices and recommendations, Geosci. Data J., 10, 396–425, <ext-link xlink:href="https://doi.org/10.1002/gdj3.179" ext-link-type="DOI">10.1002/gdj3.179</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bibx31"><label>Morice et al.(2012)</label><mixed-citation>Morice, C. P., Kennedy, J. J., Rayner, N. A., and Jones, P. D.: Quantifying uncertainties in global and regional temperature change using an ensemble of observational estimates: the HadCRUT4 data set, J. Geophys. Res.-Atmos., 117, <ext-link xlink:href="https://doi.org/10.1029/2011JD017187" ext-link-type="DOI">10.1029/2011JD017187</ext-link>, 2012.</mixed-citation></ref>
      <ref id="bib1.bibx32"><label>Muheki et al.(2026a)</label><mixed-citation>Muheki, D., Vercruysse, B., Chandrasekar, K. K. T., Hufkens, K., and Thiery, W.: MeteoSaver v1.0 (v1.0-final), Zenodo [code], <ext-link xlink:href="https://doi.org/10.5281/zenodo.19123862" ext-link-type="DOI">10.5281/zenodo.19123862</ext-link>, 2026a.</mixed-citation></ref>
      <ref id="bib1.bibx33"><label>Muheki et al.(2026b)</label><mixed-citation>Muheki, D., Hufkens, K., Jacobsen, K., Verbeeck, H., Boeckx, P., Kankonde Ntumba, D., Kapalay Moulasa, O., Vercruysse, B., M. Birkholz, J., Verbruggen, C., Hawkins, E., Lampe, S., Kasongo Yakusu, E., Makanzu Imwangana, F., Mbifo, J., Besango Likwela, T., Meunier, F., Dewitte, O., Thorne, P., and Thiery, W.: From Paper to Proof: Revealing Congo Basin Warming Through Rescued Climate Archives, EGU General Assembly 2026, Vienna, Austria, 3–8 May 2026, EGU26-10837, <ext-link xlink:href="https://doi.org/10.5194/egusphere-egu26-10837" ext-link-type="DOI">10.5194/egusphere-egu26-10837</ext-link>, 2026b.</mixed-citation></ref>
      <ref id="bib1.bibx34"><label>Nidhi et al.(2021)</label><mixed-citation>Nidhi, Saluja, K., Mahajan, A., Jadhav, A., Aggarwal, N., Chaurasia, D., and Ghosh, D.: Table detection and extraction using OpenCV and novel optimization methods, in: 2021 International Conference on Computational Performance Evaluation (ComPE), <ext-link xlink:href="https://doi.org/10.1109/ComPE53109.2021.9752204" ext-link-type="DOI">10.1109/ComPE53109.2021.9752204</ext-link>, 755–760, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx35"><label>Nockels et al.(2022)</label><mixed-citation>Nockels, J., Gooding, P., Ames, S., and Terras, M.: Understanding the application of handwritten text recognition technology in heritage contexts: a systematic review of Transkribus in published research, Archival Science, 22, 367–392, <ext-link xlink:href="https://doi.org/10.1007/s10502-022-09397-0" ext-link-type="DOI">10.1007/s10502-022-09397-0</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bibx36"><label>Noone et al.(2024)</label><mixed-citation>Noone, S., D'Arcy, C., Donegan, S., Durkan, W., Essel, B., Healion, K., Hersbach, H., Madden, S., Marshall, J., McConnell, L., Mensah, I., Scroxton, N., Thiesen, S., and Thorne, P.: Investigating the potential for students to contribute to climate data rescue: introducing the Climate Data Rescue Africa project (CliDaR-Africa), Geosci. Data J., 1–17, <ext-link xlink:href="https://doi.org/10.1002/gdj3.248" ext-link-type="DOI">10.1002/gdj3.248</ext-link>, 2024.</mixed-citation></ref>
      <ref id="bib1.bibx37"><label>Noy et al.(2023)</label><mixed-citation>Noy, I., Wehner, M., Stone, D., Rosier, S., Frame, D., Lawal, K. A., and Newman, R.: Event attribution is ready to inform loss and damage negotiations, Nat. Clim. Change, 13, 1279–1281, <ext-link xlink:href="https://doi.org/10.1038/s41558-023-01865-4" ext-link-type="DOI">10.1038/s41558-023-01865-4</ext-link>, 2023.</mixed-citation></ref>
      <ref id="bib1.bibx38"><label>Otto et al.(2020)</label><mixed-citation>Otto, F. E. L., Harrington, L., Schmitt, K., Philip, S., Kew, S., van Oldenborgh, G. J., Singh, R., Kimutai, J., and Wolski, P.: Challenges to understanding extreme weather changes in lower income countries, B. Am. Meteorol. Soc., 101, E1851 – E1860, <ext-link xlink:href="https://doi.org/10.1175/BAMS-D-19-0317.1" ext-link-type="DOI">10.1175/BAMS-D-19-0317.1</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bibx39"><label>PaddleOCR(2024)</label><mixed-citation>PaddleOCR: PaddlePaddle/PaddleOCR, <uri>https://github.com/PaddlePaddle/PaddleOCR/tree/main</uri>, last access: 19 July 2024.</mixed-citation></ref>
      <ref id="bib1.bibx40"><label>Roberts et al.(2018)</label><mixed-citation>Roberts, M. J., Vidale, P. L., Senior, C., Hewitt, H. T., Bates, C., Berthou, S., Chang, P., Christensen, H. M., Danilov, S., Demory, M.-E., Griffies, S. M., Haarsma, R., Jung, T., Martin, G., Minobe, S., Ringler, T., Satoh, M., Schiemann, R., Scoccimarro, E., Stephens, G., and Wehner, M. F.: The benefits of global high resolution for climate simulation: process understanding and the enabling of stakeholder decisions at the regional scale, B. Am. Meteorol. Soc., 99, 2341–2359, <ext-link xlink:href="https://doi.org/10.1175/BAMS-D-15-00320.1" ext-link-type="DOI">10.1175/BAMS-D-15-00320.1</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bibx41"><label>Sánchez et al.(2014)</label><mixed-citation>Sánchez, J. A., Bosch, V., Romero, V., Depuydt, K., and de Does, J.: Handwritten text recognition for historical documents in the transcriptorium project, in: Proceedings of the First International Conference on Digital Access to Textual Cultural Heritage, DATeCH '14, Association for Computing Machinery, New York, NY, USA, <ext-link xlink:href="https://doi.org/10.1145/2595188.2595193" ext-link-type="DOI">10.1145/2595188.2595193</ext-link>, 111–117, 2014.</mixed-citation></ref>
      <ref id="bib1.bibx42"><label>Sauvola and Pietikäinen(2000)</label><mixed-citation>Sauvola, J. and Pietikäinen, M.: Adaptive document image binarization, Pattern Recogn., 33, 225–236, <ext-link xlink:href="https://doi.org/10.1016/S0031-3203(99)00055-2" ext-link-type="DOI">10.1016/S0031-3203(99)00055-2</ext-link>, 2000.</mixed-citation></ref>
      <ref id="bib1.bibx43"><label>Seneviratne et al.(2021)</label><mixed-citation>Seneviratne, S., Zhang, X., Adnan, M., Badi, W., Dereczynski, C., Di Luca, A., Ghosh, S., Iskandar, I., Kossin, J., Lewis, S., Otto, F., Pinto, I., Satoh, M., Vicente-Serrano, S., Wehner, M., and Zhou, B.: Weather and climate extreme events in a changing climate, in: Climate Change 2021: The Physical Science Basis, Contribution of Working Group I to the Sixth Assessment Report of the Intergovernmental Panel on Climate Change, edited by: Masson-Delmotte, V., Zhai, P., Pirani, A., Connors, S. L., Péan, C., Berger, S., Caud, N., Chen, Y., Goldfarb, L., Gomis, M. I., Huang, M., Leitzell, K., Lonnoy, E., Matthews, J. B. R., Maycock, T. K., Waterfield, T., Yelekçi, O., Yu, R., and Zhou, B., book section 11, Cambridge University Press, Cambridge, UK and New York, NY, USA, <ext-link xlink:href="https://doi.org/10.1017/9781009157896.013" ext-link-type="DOI">10.1017/9781009157896.013</ext-link>, 1513–1765, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx44"><label>Shaman(2014)</label><mixed-citation>Shaman, J. L.: Letter to the Editor: Caution needed when using gridded meteorological data products for analyses in Africa, Euro surveillance: bulletin Europeen sur les maladies transmissibles, European Communicable Disease Bulletin, 19, 20930, <ext-link xlink:href="https://doi.org/10.2807/1560-7917.ES2014.19.41.20930" ext-link-type="DOI">10.2807/1560-7917.ES2014.19.41.20930</ext-link>, 2014.</mixed-citation></ref>
      <ref id="bib1.bibx45"><label>Suresh et al.(2010)</label><mixed-citation>Suresh, L., Simha, J. B., and Velur, R.: Seeding Cluster centers of <inline-formula><mml:math id="M168" display="inline"><mml:mi>K</mml:mi></mml:math></inline-formula>-means Clustering through Median projection, in: International Conference on Complex, Intelligent and Software Intensive Systems, 217–222, <ext-link xlink:href="https://doi.org/10.1109/CISIS.2010.133" ext-link-type="DOI">10.1109/CISIS.2010.133</ext-link>, 2010.</mixed-citation></ref>
      <ref id="bib1.bibx46"><label>Terras(2022)</label><mixed-citation>Terras, M.: Chapter 7: Inviting AI into the Archives: The Reception of Handwritten Recognition Technology into Historical Manuscript Transcription, Digital Humanities Research – Bielefeld University Press, <ext-link xlink:href="https://doi.org/10.14361/9783839455845-008" ext-link-type="DOI">10.14361/9783839455845-008</ext-link>, 2022. </mixed-citation></ref>
      <ref id="bib1.bibx47"><label>Transkribus(2024)</label><mixed-citation>Transkribus: Transcribing 3 million scans at the National Archives of the Netherlands, <uri>https://www.transkribus.org/success-story/national-archives-of-the-netherlands</uri>, last access: 2 October 2024.</mixed-citation></ref>
      <ref id="bib1.bibx48"><label>Vercruysse et al.(2025)</label><mixed-citation>Vercruysse, B., Birkholz, J. M., Thirukokaranam Chandrasekar, K. k., Muheki, D., Thiery, W., Verbeeck, H., Hufkens, K., Jacobsen, K., and Verbruggen, C.: Human-in-the-loop tabular data extraction methods for historical climate data rescue, Int. J. Doc. Anal. Recog., <ext-link xlink:href="https://doi.org/10.1007/s10032-025-00524-y" ext-link-type="DOI">10.1007/s10032-025-00524-y</ext-link>, 2025.</mixed-citation></ref>
      <ref id="bib1.bibx49"><label>Wilkinson and Vasquez(2017)</label><mixed-citation>Wilkinson, C. and Vasquez, M.: Report on the Imaging of Sources of Historic Ice, Meteorological and Oceanographic Data in the SouthernOcean – Åland Maritime Museum, Mariehamn, Finland, <ext-link xlink:href="https://doi.org/10.13140/RG.2.2.24494.61763" ext-link-type="DOI">10.13140/RG.2.2.24494.61763</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bibx50"><label>WMO(2024)</label><mixed-citation>WMO: Guidelines on Best Practices for Climate Data Rescue, World Meteorological Organisation, <uri>https://library.wmo.int/idurl/4/55395</uri>, last access: 30 September 2024.</mixed-citation></ref>
      <ref id="bib1.bibx51"><label>Wypych et al.(2024)</label><mixed-citation>Wypych, A., Ustrnul, Z., Kopaczka-Lepa, D., and Walus, K.: Weather conditions in southern Poland at the turn of the 20th century – insights from archived observational records, J. Hist. Geogr., 86, 191–203, <ext-link xlink:href="https://doi.org/10.1016/j.jhg.2024.08.005" ext-link-type="DOI">10.1016/j.jhg.2024.08.005</ext-link>, 2024.</mixed-citation></ref>
      <ref id="bib1.bibx52"><label>Xu et al.(2014)</label><mixed-citation>Xu, W.-H., Li, Q.-X., Yang, S., and Xu, Y.: Overview of global monthly surface temperature data in the past century and preliminary integration, Advances in Climate Change Research, 5, 111–117, <ext-link xlink:href="https://doi.org/10.1016/j.accre.2014.11.003" ext-link-type="DOI">10.1016/j.accre.2014.11.003</ext-link>, 2014.</mixed-citation></ref>
      <ref id="bib1.bibx53"><label>Yang et al.(1994)</label><mixed-citation>Yang, J.-D., Chen, Y.-S., and Hsu, W.-H.: Adaptive thresholding algorithm and its hardware implementation, Pattern Recogn. Lett., 15, 141–150, <ext-link xlink:href="https://doi.org/10.1016/0167-8655(94)90043-4" ext-link-type="DOI">10.1016/0167-8655(94)90043-4</ext-link>, 1994.</mixed-citation></ref>
      <ref id="bib1.bibx54"><label>Yuan et al.(2020)</label><mixed-citation>Yuan, J., Li, H., Wang, M., Liu, R., Li, C., and Wang, B.: An OpenCV-based Framework for Table Information Extraction, 2020 IEEE International Conference on Knowledge Graph (ICKG), <ext-link xlink:href="https://doi.org/10.1109/ICBK50248.2020.00093" ext-link-type="DOI">10.1109/ICBK50248.2020.00093</ext-link>, 621–628, 2020.</mixed-citation></ref>

  </ref-list></back>
    <!--<article-title-html>MeteoSaver v1.0: a machine-learning based software  for the transcription of historical weather data</article-title-html>
<abstract-html/>
<ref-html id="bib1.bib1"><label>Adler et al.(2018)</label><mixed-citation>
       Adler, R. F., Sapiano, M. R. P., Huffman, G. J., Wang, J.-J., Gu, G., Bolvin, D., Chiu, L., Schneider, U., Becker, A., Nelkin, E., Xie, P., Ferraro, R., and Shin, D.-B.: The Global Precipitation Climatology Project (GPCP) monthly analysis (new version 2.3) and a review of 2017 global precipitation, Atmosphere-Basel, 9, <a href="https://doi.org/10.3390/atmos9040138" target="_blank">https://doi.org/10.3390/atmos9040138</a>, 2018.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib2"><label>Alsdorf et al.(2016)</label><mixed-citation>
       Alsdorf, D., Beighley, E., Laraque, A., Lee, H., Tshimanga, R., O'Loughlin, F., Mahé, G., Dinga, B., Moukandi, G., and Spencer, R. G. M.: Opportunities for hydrologic research in the Congo Basin, Rev. Geophys., 54, 378–409, <a href="https://doi.org/10.1002/2016RG000517" target="_blank">https://doi.org/10.1002/2016RG000517</a>, 2016.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib3"><label>Badami(2023)</label><mixed-citation>
       Badami, K.: How to Extract Table from Image in Python (OpenCV &amp; OCR), <a href="https://livefiredev.com/how-to-extract-table-from-image-in-python-opencv-ocr/" target="_blank"/>, gitHub repository: <a href="https://github.com/livefiredev/ocr-extract-table-from-image-python" target="_blank"/> (last access: 19 July 2024), 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib4"><label>Bangare et al.(2015)</label><mixed-citation>
       Bangare, S. L., Dubal, A., Bangare, P. S., and Patil, S. T.: Reviewing Otsu's method for image thresholding, International Journal of Applied Engineering Research, 10, <a href="https://doi.org/10.37622/IJAER/10.9.2015.21777-21783" target="_blank">https://doi.org/10.37622/IJAER/10.9.2015.21777-21783</a>, 2015.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib5"><label>Bell et al.(2021)</label><mixed-citation>
       Bell, B., Hersbach, H., Simmons, A., Berrisford, P., Dahlgren, P., Horányi, A., Muñoz-Sabater, J., Nicolas, J., Radu, R., Schepers, D., Soci, C., Villaume, S., Bidlot, J.-R., Haimberger, L., Woollen, J., Buontempo, C., and Thépaut, J.-N.: The ERA5 global reanalysis: preliminary extension to 1950, Q. J. Roy. Meteor. Soc., 147, 4186–4227, <a href="https://doi.org/10.1002/qj.4174" target="_blank">https://doi.org/10.1002/qj.4174</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib6"><label>Bradshaw et al.(2015)</label><mixed-citation>
       Bradshaw, E., Rickards, L., and Aarup, T.: Sea level data archaeology and the Global Sea Level Observing System (GLOSS), GeoResJ, 6, 9–16, <a href="https://doi.org/10.1016/j.grj.2015.02.005" target="_blank">https://doi.org/10.1016/j.grj.2015.02.005</a>, 2015.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib7"><label>Braverman et al.(2017)</label><mixed-citation>
       Braverman, A., Chatterjee, S., Heyman, M., and Cressie, N.: Probabilistic evaluation of competing climate models, Adv. Stat. Clim. Meteorol. Oceanogr., 3, 93–105, <a href="https://doi.org/10.5194/ascmo-3-93-2017" target="_blank">https://doi.org/10.5194/ascmo-3-93-2017</a>, 2017.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib8"><label>Brohan et al.(2006)</label><mixed-citation>
       Brohan, P., Kennedy, J., Harris, I., Tett, S., and Jones, P.: Uncertainty estimates in regional and global observed temperature changes: a new data set from 1850, J. Geophys. Res.-Atmos., 111, <a href="https://doi.org/10.1029/2005JD006548" target="_blank">https://doi.org/10.1029/2005JD006548</a>, 2006.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib9"><label>Brönnimann et al.(2018)</label><mixed-citation>
       Brönnimann, S., Brugnara, Y., Allan, R. J., Brunet, M., Compo, G. P., Crouthamel, R. I., Jones, P. D., Jourdain, S., Luterbacher, J., Siegmund, P., Valente, M. A., and Wilkinson, C. W.: A roadmap to climate data rescue services, Geosci. Data J., 5, 28–39, <a href="https://doi.org/10.1002/gdj3.56" target="_blank">https://doi.org/10.1002/gdj3.56</a>, 2018.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib10"><label>Chauhan and Parashar(2020)</label><mixed-citation>
      
Chauhan, R. and Parashar, Y.: Outlier Pattern Detection in Time Series Sequences using Standard Deviation and Mean, Int. J. Res. Appl. Sci. Eng. Technol., 8, 858–863, <a href="https://doi.org/10.22214/ijraset.2020.4000" target="_blank">https://doi.org/10.22214/ijraset.2020.4000</a>, 2020.


    </mixed-citation></ref-html>
<ref-html id="bib1.bib11"><label>Copernicus Climate Change Service(2024)</label><mixed-citation>
      
Copernicus Climate Change Service: Data Rescue Projects: C3S Data Rescue
Portal, <a href="https://datarescue.climate.copernicus.eu/" target="_blank"/>, last access: 1 October 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib12"><label>Craig and Hawkins(2020)</label><mixed-citation>
       Craig, P. M. and Hawkins, E.: Digitizing observations from the Met Office Daily Weather Reports for 1900–1910 using citizen scientist volunteers, Geosci. Data J., 7, 116–134, <a href="https://doi.org/10.1002/gdj3.93" target="_blank">https://doi.org/10.1002/gdj3.93</a>, 2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib13"><label>de Smeth et al.(2024)</label><mixed-citation>
       de Smeth, K., Comer, J., and Murphy, C.: Hydrometric data rescue and extension of river flow records: method development and application to catchments modified by arterial drainage, Geosci. Data J., 11, 176–196, <a href="https://doi.org/10.1002/gdj3.206" target="_blank">https://doi.org/10.1002/gdj3.206</a>, 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib14"><label>Eyring et al.(2019)</label><mixed-citation>
       Eyring, V., Cox, P. M., Flato, G. M., Gleckler, P. J., Abramowitz, G., Caldwell, P., Collins, W. D., Gier, B. K., Hall, A. D., Hoffman, F. M., Hurtt, G. C., Jahn, A., Jones, C. D., Klein, S. A., Krasting, J. P., Kwiatkowski, L., Lorenz, R., Maloney, E., Meehl, G. A., Pendergrass, A. G., Pincus, R., Ruane, A. C., Russell, J. L., Sanderson, B. M., Santer, B. D., Sherwood, S. C., Simpson, I. R., Stouffer, R. J., and Williamson, M. S.: Taking climate model evaluation to the next level, Nat. Clim. Change, 9, 102–110, <a href="https://doi.org/10.1038/s41558-018-0355-y" target="_blank">https://doi.org/10.1038/s41558-018-0355-y</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib15"><label>Flato et al.(2013)</label><mixed-citation>
       Flato, G., Marotzke, J., Abiodun, B., Braconnot, P., Chou, S., Collins, W., Cox, P., Driouech, F., Emori, S., Eyring, V., Forest, C., Gleckler, P., Guilyardi, E., Jakob, C., Kattsov, V., Reason, C., and Rummukainen, M.: Evaluation of climate models, in: Climate Change 2013: The Physical Science Basis, Contribution of Working Group I to the Fifth Assessment Report of the Intergovernmental Panel on Climate Change, edited by: Stocker, T., Qin, D., Plattner, G.-K., Tignor, M., Allen, S., Boschung, J., Nauels, A., Xia, Y., Bex, V., and Midgley, P., book section 9, Cambridge University Press, Cambridge, UK and New York, NY, USA, <a href="https://doi.org/10.1017/CBO9781107415324.020" target="_blank">https://doi.org/10.1017/CBO9781107415324.020</a>, 741–866, 2013.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib16"><label>Folland et al.(2001)</label><mixed-citation>
       Folland, C., Rayner, N., Brown, S., Smith, T., Shen, S., Parker, D., Macadam, I., Jones, P., Nicholls, N., and Sexton, D.: Global temperature change and its uncertainties since 1861, Geophys. Res. Lett., 28, <a href="https://doi.org/10.1029/2001GL012877" target="_blank">https://doi.org/10.1029/2001GL012877</a>, 2001.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib17"><label>Funk et al.(2015)</label><mixed-citation>
       Funk, C., Peterson, P., Landsfeld, M., Pedreros, D., Verdin, J., Shukla, S., Husak, G., Rowland, J., Harrison, L., Hoell, A., and Michaelsen, J.: The climate hazards infrared precipitation with stations – a new environmental record for monitoring extremes, Scientific Data, 2, <a href="https://doi.org/10.1038/sdata.2015.66" target="_blank">https://doi.org/10.1038/sdata.2015.66</a>, 2015.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib18"><label>Hawkins et al.(2019)</label><mixed-citation>
       Hawkins, E., Burt, S., Brohan, P., Lockwood, M., Richardson, H., Roy, M., and Thomas, S.: Hourly weather observations from the Scottish Highlands (1883–1904) rescued by volunteer citizen scientists, Geosci. Data J., 6, 160–173, <a href="https://doi.org/10.1002/gdj3.79" target="_blank">https://doi.org/10.1002/gdj3.79</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib19"><label>Hawkins et al.(2022)</label><mixed-citation>
       Hawkins, E., Burt, S., McCarthy, M., Murphy, C., Ross, C., Baldock, M., Brazier, J., Hersee, G., Huntley, J., Meats, R., O'Grady, J., Scrimgeour, I., and Silk, T.: Millions of historical monthly rainfall observations taken in the UK and Ireland rescued by citizen scientists, Geosci. Data J., 10, 246–261, <a href="https://doi.org/10.1002/gdj3.157" target="_blank">https://doi.org/10.1002/gdj3.157</a>, 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib20"><label>Hawkins et al.(2023)</label><mixed-citation>
       Hawkins, E., Brohan, P., Burgess, S. N., Burt, S., Compo, G. P., Gray, S. L., Haigh, I. D., Hersbach, H., Kuijjer, K., Martínez-Alvarado, O., McColl, C., Schurer, A. P., Slivinski, L., and Williams, J.: Rescuing historical weather observations improves quantification of severe windstorm risks, Nat. Hazards Earth Syst. Sci., 23, 1465–1482, <a href="https://doi.org/10.5194/nhess-23-1465-2023" target="_blank">https://doi.org/10.5194/nhess-23-1465-2023</a>, 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib21"><label>Hersbach et al.(2020)</label><mixed-citation>
       Hersbach, H., Bell, B., Berrisford, P., Hirahara, S., Horányi, A., Muñoz-Sabater, J., Nicolas, J., Peubey, C., Radu, R., Schepers, D., Simmons, A., Soci, C., Abdalla, S., Abellan, X., Balsamo, G., Bechtold, P., Biavati, G., Bidlot, J., Bonavita, M., De Chiara, G., Dahlgren, P., Dee, D., Diamantakis, M., Dragani, R., Flemming, J., Forbes, R., Fuentes, M., Geer, A., Haimberger, L., Healy, S., Hogan, R. J., Hólm, E., Janisková, M., Keeley, S., Laloyaux, P., Lopez, P., Lupu, C., Radnoti, G., de Rosnay, P., Rozum, I., Vamborg, F., Villaume, S., and Thépaut, J.-N.: The ERA5 global reanalysis, Q. J. Roy. Meteor. Soc., 146, 1999–2049, <a href="https://doi.org/10.1002/qj.3803" target="_blank">https://doi.org/10.1002/qj.3803</a>, 2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib22"><label>Huffman et al.(2023)</label><mixed-citation>
       Huffman, G. J., Adler, R. F., Behrangi, A., Bolvin, D. T., Nelkin, E. J., Gu, G., and Ehsani, M. R.: The New Version 3.2 Global Precipitation Climatology Project (GPCP) monthly and daily precipitation products, J. Climate, 36, 7635–7655, <a href="https://doi.org/10.1175/JCLI-D-23-0123.1" target="_blank">https://doi.org/10.1175/JCLI-D-23-0123.1</a>, 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib23"><label>IPCC(2021)</label><mixed-citation>
       IPCC: Climate Change 2021: The Physical Science Basis, Contribution of Working Group I to the Sixth Assessment Report of the Intergovernmental Panel on Climate Change, Cambridge University Press, Cambridge, UK and New York, NY, USA, <a href="https://doi.org/10.1017/9781009157896" target="_blank">https://doi.org/10.1017/9781009157896</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib24"><label>IPCC(2022)</label><mixed-citation>
       IPCC: Summary for policymakers, in: Climate Change 2022: Impacts, Adaptation and Vulnerability, Contribution of Working Group II to the Sixth Assessment Report of the Intergovernmental Panel on Climate Change, edited by: Pörtner, H. O., Roberts, D. C., Tignor, M., Poloczanska, E. S., Mintenbeck, K., Alegría, A., Craig, M., Langsdorf, S., Löschke, S., Möller, V., Okem, A., and Rama, B., Cambridge University Press, Cambridge, UK and New York, NY, USA, <a href="https://doi.org/10.1017/9781009325844.001" target="_blank">https://doi.org/10.1017/9781009325844.001</a>, 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib25"><label>JaidedAI(2023)</label><mixed-citation>
       JaidedAI: EasyOCR, <a href="https://github.com/JaidedAI/EasyOCR" target="_blank"/> (last access: 19 July 2024), 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib26"><label>Jourdain et al.(2015)</label><mixed-citation>
       Jourdain, S., Émeline Roucaute, Dandin, P., Javelle, J.-P., Donet, I., Ménassère, S., and Cénac, N.: Le sauvetage de données climatologiques anciennes à Météo-France : De la conservation des documents à la mise à disposition des données, La Météorologie, 89, 47–55, <a href="https://doi.org/10.4267/2042/56598" target="_blank">https://doi.org/10.4267/2042/56598</a>, 2015.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib27"><label>Kalnay et al.(1996)</label><mixed-citation>
       Kalnay, E., Kanamitsu, M., Kistler, R., Collins, W., Deaven, D., Gandin, L., Iredell, M., Saha, S., White, G., Woollen, J., Zhu, Y., Chelliah, M., Ebisuzaki, W., Higgins, W., Janowiak, J., Mo, K. C., Ropelewski, C., Wang, J., Leetmaa, A., Reynolds, R., Jenne, R., and Joseph, D.: The NCEP/NCAR 40-year reanalysis project, B. Am. Meteorol. Soc., 77, 437–472, <a href="https://doi.org/10.1175/1520-0477(1996)077&lt;0437:TNYRP&gt;2.0.CO;2" target="_blank">https://doi.org/10.1175/1520-0477(1996)077&lt;0437:TNYRP&gt;2.0.CO;2</a>, 1996.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib28"><label>Kimutai et al.(2023)</label><mixed-citation>
       Kimutai, J., Faka, D.,
Ayabagabo, P., Barnes, C., Zachariah, M., Pinto, I., Vahlberg, M., Singh, R.,
Heinrich, D., Raju, E., Thalheimer, L., Sivanu, S., Otto, F., Philip, S.,
Kiswendsida, G., and Nioulé, L.: Limited data prevent assessment of role
of climate change in deadly floods affecting highly vulnerable communities
around Lake Kivu, Grantham Institute for Climate Change [data set], <a href="https://doi.org/10.25561/105152" target="_blank">https://doi.org/10.25561/105152</a>, 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib29"><label>King et al.(2023)</label><mixed-citation>
       King, A. D., Grose, M. R., Kimutai, J., Pinto, I., and Harrington, L. J.: Event attribution is not ready for a major role in loss and damage, Nat. Clim. Change, 13, 415–417, <a href="https://doi.org/10.1038/s41558-023-01651-2" target="_blank">https://doi.org/10.1038/s41558-023-01651-2</a>, 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib30"><label>Latapy et al.(2022)</label><mixed-citation>
       Latapy, A., Ferret, Y., Testut, L., Talke, S., Aarup, T., Pons, F., Jan, G., Bradshaw, E., and Pouvreau, N.: Data rescue process in the context of sea level reconstructions: an overview of the methodology, lessons learned, up-to-date best practices and recommendations, Geosci. Data J., 10, 396–425, <a href="https://doi.org/10.1002/gdj3.179" target="_blank">https://doi.org/10.1002/gdj3.179</a>, 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib31"><label>Morice et al.(2012)</label><mixed-citation>
       Morice, C. P., Kennedy, J. J., Rayner, N. A., and Jones, P. D.: Quantifying uncertainties in global and regional temperature change using an ensemble of observational estimates: the HadCRUT4 data set, J. Geophys. Res.-Atmos., 117, <a href="https://doi.org/10.1029/2011JD017187" target="_blank">https://doi.org/10.1029/2011JD017187</a>, 2012.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib32"><label>Muheki et al.(2026a)</label><mixed-citation>
       Muheki, D., Vercruysse, B.,
Chandrasekar, K. K. T., Hufkens, K., and Thiery, W.: MeteoSaver v1.0
(v1.0-final), Zenodo [code], <a href="https://doi.org/10.5281/zenodo.19123862" target="_blank">https://doi.org/10.5281/zenodo.19123862</a>, 2026a.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib33"><label>Muheki et al.(2026b)</label><mixed-citation>
      
Muheki, D., Hufkens, K., Jacobsen, K., Verbeeck, H., Boeckx, P., Kankonde Ntumba, D., Kapalay Moulasa, O., Vercruysse, B., M. Birkholz, J., Verbruggen, C., Hawkins, E., Lampe, S., Kasongo Yakusu, E., Makanzu Imwangana, F., Mbifo, J., Besango Likwela, T., Meunier, F., Dewitte, O., Thorne, P., and Thiery, W.: From Paper to Proof: Revealing Congo Basin Warming Through Rescued Climate Archives, EGU General Assembly 2026, Vienna, Austria, 3–8 May 2026, EGU26-10837, <a href="https://doi.org/10.5194/egusphere-egu26-10837" target="_blank">https://doi.org/10.5194/egusphere-egu26-10837</a>, 2026b.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib34"><label>Nidhi et al.(2021)</label><mixed-citation>
       Nidhi, Saluja, K., Mahajan, A., Jadhav, A., Aggarwal, N., Chaurasia, D., and Ghosh, D.: Table detection and extraction using OpenCV and novel optimization methods, in: 2021 International Conference on Computational Performance Evaluation (ComPE), <a href="https://doi.org/10.1109/ComPE53109.2021.9752204" target="_blank">https://doi.org/10.1109/ComPE53109.2021.9752204</a>, 755–760, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib35"><label>Nockels et al.(2022)</label><mixed-citation>
       Nockels, J., Gooding, P., Ames, S., and Terras, M.: Understanding the application of handwritten text recognition technology in heritage contexts: a systematic review of Transkribus in published research, Archival Science, 22, 367–392, <a href="https://doi.org/10.1007/s10502-022-09397-0" target="_blank">https://doi.org/10.1007/s10502-022-09397-0</a>, 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib36"><label>Noone et al.(2024)</label><mixed-citation>
       Noone, S., D'Arcy, C., Donegan, S., Durkan, W., Essel, B., Healion, K., Hersbach, H., Madden, S., Marshall, J., McConnell, L., Mensah, I., Scroxton, N., Thiesen, S., and Thorne, P.: Investigating the potential for students to contribute to climate data rescue: introducing the Climate Data Rescue Africa project (CliDaR-Africa), Geosci. Data J., 1–17, <a href="https://doi.org/10.1002/gdj3.248" target="_blank">https://doi.org/10.1002/gdj3.248</a>, 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib37"><label>Noy et al.(2023)</label><mixed-citation>
       Noy, I., Wehner, M., Stone, D., Rosier, S., Frame, D., Lawal, K. A., and Newman, R.: Event attribution is ready to inform loss and damage negotiations, Nat. Clim. Change, 13, 1279–1281, <a href="https://doi.org/10.1038/s41558-023-01865-4" target="_blank">https://doi.org/10.1038/s41558-023-01865-4</a>, 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib38"><label>Otto et al.(2020)</label><mixed-citation>
       Otto, F. E. L., Harrington, L., Schmitt, K., Philip, S., Kew, S., van Oldenborgh, G. J., Singh, R., Kimutai, J., and Wolski, P.: Challenges to understanding extreme weather changes in lower income countries, B. Am. Meteorol. Soc., 101, E1851 – E1860, <a href="https://doi.org/10.1175/BAMS-D-19-0317.1" target="_blank">https://doi.org/10.1175/BAMS-D-19-0317.1</a>, 2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib39"><label>PaddleOCR(2024)</label><mixed-citation>
       PaddleOCR: PaddlePaddle/PaddleOCR, <a href="https://github.com/PaddlePaddle/PaddleOCR/tree/main" target="_blank"/>, last access: 19 July 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib40"><label>Roberts et al.(2018)</label><mixed-citation>
       Roberts, M. J., Vidale, P. L., Senior, C., Hewitt, H. T., Bates, C., Berthou, S., Chang, P., Christensen, H. M., Danilov, S., Demory, M.-E., Griffies, S. M., Haarsma, R., Jung, T., Martin, G., Minobe, S., Ringler, T., Satoh, M., Schiemann, R., Scoccimarro, E., Stephens, G., and Wehner, M. F.: The benefits of global high resolution for climate simulation: process understanding and the enabling of stakeholder decisions at the regional scale, B. Am. Meteorol. Soc., 99, 2341–2359, <a href="https://doi.org/10.1175/BAMS-D-15-00320.1" target="_blank">https://doi.org/10.1175/BAMS-D-15-00320.1</a>, 2018.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib41"><label>Sánchez et al.(2014)</label><mixed-citation>
       Sánchez, J. A., Bosch, V., Romero, V., Depuydt, K., and de Does, J.: Handwritten text recognition for historical documents in the transcriptorium project, in: Proceedings of the First International Conference on Digital Access to Textual Cultural Heritage, DATeCH '14, Association for Computing Machinery, New York, NY, USA, <a href="https://doi.org/10.1145/2595188.2595193" target="_blank">https://doi.org/10.1145/2595188.2595193</a>, 111–117, 2014.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib42"><label>Sauvola and Pietikäinen(2000)</label><mixed-citation>
       Sauvola, J. and Pietikäinen, M.: Adaptive document image binarization, Pattern Recogn., 33, 225–236, <a href="https://doi.org/10.1016/S0031-3203(99)00055-2" target="_blank">https://doi.org/10.1016/S0031-3203(99)00055-2</a>, 2000.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib43"><label>Seneviratne et al.(2021)</label><mixed-citation>
       Seneviratne, S., Zhang, X., Adnan, M., Badi, W., Dereczynski, C., Di Luca, A., Ghosh, S., Iskandar, I., Kossin, J., Lewis, S., Otto, F., Pinto, I., Satoh, M., Vicente-Serrano, S., Wehner, M., and Zhou, B.: Weather and climate extreme events in a changing climate, in: Climate Change 2021: The Physical Science Basis, Contribution of Working Group I to the Sixth Assessment Report of the Intergovernmental Panel on Climate Change, edited by: Masson-Delmotte, V., Zhai, P., Pirani, A., Connors, S. L., Péan, C., Berger, S., Caud, N., Chen, Y., Goldfarb, L., Gomis, M. I., Huang, M., Leitzell, K., Lonnoy, E., Matthews, J. B. R., Maycock, T. K., Waterfield, T., Yelekçi, O., Yu, R., and Zhou, B., book section 11, Cambridge University Press, Cambridge, UK and New York, NY, USA, <a href="https://doi.org/10.1017/9781009157896.013" target="_blank">https://doi.org/10.1017/9781009157896.013</a>, 1513–1765, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib44"><label>Shaman(2014)</label><mixed-citation>
      
Shaman, J. L.: Letter to the Editor: Caution needed when using gridded meteorological data products for analyses in Africa, Euro surveillance: bulletin Europeen sur les maladies transmissibles, European Communicable Disease Bulletin, 19, 20930, <a href="https://doi.org/10.2807/1560-7917.ES2014.19.41.20930" target="_blank">https://doi.org/10.2807/1560-7917.ES2014.19.41.20930</a>, 2014.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib45"><label>Suresh et al.(2010)</label><mixed-citation>
      
Suresh, L., Simha, J. B., and Velur, R.: Seeding Cluster centers of <i>K</i>-means Clustering through Median projection, in: International Conference on Complex, Intelligent and Software Intensive Systems, 217–222, <a href="https://doi.org/10.1109/CISIS.2010.133" target="_blank">https://doi.org/10.1109/CISIS.2010.133</a>, 2010.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib46"><label>Terras(2022)</label><mixed-citation>
       Terras, M.: Chapter 7: Inviting AI into the Archives: The Reception of Handwritten Recognition Technology into Historical Manuscript Transcription, Digital Humanities Research – Bielefeld University Press, <a href="https://doi.org/10.14361/9783839455845-008" target="_blank">https://doi.org/10.14361/9783839455845-008</a>, 2022.


    </mixed-citation></ref-html>
<ref-html id="bib1.bib47"><label>Transkribus(2024)</label><mixed-citation>
       Transkribus: Transcribing 3 million scans at the National Archives of the Netherlands, <a href="https://www.transkribus.org/success-story/national-archives-of-the-netherlands" target="_blank"/>, last access: 2 October 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib48"><label>Vercruysse et al.(2025)</label><mixed-citation>
       Vercruysse, B., Birkholz, J. M., Thirukokaranam Chandrasekar, K. k., Muheki, D., Thiery, W., Verbeeck, H., Hufkens, K., Jacobsen, K., and Verbruggen, C.: Human-in-the-loop tabular data extraction methods for historical climate data rescue, Int. J. Doc. Anal. Recog., <a href="https://doi.org/10.1007/s10032-025-00524-y" target="_blank">https://doi.org/10.1007/s10032-025-00524-y</a>, 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib49"><label>Wilkinson and Vasquez(2017)</label><mixed-citation>
       Wilkinson, C. and
Vasquez, M.: Report on the Imaging of Sources of Historic Ice, Meteorological
and Oceanographic Data in the SouthernOcean – Åland Maritime Museum, Mariehamn, Finland, <a href="https://doi.org/10.13140/RG.2.2.24494.61763" target="_blank">https://doi.org/10.13140/RG.2.2.24494.61763</a>, 2017.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib50"><label>WMO(2024)</label><mixed-citation>
       WMO: Guidelines on Best Practices for Climate Data Rescue, World Meteorological Organisation, <a href="https://library.wmo.int/idurl/4/55395" target="_blank"/>, last access: 30 September 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib51"><label>Wypych et al.(2024)</label><mixed-citation>
       Wypych, A., Ustrnul, Z., Kopaczka-Lepa, D., and Walus, K.: Weather conditions in southern Poland at the turn of the 20th century – insights from archived observational records, J. Hist. Geogr., 86, 191–203, <a href="https://doi.org/10.1016/j.jhg.2024.08.005" target="_blank">https://doi.org/10.1016/j.jhg.2024.08.005</a>, 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib52"><label>Xu et al.(2014)</label><mixed-citation>
       Xu, W.-H., Li, Q.-X., Yang, S., and Xu, Y.: Overview of global monthly surface temperature data in the past century and preliminary integration, Advances in Climate Change Research, 5, 111–117, <a href="https://doi.org/10.1016/j.accre.2014.11.003" target="_blank">https://doi.org/10.1016/j.accre.2014.11.003</a>, 2014.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib53"><label>Yang et al.(1994)</label><mixed-citation>
       Yang, J.-D., Chen, Y.-S., and Hsu, W.-H.: Adaptive thresholding algorithm and its hardware implementation, Pattern Recogn. Lett., 15, 141–150, <a href="https://doi.org/10.1016/0167-8655(94)90043-4" target="_blank">https://doi.org/10.1016/0167-8655(94)90043-4</a>, 1994.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib54"><label>Yuan et al.(2020)</label><mixed-citation>
       Yuan, J., Li, H., Wang, M., Liu, R., Li, C., and Wang, B.: An OpenCV-based Framework for Table Information Extraction, 2020 IEEE International Conference on Knowledge Graph (ICKG), <a href="https://doi.org/10.1109/ICBK50248.2020.00093" target="_blank">https://doi.org/10.1109/ICBK50248.2020.00093</a>, 621–628, 2020.

    </mixed-citation></ref-html>--></article>
