<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing with OASIS Tables v3.0 20080202//EN" "https://jats.nlm.nih.gov/nlm-dtd/publishing/3.0/journalpub-oasis3.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:oasis="http://docs.oasis-open.org/ns/oasis-exchange/table" xml:lang="en" dtd-version="3.0" article-type="research-article">
  <front>
    <journal-meta><journal-id journal-id-type="publisher">GMD</journal-id><journal-title-group>
    <journal-title>Geoscientific Model Development</journal-title>
    <abbrev-journal-title abbrev-type="publisher">GMD</abbrev-journal-title><abbrev-journal-title abbrev-type="nlm-ta">Geosci. Model Dev.</abbrev-journal-title>
  </journal-title-group><issn pub-type="epub">1991-9603</issn><publisher>
    <publisher-name>Copernicus Publications</publisher-name>
    <publisher-loc>Göttingen, Germany</publisher-loc>
  </publisher></journal-meta>
    <article-meta>
      <article-id pub-id-type="doi">10.5194/gmd-19-3569-2026</article-id><title-group><article-title>The ocean model for E3SM global applications: Omega version 0.1.0 – a new high-performance computing code for exascale architectures</article-title><alt-title>Omega: the ocean model for E3SM global applications</alt-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author" corresp="yes" rid="aff1">
          <name><surname>Petersen</surname><given-names>Mark R.</given-names></name>
          <email>mpetersen@lanl.gov</email>
        <ext-link>https://orcid.org/0000-0001-7170-7511</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Asay-Davis</surname><given-names>Xylar S.</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-1990-892X</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Barthel</surname><given-names>Alice M.</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-2481-8646</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Begeman</surname><given-names>Carolyn Branecky</given-names></name>
          
        <ext-link>https://orcid.org/0000-0001-9828-1741</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff2">
          <name><surname>Bishnu</surname><given-names>Siddhartha</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-4227-9738</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff3">
          <name><surname>Brus</surname><given-names>Steven R.</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-0314-9201</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Jones</surname><given-names>Philip W.</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff4">
          <name><surname>Kang</surname><given-names>Hyun-Gyu</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff4">
          <name><surname>Kim</surname><given-names>Youngsung</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff3">
          <name><surname>Mametjanov</surname><given-names>Azamat</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>O'Neill</surname><given-names>Brian J.</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff5">
          <name><surname>Overfelt</surname><given-names>James R.</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1 aff6">
          <name><surname>Ringel</surname><given-names>Kieran K.</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Smith</surname><given-names>Katherine M.</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-1603-7727</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff4">
          <name><surname>Sreepathi</surname><given-names>Sarat</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-4978-9423</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Van Roekel</surname><given-names>Luke P.</given-names></name>
          
        <ext-link>https://orcid.org/0000-0003-1418-5686</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff5">
          <name><surname>Waruszewski</surname><given-names>Maciej</given-names></name>
          
        </contrib>
        <aff id="aff1"><label>1</label><institution>Los Alamos National Laboratory, Los Alamos, NM 87545, USA</institution>
        </aff>
        <aff id="aff2"><label>2</label><institution>Department of Earth Sciences, University of Cambridge, Cambridge, UK</institution>
        </aff>
        <aff id="aff3"><label>3</label><institution>Argonne National Laboratory, Lemont, IL 60439, USA</institution>
        </aff>
        <aff id="aff4"><label>4</label><institution>Oak Ridge National Laboratory, Oak Ridge, TN 37830, USA</institution>
        </aff>
        <aff id="aff5"><label>5</label><institution>Sandia National Laboratories, Albuquerque, NM 87123, USA</institution>
        </aff>
        <aff id="aff6"><label>6</label><institution>Center for Nonlinear Studies, Los Alamos National Laboratory, NM 87545, USA</institution>
        </aff>
      </contrib-group>
      <author-notes><corresp id="corr1">Mark R. Petersen (mpetersen@lanl.gov)</corresp></author-notes><pub-date><day>4</day><month>May</month><year>2026</year></pub-date>
      
      <volume>19</volume>
      <issue>9</issue>
      <fpage>3569</fpage><lpage>3594</lpage>
      <history>
        <date date-type="received"><day>20</day><month>July</month><year>2025</year></date>
           <date date-type="rev-request"><day>24</day><month>October</month><year>2025</year></date>
           <date date-type="rev-recd"><day>5</day><month>March</month><year>2026</year></date>
           <date date-type="accepted"><day>9</day><month>March</month><year>2026</year></date>
      </history>
      <permissions>
        <copyright-statement>Copyright: © 2026 Mark R. Petersen et al.</copyright-statement>
        <copyright-year>2026</copyright-year>
      <license license-type="open-access"><license-p>This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this licence, visit <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link></license-p></license></permissions><self-uri xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026.html">This article is available from https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026.html</self-uri><self-uri xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026.pdf">The full text article is available as a PDF file from https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026.pdf</self-uri>
      <abstract><title>Abstract</title>

      <p id="d2e262">This paper introduces Omega, the Ocean Model for E3SM Global Applications. Omega is a new ocean model designed to run efficiently on high performance computing (HPC) platforms, including exascale heterogeneous architectures with accelerators, such as Graphics Processing Units (GPUs). Omega is written in C<inline-formula><mml:math id="M1" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> and uses the Kokkos performance portability library. These were chosen because they are well-supported and will help future-proof Omega for upcoming HPC architectures. Omega will eventually replace the Model for Prediction Across Scales-Ocean (MPAS-Ocean) in the US Department of Energy's (DOE's) Energy Exascale Earth System Model (E3SM). Omega runs on unstructured horizontal meshes with variable-resolution capability and implements the same horizontal discretization as MPAS-Ocean. This work documents the design and performance of Omega Version 0.1.0 (Omega-V0), which solves the shallow water equations with passive tracers and is the first step towards the full primitive equation ocean model. On Central Processing Units (CPUs), Omega-V0 is 1.4 times faster than MPAS-Ocean with the same configuration. Omega-V0 is more efficient on GPUs than CPUs on a per-watt basis – by a factor of 5.3 on Frontier and 3.6 on Aurora, two of the world's fastest exascale computers.</p>
  </abstract>
    
<funding-group>
<award-group id="gs1">
<funding-source>Biological and Environmental Research</funding-source>
<award-id>E3SM</award-id>
</award-group>
</funding-group>
</article-meta>
  </front>
<body>
      

      
<sec id="Ch1.S1" sec-type="intro">
  <label>1</label><title>Introduction</title>
      <p id="d2e289">Ocean models have always required access to the fastest available computers in order to resolve fine spatial scales and simulate the long timescales inherent in ocean circulation. As a result, ocean models have continually adapted to evolving high-performance computing (HPC) architectures and programming paradigms. Early global ocean models were written in Fortran in the 1960s <xref ref-type="bibr" rid="bib1.bibx12" id="paren.1"/> and subsequently optimized in the 1970s and 1980s for vector supercomputers <xref ref-type="bibr" rid="bib1.bibx64" id="paren.2"><named-content content-type="pre">e.g.,</named-content></xref>, enabling early eddy-permitting simulations. During the transition to parallel computing in the late 1980s and 1990s, the Parallel Ocean Program <xref ref-type="bibr" rid="bib1.bibx18" id="paren.3"><named-content content-type="pre">POP;</named-content></xref> introduced a data-parallel formulation of the Bryan–Cox models along with algorithmic innovations required for scalable parallel implementations <xref ref-type="bibr" rid="bib1.bibx17" id="paren.4"/>. POP was used for the first eddy-resolving simulations of the global ocean and the North Atlantic <xref ref-type="bibr" rid="bib1.bibx39 bib1.bibx69" id="paren.5"/>. Following a decade of competing parallel programming models, the Message Passing Interface <xref ref-type="bibr" rid="bib1.bibx43" id="paren.6"/> emerged as the de facto standard, and ocean models such as POP were adapted to MPI using horizontal domain decomposition with halo regions to minimize communication overhead <xref ref-type="bibr" rid="bib1.bibx68" id="paren.7"><named-content content-type="pre">e.g.,</named-content></xref>. As multicore CPUs became standard in HPC clusters, OpenMP <xref ref-type="bibr" rid="bib1.bibx51" id="paren.8"/> directives were incorporated to enable on-node shared-memory parallelism <xref ref-type="bibr" rid="bib1.bibx79 bib1.bibx33" id="paren.9"/>.</p>
      <p id="d2e326">A new transition in HPC is currently underway, driven by power and cooling constraints that limit CPU performance scaling. Modern HPC systems are increasingly heterogeneous, most commonly pairing CPUs with Graphics Processing Units (GPUs) as accelerators. In the current TOP500 rankings, only two of the top 50 systems lack GPUs <xref ref-type="bibr" rid="bib1.bibx71" id="paren.10"/>, and the majority of computational capability in most of these systems resides in the GPU components. For example, the DOE's Frontier system provides 2.5 TFLOPs per node from CPUs compared to 192 TFLOPs from its four GPUs (98 %), while DOE's Aurora provides 27 TFLOPs from CPUs and 314 TFLOPs from GPUs per node (92 %). Achieving high performance on modern HPC platforms therefore requires computational physics models to execute efficiently on GPUs.</p>
      <p id="d2e332">The new Ocean Model for E3SM Global Applications (Omega) is designed for emerging architectures, with performance portability central to this philosophy. As in the early 1990s parallel transition, there are a number of competing programming models for these new heterogeneous architectures and few options that are portable across all the Leadership Computing Facilities of the DOE. Current options include: (1) directive-based offloading using OpenACC <xref ref-type="bibr" rid="bib1.bibx50" id="paren.11"/>; (2) low-level, vendor-specific GPU programming interfaces such as CUDA <xref ref-type="bibr" rid="bib1.bibx48" id="paren.12"/>, HIP <xref ref-type="bibr" rid="bib1.bibx1" id="paren.13"/>, or DPC<inline-formula><mml:math id="M2" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> <xref ref-type="bibr" rid="bib1.bibx31" id="paren.14"/>; (3) domain-specific languages or source-to-source tools such as <xref ref-type="bibr" rid="bib1.bibx57" id="text.15"/> and <xref ref-type="bibr" rid="bib1.bibx25" id="text.16"/>; and (4) performance-portable programming models, including Kokkos <xref ref-type="bibr" rid="bib1.bibx76" id="paren.17"/>, YAKL <xref ref-type="bibr" rid="bib1.bibx47" id="paren.18"/>, and Raja <xref ref-type="bibr" rid="bib1.bibx8" id="paren.19"/>, which provide high-level abstractions mapped to optimized, vendor-specific backends. Language standards for C<inline-formula><mml:math id="M3" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> and Fortran are also evolving to support parallelism and data placement for hybrid architectures, but implementations remain limited. Performance portability is defined here as the ability of a code to achieve high performance across multiple computing platforms without platform-specific code modifications or tuning.</p>
      <p id="d2e383">Most climate model components, including ocean models, are written in Fortran and rely on MPI for inter-node communication and OpenMP for shared-memory parallelism. The most straightforward path to GPU execution is the addition of OpenACC directives (option 1), which preserves the underlying code structure and closely resembles OpenMP. This approach was adopted for MPAS-Ocean and ICON-O <xref ref-type="bibr" rid="bib1.bibx56" id="paren.20"/>, and was successfully deployed on GPU systems. However, only approximately half of the MPAS-Ocean code could be accelerated due to complex tracer data structures, reliance on external community libraries for key tracer tendencies, and barotropic splitting algorithms that required frequent non-GPU-aware MPI communications. Consequently, performance gains were limited, ranging from modest speedups on some platforms (e.g., Frontier) to slowdowns on others (e.g., Perlmutter), as discussed in Sect. <xref ref-type="sec" rid="Ch1.S5"/>. In all cases, achieved performance fell well below expected GPU throughput because of small kernel sizes and excessive CPU–GPU data transfers. In addition, compiler support for Fortran on GPU architectures was often delayed or incomplete. These limitations motivated the exploration of alternative GPU programming models and ultimately a complete redesign of the ocean model.</p>
      <p id="d2e392">Vendor-specific GPU programming interfaces (option 2) offer fine-grained control but suffer from portability limitations and vendor lock-in. NVIDIA's CUDA framework enables highly optimized GPU implementations but targets only NVIDIA hardware. AMD's HIP provides a C<inline-formula><mml:math id="M4" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> runtime and kernel language aimed primarily at AMD GPUs, with limited support for NVIDIA devices. SYCL, an open C<inline-formula><mml:math id="M5" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> standard for heterogeneous programming, underlies Intel's Data Parallel C<inline-formula><mml:math id="M6" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> (DPC<inline-formula><mml:math id="M7" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula>) implementation and primarily targets Intel GPUs. These approaches impose a steep learning curve for domain scientists, offer limited guarantees of portability, and do not prioritize performance portability across diverse architectures. Several models have been ported to GPUs using CUDA, including the Princeton Ocean Model <xref ref-type="bibr" rid="bib1.bibx82 bib1.bibx83" id="paren.21"/>, the Finite Volume Coastal Ocean Model (FVCOM) <xref ref-type="bibr" rid="bib1.bibx85" id="paren.22"/>, and the Weather Research and Forecasting (WRF) <xref ref-type="bibr" rid="bib1.bibx41" id="paren.23"/>, but this strategy was deemed unsuitable given the diversity of DOE computing platforms.</p>
      <p id="d2e445">Domain-specific languages (option 3) applicable to ocean modeling remain limited and lack broad community adoption, presenting risks for long-term model development. <xref ref-type="bibr" rid="bib1.bibx57" id="text.24"/> is a source-to-source translation tool used to generate GPU-enabled code in the Nucleus for European Modelling of the Ocean <xref ref-type="bibr" rid="bib1.bibx45" id="paren.25"/>. Firedrake <xref ref-type="bibr" rid="bib1.bibx26" id="paren.26"/> is an example of a specialized domain specific language. It provides a high-level interface with partial differential equations (PDEs) and underlying discretizations, but is not strongly supported on GPU architectures. Julia is a high-level language that supports performance portability across new architectures. The Climate Modeling Alliance (CliMA) has adopted Julia as its primary language, and its ocean model, Oceananigans.jl <xref ref-type="bibr" rid="bib1.bibx58" id="paren.27"/>, has demonstrated strong GPU performance <xref ref-type="bibr" rid="bib1.bibx65" id="paren.28"/>. While Julia continues to evolve and gain traction in scientific computing, there is not yet support for all vendor GPUs. C<inline-formula><mml:math id="M8" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> offered a more stable and production-ready foundation for building a scalable and performant ocean model from the ground up.</p>
      <p id="d2e474">In designing Omega, the performance-portable library approach (option 4) was found to be the most promising. The existing C<inline-formula><mml:math id="M9" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> based libraries (Kokkos, Raja, YAKL) all offer similar capabilities, including data array abstractions for managing the CPU and GPU memory spaces, as well as a <preformat preformat-type="code"><![CDATA[parallel_for]]></preformat> construct for kernel launches and parallel execution on the GPU. A number of additional utilities are also provided to support a performance-portable interface across the heterogeneous nodes. Omega initially used YAKL because it was the simplest and most light-weight library, specifically developed to port existing Fortran atmosphere codes. The Kokkos library was chosen in the end due to its long-term stability, support for new architectures, and large user community.</p>
      <p id="d2e490">The Kokkos programming model is a C<inline-formula><mml:math id="M10" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> library. Codes written in Fortran must be rewritten in C<inline-formula><mml:math id="M11" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> to use Kokkos. This is a major change, as Fortran has been used by the computational physics community for many decades. The model rewrite in C<inline-formula><mml:math id="M12" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> is a worthwhile, long-term investment for its widespread support across all major HPC platforms. C<inline-formula><mml:math id="M13" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> benefits from decades of ecosystem development, robust support by compiler vendors, and a wealth of well-established libraries for MPI, parallel I/O, and performance portability frameworks like Kokkos.</p>
      <p id="d2e533">Kokkos was already being used by the E3SM atmosphere component, so some expertise had been developed within the project. The E3SM Atmosphere Model in C<inline-formula><mml:math id="M14" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> (EAMxx) was designed from the ground up using C<inline-formula><mml:math id="M15" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> and Kokkos. EAMxx and its high-resolution counterpart, the Simple Cloud-Resolving E3SM Atmosphere Model (SCREAM), won the 2023 Gordon Bell Climate Prize for Modeling award for being the first global cloud-resolving model to run efficiently on an exascale supercomputer <xref ref-type="bibr" rid="bib1.bibx15" id="paren.29"/>. SCREAM was designed to provide sufficient parallelism to keep GPUs fully utilized, and surpassed one simulated year per compute day at global 3 km resolution.</p>
      <p id="d2e559">A recent example of a Kokkos-based ocean model outside of DOE is LICOMK<inline-formula><mml:math id="M16" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> <xref ref-type="bibr" rid="bib1.bibx80" id="paren.30"/>. They showed performance portability across CPUs and HIP-based GPUs <xref ref-type="bibr" rid="bib1.bibx80" id="paren.31"/>. Like the Omega effort, this ocean model is still in the early stages of development, lacking some features found in more mature ocean models and relying on more uniform, regular meshes. Nonetheless, it will be a valuable point of comparison for Omega going forward. It should be noted that unlike the two efforts above, Omega development proceeded with a team primarily composed of domain scientists, without a dedicated computer science team (aside from the significant Kokkos development team). The Omega code base was purposefully written to be legible to domain scientists, simplifying some of the Kokkos abstractions. These simplifications took two forms. First, aliases to Kokkos features were created that impose some standard options to reduce the syntactical complexity. Second, simpler aliases were used to translate some Kokkos jargon to language more familiar to developers (e.g. Kokkos Views to more familiar Array data types). These are very thin abstractions and easily maintainable, even by domain scientists familiar enough with Kokkos. The core development team is providing extensive documentation, with templates and examples, so that domain scientists with less experience in C<inline-formula><mml:math id="M17" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> and Kokkos can more easily contribute to Omega in the future. This will be important as the model adopts physics schemes and sub-grid parameterizations, since that work relies more heavily on domain experts.</p>
      <p id="d2e589">This paper documents the first phase of Omega development. The model is described in Sect. 2, including the governing equations, variable definitions, and discrete formulation. The code design in Sect. 3 explains the details of the model framework, Kokkos interface, and code organization. Section 4 describes four verification tests of increasing complexity. Section 5 provides Omega performance results on three architectures with comparisons on CPUs and GPUs, and with the predecessor model MPAS-Ocean. Conclusions are presented in Sect. 6.</p>
</sec>
<sec id="Ch1.S2">
  <label>2</label><title>Model description</title>
      <p id="d2e600">Omega Version 0.1.0 (Omega-V0) was created as a first version of the full Omega primitive equation model. It solves the shallow water equations (SWE), as well as the advection-diffusion equation for passive tracers. This is sufficient to test performance using the Kokkos library on CPUs and GPUs, as well as the framework functions described in Sect. <xref ref-type="sec" rid="Ch1.S3.SS1"/>. Omega-V0 has redundant vertical layers in order to test performance using arrays with a vertical index, but does not include any vertical advection or diffusion terms.</p>
<sec id="Ch1.S2.SS1">
  <label>2.1</label><title>Governing equations</title>
      <p id="d2e612">The shallow water equations govern the conservation of momentum and volume for an incompressible fluid on the rotating earth. Standard formulations may be found in textbooks on Geophysical Fluid Dynamics, such as those by <xref ref-type="bibr" rid="bib1.bibx78" id="text.32"/>, <xref ref-type="bibr" rid="bib1.bibx14" id="text.33"/>, <xref ref-type="bibr" rid="bib1.bibx23" id="text.34"/> and <xref ref-type="bibr" rid="bib1.bibx53" id="text.35"/>. The presentation follows <xref ref-type="bibr" rid="bib1.bibx11" id="text.36"/> (Sect. 2.1). In continuous form, the shallow water equations are

                <disp-formula specific-use="align" content-type="numbered"><mml:math id="M18" display="block"><mml:mtable displaystyle="true"><mml:mlabeledtr id="Ch1.E1"><mml:mtd><mml:mtext>1</mml:mtext></mml:mtd><mml:mtd><mml:mstyle class="stylechange" displaystyle="true"/></mml:mtd><mml:mtd><mml:mrow><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mo>∂</mml:mo><mml:mi mathvariant="bold-italic">u</mml:mi></mml:mrow><mml:mrow><mml:mo>∂</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:mfrac></mml:mstyle><mml:mo>+</mml:mo><mml:mo>(</mml:mo><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>⋅</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mo>)</mml:mo><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>+</mml:mo><mml:mi>f</mml:mi><mml:mi mathvariant="bold-italic">k</mml:mi><mml:mo>×</mml:mo><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mi>g</mml:mi><mml:mi mathvariant="normal">∇</mml:mi><mml:mo>(</mml:mo><mml:mi>h</mml:mi><mml:mo>-</mml:mo><mml:mi>b</mml:mi><mml:mo>)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mlabeledtr><mml:mlabeledtr id="Ch1.E2"><mml:mtd><mml:mtext>2</mml:mtext></mml:mtd><mml:mtd><mml:mstyle class="stylechange" displaystyle="true"/></mml:mtd><mml:mtd><mml:mrow><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mo>∂</mml:mo><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mo>∂</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:mfrac></mml:mstyle><mml:mo>+</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mo>⋅</mml:mo><mml:mo>(</mml:mo><mml:mi>h</mml:mi><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>)</mml:mo><mml:mo>=</mml:mo><mml:mn mathvariant="normal">0</mml:mn><mml:mo>.</mml:mo></mml:mrow></mml:mtd></mml:mlabeledtr></mml:mtable></mml:math></disp-formula></p>

<table-wrap id="T1" specific-use="star"><label>Table 1</label><caption><p id="d2e745">Definition of variables.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="5">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="left"/>
     <oasis:colspec colnum="3" colname="col3" align="left"/>
     <oasis:colspec colnum="4" colname="col4" align="left"/>
     <oasis:colspec colnum="5" colname="col5" align="left"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Symbol</oasis:entry>
         <oasis:entry colname="col2">Name</oasis:entry>
         <oasis:entry colname="col3">Units</oasis:entry>
         <oasis:entry colname="col4">Location</oasis:entry>
         <oasis:entry colname="col5">Notes</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M19" display="inline"><mml:mi>b</mml:mi></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">bottom depth</oasis:entry>
         <oasis:entry colname="col3">m</oasis:entry>
         <oasis:entry colname="col4">cell</oasis:entry>
         <oasis:entry colname="col5">always positive</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M20" display="inline"><mml:mrow><mml:msub><mml:mi>C</mml:mi><mml:mi mathvariant="normal">D</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">bottom drag</oasis:entry>
         <oasis:entry colname="col3">m<sup>−1</sup></oasis:entry>
         <oasis:entry colname="col4">constant</oasis:entry>
         <oasis:entry colname="col5">typically 0.001</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M22" display="inline"><mml:mi>f</mml:mi></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">Coriolis parameter</oasis:entry>
         <oasis:entry colname="col3">s<sup>−1</sup></oasis:entry>
         <oasis:entry colname="col4">vertex</oasis:entry>
         <oasis:entry colname="col5"/>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M24" display="inline"><mml:mi>g</mml:mi></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">gravitational acceleration</oasis:entry>
         <oasis:entry colname="col3">m s<sup>−2</sup></oasis:entry>
         <oasis:entry colname="col4">constant</oasis:entry>
         <oasis:entry colname="col5"/>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M26" display="inline"><mml:mi>h</mml:mi></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">layer thickness</oasis:entry>
         <oasis:entry colname="col3">m</oasis:entry>
         <oasis:entry colname="col4">cell</oasis:entry>
         <oasis:entry colname="col5"/>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M27" display="inline"><mml:mi mathvariant="bold-italic">k</mml:mi></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">vertical unit vector</oasis:entry>
         <oasis:entry colname="col3">unitless</oasis:entry>
         <oasis:entry colname="col4">none</oasis:entry>
         <oasis:entry colname="col5"/>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M28" display="inline"><mml:mi>K</mml:mi></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">kinetic energy</oasis:entry>
         <oasis:entry colname="col3">m<sup>2</sup> s<sup>−2</sup></oasis:entry>
         <oasis:entry colname="col4">cell</oasis:entry>
         <oasis:entry colname="col5"><inline-formula><mml:math id="M31" display="inline"><mml:mrow><mml:mi>K</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mfenced open="∥" close="∥"><mml:mi mathvariant="bold-italic">u</mml:mi></mml:mfenced><mml:mn mathvariant="normal">2</mml:mn></mml:msup><mml:mo>/</mml:mo><mml:mn mathvariant="normal">2</mml:mn></mml:mrow></mml:math></inline-formula></oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M32" display="inline"><mml:mi>q</mml:mi></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">potential vorticity</oasis:entry>
         <oasis:entry colname="col3">m<sup>−1</sup> s<sup>−1</sup></oasis:entry>
         <oasis:entry colname="col4">vertex</oasis:entry>
         <oasis:entry colname="col5"><inline-formula><mml:math id="M35" display="inline"><mml:mrow><mml:mi>q</mml:mi><mml:mo>=</mml:mo><mml:mo>(</mml:mo><mml:mi mathvariant="italic">ω</mml:mi><mml:mo>+</mml:mo><mml:mi>f</mml:mi><mml:mo>)</mml:mo><mml:mo>/</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:math></inline-formula></oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M36" display="inline"><mml:mi>t</mml:mi></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">time</oasis:entry>
         <oasis:entry colname="col3">s</oasis:entry>
         <oasis:entry colname="col4">none</oasis:entry>
         <oasis:entry colname="col5"/>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M37" display="inline"><mml:mi mathvariant="bold-italic">u</mml:mi></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">velocity, vector form</oasis:entry>
         <oasis:entry colname="col3">m s<sup>−1</sup></oasis:entry>
         <oasis:entry colname="col4">edge</oasis:entry>
         <oasis:entry colname="col5"/>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M39" display="inline"><mml:mrow><mml:msub><mml:mi>u</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">velocity, normal to edge</oasis:entry>
         <oasis:entry colname="col3">m s<sup>−1</sup></oasis:entry>
         <oasis:entry colname="col4">edge</oasis:entry>
         <oasis:entry colname="col5"/>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M41" display="inline"><mml:mrow><mml:msubsup><mml:mi>u</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mo>⟂</mml:mo></mml:msubsup></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">velocity, tangential to edge</oasis:entry>
         <oasis:entry colname="col3">m s<sup>−1</sup></oasis:entry>
         <oasis:entry colname="col4">edge</oasis:entry>
         <oasis:entry colname="col5"/>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M43" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">κ</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">tracer diffusion</oasis:entry>
         <oasis:entry colname="col3">m<sup>2</sup> s<sup>−1</sup></oasis:entry>
         <oasis:entry colname="col4">cell</oasis:entry>
         <oasis:entry colname="col5"/>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M46" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">κ</mml:mi><mml:mn mathvariant="normal">4</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">biharmonic tracer diffusion</oasis:entry>
         <oasis:entry colname="col3">m<sup>4</sup> s<sup>−1</sup></oasis:entry>
         <oasis:entry colname="col4">cell</oasis:entry>
         <oasis:entry colname="col5"/>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M49" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">ν</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">viscosity</oasis:entry>
         <oasis:entry colname="col3">m<sup>2</sup> s<sup>−1</sup></oasis:entry>
         <oasis:entry colname="col4">edge</oasis:entry>
         <oasis:entry colname="col5"/>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M52" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">ν</mml:mi><mml:mn mathvariant="normal">4</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">biharmonic viscosity</oasis:entry>
         <oasis:entry colname="col3">m<sup>4</sup> s<sup>−1</sup></oasis:entry>
         <oasis:entry colname="col4">edge</oasis:entry>
         <oasis:entry colname="col5"/>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M55" display="inline"><mml:mi mathvariant="italic">φ</mml:mi></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">tracer</oasis:entry>
         <oasis:entry colname="col3">varies</oasis:entry>
         <oasis:entry colname="col4">cell</oasis:entry>
         <oasis:entry colname="col5">units: kg m<sup>−3</sup> or similar</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M57" display="inline"><mml:mi mathvariant="italic">τ</mml:mi></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">wind stress</oasis:entry>
         <oasis:entry colname="col3">Pa</oasis:entry>
         <oasis:entry colname="col4">edge</oasis:entry>
         <oasis:entry colname="col5"/>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M58" display="inline"><mml:mi mathvariant="italic">ω</mml:mi></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">relative vorticity</oasis:entry>
         <oasis:entry colname="col3">s<sup>−1</sup></oasis:entry>
         <oasis:entry colname="col4">vertex</oasis:entry>
         <oasis:entry colname="col5"><inline-formula><mml:math id="M60" display="inline"><mml:mrow><mml:mi mathvariant="italic">ω</mml:mi><mml:mo>=</mml:mo><mml:mi mathvariant="bold-italic">k</mml:mi><mml:mo>⋅</mml:mo><mml:mo>(</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mo>×</mml:mo><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula></oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table></table-wrap>

      <p id="d2e1519">All variables introduced in this section are summarized in Table <xref ref-type="table" rid="T1"/>. Using a vector calculus identity, the non-linear advection term may be represented as

                <disp-formula specific-use="align" content-type="numbered"><mml:math id="M61" display="block"><mml:mtable displaystyle="true"><mml:mlabeledtr id="Ch1.E3"><mml:mtd><mml:mtext>3</mml:mtext></mml:mtd><mml:mtd><mml:mrow><mml:mstyle class="stylechange" displaystyle="true"/><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>⋅</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mi mathvariant="bold-italic">u</mml:mi></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:mstyle class="stylechange" displaystyle="true"/><mml:mo>=</mml:mo><mml:mo>(</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mo>×</mml:mo><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>)</mml:mo><mml:mo>×</mml:mo><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>+</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mo>|</mml:mo><mml:mi mathvariant="bold-italic">u</mml:mi><mml:msup><mml:mo>|</mml:mo><mml:mn mathvariant="normal">2</mml:mn></mml:msup></mml:mrow><mml:mn mathvariant="normal">2</mml:mn></mml:mfrac></mml:mstyle></mml:mrow></mml:mtd></mml:mlabeledtr><mml:mlabeledtr id="Ch1.E4"><mml:mtd><mml:mtext>4</mml:mtext></mml:mtd><mml:mtd><mml:mstyle class="stylechange" displaystyle="true"/></mml:mtd><mml:mtd><mml:mrow><mml:mstyle displaystyle="true" class="stylechange"/><mml:mo>=</mml:mo><mml:mo mathvariant="italic">{</mml:mo><mml:mi mathvariant="bold-italic">k</mml:mi><mml:mo>⋅</mml:mo><mml:mo>(</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mo>×</mml:mo><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>)</mml:mo><mml:mo mathvariant="italic">}</mml:mo><mml:mi mathvariant="bold-italic">k</mml:mi><mml:mo>×</mml:mo><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>+</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mo>|</mml:mo><mml:mi mathvariant="bold-italic">u</mml:mi><mml:msup><mml:mo>|</mml:mo><mml:mn mathvariant="normal">2</mml:mn></mml:msup></mml:mrow><mml:mn mathvariant="normal">2</mml:mn></mml:mfrac></mml:mstyle></mml:mrow></mml:mtd></mml:mlabeledtr><mml:mlabeledtr id="Ch1.E5"><mml:mtd><mml:mtext>5</mml:mtext></mml:mtd><mml:mtd><mml:mstyle displaystyle="true" class="stylechange"/></mml:mtd><mml:mtd><mml:mrow><mml:mstyle class="stylechange" displaystyle="true"/><mml:mo>=</mml:mo><mml:mi mathvariant="italic">ω</mml:mi><mml:msup><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>⟂</mml:mo></mml:msup><mml:mo>+</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mi>K</mml:mi><mml:mo>.</mml:mo></mml:mrow></mml:mtd></mml:mlabeledtr></mml:mtable></mml:math></disp-formula>

          Thus the advection and Coriolis term may be combined together as 

                <disp-formula specific-use="align" content-type="numbered"><mml:math id="M62" display="block"><mml:mtable displaystyle="true"><mml:mlabeledtr id="Ch1.E6"><mml:mtd><mml:mtext>6</mml:mtext></mml:mtd><mml:mtd><mml:mrow><mml:mstyle displaystyle="true" class="stylechange"/><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>⋅</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>+</mml:mo><mml:mi>f</mml:mi><mml:mi mathvariant="bold-italic">k</mml:mi><mml:mo>×</mml:mo><mml:mi mathvariant="bold-italic">u</mml:mi></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:mstyle displaystyle="true" class="stylechange"/><mml:mo>=</mml:mo><mml:mo>(</mml:mo><mml:mi mathvariant="italic">ω</mml:mi><mml:mo>+</mml:mo><mml:mi>f</mml:mi><mml:mo>)</mml:mo><mml:msup><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>⟂</mml:mo></mml:msup><mml:mo>+</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mi>K</mml:mi></mml:mrow></mml:mtd></mml:mlabeledtr><mml:mlabeledtr id="Ch1.E7"><mml:mtd><mml:mtext>7</mml:mtext></mml:mtd><mml:mtd><mml:mstyle displaystyle="true" class="stylechange"/></mml:mtd><mml:mtd><mml:mrow><mml:mstyle displaystyle="true" class="stylechange"/><mml:mo>=</mml:mo><mml:mi>q</mml:mi><mml:mfenced open="(" close=")"><mml:mrow><mml:mi>h</mml:mi><mml:msup><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>⟂</mml:mo></mml:msup></mml:mrow></mml:mfenced><mml:mo>+</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mi>K</mml:mi><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mlabeledtr></mml:mtable></mml:math></disp-formula>

          where <inline-formula><mml:math id="M63" display="inline"><mml:mi>q</mml:mi></mml:math></inline-formula> is the potential vorticity. This formulation, described in Sect. 2.1 of <xref ref-type="bibr" rid="bib1.bibx59" id="text.37"/>, is useful for the mimetic properties of potential vorticity and energy conservation in the TRiSK discretization <xref ref-type="bibr" rid="bib1.bibx74" id="paren.38"/>.</p>
      <p id="d2e1769">The governing equations for Omega-V0 in continuous form are

                <disp-formula specific-use="gather" content-type="numbered"><mml:math id="M64" display="block"><mml:mtable displaystyle="true"><mml:mlabeledtr id="Ch1.E8"><mml:mtd><mml:mtext>8</mml:mtext></mml:mtd><mml:mtd><mml:mrow><mml:mstyle displaystyle="true" class="stylechange"/><mml:mtable rowspacing="0.2ex" class="split" displaystyle="true" columnalign="right left"><mml:mtr><mml:mtd><mml:mrow><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mo>∂</mml:mo><mml:mi mathvariant="bold-italic">u</mml:mi></mml:mrow><mml:mrow><mml:mo>∂</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:mo>+</mml:mo><mml:mi>q</mml:mi><mml:mfenced open="(" close=")"><mml:mrow><mml:mi>h</mml:mi><mml:msup><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>⟂</mml:mo></mml:msup></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mi>g</mml:mi><mml:mi mathvariant="normal">∇</mml:mi><mml:mo>(</mml:mo><mml:mi>h</mml:mi><mml:mo>-</mml:mo><mml:mi>b</mml:mi><mml:mo>)</mml:mo><mml:mo>-</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mi>K</mml:mi><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="italic">ν</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub><mml:msup><mml:mi mathvariant="normal">∇</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msup><mml:mi mathvariant="bold-italic">u</mml:mi></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd/><mml:mtd><mml:mrow><mml:mo>-</mml:mo><mml:msub><mml:mi mathvariant="italic">ν</mml:mi><mml:mn mathvariant="normal">4</mml:mn></mml:msub><mml:msup><mml:mi mathvariant="normal">∇</mml:mi><mml:mn mathvariant="normal">4</mml:mn></mml:msup><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>-</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mi mathvariant="normal">D</mml:mi></mml:msub><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>|</mml:mo><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>|</mml:mo></mml:mrow><mml:mi>h</mml:mi></mml:mfrac></mml:mstyle><mml:mo>+</mml:mo><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mi mathvariant="italic">τ</mml:mi><mml:mi>h</mml:mi></mml:mfrac></mml:mstyle></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:mtd></mml:mlabeledtr><mml:mlabeledtr id="Ch1.E9"><mml:mtd><mml:mtext>9</mml:mtext></mml:mtd><mml:mtd><mml:mrow><mml:mstyle class="stylechange" displaystyle="true"/><mml:mrow><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mo>∂</mml:mo><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mo>∂</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:mfrac></mml:mstyle><mml:mo>+</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mo>⋅</mml:mo><mml:mo>(</mml:mo><mml:mi>h</mml:mi><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mo>)</mml:mo><mml:mo>=</mml:mo><mml:mn mathvariant="normal">0</mml:mn></mml:mrow></mml:mrow></mml:mtd></mml:mlabeledtr><mml:mlabeledtr id="Ch1.E10"><mml:mtd><mml:mtext>10</mml:mtext></mml:mtd><mml:mtd><mml:mrow><mml:mstyle class="stylechange" displaystyle="true"/><mml:mrow><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mo>∂</mml:mo><mml:mi>h</mml:mi><mml:mi mathvariant="italic">φ</mml:mi></mml:mrow><mml:mrow><mml:mo>∂</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:mfrac></mml:mstyle><mml:mo>+</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mo>⋅</mml:mo><mml:mo>(</mml:mo><mml:mi>h</mml:mi><mml:mi mathvariant="bold-italic">u</mml:mi><mml:mi mathvariant="italic">φ</mml:mi><mml:mo>)</mml:mo><mml:mo>=</mml:mo><mml:msub><mml:mi mathvariant="italic">κ</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub><mml:mi>h</mml:mi><mml:msup><mml:mi mathvariant="normal">∇</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msup><mml:mi mathvariant="italic">φ</mml:mi><mml:mo>-</mml:mo><mml:msub><mml:mi mathvariant="italic">κ</mml:mi><mml:mn mathvariant="normal">4</mml:mn></mml:msub><mml:mi>h</mml:mi><mml:msup><mml:mi mathvariant="normal">∇</mml:mi><mml:mn mathvariant="normal">4</mml:mn></mml:msup><mml:mi mathvariant="italic">φ</mml:mi><mml:mo>.</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mlabeledtr></mml:mtable></mml:math></disp-formula>

          In order to bring these equations closer to the layered formulation of the upcoming full ocean model in Omega-V1, we have added Laplacian and biharmonic dissipation to the momentum equation, along with quadratic bottom drag and wind forcing. The thickness equation (Eq. <xref ref-type="disp-formula" rid="Ch1.E9"/>) is derived from conservation of mass for a fluid with constant density, which reduces to conservation of volume. The model domain uses fixed horizontal cells with horizontal areas that are constant in time, so the area drops out and only the layer thickness <inline-formula><mml:math id="M65" display="inline"><mml:mi>h</mml:mi></mml:math></inline-formula> remains as the prognostic variable. The tracer equation (Eq. <xref ref-type="disp-formula" rid="Ch1.E10"/>) is the conservation equation for a passive tracer (scalar), with only advective and diffusive terms. It is not included in the textbook shallow water equations, but is useful for us to test tracer advection in preparation for a primitive equation model in Omega-V1. In this equation set, the tracer equation does not feed back into the momentum or thickness equations. It is written in a thickness-weighted form because the conserved quantity is the tracer mass. Here (<inline-formula><mml:math id="M66" display="inline"><mml:mrow><mml:mi>h</mml:mi><mml:mi mathvariant="italic">φ</mml:mi><mml:mi>A</mml:mi></mml:mrow></mml:math></inline-formula>), where <inline-formula><mml:math id="M67" display="inline"><mml:mi>A</mml:mi></mml:math></inline-formula> is horizontal cell area, typically has units of tracer mass in kg, while <inline-formula><mml:math id="M68" display="inline"><mml:mi mathvariant="italic">φ</mml:mi></mml:math></inline-formula> has units of concentration in kg m<sup>−3</sup>. Since <inline-formula><mml:math id="M70" display="inline"><mml:mi>A</mml:mi></mml:math></inline-formula> is fixed, it is divided out, making Eq. (<xref ref-type="disp-formula" rid="Ch1.E10"/>) thickness-weighted, rather than volume-weighted. A derivation of the thickness-weighted tracer equation appears in Appendix A2 of <xref ref-type="bibr" rid="bib1.bibx60" id="text.39"/>. The Omega-V0 governing equations do not include any vertical advection or diffusion. Although Omega-V0 includes a vertical index for performance testing and future expansion, vertical layers are currently redundant.</p>
</sec>
<sec id="Ch1.S2.SS2">
  <label>2.2</label><title>Discretization</title>
      <p id="d2e2082">The horizontal domain is partitioned into polygonal finite-volume cells. Definitions of the mesh variables, differential operators and illustrative figures can be found in <xref ref-type="bibr" rid="bib1.bibx59" id="text.40"/>, Sect. 3, and are not reproduced here.</p>
      <p id="d2e2088">In horizontally discrete form, the governing equations are

            <disp-formula id="Ch1.E11" content-type="numbered"><label>11</label><mml:math id="M71" display="block"><mml:mrow><mml:mtable rowspacing="0.2ex" class="split" displaystyle="true" columnalign="right left"><mml:mtr><mml:mtd><mml:mrow><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mo>∂</mml:mo><mml:msub><mml:mi>u</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mo>∂</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mfenced open="[" close="]"><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mi mathvariant="bold-italic">k</mml:mi><mml:mo>⋅</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mo>×</mml:mo><mml:msub><mml:mi>u</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mi mathvariant="normal">v</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mfenced close="]" open="["><mml:mrow><mml:msub><mml:mi>h</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mfenced><mml:mi mathvariant="normal">v</mml:mi></mml:msub></mml:mrow></mml:mfrac></mml:mstyle></mml:mfenced><mml:mi mathvariant="normal">e</mml:mi></mml:msub><mml:mfenced close=")" open="("><mml:mrow><mml:msub><mml:mfenced close="]" open="["><mml:mrow><mml:msub><mml:mi>h</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mfenced><mml:mi mathvariant="normal">e</mml:mi></mml:msub><mml:msubsup><mml:mi>u</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mo>⟂</mml:mo></mml:msubsup></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mi>g</mml:mi><mml:mi mathvariant="normal">∇</mml:mi><mml:mfenced close=")" open="("><mml:mrow><mml:msub><mml:mi>h</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mfenced></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd/><mml:mtd><mml:mrow><mml:mo>-</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:msub><mml:mi>K</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="italic">ν</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub><mml:msup><mml:mi mathvariant="normal">∇</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msup><mml:msub><mml:mi>u</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mi mathvariant="italic">ν</mml:mi><mml:mn mathvariant="normal">4</mml:mn></mml:msub><mml:msup><mml:mi mathvariant="normal">∇</mml:mi><mml:mn mathvariant="normal">4</mml:mn></mml:msup><mml:msub><mml:mi>u</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mi mathvariant="normal">D</mml:mi></mml:msub><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:msub><mml:mi>u</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:msub><mml:mo>|</mml:mo><mml:msub><mml:mi>u</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:msub><mml:mfenced open="[" close="]"><mml:mrow><mml:msub><mml:mi>h</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mfenced><mml:mi mathvariant="normal">e</mml:mi></mml:msub></mml:mrow></mml:mfrac></mml:mstyle><mml:mo>+</mml:mo><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:msub><mml:mi mathvariant="italic">τ</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mfenced close="]" open="["><mml:mrow><mml:msub><mml:mi>h</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mfenced><mml:mi mathvariant="normal">e</mml:mi></mml:msub></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math></disp-formula>

          

                <disp-formula specific-use="align" content-type="numbered"><mml:math id="M72" display="block"><mml:mtable displaystyle="true"><mml:mlabeledtr id="Ch1.E12"><mml:mtd><mml:mtext>12</mml:mtext></mml:mtd><mml:mtd><mml:mstyle class="stylechange" displaystyle="true"/></mml:mtd><mml:mtd><mml:mrow><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mo>∂</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mo>∂</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:mfrac></mml:mstyle><mml:mo>+</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mo>⋅</mml:mo><mml:mfenced open="(" close=")"><mml:mrow><mml:msub><mml:mfenced close="]" open="["><mml:mrow><mml:msub><mml:mi>h</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mfenced><mml:mi mathvariant="normal">e</mml:mi></mml:msub><mml:msub><mml:mi>u</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:msub></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:mn mathvariant="normal">0</mml:mn><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mlabeledtr><mml:mlabeledtr id="Ch1.E13"><mml:mtd><mml:mtext>13</mml:mtext></mml:mtd><mml:mtd><mml:mstyle class="stylechange" displaystyle="true"/></mml:mtd><mml:mtd><mml:mrow><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mo>∂</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msub><mml:mi mathvariant="italic">φ</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mo>∂</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:mfrac></mml:mstyle><mml:mo>+</mml:mo><mml:mi mathvariant="normal">∇</mml:mi><mml:mo>⋅</mml:mo><mml:mfenced close=")" open="("><mml:mrow><mml:msub><mml:mi>u</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:msub><mml:msub><mml:mfenced open="[" close="]"><mml:mrow><mml:msub><mml:mi>h</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msub><mml:mi mathvariant="italic">φ</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mfenced><mml:mi mathvariant="normal">e</mml:mi></mml:msub></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:msub><mml:mi mathvariant="italic">κ</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub><mml:msub><mml:mi>h</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msup><mml:mi mathvariant="normal">∇</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msup><mml:msub><mml:mi mathvariant="italic">φ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mi mathvariant="italic">κ</mml:mi><mml:mn mathvariant="normal">4</mml:mn></mml:msub><mml:msub><mml:mi>h</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msup><mml:mi mathvariant="normal">∇</mml:mi><mml:mn mathvariant="normal">4</mml:mn></mml:msup><mml:msub><mml:mi mathvariant="italic">φ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mlabeledtr></mml:mtable></mml:math></disp-formula>

          where subscripts <inline-formula><mml:math id="M73" display="inline"><mml:mi>i</mml:mi></mml:math></inline-formula>, “e”, and “v” indicate cell, edge, and vertex locations (<inline-formula><mml:math id="M74" display="inline"><mml:mi>i</mml:mi></mml:math></inline-formula> was chosen for cell because “c” and “e” look similar). Here square brackets <inline-formula><mml:math id="M75" display="inline"><mml:mrow><mml:mo>[</mml:mo><mml:mo>⋅</mml:mo><mml:msub><mml:mo>]</mml:mo><mml:mi mathvariant="normal">e</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M76" display="inline"><mml:mrow><mml:mo>[</mml:mo><mml:mo>⋅</mml:mo><mml:msub><mml:mo>]</mml:mo><mml:mi mathvariant="normal">v</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> represent quantities that are interpolated to edge and vertex locations. The interpolation is typically centered, but may vary by method, particularly for advection schemes. For vector quantities, <inline-formula><mml:math id="M77" display="inline"><mml:mrow><mml:msub><mml:mi>u</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> denotes the normal component at the center of the edge, while <inline-formula><mml:math id="M78" display="inline"><mml:mrow><mml:msubsup><mml:mi>u</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mo>⟂</mml:mo></mml:msubsup></mml:mrow></mml:math></inline-formula> denotes the tangential component.</p>
      <p id="d2e2538">Documentation of the grid convergence rates of individual operators are provided in <xref ref-type="bibr" rid="bib1.bibx10" id="text.41"/> (Sect. 4.1 and Fig. 1). All TRiSK spatial operators demonstrate second-order convergence on a uniform hexagon grid, except for the curl on vertices, which is first order. The curl interpolated from vertices to cell centers regains second order convergence. The rates of convergence are typically less than second order on nonuniform meshes, including spherical meshes. Tracer advection uses center-weighted thickness and tracer values at each edge. The boundary conditions are no normal flow and no-slip. This is accomplished by setting the edge-normal velocity <inline-formula><mml:math id="M79" display="inline"><mml:mrow><mml:msub><mml:mi>u</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn mathvariant="normal">0</mml:mn></mml:mrow></mml:math></inline-formula> on the boundary for flux and vorticity calculations.</p>
</sec>
</sec>
<sec id="Ch1.S3">
  <label>3</label><title>Code design</title>
      <p id="d2e2568">Omega-V0 has been designed to perform efficiently on modern parallel, hybrid HPC architectures. The design utilizes a domain decomposition of the unstructured mesh across parallel nodes with data communicated between the partitions using the <xref ref-type="bibr" rid="bib1.bibx43" id="text.42"/>. Within a single shared-memory node, we have adopted the Kokkos <xref ref-type="bibr" rid="bib1.bibx76" id="paren.43"/> programming model, which provides the capability to map computational work to either CPU cores (host) or GPU accelerators (device); however, in the current version of Omega, when built to utilize GPUs, compute-intensive kernels are executed exclusively on the device. This required that Omega be written in the C<inline-formula><mml:math id="M80" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> programming language <xref ref-type="bibr" rid="bib1.bibx72 bib1.bibx73" id="paren.44"/>. We have added some additional abstractions or aliases to simplify some of the Kokkos syntax and make it more accessible to Omega developers (see Sect. <xref ref-type="sec" rid="Ch1.S3.SS2"/>). Kokkos is a well-supported, portable framework  <xref ref-type="bibr" rid="bib1.bibx75 bib1.bibx76" id="paren.45"/> that has enabled us to create a performance-portable ocean model.</p>
      <p id="d2e2596">All components of Omega follow a process of design document writing and review, and then code writing, testing and review. Each feature is accompanied with a user's guide, developer's guide, and the original design document on the Omega documentation website <xref ref-type="bibr" rid="bib1.bibx7" id="paren.46"/>. This detailed information, created by each developer during code development, will serve as a comprehensive reference for the completed model.</p>
<sec id="Ch1.S3.SS1">
  <label>3.1</label><title>Framework</title>
<sec id="Ch1.S3.SS1.SSS1">
  <label>3.1.1</label><title>Domain decomposition</title>
      <p id="d2e2617">As described above, the top level of parallelism is a domain decomposition of the horizontal mesh. The Metis library <xref ref-type="bibr" rid="bib1.bibx32" id="paren.47"/> creates the decomposition, given a mesh connectivity computed from JIGSAW <xref ref-type="bibr" rid="bib1.bibx20" id="paren.48"/>. Unlike the previous MPAS model, the decomposition is computed at startup with a call to the Metis library rather than being computed off-line. This eliminates a preprocessing step and the need to maintain partition files for different model configurations. The number of tasks is determined at run time from either the MPI environment when running a standalone ocean model or from a coupled model driver when Omega is run coupled within E3SM. The actual layout of MPI tasks across CPU cores and GPUs within a node can be set with job submission scripts. Multiple domain decompositions may be run concurrently. This will be useful when certain parts of the model, such as the barotropic mode solver, analysis tasks, or subgrid processes, would run more efficiently on a different processor layout.</p>
</sec>
<sec id="Ch1.S3.SS1.SSS2">
  <label>3.1.2</label><title>Message passing infrastructure</title>
      <p id="d2e2634">Message passing is used to communicate data between the horizontal domains. The Omega base infrastructure layer provides simple interfaces for performing communications like the broadcast of data from a single task, the updating of domain halos and performing global reductions like sums across the global domain. All communication routines can determine whether the data exists on the host or device and can utilize GPU-aware MPI capabilities wherever available. The global sum function is bit-reproducible for all data types. For single-precision (32-bit) floating point types, the sums are performed in double precision (64-bit) and converted back to single precision. For double-precision floating point data, the sums are computed using the double-double algorithms of <xref ref-type="bibr" rid="bib1.bibx37" id="text.49"/> and <xref ref-type="bibr" rid="bib1.bibx29" id="text.50"/> following the implementation of <xref ref-type="bibr" rid="bib1.bibx28" id="text.51"/>.</p>
      <p id="d2e2646">An MPI halo exchange module handles the transfer of data across interfaces between adjacent partitions in a given domain decomposition. This implementation supports exchanges of multidimensional arrays of fundamental data types residing in either CPU or GPU memory. The module is designed to minimize latency by utilizing non-blocking MPI routines (i.e., <monospace>MPI_Isend</monospace> and <monospace>MPI_Irecv</monospace>) and supports a user-configurable halo width; all test cases reported here use a halo width of three cells. In Omega-V0, three types of halo exchanges are performed during each time step, each with different message sizes: layer thickness, edge-normal velocity, and an aggregated set of five tracer fields.</p>

      <fig id="F1" specific-use="star"><label>Figure 1</label><caption><p id="d2e2657">Left panel: ratio of execution time per timestep for halo exchanges using host-staged MPI versus GPU-aware MPI on the Frontier supercomputer at Oak Ridge National Laboratory with Cray MPICH. Results are shown for three different planar mesh sizes, utilizing eight MPI tasks per node. Right panel: total halo exchange communication volume per time step. Bars represent the aggregate data sent per time step, summed over all communicating tasks and exchanges and averaged per MPI task.</p></caption>
            <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-f01.png"/>

          </fig>

      <p id="d2e2667">To maximize performance on GPU-accelerated systems, the halo exchange module can leverage GPU-aware MPI, enabled via a compile-time build flag. When built for GPU execution, halo elements are packed into and unpacked from contiguous buffers directly on the device using parallel kernels. With GPU-aware MPI enabled, send and receive buffers in device memory are passed directly to the MPI routines; otherwise, the packed send buffers must be copied from device to host for traditional host-staged MPI, and the received buffers are copied back from host to device before unpacking. Benchmarking on Frontier at Oak Ridge National Laboratory with GPU-aware Cray MPICH demonstrates that this approach significantly reduces halo exchange overhead, yielding approximately a 4–<inline-formula><mml:math id="M81" display="inline"><mml:mrow><mml:mn mathvariant="normal">6</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> reduction in halo exchange time per time step compared to host-staged MPI at large node counts, where communication is latency dominated (Fig. <xref ref-type="fig" rid="F1"/>).</p>
</sec>
<sec id="Ch1.S3.SS1.SSS3">
  <label>3.1.3</label><title>Other utilities</title>
      <p id="d2e2690">Configuration of Omega is done through an input configuration file in YAML <xref ref-type="bibr" rid="bib1.bibx84" id="paren.52"/> format. We use the yaml-cpp library <xref ref-type="bibr" rid="bib1.bibx9" id="paren.53"/> to read and parse the configuration on initialization. Logging of both informational and error messages are part of Omega's logging and error handling capabilities that are built on the spdlog library <xref ref-type="bibr" rid="bib1.bibx40" id="paren.54"/>. This supports varying levels of error/log severity and messages can be written from either a master task or from all tasks, depending on a build-time configuration.</p>
      <p id="d2e2702">All input and output are performed in parallel using the SCORPIO library <xref ref-type="bibr" rid="bib1.bibx38" id="paren.55"/> that writes distributed data using a runtime configuration of IO tasks. It supports both NetCDF <xref ref-type="bibr" rid="bib1.bibx77" id="paren.56"/> and ADIOS <xref ref-type="bibr" rid="bib1.bibx24" id="paren.57"/> formats. Multiple IO streams can be defined with each stream having its own frequency of input/output and its own set of fields. The details of each stream are specified by the user in the streams section of the input configuration file. Each field available for IO is defined within Omega using a field class that defines the metadata associated with the field and attaches/detaches the data array as needed. The field creation interfaces ensure that all required metadata are defined in accordance with the NetCDF CF metadata conventions <xref ref-type="bibr" rid="bib1.bibx19" id="paren.58"/>.</p>
      <p id="d2e2717">A time manager tracks model time in the context of a number of supported calendars. It uses integer arithmetic to avoid round-off in accumulated time. It is a reimplementation of the Earth System Modeling Framework <xref ref-type="bibr" rid="bib1.bibx21" id="paren.59"/> time manager, that has been simplified for more clarity and streamlined by removing unnecessary functionalities, such as Fortran interfaces. It includes support for a model clock, time instants, time intervals (e.g., time step) and alarms for various model events like forcing and IO.</p>
      <p id="d2e2723">A profiling interface called Pacer is used to keep track of the computational time spent in various model processes using application level markers that designate beginning and end of each process. These timers are aggregated across multiple ranks and a summary report is generated when running in parallel. This timing infrastructure is based on our extensions to the General Purpose Timing Library (GPTL) <xref ref-type="bibr" rid="bib1.bibx62" id="paren.60"/>.</p>
</sec>
</sec>
<sec id="Ch1.S3.SS2">
  <label>3.2</label><title>Performance portability with Kokkos</title>
      <p id="d2e2739">To achieve performance portability, Omega has adopted the Kokkos Programming Model. The Kokkos Programming Model is implemented as a C<inline-formula><mml:math id="M82" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> library and provides abstractions necessary to achieve performance on the diverse set of modern computing architectures. Kokkos abstractions can be divided into abstractions for data storage (View, Memory Space, Memory Layout, and Memory Traits) and parallel execution (Execution Space, Execution Policy, and Execution Pattern). Omega builds its own abstractions on top of these fundamental components to provide a simpler interface for domain scientists.</p>
      <p id="d2e2752">For data storage, Omega uses the Kokkos <monospace>View</monospace> data structure. For convenience, type aliases are provided for commonly needed views of fundamental data types, such as <list list-type="bullet"><list-item>
      <p id="d2e2761"><monospace>Array1DI4</monospace>: device-resident one-dimensional array of four-byte integers,</p></list-item><list-item>
      <p id="d2e2767"><monospace>Array3DReal</monospace>: device-resident three-dimensional array of user-configurable floating-point type,</p></list-item><list-item>
      <p id="d2e2773"><monospace>HostArray2DI8</monospace>: host-resident two-dimensional array of eight-byte integers,</p></list-item></list> and similarly for other combinations of ranks and types.</p>
      <p id="d2e2779">For parallel execution, Omega provides a <monospace>parallelFor</monospace> function, that can express parallel iteration over a multi-dimensional index range. Listing <xref ref-type="fig" rid="Li1"/> shows how a simple Fortran loop nest is expressed in Omega. Internally, this function dispatches to the best performing (in the context of Omega) Kokkos execution policy for the chosen compute platform. Currently, we use Kokkos <monospace>MDRangePolicy</monospace> on CPU platforms, but opt to use a one-dimensional <monospace>RangePolicy</monospace> with manual index unpacking on GPUs, as this reduces GPU runtime overhead by replacing the more complex index mapping logic of <monospace>MDRangePolicy</monospace> with a simpler manual calculation performed within each thread. This simplification can lower instruction count and improve memory access patterns and cache utilization. Additionally, flattening the iteration space enables Kokkos's internal heuristics to more effectively select GPU kernel launch parameters, such as block size and grid configuration, thereby improving occupancy and load balancing. On Frontier and Perlmutter GPU nodes, this approach yielded a 10 %–20 % reduction in kernel execution time compared to <monospace>MDRangePolicy</monospace>.<fn id="Ch1.Footn1"><p id="d2e2800">The Kokkos development team is aware of <monospace>MDRangePolicy</monospace> performance issues and, at the time of writing, is actively working to address them. Future Kokkos versions might not require this type of workaround.</p></fn></p><fig id="Li1" specific-use="star"><label>Listing 1</label><caption><p id="d2e2810">Multi-dimensional iteration expressed in Fortran (left panel) and using Omega abstractions (right panel).</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-l01.png"/>

        </fig>

      <p id="d2e2819">Individual computations in Omega (for example, tendency terms or auxiliary variables) are implemented as C<inline-formula><mml:math id="M83" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> functors, which are classes that implement the function call operator. Functors can be called similarly to normal C<inline-formula><mml:math id="M84" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> functions, but may contain an internal state. In Omega, functors are used to represent computations for a given mesh element (e.g., vertex, cell, or edge) index and over a chunk of vertical levels. Our strategy is to design functors that perform computations over contiguous chunks of vertical indices with a chunk size known at compile time, to facilitate vectorization on CPUs. For GPU execution, the chunk size is set to 1 to distribute the workload across as many GPU threads as possible. To simplify the calling interfaces, Omega functors store as member variables the static data needed to implement their operation, such as mesh connectivity or geometry information. Variable input data are passed as arguments.</p>
      <p id="d2e2842">To give a concrete example, a functor that implements the kinetic energy gradient tendency term is shown in Listing <xref ref-type="fig" rid="Li2"/>. Its constructor takes a pointer to the <monospace>HorzMesh</monospace> object so that the functor can store pointers to the <monospace>CellsOnEdge</monospace> connectivity array and the <monospace>DcEdge</monospace> geometry array. The <monospace>operator()</monospace> implements the kinetic energy gradient computation for the edge index <monospace>IEdge</monospace> and over the range <monospace>[KChunk * VecLength, KChunk * VecLength + VecLength)</monospace> of vertical levels. This functor can then be used to compute the tendency term over the whole mesh by using the <monospace>parallelFor</monospace> function, as shown in Listing <xref ref-type="fig" rid="Li3"/>.</p>

      <fig id="Li2" specific-use="star"><label>Listing 2</label><caption><p id="d2e2873">Kinetic energy gradient functor in Omega.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-l02.png"/>

        </fig>

      <fig id="Li3" specific-use="star"><label>Listing 3</label><caption><p id="d2e2884">Computation of kinetic energy gradient over the whole mesh.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-l03.png"/>

        </fig>

      <p id="d2e2894">Omega tendencies are composed of multiple terms. The functor approach makes it possible to easily switch between computing multiple tendency terms in one parallel loop or in separate parallel loops. For example, given another functor that computes the sea surface height (SSH) gradient term <monospace>SSHGradOnEdge</monospace>, the kinetic energy and the SSH gradients can be computed together or separately, as shown in Listing <xref ref-type="fig" rid="Li4"/>. Kernel fusion is a powerful optimization technique that often results in better performing code due to reduced overheads and data reuse. However, overuse of this optimization may result in high register usage, which can sometimes lead to worse performance. Therefore, having the flexibility to experiment with different splittings is important. Due to the combinatorial explosion of fusion candidates in large programs, manual kernel fusion works best when guided by profiling data and algorithmic domain knowledge.</p>

      <fig id="Li4" specific-use="star"><label>Listing 4</label><caption><p id="d2e2904">Split (left panel) or fused (right panel) computation of two tendency terms.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-l04.png"/>

        </fig>

      <p id="d2e2913">The flat multi-dimensional parallelism approach with vertical chunking is expected to work well for a stacked shallow water solver like Omega-0. Future versions of Omega will incorporate vertical dynamics and advanced ocean physics parametrizations, with more complicated computational patterns involving vertical dependencies. In that case, we believe that the outlined approach and memory layout can still lead to good CPU and GPU performance, as long as most vertical operations can be parallelized on GPUs. This will likely require the use of more advanced Kokkos features such as hierarchical parallelism, batched parallel reduce and scan operations, and even writing different algorithms for CPUs and GPUs in select cases. This approach has already been successfully demonstrated by EAMXX in the context of an atmosphere model. Our experiences with applying it to the ocean component will be reported in articles presenting future Omega versions.</p>
</sec>
<sec id="Ch1.S3.SS3">
  <label>3.3</label><title>Code organization and C<inline-formula><mml:math id="M85" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> classes</title>
      <p id="d2e2935">Omega is organized into modularized classes to handle major pieces of the PDE solver such as Decomp, Halo, Mesh, State variables, Auxiliary variables, Timestepping, and Tendency terms. The decomposition of the mesh into local MPI rank subdomains is performed online in the Decomp class with the resulting local subdomain mesh represented in the Mesh class. The infrastructure necessary to perform message passing on the host and device between local subdomain halo regions is contained in the Halo class. The State class manages the prognostic variables, while the Auxiliary variable class stores and computes diagnostic quantities derived directly from the prognostic variables and used in the tendency terms, e.g., kinetic energy and potential vorticity. The constructor of each tendency functor takes in and stores static mesh information as private member variables, which simplifies the calling arguments in the PDE solution.</p>
</sec>
<sec id="Ch1.S3.SS4">
  <label>3.4</label><title>Build and internal testing</title>
      <p id="d2e2946">The Omega build system, built on the widely adopted CMake <xref ref-type="bibr" rid="bib1.bibx35" id="paren.61"/> tool, establishes a robust framework for managing the compilation process. It operates in two distinct modes: standalone and E3SM component. In standalone mode, Omega generates a generic E3SM case and derives its build configurations from it. In contrast, the E3SM component build mode leverages build configurations provided by the CIME <xref ref-type="bibr" rid="bib1.bibx3" id="paren.62"/> build system within an existing E3SM case. The build process, meticulously defined in the top-level CMakeLists.txt file, is segmented into four sequential steps: Setup, Update, Build, and Output.</p>
      <p id="d2e2955">A comprehensive testing strategy ensures Omega's quality assurance and continuous integration. All major Omega algorithms and software frameworks are rigorously validated using CTest <xref ref-type="bibr" rid="bib1.bibx36" id="paren.63"/>, CMake's integrated testing tool. This enables the execution of functional tests, activated by setting OMEGA_BUILD_TEST=ON during the CMake configuration. These tests are critical to verify the correct functionality and integrity of the codebase.</p>
      <p id="d2e2961">Furthermore, nightly tests are developed and integrated with CDash <xref ref-type="bibr" rid="bib1.bibx34" id="paren.64"/> to maintain ongoing stability and performance. This integration facilitates automated reporting of test results, providing continuous feedback on the codebase's status. This robust testing infrastructure, which includes both CTest-based functional tests and CDash-driven nightly regressions, is paramount to ensuring the high quality and reliability of the Omega ocean model.</p>
</sec>
</sec>
<sec id="Ch1.S4">
  <label>4</label><title>Verification tests</title>
      <p id="d2e2977">A series of Omega-V0 tests were conducted to verify the accuracy of the model solution, and document the computing performance across several platforms. Convergence studies against exact solutions in idealized domains were conducted with the manufactured solution, tracer transport, and barotropic gyre test cases. The global wind-driven simulation was designed to introduce coastlines, bathymetry, and wind forcing, in order to test the workflow for realistic domains.</p>
      <p id="d2e2980">A python package <monospace>polaris</monospace> <xref ref-type="bibr" rid="bib1.bibx5" id="paren.65"/> was developed to facilitate the set-up and execution of verification and validation tests for Omega. <monospace>polaris</monospace> is responsible for creating the MPAS mesh using the JIGSAW library <xref ref-type="bibr" rid="bib1.bibx20" id="paren.66"/>, generating the initial condition, configuring the forward model run and linking the model executable, and conducting analysis including producing visualizations on the native MPAS mesh. <monospace>polaris</monospace> facilitates the creation of identical test cases for MPAS-Ocean and Omega, supporting the benchmarking of Omega implementations against MPAS-Ocean.</p>
<sec id="Ch1.S4.SS1">
  <label>4.1</label><title>Manufactured solution</title>
      <p id="d2e3005">The method of manufactured solutions is commonly used for the code verification of partial differential equations (PDE) solvers. Unlike code validation, which assesses whether a model captures the correct physics by comparing its results to experimental or observational data, code verification is a purely mathematical exercise that evaluates whether a code correctly implements the intended numerical method. The manufactured solution approach was formalized in the computational science literature by <xref ref-type="bibr" rid="bib1.bibx63" id="text.67"/> and further refined in <xref ref-type="bibr" rid="bib1.bibx61" id="text.68"/>. The key idea is to choose an exact solution, substitute it into the PDE, and include the residual terms as a source term. This enables the creation of analytic test cases for the full shallow water system, including non-linear terms. It stands out in this respect, as other shallow water test cases, such as the coastal Kelvin wave or the inertia-gravity wave test case <xref ref-type="bibr" rid="bib1.bibx11" id="paren.69"/> only provide analytic solutions to the linear, inviscid form of the equations. Therefore, the manufactured solution represents the single best test case for the verification of all terms in the model. We manufactured our solution to match the test case described in detail in <xref ref-type="bibr" rid="bib1.bibx11" id="text.70"/> Section 2.10. However, as noted in that work, any smooth solution in space and time can be used, provided that the source terms are correctly defined. The test case verifies the time-stepping scheme along with the SSH  gradient, Coriolis, and non-linear advection terms. The source term was only modified to include both Laplacian and biharmonic dissipation.</p>
      <p id="d2e3020">The <monospace>polaris</monospace> system automates the testing of the manufactured solution for both MPAS-Ocean and Omega. The expected convergence rate is second order, as shown in <xref ref-type="bibr" rid="bib1.bibx11" id="text.71"/> (Figs. 13 and 19). These results are reproduced in Figs. <xref ref-type="fig" rid="F2"/> and <xref ref-type="fig" rid="F3"/>, which are generated by <monospace>polaris</monospace> using data from regular planar hexagonal meshes with grid cells of width 200, 100, 50, and 25 km. The corresponding time steps are 300, 150, 75, and 37.5 s, and the error was measured after 10 h of simulation time. All tests use Laplacian and biharmonic viscosity coefficients of <inline-formula><mml:math id="M86" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">ν</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn mathvariant="normal">1.5</mml:mn><mml:mo>×</mml:mo><mml:msup><mml:mn mathvariant="normal">10</mml:mn><mml:mn mathvariant="normal">6</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula> m<sup>2</sup> s<sup>−1</sup> and <inline-formula><mml:math id="M89" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">ν</mml:mi><mml:mn mathvariant="normal">4</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn mathvariant="normal">5</mml:mn><mml:mo>×</mml:mo><mml:msup><mml:mn mathvariant="normal">10</mml:mn><mml:mn mathvariant="normal">13</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula> m<sup>4</sup> s<sup>−1</sup> respectively, classical fourth-order Runge–Kutta time-stepping (e.g. Sect. 24.2 of <xref ref-type="bibr" rid="bib1.bibx27" id="altparen.72"/>), and a center-weighted thickness advection. The tracer equation (Eq. <xref ref-type="disp-formula" rid="Ch1.E13"/>) is not used in this test.</p><fig id="F2"><label>Figure 2</label><caption><p id="d2e3131">Convergence plot for Omega with the Manufactured Solution Test, showing the L2 norm of the difference between the computed and analytic solution in SSH.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-f02.png"/>

        </fig>

      <fig id="F3" specific-use="star"><label>Figure 3</label><caption><p id="d2e3143">SSH of the Manufactured Solution Test with a 25 km grid.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-f03.png"/>

        </fig>

      <p id="d2e3152">Individual operators such as the gradient, divergence, curl, and tangential velocities were verified in the early stages of model development. These used simple analytic functions such as sine waves on a doubly-periodic domain, where the exact solution was easily computed. The test setup follows <xref ref-type="bibr" rid="bib1.bibx10" id="text.73"/> (Sect. 4.1), and was able to reproduce the second-order convergence for TRiSK operators shown in Fig. 1 of that paper. The manufactured solution test is a superset of these tests, as it includes these individual terms.</p>
</sec>
<sec id="Ch1.S4.SS2">
  <label>4.2</label><title>Tracer transport on the sphere</title>
      <p id="d2e3166">Tracer transport was verified using a fixed angular velocity field and a tracer distribution that is advected around the sphere. This is named the cosine bell test case, and is available in <monospace>polaris</monospace> under <monospace>cosine_bell</monospace>. It was first described in <xref ref-type="bibr" rid="bib1.bibx81" id="text.74"/> but the variant from Sect. 3a of <xref ref-type="bibr" rid="bib1.bibx66" id="text.75"/> is used here. A flow field representing solid-body rotation transports a bell-shaped perturbation in a tracer <inline-formula><mml:math id="M92" display="inline"><mml:mi mathvariant="italic">ψ</mml:mi></mml:math></inline-formula> once around the sphere, and the exact solution is the original distribution after one full rotation. The standard case evaluates error convergence with resolution, where the time step varies in proportion to the cell size. Another <monospace>polaris</monospace> test performs two runs of the cosine bell at coarse resolution, once with 12 and once with 24 cores, to verify the bit-for-bit identical results for tracer advection across different core counts. A final <monospace>polaris</monospace> test with the cosine bell configuration runs for two time steps at coarse resolution, then performs reruns of the second time step, as a restart run to verify the bit-for-bit restart capability for tracer advection.</p>
      <p id="d2e3195">The cosine bell domain is an aquaplanet without continents, with a uniform depth of 300 m. The initial bell is defined by a passive tracer

            <disp-formula id="Ch1.E14" content-type="numbered"><label>14</label><mml:math id="M93" display="block"><mml:mrow><mml:mi mathvariant="italic">ψ</mml:mi><mml:mo>=</mml:mo><mml:mfenced close="" open="{"><mml:mtable class="array" columnalign="left left"><mml:mtr><mml:mtd><mml:mrow><mml:mfenced close=")" open="("><mml:mrow><mml:msub><mml:mi mathvariant="italic">ψ</mml:mi><mml:mn mathvariant="normal">0</mml:mn></mml:msub><mml:mo>/</mml:mo><mml:mn mathvariant="normal">2</mml:mn></mml:mrow></mml:mfenced><mml:mo>[</mml:mo><mml:mn mathvariant="normal">1</mml:mn><mml:mo>+</mml:mo><mml:mi>cos⁡</mml:mi><mml:mo>(</mml:mo><mml:mi mathvariant="italic">π</mml:mi><mml:mi>r</mml:mi><mml:mo>/</mml:mo><mml:mi>R</mml:mi><mml:mo>)</mml:mo><mml:mo>]</mml:mo></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:mi mathvariant="normal">if</mml:mi><mml:mspace width="0.25em" linebreak="nobreak"/><mml:mi>r</mml:mi><mml:mo>&lt;</mml:mo><mml:mi>R</mml:mi></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn mathvariant="normal">0</mml:mn></mml:mtd><mml:mtd><mml:mrow><mml:mi mathvariant="normal">if</mml:mi><mml:mspace width="0.25em" linebreak="nobreak"/><mml:mi>r</mml:mi><mml:mo>≥</mml:mo><mml:mi>R</mml:mi></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mfenced></mml:mrow></mml:math></disp-formula>

          where <inline-formula><mml:math id="M94" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">ψ</mml:mi><mml:mn mathvariant="normal">0</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:math></inline-formula> and the bell radius, <inline-formula><mml:math id="M95" display="inline"><mml:mrow><mml:mi>R</mml:mi><mml:mo>=</mml:mo><mml:mi>a</mml:mi><mml:mo>/</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:math></inline-formula>, with <inline-formula><mml:math id="M96" display="inline"><mml:mi>a</mml:mi></mml:math></inline-formula> representing the radius of the sphere, as shown in Fig. <xref ref-type="fig" rid="F4"/>. The zonal and meridional components of the fixed velocity field are

                <disp-formula specific-use="align" content-type="numbered"><mml:math id="M97" display="block"><mml:mtable displaystyle="true"><mml:mlabeledtr id="Ch1.E15"><mml:mtd><mml:mtext>15</mml:mtext></mml:mtd><mml:mtd><mml:mstyle class="stylechange" displaystyle="true"/></mml:mtd><mml:mtd><mml:mrow><mml:mstyle class="stylechange" displaystyle="true"/><mml:mi>u</mml:mi><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mn mathvariant="normal">2</mml:mn><mml:mi mathvariant="italic">π</mml:mi><mml:mo>(</mml:mo><mml:mi>a</mml:mi><mml:mi>cos⁡</mml:mi><mml:mi mathvariant="italic">θ</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mi mathvariant="italic">τ</mml:mi></mml:mfrac></mml:mstyle><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mlabeledtr><mml:mlabeledtr id="Ch1.E16"><mml:mtd><mml:mtext>16</mml:mtext></mml:mtd><mml:mtd><mml:mstyle class="stylechange" displaystyle="true"/></mml:mtd><mml:mtd><mml:mrow><mml:mstyle displaystyle="true" class="stylechange"/><mml:mi>v</mml:mi><mml:mo>=</mml:mo><mml:mn mathvariant="normal">0</mml:mn><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mlabeledtr></mml:mtable></mml:math></disp-formula>

          where <inline-formula><mml:math id="M98" display="inline"><mml:mi mathvariant="italic">τ</mml:mi></mml:math></inline-formula> is the time it takes to complete one full rotation around the globe and <inline-formula><mml:math id="M99" display="inline"><mml:mi mathvariant="italic">θ</mml:mi></mml:math></inline-formula> is the latitude. The default value of the time period <inline-formula><mml:math id="M100" display="inline"><mml:mi mathvariant="italic">τ</mml:mi></mml:math></inline-formula> is 24 d. Momentum and thickness are not evolved in this test.</p>

      <fig id="F4"><label>Figure 4</label><caption><p id="d2e3394">Initial tracer concentration for the Cosine Bell Advection Test.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-f04.png"/>

        </fig>

      <p id="d2e3404">The convergence test uses spherical icosahedral meshes, each with average grid cell widths of 480, 240, 120, and 60 km. These meshes are constructed by subdividing the triangular faces of an icosahedron 4, 5, 6 and 7 times, respectively, projecting the vertices onto the sphere, and then creating the dual spherical Voronoi mesh. The results of the convergence test are shown in Fig. <xref ref-type="fig" rid="F5"/>. The order of convergence is 1.364 for the centered advection scheme. MPAS-Ocean was tested using this lower-order advection scheme with the same mesh files and obtained an identical convergence rate.</p>

      <fig id="F5"><label>Figure 5</label><caption><p id="d2e3411">Convergence plot for the Cosine Bell Advection Test with Omega-V0 using first-order centered advection.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-f05.png"/>

        </fig>

</sec>
<sec id="Ch1.S4.SS3">
  <label>4.3</label><title>Wind-driven barotropic gyre</title>
      <p id="d2e3429">The barotropic gyre test case is used to evaluate barotropic ocean dynamics with Laplacian viscosity and surface wind forcing. It is based on the Munk Model <xref ref-type="bibr" rid="bib1.bibx44" id="paren.76"/>, which is an idealized configuration of an ocean basin. Using a single layer in a rectangular domain on a <inline-formula><mml:math id="M101" display="inline"><mml:mi mathvariant="italic">β</mml:mi></mml:math></inline-formula>-plane, Munk was able to produce a basin-wide circulation with a western boundary current. The width of the jet is controlled by a single parameter <inline-formula><mml:math id="M102" display="inline"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mi>m</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo>(</mml:mo><mml:mi mathvariant="italic">ν</mml:mi><mml:mo>/</mml:mo><mml:mi mathvariant="italic">β</mml:mi><mml:msup><mml:mo>)</mml:mo><mml:mrow><mml:mn mathvariant="normal">1</mml:mn><mml:mo>/</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, where <inline-formula><mml:math id="M103" display="inline"><mml:mrow><mml:mi mathvariant="italic">β</mml:mi><mml:mo>=</mml:mo><mml:mi mathvariant="normal">d</mml:mi><mml:mi>f</mml:mi><mml:mo>/</mml:mo><mml:mi mathvariant="normal">d</mml:mi><mml:mi>y</mml:mi></mml:mrow></mml:math></inline-formula> is the meridional gradient of the Coriolis parameter and <inline-formula><mml:math id="M104" display="inline"><mml:mi mathvariant="italic">ν</mml:mi></mml:math></inline-formula> is the kinematic viscosity. The jet becomes narrower as <inline-formula><mml:math id="M105" display="inline"><mml:mi mathvariant="italic">β</mml:mi></mml:math></inline-formula> decreases or <inline-formula><mml:math id="M106" display="inline"><mml:mi mathvariant="italic">ν</mml:mi></mml:math></inline-formula> increases <xref ref-type="bibr" rid="bib1.bibx78" id="paren.77"><named-content content-type="post">Eq. 14.43</named-content></xref>. Alternately, a similar barotropic gyre can be generated through a balance between wind stress and bottom stress, rather than viscosity. This variant is known as the Stommel Model <xref ref-type="bibr" rid="bib1.bibx70 bib1.bibx52" id="paren.78"><named-content content-type="post">Appendix B</named-content></xref>, which is not considered in this study.</p>
      <p id="d2e3525">The Munk Model serves as an excellent test case for the shallow water equations, as it is one of the few configurations with a physically meaningful circulation and an analytical solution. The wind stress field (<inline-formula><mml:math id="M107" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">τ</mml:mi><mml:mi>x</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M108" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">τ</mml:mi><mml:mi>y</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>) is given by 

                <disp-formula id="Ch1.E17" specific-use="align" content-type="subnumberedsingle"><mml:math id="M109" display="block"><mml:mtable displaystyle="true"><mml:mlabeledtr id="Ch1.E17.18"><mml:mtd><mml:mtext>17a</mml:mtext></mml:mtd><mml:mtd><mml:mstyle displaystyle="true" class="stylechange"/></mml:mtd><mml:mtd><mml:mrow><mml:mstyle class="stylechange" displaystyle="true"/><mml:msub><mml:mi mathvariant="italic">τ</mml:mi><mml:mi>x</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi mathvariant="italic">τ</mml:mi><mml:mn mathvariant="normal">0</mml:mn></mml:msub><mml:mi>cos⁡</mml:mi><mml:mfenced open="(" close=")"><mml:mrow><mml:mi mathvariant="italic">π</mml:mi><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mi>y</mml:mi><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mi>y</mml:mi></mml:msub></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:mfenced><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mlabeledtr><mml:mlabeledtr id="Ch1.E17.19"><mml:mtd><mml:mtext>17b</mml:mtext></mml:mtd><mml:mtd><mml:mstyle displaystyle="true" class="stylechange"/></mml:mtd><mml:mtd><mml:mrow><mml:mstyle displaystyle="true" class="stylechange"/><mml:msub><mml:mi mathvariant="italic">τ</mml:mi><mml:mi>y</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn mathvariant="normal">0</mml:mn><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mlabeledtr></mml:mtable></mml:math></disp-formula>

          on a domain of width <inline-formula><mml:math id="M110" display="inline"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mi>x</mml:mi></mml:msub><mml:mo>×</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>y</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>. MPAS-Ocean and Omega are evaluated against the analytic solution for the streamfunction <inline-formula><mml:math id="M111" display="inline"><mml:mi mathvariant="normal">Ψ</mml:mi></mml:math></inline-formula> under no slip boundary conditions <xref ref-type="bibr" rid="bib1.bibx78" id="paren.79"><named-content content-type="post">p. 743, Eq. 19.49</named-content></xref>:

            <disp-formula id="Ch1.E20" content-type="numbered"><label>18</label><mml:math id="M112" display="block"><mml:mtable class="split" rowspacing="0.2ex" displaystyle="true" columnalign="right left"><mml:mtr><mml:mtd><mml:mrow><mml:mi mathvariant="normal">Ψ</mml:mi></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:mo>=</mml:mo><mml:mi mathvariant="italic">π</mml:mi><mml:mi>sin⁡</mml:mi><mml:mo>(</mml:mo><mml:mi mathvariant="italic">π</mml:mi><mml:mi>y</mml:mi><mml:mo>)</mml:mo><mml:mfenced open="(" close=""><mml:mrow><mml:mn mathvariant="normal">1</mml:mn><mml:mo>-</mml:mo><mml:mover accent="true"><mml:mi>x</mml:mi><mml:mo stretchy="false" mathvariant="normal">̃</mml:mo></mml:mover><mml:mo>-</mml:mo><mml:msup><mml:mi>e</mml:mi><mml:mrow><mml:mo>-</mml:mo><mml:mover accent="true"><mml:mi>x</mml:mi><mml:mo mathvariant="normal" stretchy="false">̃</mml:mo></mml:mover><mml:mo>/</mml:mo><mml:mo>(</mml:mo><mml:mn mathvariant="normal">2</mml:mn><mml:mi mathvariant="italic">ϵ</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:msup><mml:mfenced close="" open="["><mml:mrow><mml:mi>cos⁡</mml:mi><mml:mfenced close=")" open="("><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:msqrt><mml:mn mathvariant="normal">3</mml:mn></mml:msqrt><mml:mover accent="true"><mml:mi>x</mml:mi><mml:mo stretchy="false" mathvariant="normal">̃</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mn mathvariant="normal">2</mml:mn><mml:mi mathvariant="italic">ϵ</mml:mi></mml:mrow></mml:mfrac></mml:mstyle></mml:mfenced></mml:mrow></mml:mfenced></mml:mrow></mml:mfenced></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd/><mml:mtd><mml:mrow><mml:mfenced open="" close=")"><mml:mrow><mml:mfenced close="]" open=""><mml:mrow><mml:mo>+</mml:mo><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mn mathvariant="normal">1</mml:mn><mml:mo>-</mml:mo><mml:mn mathvariant="normal">2</mml:mn><mml:mi mathvariant="italic">ϵ</mml:mi></mml:mrow><mml:msqrt><mml:mn mathvariant="normal">3</mml:mn></mml:msqrt></mml:mfrac></mml:mstyle><mml:mi>sin⁡</mml:mi><mml:mfenced open="(" close=")"><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:msqrt><mml:mn mathvariant="normal">3</mml:mn></mml:msqrt><mml:mover accent="true"><mml:mi>x</mml:mi><mml:mo mathvariant="normal" stretchy="false">̃</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mn mathvariant="normal">2</mml:mn><mml:mi mathvariant="italic">ϵ</mml:mi></mml:mrow></mml:mfrac></mml:mstyle></mml:mfenced></mml:mrow></mml:mfenced><mml:mo>+</mml:mo><mml:mi mathvariant="italic">ϵ</mml:mi><mml:msup><mml:mi>e</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mover accent="true"><mml:mi>x</mml:mi><mml:mo mathvariant="normal" stretchy="false">̃</mml:mo></mml:mover><mml:mo>-</mml:mo><mml:mn mathvariant="normal">1</mml:mn><mml:mo>)</mml:mo><mml:mo>/</mml:mo><mml:mi mathvariant="italic">ϵ</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:mfenced><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>

          where <inline-formula><mml:math id="M113" display="inline"><mml:mrow><mml:mover accent="true"><mml:mi>x</mml:mi><mml:mo mathvariant="normal" stretchy="false">̃</mml:mo></mml:mover><mml:mo>=</mml:mo><mml:mi>x</mml:mi><mml:mo>/</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>x</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M114" display="inline"><mml:mrow><mml:mi mathvariant="italic">ϵ</mml:mi><mml:mo>=</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>m</mml:mi></mml:msub><mml:mo>/</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mi>y</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>. Free slip boundary conditions are not available for either model and are not evaluated.</p>
      <p id="d2e3846">The wind-driven barotropic gyre is available in the <monospace>polaris</monospace> testing environment under the name <monospace>barotropic_gyre</monospace>. It uses a dimensional version of the Munk Model in order to test Omega with realistic parameter values. The domain size is 1200 km by 1200 km, with a resolution of 20 km. The maximum zonal wind stress amplitude, <inline-formula><mml:math id="M115" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">τ</mml:mi><mml:mn mathvariant="normal">0</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn mathvariant="normal">0.1</mml:mn></mml:mrow></mml:math></inline-formula>; the horizontal viscosity, <inline-formula><mml:math id="M116" display="inline"><mml:mrow><mml:mi mathvariant="italic">ν</mml:mi><mml:mo>=</mml:mo><mml:mn mathvariant="normal">4</mml:mn><mml:mo>×</mml:mo><mml:msup><mml:mn mathvariant="normal">10</mml:mn><mml:mn mathvariant="normal">2</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula> m<sup>2</sup> s<sup>−1</sup>; the Coriolis parameter, <inline-formula><mml:math id="M119" display="inline"><mml:mrow><mml:mi>f</mml:mi><mml:mo>=</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mn mathvariant="normal">0</mml:mn></mml:msub><mml:mo>+</mml:mo><mml:mi mathvariant="italic">β</mml:mi><mml:mi>y</mml:mi></mml:mrow></mml:math></inline-formula> with <inline-formula><mml:math id="M120" display="inline"><mml:mrow><mml:msub><mml:mi>f</mml:mi><mml:mn mathvariant="normal">0</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:msup><mml:mn mathvariant="normal">10</mml:mn><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">4</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> s<sup>−1</sup> and <inline-formula><mml:math id="M122" display="inline"><mml:mrow><mml:mi mathvariant="italic">β</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mn mathvariant="normal">10</mml:mn><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">10</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> s<sup>−1</sup> m<sup>−1</sup>. The boundaries are non-periodic in both <inline-formula><mml:math id="M125" display="inline"><mml:mi>x</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math id="M126" display="inline"><mml:mi>y</mml:mi></mml:math></inline-formula>, and the bottom topography is flat.</p>
      <p id="d2e4022">The case begins from rest with a uniform depth of 5000 m and zero SSH  perturbations. It is spun up for three years, with a time step of 1 h 23 min, chosen to satisfy the CFL condition with a Courant number of 0.25 and an assumed maximum velocity of 1 m s<sup>−1</sup>. Upon completion, the streamfunction is computed from the native edge-normal velocity. Both MPAS-Ocean and Omega have on the order of 10 % differences in streamfunction magnitude from an approximate analytic solution based on linearizing the vorticity equation (Fig. <xref ref-type="fig" rid="F6"/>). After three years of simulation, small differences between the two models occur at the boundary. This may be due to different order of operations or compiler optimization.</p>

      <fig id="F6" specific-use="star"><label>Figure 6</label><caption><p id="d2e4042">Barotropic gyre test case after three years, showing the streamfunction for Omega (top panels) and MPAS-Ocean (bottom panels).</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-f06.png"/>

        </fig>

</sec>
<sec id="Ch1.S4.SS4">
  <label>4.4</label><title>Wind-driven global simulations</title>
      <p id="d2e4059">The final test of Omega adds realistic components to the configuration: Earth's coastlines and bathymetry on the sphere, climatological wind stress, bottom drag, and the full Coriolis parameter. This results in basin-wide circulations with western boundary currents such as the Gulf Stream and the Kuroshio current, and an Antarctic Circumpolar Current. This is the most realistic configuration one can attain with the shallow water equations, as variations in temperature and salinity, and the layered baroclinic dynamics are necessarily missing. Still, the wind-driven global simulation is an important step from the idealized box of the Munk Model, and demonstrates that the infrastructure for realistic geography and wind forcing is working properly. These components are essential for the upcoming layered version of Omega, where we can make quantitative comparisons to ocean observations.</p>
      <p id="d2e4062">There is no exact solution to the wind-driven global simulation. Therefore, Omega-V0 is compared against MPAS-Ocean, solving the shallow water equations with the same configuration. Both are run with the full nonlinear advection term, with a bottom drag coefficient of <inline-formula><mml:math id="M128" display="inline"><mml:mrow><mml:msub><mml:mi>C</mml:mi><mml:mi mathvariant="normal">D</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msup><mml:mn mathvariant="normal">10</mml:mn><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, Laplacian diffusion with <inline-formula><mml:math id="M129" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">ν</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:msup><mml:mn mathvariant="normal">10</mml:mn><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula> m<sup>2</sup> s<sup>−1</sup> and biharmonic with <inline-formula><mml:math id="M132" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">ν</mml:mi><mml:mn mathvariant="normal">4</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn mathvariant="normal">1.2</mml:mn><mml:mo>×</mml:mo><mml:msup><mml:mn mathvariant="normal">10</mml:mn><mml:mn mathvariant="normal">11</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula> m<sup>4</sup> s<sup>−1</sup>. The coastal boundaries and realistic bathymetry for these single-layer simulations are interpolated from the GEBCO 2023 <xref ref-type="bibr" rid="bib1.bibx22" id="paren.80"/> and BedMachine Antarctica v3 <xref ref-type="bibr" rid="bib1.bibx42" id="paren.81"/> which have been blended together between 60 and 62° S. The ocean begins at rest with a uniform SSH  of zero, and spins up for 40 d. MPAS-Ocean and Omega read in the identical initial condition file, as they both use the MPAS mesh specification in a NetCDF file format.</p>
      <p id="d2e4175">A sequence of spherical icosahedral meshes were generated using the JIGSAW software via <monospace>Compass</monospace> <xref ref-type="bibr" rid="bib1.bibx6" id="paren.82"/>, the predecessor to <monospace>polaris</monospace>. The first mesh has 8 icosahedral subdivisions resulting in an average gridcell width of 30 km, and the width halves with each progressive subdivision. The results for 10 subdivisions, with a resolution of 7.5 km and 7.44 million horizontal cells are shown in Fig. <xref ref-type="fig" rid="F7"/>. A time step of 15 s is required at this resolution, which is similar to the barotropic time step in time-split layered ocean models, in order to satisfy the CFL condition for surface gravity waves. The wind forcing is constant in time, so there is no diurnal or seasonal variation. After a spin-up period of 40 d, one can observe the structure of the global circulation in the SSH (Fig. <xref ref-type="fig" rid="F8"/>), which is a proxy for the streamfunction. In the wind-driven shallow water system, strong currents develop along western boundaries and along deep sea ridges in the Southern Ocean (Fig. <xref ref-type="fig" rid="F9"/>). Omega and MPAS-Ocean produce the same circulation patterns, with differences of less than 5 % throughout most of the domain. Visible differences along coastlines may stem from the accumulation of numerical errors in energetic regions after <inline-formula><mml:math id="M135" display="inline"><mml:mrow><mml:mn mathvariant="normal">2.3</mml:mn><mml:mo>×</mml:mo><mml:msup><mml:mn mathvariant="normal">10</mml:mn><mml:mn mathvariant="normal">5</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula> time steps. The 7.5 km mesh was run on 10 nodes, with a total of 1280 processors, on Perlmutter at the National Energy Research Scientific Computing Center (NERSC).</p>

      <fig id="F7" specific-use="star"><label>Figure 7</label><caption><p id="d2e4212">Detail of the icosahedral 7.5 km mesh, showing bottom depth in the Gulf of Mexico (top panel), and a close-up view of the Mississippi River Delta Region (bottom panel).</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-f07.jpg"/>

        </fig>

      <fig id="F8" specific-use="star"><label>Figure 8</label><caption><p id="d2e4223">Global wind-driven test case showing SSH in meters at day 40, with the 7.5 km icosahedral mesh. Results are for Omega (top panel), MPAS-Ocean (middle panel) and the difference (bottom panel).</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-f08.jpg"/>

        </fig>

      <fig id="F9" specific-use="star"><label>Figure 9</label><caption><p id="d2e4234">Global wind-driven test case showing kinetic energy in m<sup>2</sup> s<sup>−2</sup> at day 40, with the 7.5 km icosahedral mesh.  Results are for Omega (top panel), MPAS-Ocean (middle panel) and the difference (bottom panel).</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-f09.jpg"/>

        </fig>

</sec>
</sec>
<sec id="Ch1.S5">
  <label>5</label><title>Computational performance</title>
      <p id="d2e4273">Experiments were conducted to evaluate the computational performance of Omega-V0. The goals of this campaign are to measure: the computational throughput on both CPUs and GPUs; scaling with the number of compute nodes; and performance across a range of operational resolutions. The promise of the performance portability of Kokkos is tested in this section with three DOE platforms, which contain two types of CPUs and three different GPU designs. In order to take full advantage of DOE's computing resources, Omega must be able to achieve high throughput at large node counts with high resolution domains on all of these machines.</p>
<sec id="Ch1.S5.SS1">
  <label>5.1</label><title>Hardware and compiler specifications</title>
      <p id="d2e4283">Performance testing was carried out on three of the largest supercomputers in the world: Frontier, Aurora, and Perlmutter. These were ranked second, third and twenty-fifth, respectively in the most recent Top500 list <xref ref-type="bibr" rid="bib1.bibx71" id="paren.83"/>, as shown in Table <xref ref-type="table" rid="T2"/>. Currently, the DOE owns the only three exascale computers on the list – El Capitan at 1.74 EFlop per s; Frontier at 1.35 EFlop per s; and Aurora at 1.01 EFlop per s, as measured by the High-Performance Linpack benchmark implementation. While El Capitan was not available for this project, we were able to test Omega-V0's performance on other architectures relevant to DOE computing.</p>

<table-wrap id="T2" specific-use="star"><label>Table 2</label><caption><p id="d2e4294">Performance statistics from the Top500 Supercomputers list, June 2025 <xref ref-type="bibr" rid="bib1.bibx71" id="paren.84"/>. <inline-formula><mml:math id="M138" display="inline"><mml:mrow><mml:msub><mml:mi>R</mml:mi><mml:mi mathvariant="normal">max</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is the maximum performance achieved using the LINPACK benchmark suite. <inline-formula><mml:math id="M139" display="inline"><mml:mrow><mml:msub><mml:mi>R</mml:mi><mml:mi mathvariant="normal">peak</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is the theoretical peak performance. <inline-formula><mml:math id="M140" display="inline"><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mi mathvariant="normal">max</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> refers to the size of the largest problem (specifically, the matrix size in a LINPACK benchmark) that a computer can solve. HPCG is the High-Performance Conjugate Gradient (HPCG) Benchmark results.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="4">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="left"/>
     <oasis:colspec colnum="3" colname="col3" align="left"/>
     <oasis:colspec colnum="4" colname="col4" align="left"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Top500 statistics</oasis:entry>
         <oasis:entry colname="col2">Frontier</oasis:entry>
         <oasis:entry colname="col3">Aurora</oasis:entry>
         <oasis:entry colname="col4">Perlmutter</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">Rank, June 2025</oasis:entry>
         <oasis:entry colname="col2">2</oasis:entry>
         <oasis:entry colname="col3">3</oasis:entry>
         <oasis:entry colname="col4">25</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Linpack Performance (<inline-formula><mml:math id="M141" display="inline"><mml:mrow><mml:msub><mml:mi>R</mml:mi><mml:mi mathvariant="normal">max</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>)</oasis:entry>
         <oasis:entry colname="col2">1353 PFlop per s</oasis:entry>
         <oasis:entry colname="col3">1012 PFlop per s</oasis:entry>
         <oasis:entry colname="col4">79 PFlop per s</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Theoretical Peak (<inline-formula><mml:math id="M142" display="inline"><mml:mrow><mml:msub><mml:mi>R</mml:mi><mml:mi mathvariant="normal">peak</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>)</oasis:entry>
         <oasis:entry colname="col2">2056 PFlop per s</oasis:entry>
         <oasis:entry colname="col3">1980 PFlop per s</oasis:entry>
         <oasis:entry colname="col4">113 PFlop per s</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"><inline-formula><mml:math id="M143" display="inline"><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mi mathvariant="normal">max</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col2">24 837 120</oasis:entry>
         <oasis:entry colname="col3">28 773 888</oasis:entry>
         <oasis:entry colname="col4">5 800 000</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">HPCG</oasis:entry>
         <oasis:entry colname="col2">14 054 TFlop per s</oasis:entry>
         <oasis:entry colname="col3">5613 TFlop per s</oasis:entry>
         <oasis:entry colname="col4">1905 TFlop per s</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">CPU cores for test</oasis:entry>
         <oasis:entry colname="col2">9 066 176</oasis:entry>
         <oasis:entry colname="col3">9 264 128</oasis:entry>
         <oasis:entry colname="col4">888 832</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Power Consumption</oasis:entry>
         <oasis:entry colname="col2">24 607 kW</oasis:entry>
         <oasis:entry colname="col3">38 698 kW</oasis:entry>
         <oasis:entry colname="col4">2945 kW</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table></table-wrap>

      <p id="d2e4505">Frontier, Aurora, and Perlmutter provided a variety of chip designs to test the performance portability of the Kokkos library, as shown in Table <xref ref-type="table" rid="T3"/>. CPUs include AMD's EPYC 7763 and Intel's Xeon Max 9470. The three machines use three different GPU models: the AMD MI250X in Frontier; the Intel Data Center in Aurora; and the NVIDIA A100 Ampere in Perlmutter. Likewise, three compilers were tested: gnu on Frontier, intel on Aurora, and cray clang on Perlmutter (see Table <xref ref-type="table" rid="T4"/>).</p>

<table-wrap id="T3" specific-use="star"><label>Table 3</label><caption><p id="d2e4517">Hardware specifications for computers in this study, collected from <xref ref-type="bibr" rid="bib1.bibx16" id="text.85"/> <xref ref-type="bibr" rid="bib1.bibx49" id="text.86"/> <xref ref-type="bibr" rid="bib1.bibx4" id="text.87"/> and <xref ref-type="bibr" rid="bib1.bibx46" id="text.88"/>.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="4">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="left"/>
     <oasis:colspec colnum="3" colname="col3" align="left"/>
     <oasis:colspec colnum="4" colname="col4" align="left"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Hardware</oasis:entry>
         <oasis:entry colname="col2">Frontier</oasis:entry>
         <oasis:entry colname="col3">Aurora</oasis:entry>
         <oasis:entry colname="col4">Perlmutter</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Manufacturer</oasis:entry>
         <oasis:entry colname="col2">HPE</oasis:entry>
         <oasis:entry colname="col3">Intel</oasis:entry>
         <oasis:entry colname="col4">HPE</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Location</oasis:entry>
         <oasis:entry colname="col2">Oak Ridge National Lab.</oasis:entry>
         <oasis:entry colname="col3">Argonne National Lab.</oasis:entry>
         <oasis:entry colname="col4">NERSC</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Installation Year</oasis:entry>
         <oasis:entry colname="col2">2021</oasis:entry>
         <oasis:entry colname="col3">2023 (available 14 Feb 2025)</oasis:entry>
         <oasis:entry colname="col4">2021</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Nodes</oasis:entry>
         <oasis:entry colname="col2">9856</oasis:entry>
         <oasis:entry colname="col3">10 624</oasis:entry>
         <oasis:entry colname="col4">4904 (1792 GPU; 3072 CPU; 40 login)</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">CPU</oasis:entry>
         <oasis:entry colname="col2">AMD EPYC 7713 2 GHz</oasis:entry>
         <oasis:entry colname="col3">2x Xeon Max 9470 2.4 GHz</oasis:entry>
         <oasis:entry colname="col4">AMD EPYC 7763 2.45 GHz</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Cores per CPU</oasis:entry>
         <oasis:entry colname="col2">64</oasis:entry>
         <oasis:entry colname="col3">51</oasis:entry>
         <oasis:entry colname="col4">64 (GPU node); 128 (CPU node)</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">GPU</oasis:entry>
         <oasis:entry colname="col2"><inline-formula><mml:math id="M144" display="inline"><mml:mrow><mml:mn mathvariant="normal">4</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> AMD MI250Xs w/2 GCD</oasis:entry>
         <oasis:entry colname="col3"><inline-formula><mml:math id="M145" display="inline"><mml:mrow><mml:mn mathvariant="normal">6</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> Intel Data Center GPU Max</oasis:entry>
         <oasis:entry colname="col4"><inline-formula><mml:math id="M146" display="inline"><mml:mrow><mml:mn mathvariant="normal">4</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> NVIDIA A100 Ampere</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">GPU performance</oasis:entry>
         <oasis:entry colname="col2">47.9 TFlops (FP64)/GPU</oasis:entry>
         <oasis:entry colname="col3">52.4 TFlops (FP64)/GPU</oasis:entry>
         <oasis:entry colname="col4">9.7 TFlops (FP64)/GPU</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Memory per node</oasis:entry>
         <oasis:entry colname="col2">512 GB of DDR4</oasis:entry>
         <oasis:entry colname="col3">1024 GB of DDR5, 128 GB HBM</oasis:entry>
         <oasis:entry colname="col4">256 GB of DDR4 DRAM</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">64 GB HBM2E/GCD</oasis:entry>
         <oasis:entry colname="col3">768 GB HBM for GPU</oasis:entry>
         <oasis:entry colname="col4"/>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Memory bandwidth</oasis:entry>
         <oasis:entry colname="col2">204.8 GB s<sup>−1</sup> CPU</oasis:entry>
         <oasis:entry colname="col3">2870 Peak per GB per s CPU HBM</oasis:entry>
         <oasis:entry colname="col4">204.8 GB s<sup>−1</sup> CPU</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">1600 GB s<sup>−1</sup> GPU</oasis:entry>
         <oasis:entry colname="col3">19 660 peak per GB per s GPU</oasis:entry>
         <oasis:entry colname="col4">1555.2 GB s<sup>−1</sup> GPU</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Interconnect</oasis:entry>
         <oasis:entry colname="col2">Slingshot-11</oasis:entry>
         <oasis:entry colname="col3">Slingshot-11</oasis:entry>
         <oasis:entry colname="col4">Slingshot-11</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">Infinity Fabric</oasis:entry>
         <oasis:entry colname="col3">PCIe 5.0 NIC-CPU connection</oasis:entry>
         <oasis:entry colname="col4">PCIe 4.0 NIC-CPU connection</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table></table-wrap>

<table-wrap id="T4" specific-use="star"><label>Table 4</label><caption><p id="d2e4853">Software for performance tests presented in this section.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="4">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="left"/>
     <oasis:colspec colnum="3" colname="col3" align="left"/>
     <oasis:colspec colnum="4" colname="col4" align="left"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Software for Omega-V0 tests</oasis:entry>
         <oasis:entry colname="col2">Frontier</oasis:entry>
         <oasis:entry colname="col3">Aurora</oasis:entry>
         <oasis:entry colname="col4">Perlmutter</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">Operating System</oasis:entry>
         <oasis:entry colname="col2">HPE Cray OS</oasis:entry>
         <oasis:entry colname="col3">SUSE Linux Enterprise Server 15 SP4</oasis:entry>
         <oasis:entry colname="col4">HPE Cray OS</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Compiler</oasis:entry>
         <oasis:entry colname="col2">gcc (SUSE Linux) 13.2.1</oasis:entry>
         <oasis:entry colname="col3">Intel OneAPI DPC<inline-formula><mml:math id="M151" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> 2025.0.4</oasis:entry>
         <oasis:entry colname="col4">Cray clang 18.0.1</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">MPI</oasis:entry>
         <oasis:entry colname="col2">cray-mpich/8.1.30</oasis:entry>
         <oasis:entry colname="col3">mpich v5.0.0a1</oasis:entry>
         <oasis:entry colname="col4">cray-mpich/8.1.31</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Programming environment</oasis:entry>
         <oasis:entry colname="col2">PrgEnv-gnu/8.5.0</oasis:entry>
         <oasis:entry colname="col3">oneapi/release/2025.0.5</oasis:entry>
         <oasis:entry colname="col4">PrgEnv-cray/8.6.0</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table></table-wrap>

</sec>
<sec id="Ch1.S5.SS2">
  <label>5.2</label><title>Strong scaling tests</title>
      <p id="d2e4969">Performance tests were conducted using the inertial-gravity wave shallow water test, available in the <monospace>polaris</monospace> suite under <monospace>inertial_gravity_wave</monospace>, and described in Sect. 2.6 of <xref ref-type="bibr" rid="bib1.bibx11" id="text.89"/>. In order to mimic the performance requirements of a primitive equation ocean model, the Omega-V0 shallow water model was run with 96 identical vertical layers, five active tracers, the full non-linear advection terms, and the Laplacian and biharmonic terms active in both the momentum equation (Eq. <xref ref-type="disp-formula" rid="Ch1.E11"/>) and tracer equation (Eq. <xref ref-type="disp-formula" rid="Ch1.E13"/>). The choice of 96 layers was made as it is a multiple of 8, allowing for better vectorization. Wind forcing and bottom stress were not applied in these steps. The time-stepping scheme was chosen to be classical fourth-order Runge-Kutta.</p>
      <p id="d2e4985">The domain is doubly periodic on a Cartesian, regular hexagonal grid.  Configurations of <inline-formula><mml:math id="M152" display="inline"><mml:mrow><mml:mn mathvariant="normal">1024</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">1024</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">96</mml:mn></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M153" display="inline"><mml:mrow><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">96</mml:mn></mml:mrow></mml:math></inline-formula> grid cells (<inline-formula><mml:math id="M154" display="inline"><mml:mi>x</mml:mi></mml:math></inline-formula> cells by <inline-formula><mml:math id="M155" display="inline"><mml:mi>y</mml:mi></mml:math></inline-formula> cells by <inline-formula><mml:math id="M156" display="inline"><mml:mi>z</mml:mi></mml:math></inline-formula> cells) are presented here, both with 1 km grid cell width. On a regular hexagonal grid the total domain length in the <inline-formula><mml:math id="M157" display="inline"><mml:mi>x</mml:mi></mml:math></inline-formula> direction is the number of cells times the grid cell width, just like on a quadrilateral grid. In the <inline-formula><mml:math id="M158" display="inline"><mml:mi>y</mml:mi></mml:math></inline-formula> direction the total domain length is the number of cells times the grid cell width times <inline-formula><mml:math id="M159" display="inline"><mml:mrow><mml:msqrt><mml:mn mathvariant="normal">3</mml:mn></mml:msqrt><mml:mo>/</mml:mo><mml:mn mathvariant="normal">2</mml:mn><mml:mo>∼</mml:mo><mml:mn mathvariant="normal">0.866</mml:mn></mml:mrow></mml:math></inline-formula> due to the stacking of the hexagonal cells. Performance times are equivalent for regular cartesian and unstructured spherical meshes because the regular hexagon grid is still treated as unstructured data with indirect addressing. The regular hexagon grids are used here for convenience, as its horizontal grid cell count can easily be incremented by factors of two to produce a sequence of grid resolutions. The number of horizontal grid cells is approximately one million for the <inline-formula><mml:math id="M160" display="inline"><mml:mrow><mml:mn mathvariant="normal">1024</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">1024</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">96</mml:mn></mml:mrow></mml:math></inline-formula> domain and four million for the <inline-formula><mml:math id="M161" display="inline"><mml:mrow><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">96</mml:mn></mml:mrow></mml:math></inline-formula> domain. This compares to recent publications of 235 thousand horizontal cells by 64 vertical layers for the low-resolution global MPAS-Ocean E3SM domain <xref ref-type="bibr" rid="bib1.bibx67" id="paren.90"/>, and 3.7 million by 80 vertical layers for the high-resolution 6 to 18 km MPAS-Ocean domain <xref ref-type="bibr" rid="bib1.bibx13" id="paren.91"/>. Omega-V1 will be a full ocean model with additional computations such as vertical advection and mixing, equation of state, pressure computation, and physics parameterizations. Despite this, the current shallow water configurations provide a good preliminary representation of the performance comparison between Omega and MPAS-Ocean, between CPU and GPUs, and scaling to large node counts. For these tests, MPAS-Ocean has some of its primitive equation terms disabled so that it is solving the identical equations as Omega-V0. MPAS-Ocean was not tested on Aurora, the newest machine, because the purpose of three machines was to demonstrate the versatility of Omega on different hardware, and Frontier and Perlmutter were considered sufficient for the Omega versus MPAS-Ocean comparison.</p>
      <p id="d2e5112">Performance results for Omega-V0 are shown in Fig. <xref ref-type="fig" rid="F10"/> for the <inline-formula><mml:math id="M162" display="inline"><mml:mrow><mml:mn mathvariant="normal">1024</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">1024</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">96</mml:mn></mml:mrow></mml:math></inline-formula> mesh, and in Fig. <xref ref-type="fig" rid="F11"/> for the <inline-formula><mml:math id="M163" display="inline"><mml:mrow><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">96</mml:mn></mml:mrow></mml:math></inline-formula> mesh. Corresponding results for MPAS-Ocean are shown in Figs. <xref ref-type="fig" rid="F12"/> and <xref ref-type="fig" rid="F13"/>. In all cases, computation (blue lines) scales better than halo communication (green line), which is expected <xref ref-type="bibr" rid="bib1.bibx10" id="paren.92"/>. Inter-node communication can be highly variable, depending on the competing traffic on the interconnect. Each point on these plots represents the time per timestep averaged over 5 simulations of 12 timesteps each, excluding start-up and I/O time. Since communication does not scale well with increasing node counts, low resolution configurations exhibit poor scaling due to insufficient computational intensity. This effect is more pronounced on GPUs (right column) than on CPUs (left column). In the “GPU” simulations, some CPUs were used for the timing test, as specified in Table <xref ref-type="table" rid="T5"/>. However, the vast majority of computational work is executed on GPUs, while CPUs are primarily used for tasks such as flow control, kernel launches, synchronization, and I/O. As expected, the problem of poor scaling at a particular node count can be alleviated by running the model with higher resolution. MPAS-Ocean was not fully ported to OpenACC, so additional speed-up on GPUs is possible with further porting and tuning.</p>

      <fig id="F10" specific-use="star"><label>Figure 10</label><caption><p id="d2e5164">Strong scaling of Omega-V0 for the <inline-formula><mml:math id="M164" display="inline"><mml:mrow><mml:mn mathvariant="normal">024</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">1024</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">96</mml:mn></mml:mrow></mml:math></inline-formula> resolution on Frontier (top panels), Aurora (middle panels) and Perlmutter (bottom panels), showing CPU-only simulations (left column panels), and GPUs with CPUs (right column panels). The colors separate the total (red) between the inter-node halo communication (green) and the on-node computation (blue). Start-up time and I/O are not included.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-f10.png"/>

        </fig>

      <fig id="F11" specific-use="star"><label>Figure 11</label><caption><p id="d2e5191">Same as Fig. <xref ref-type="fig" rid="F10"/> but for the <inline-formula><mml:math id="M165" display="inline"><mml:mrow><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">96</mml:mn></mml:mrow></mml:math></inline-formula> resolution.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-f11.png"/>

        </fig>

      <fig id="F12" specific-use="star"><label>Figure 12</label><caption><p id="d2e5220">Strong scaling of MPAS-Ocean for the <inline-formula><mml:math id="M166" display="inline"><mml:mrow><mml:mn mathvariant="normal">1024</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">1024</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">96</mml:mn></mml:mrow></mml:math></inline-formula> resolution on Frontier (top panels) and Perlmutter (bottom panels), showing CPU-only simulations (left column panels), and GPUs with CPUs (right column panels).</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-f12.png"/>

          
        </fig>

      <fig id="F13" specific-use="star"><label>Figure 13</label><caption><p id="d2e5249">Same as Fig. <xref ref-type="fig" rid="F12"/> but for the <inline-formula><mml:math id="M167" display="inline"><mml:mrow><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">96</mml:mn></mml:mrow></mml:math></inline-formula> resolution.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-f13.png"/>

        </fig>

<table-wrap id="T5" specific-use="star"><label>Table 5</label><caption><p id="d2e5280">Timing for the <inline-formula><mml:math id="M168" display="inline"><mml:mrow><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">96</mml:mn></mml:mrow></mml:math></inline-formula> on four nodes, with thermal design power and throughput per watt power consumption.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="8">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="left"/>
     <oasis:colspec colnum="3" colname="col3" align="right"/>
     <oasis:colspec colnum="4" colname="col4" align="right"/>
     <oasis:colspec colnum="5" colname="col5" align="left"/>
     <oasis:colspec colnum="6" colname="col6" align="right"/>
     <oasis:colspec colnum="7" colname="col7" align="right"/>
     <oasis:colspec colnum="8" colname="col8" align="right"/>
     <oasis:thead>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2"/>
         <oasis:entry colname="col3">Frontier</oasis:entry>
         <oasis:entry colname="col4">Frontier</oasis:entry>
         <oasis:entry colname="col5">Aurora</oasis:entry>
         <oasis:entry colname="col6">Aurora</oasis:entry>
         <oasis:entry colname="col7">Perlmutter</oasis:entry>
         <oasis:entry colname="col8">Perlmutter</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2"/>
         <oasis:entry colname="col3">CPU</oasis:entry>
         <oasis:entry colname="col4">GPU <inline-formula><mml:math id="M169" display="inline"><mml:mo>+</mml:mo></mml:math></inline-formula> CPU</oasis:entry>
         <oasis:entry colname="col5">CPU</oasis:entry>
         <oasis:entry colname="col6">GPU <inline-formula><mml:math id="M170" display="inline"><mml:mo>+</mml:mo></mml:math></inline-formula> CPU</oasis:entry>
         <oasis:entry colname="col7">CPU</oasis:entry>
         <oasis:entry colname="col8">GPU <inline-formula><mml:math id="M171" display="inline"><mml:mo>+</mml:mo></mml:math></inline-formula> CPU</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">WC time/timestep s, full node</oasis:entry>
         <oasis:entry colname="col2">Omega</oasis:entry>
         <oasis:entry colname="col3">8.7</oasis:entry>
         <oasis:entry colname="col4">0.18</oasis:entry>
         <oasis:entry colname="col5">4.3</oasis:entry>
         <oasis:entry colname="col6">0.18</oasis:entry>
         <oasis:entry colname="col7">8.5</oasis:entry>
         <oasis:entry colname="col8">1.4</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">MPAS-Ocean</oasis:entry>
         <oasis:entry colname="col3">12.1</oasis:entry>
         <oasis:entry colname="col4">8.5</oasis:entry>
         <oasis:entry colname="col5">NA</oasis:entry>
         <oasis:entry colname="col6">NA</oasis:entry>
         <oasis:entry colname="col7">12.0</oasis:entry>
         <oasis:entry colname="col8">4.4</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Number of CPUs/node used for test</oasis:entry>
         <oasis:entry colname="col2"/>
         <oasis:entry colname="col3">56</oasis:entry>
         <oasis:entry colname="col4">32</oasis:entry>
         <oasis:entry colname="col5">104</oasis:entry>
         <oasis:entry colname="col6">6</oasis:entry>
         <oasis:entry colname="col7">64</oasis:entry>
         <oasis:entry colname="col8">64</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Number of GPUs</oasis:entry>
         <oasis:entry colname="col2"/>
         <oasis:entry colname="col3">0</oasis:entry>
         <oasis:entry colname="col4">4</oasis:entry>
         <oasis:entry colname="col5">0</oasis:entry>
         <oasis:entry colname="col6">6</oasis:entry>
         <oasis:entry colname="col7">0</oasis:entry>
         <oasis:entry colname="col8">4</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Power, TDP watts per CPU or GPU</oasis:entry>
         <oasis:entry colname="col2"/>
         <oasis:entry colname="col3">280</oasis:entry>
         <oasis:entry colname="col4">560</oasis:entry>
         <oasis:entry colname="col5">350/socket</oasis:entry>
         <oasis:entry colname="col6">600</oasis:entry>
         <oasis:entry colname="col7">280</oasis:entry>
         <oasis:entry colname="col8">300</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Power, TDP watts per node</oasis:entry>
         <oasis:entry colname="col2"/>
         <oasis:entry colname="col3">280</oasis:entry>
         <oasis:entry colname="col4">2520</oasis:entry>
         <oasis:entry colname="col5">700</oasis:entry>
         <oasis:entry colname="col6">4300</oasis:entry>
         <oasis:entry colname="col7">280</oasis:entry>
         <oasis:entry colname="col8">1480</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Throughput (model step/WC time)</oasis:entry>
         <oasis:entry colname="col2">Omega</oasis:entry>
         <oasis:entry colname="col3">0.115</oasis:entry>
         <oasis:entry colname="col4">5.556</oasis:entry>
         <oasis:entry colname="col5">0.233</oasis:entry>
         <oasis:entry colname="col6">5.556</oasis:entry>
         <oasis:entry colname="col7">0.118</oasis:entry>
         <oasis:entry colname="col8">0.714</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">MPAS-Ocean</oasis:entry>
         <oasis:entry colname="col3">0.083</oasis:entry>
         <oasis:entry colname="col4">0.118</oasis:entry>
         <oasis:entry colname="col5">NA</oasis:entry>
         <oasis:entry colname="col6">NA</oasis:entry>
         <oasis:entry colname="col7">0.083</oasis:entry>
         <oasis:entry colname="col8">0.227</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Throughput per watt for 1000 steps</oasis:entry>
         <oasis:entry colname="col2">Omega</oasis:entry>
         <oasis:entry colname="col3">0.411</oasis:entry>
         <oasis:entry colname="col4">2.205</oasis:entry>
         <oasis:entry colname="col5">0.332</oasis:entry>
         <oasis:entry colname="col6">1.292</oasis:entry>
         <oasis:entry colname="col7">0.420</oasis:entry>
         <oasis:entry colname="col8">0.483</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">MPAS-Ocean</oasis:entry>
         <oasis:entry colname="col3">0.295</oasis:entry>
         <oasis:entry colname="col4">0.047</oasis:entry>
         <oasis:entry colname="col5">NA</oasis:entry>
         <oasis:entry colname="col6">NA</oasis:entry>
         <oasis:entry colname="col7">0.298</oasis:entry>
         <oasis:entry colname="col8">0.154</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Throughput per watt</oasis:entry>
         <oasis:entry colname="col2">Omega</oasis:entry>
         <oasis:entry colname="col3">1.00</oasis:entry>
         <oasis:entry colname="col4">5.25</oasis:entry>
         <oasis:entry colname="col5">1.00</oasis:entry>
         <oasis:entry colname="col6">3.89</oasis:entry>
         <oasis:entry colname="col7">1.00</oasis:entry>
         <oasis:entry colname="col8">1.15</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Relative to Omega CPU</oasis:entry>
         <oasis:entry colname="col2">MPAS-Ocean</oasis:entry>
         <oasis:entry colname="col3">0.72</oasis:entry>
         <oasis:entry colname="col4">0.11</oasis:entry>
         <oasis:entry colname="col5">NA</oasis:entry>
         <oasis:entry colname="col6">NA</oasis:entry>
         <oasis:entry colname="col7">0.71</oasis:entry>
         <oasis:entry colname="col8">0.37</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup>

</oasis:table><table-wrap-foot><p id="d2e5299">NA means not available.</p></table-wrap-foot></table-wrap>

      <p id="d2e5736">Next, we compare throughput on CPU-only nodes versus when GPUs are added, and between Omega and MPAS-Ocean. To do this, the comparison is fixed at four nodes, all within the “perfect scaling” regime, using the <inline-formula><mml:math id="M172" display="inline"><mml:mrow><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">96</mml:mn></mml:mrow></mml:math></inline-formula> mesh. These comparisons are not sensitive to the choice of resolution, as for each case, the <inline-formula><mml:math id="M173" display="inline"><mml:mrow><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">96</mml:mn></mml:mrow></mml:math></inline-formula> timing is almost exactly four times that of <inline-formula><mml:math id="M174" display="inline"><mml:mrow><mml:mn mathvariant="normal">1024</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">1024</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">96</mml:mn></mml:mrow></mml:math></inline-formula>, demonstrating ideal weak scaling. The average wallclock time per time step is provided in the first two rows of Table <xref ref-type="table" rid="T5"/>. Note that Frontier and Perlmutter both have AMD EPYC CPUs but different clock speeds and compilers (gcc versus Cray clang). Timing results very similar but not identical in Figs. <xref ref-type="fig" rid="F10"/>–<xref ref-type="fig" rid="F13"/>.</p>
      <p id="d2e5794">There are several ways to measure the speed-up when transitioning from CPU-only nodes to nodes with both CPUs and GPUs. The simplest method is to take the ratio of the compute times when the full resources of each node are utilized. For Omega-V0, this yields speed-ups of <inline-formula><mml:math id="M175" display="inline"><mml:mrow><mml:mn mathvariant="normal">47</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> on Frontier, <inline-formula><mml:math id="M176" display="inline"><mml:mrow><mml:mn mathvariant="normal">22</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> on Aurora, and <inline-formula><mml:math id="M177" display="inline"><mml:mrow><mml:mn mathvariant="normal">6.1</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> on Perlmutter, as shown in the top row of each arrow on Fig. <xref ref-type="fig" rid="F14"/>. Another comparison involves using the full CPU set versus a single GPU, which results in speed-ups of <inline-formula><mml:math id="M178" display="inline"><mml:mrow><mml:mn mathvariant="normal">12</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M179" display="inline"><mml:mrow><mml:mn mathvariant="normal">3.7</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="M180" display="inline"><mml:mrow><mml:mn mathvariant="normal">1.5</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> for Omega-V0 on these machines. However, one could argue that modern supercomputers are designed to deliver high GPU throughput, and the CPUs are simply helpers to coordinate the GPU computations. Accordingly, the ideal configuration for Omega maps one MPI task to one CPU and one GPU, with the CPU primarily responsible for orchestrating GPU execution. A major hardware design consideration is the reduced power usage per flop for GPUs, as the full supercomputer must aim to maximize computational throughput while minimizing total power consumption. The configurations used in Table <xref ref-type="table" rid="T5"/> are chosen to enable a fair comparison between Omega and MPAS-Ocean for throughput per watt evaluation. To this end, we estimate the computational efficiency of our models with the thermal design power (TDP) of each chip (row 4 of Table <xref ref-type="table" rid="T5"/>). For example, on Frontier, the AMD EPYC 2GHz CPU is rated at 2.5 TFLOPs of double-precision performance and 225–280 W TDP <xref ref-type="bibr" rid="bib1.bibx30" id="paren.93"/>. In contrast, Frontier's AMD MI250X GPU specifications state 47.9 TFLOPs for 500–560 W TDP <xref ref-type="bibr" rid="bib1.bibx2" id="paren.94"/>, for a total of 191.6 TFLOPs and 2000–2240 W TDP for the four GPUs on a single node. This means that the lion's share of computing and power consumption on Frontier takes place on the GPUs. Thus, the most meaningful comparison of code performance between CPUs and GPUs for a new model is based on the computational throughput per watt of power consumption. For Omega-V0, this metric shows performance improvements of <inline-formula><mml:math id="M181" display="inline"><mml:mrow><mml:mn mathvariant="normal">5.3</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> on Frontier, <inline-formula><mml:math id="M182" display="inline"><mml:mrow><mml:mn mathvariant="normal">3.6</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> on Aurora, and <inline-formula><mml:math id="M183" display="inline"><mml:mrow><mml:mn mathvariant="normal">1.2</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> on Perlmutter. Using this same method, these numbers for MPAS-Ocean are <inline-formula><mml:math id="M184" display="inline"><mml:mrow><mml:mn mathvariant="normal">0.16</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> on Frontier and <inline-formula><mml:math id="M185" display="inline"><mml:mrow><mml:mn mathvariant="normal">0.5</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> on Perlmutter, indicating a reduction in computational throughput per watt. Omega's relative performance is further highlighted in head-to-head comparisons on each chip: Omega-V0 is <inline-formula><mml:math id="M186" display="inline"><mml:mrow><mml:mn mathvariant="normal">1.4</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> faster than MPAS-Ocean on the AMD EPYC CPU, <inline-formula><mml:math id="M187" display="inline"><mml:mrow><mml:mn mathvariant="normal">3.1</mml:mn><mml:mi>x</mml:mi><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> faster with Perlmutter's NVIDIA A100 Ampere GPU, and <inline-formula><mml:math id="M188" display="inline"><mml:mrow><mml:mn mathvariant="normal">47</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> faster on Frontier's AMD MI250Xs. These results underscore the effectiveness of Omega's performance-portable design based on C<inline-formula><mml:math id="M189" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> and the Kokkos library.</p>

      <fig id="F14"><label>Figure 14</label><caption><p id="d2e5966">A diagram of the speed-up factors when including the GPUs on each machine, for Omega-V0 and MPAS-Ocean. Times are based on four-node results on the <inline-formula><mml:math id="M190" display="inline"><mml:mrow><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">2048</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">96</mml:mn></mml:mrow></mml:math></inline-formula> resolution shown in Table <xref ref-type="table" rid="T5"/>. The speed-up per watt uses the thermal design power of each CPU and GPU.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-f14.png"/>

        </fig>

</sec>
<sec id="Ch1.S5.SS3">
  <label>5.3</label><title>Absolute performance metrics</title>
      <p id="d2e6001">To address the need for absolute performance metrics, we conducted a detailed kernel-level profiling of Omega using NVIDIA Nsight Systems and Nsight Compute on an NVIDIA A100 GPU at NERSC Perlmutter. Figure <xref ref-type="fig" rid="F15"/> presents the accumulated execution time of the GPU kernel per iteration for two mesh resolutions (289k and 1154k cells). The computeTracerTendenciesOnly kernel dominates the overall GPU execution time, accounting for 32.66 ms (approximately 40 % of total kernel time) on the finer mesh, making it the natural target for detailed absolute performance analysis.</p>

      <fig id="F15" specific-use="star"><label>Figure 15</label><caption><p id="d2e6008">Accumulated GPU kernel duration per iteration for 289k and 1154k mesh resolutions on NVIDIA A100. The computeTracerTendenciesOnly kernel dominates execution time, accounting for approximately 40 % of total kernel runtime on the finer mesh.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-f15.png"/>

        </fig>

      <p id="d2e6017">Table <xref ref-type="table" rid="T6"/> summarizes key hardware utilization metrics for the computeTracerTendenciesOnly kernel. The kernel achieves approximately 44 % compute (SM) throughput and 52 %–53 % of peak memory bandwidth (817–818 GB s<sup>−1</sup> out of 1.56 TB s<sup>−1</sup> theoretical), with an achieved occupancy of 58 % relative to a theoretical maximum of 62.5 %. The roofline analysis (Fig. <xref ref-type="fig" rid="F16"/>) reveals that the kernel operates in the memory-bound regime with an arithmetic intensity of approximately 2 FLOP/byte, achieving 1.60 TFLOPS – roughly 21 % of the A100's peak FP64 performance (7.57 TFLOPS).</p>

<table-wrap id="T6" specific-use="star"><label>Table 6</label><caption><p id="d2e6053">Key GPU kernel metrics for <monospace>computeTracerTendenciesOnly</monospace> on NVIDIA A100 at NERSC Perlmutter for two mesh resolutions.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="3">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="left"/>
     <oasis:colspec colnum="3" colname="col3" align="left"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Metric</oasis:entry>
         <oasis:entry colname="col2">289k mesh</oasis:entry>
         <oasis:entry colname="col3">1154k mesh</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">Grid/block</oasis:entry>
         <oasis:entry colname="col2">(276 829, 1, 1) <inline-formula><mml:math id="M193" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> (1, 128, 1)</oasis:entry>
         <oasis:entry colname="col3">(1 094 888, 1, 1) <inline-formula><mml:math id="M194" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> (1, 128, 1)</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Compute (SM) Throughput</oasis:entry>
         <oasis:entry colname="col2">44.24 %</oasis:entry>
         <oasis:entry colname="col3">44.29 %</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Memory Throughput (GB s<sup>−1</sup>, Max %)</oasis:entry>
         <oasis:entry colname="col2">818 GB s<sup>−1</sup>, 52.6 %</oasis:entry>
         <oasis:entry colname="col3">817.28 GB s<sup>−1</sup>, 52.55 %</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Occupancy (Achieved/Theoretical)</oasis:entry>
         <oasis:entry colname="col2">58.41 %/62.5 %</oasis:entry>
         <oasis:entry colname="col3">58.52 %/62.5 %</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table></table-wrap>

      <fig id="F16" specific-use="star"><label>Figure 16</label><caption><p id="d2e6188">Roofline analysis of the computeTracerTendenciesOnly kernel on NVIDIA A100 GPU. The kernel achieves approximately 21 % of peak FP64 performance (1.60 TFLOPS) and operates in the memory-bound regime with an arithmetic intensity of  2 FLOP/byte.</p></caption>
          <graphic xlink:href="https://gmd.copernicus.org/articles/19/3569/2026/gmd-19-3569-2026-f16.png"/>

        </fig>

      <p id="d2e6197">The next phase of Omega development will add higher-order advection, the equation of state, and physics parameterizations. These are expected to run efficiently on GPUs as they are compute-intensive relative to communication with neighboring cells. The design choices, such as vertical chunking and Kokkos parallel execution policies, are expected to hold but will be retested and altered to suit the full ocean model. Two concerns for future additions are variable bathymetry, which can disrupt the vertical chunking strategy, and inefficiencies introduced by branching that are inherent to parameterizations.  Preliminary work shows that computing on the full column with masking arrays is a useful option to avoid these issues.</p>
</sec>
</sec>
<sec id="Ch1.S6" sec-type="conclusions">
  <label>6</label><title>Conclusions</title>
      <p id="d2e6209">This paper documents the governing equations, design philosophy, coding implementation, verification, and performance of Version 0.1.0 of the Ocean Model for E3SM Global Applications (Omega-V0). Version 0 is the first step towards a layered non-Boussinesq ocean model that can be used for realistic global applications as a component within E3SM. The motivation for rewriting the ocean model is to create a code base that is resilient to changing supercomputer architectures. We found that our previous framework of Fortran code with MPI, OpenMP, and more recently OpenACC was not suitable for the new exascale computing landscape within DOE.</p>
      <p id="d2e6212">The key to the new Omega design is <italic>performance portability</italic>. The investment into developing a code base from scratch will pay off as new architectures are introduced, because the underlying Kokkos library will be updated and optimized for new machines while the Omega code may remain unchanged. Moving from Fortran to C<inline-formula><mml:math id="M198" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> offers the additional advantages of more standard libraries, modern code abstractions, and a language familiar to the next generation of developers.</p>
      <p id="d2e6228">The verification of Omega-V0 included convergence against exact solutions for the nonlinear shallow water equations using a manufactured solution test case, and for tracer advection on the sphere using a cosine bell test case. The barotropic gyre test case adds wind forcing, solid boundaries, and viscosity in an idealized domain, while the wind-driven global simulations validate our workflow with coastlines and bathymetry on a rotating earth. These tests are all automated and available in our <monospace>polaris</monospace> package, including the generation of initial conditions, statistical analysis, and visualization. Comparisons with exact solutions and MPAS-Ocean simulations provide confidence that Omega-V0 is working as expected.</p>
      <p id="d2e6234">Performance results on GPUs are of particular importance for this study, as that is the driving purpose of Omega. Omega-V0 is significantly faster on GPUs than on CPUs, as measured on a per-node, per-GPU, or per-watt basis. Performance measurements on Frontier and Aurora, two of the world's fastest exascale computers, were quite promising. The speed-up from full-node CPU-only to full-node with GPUs was <inline-formula><mml:math id="M199" display="inline"><mml:mrow><mml:mn mathvariant="normal">47</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> on Frontier, <inline-formula><mml:math id="M200" display="inline"><mml:mrow><mml:mn mathvariant="normal">22</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> on Aurora, and <inline-formula><mml:math id="M201" display="inline"><mml:mrow><mml:mn mathvariant="normal">6.1</mml:mn><mml:mi>x</mml:mi><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> on Perlmutter (this is <inline-formula><mml:math id="M202" display="inline"><mml:mrow><mml:mn mathvariant="normal">12</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M203" display="inline"><mml:mrow><mml:mn mathvariant="normal">3.7</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="M204" display="inline"><mml:mrow><mml:mn mathvariant="normal">1.5</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula>, respectively, on a per-GPU basis). Regarding energy consumption, the improvement in throughput from CPUs to GPUs on a per watt basis was <inline-formula><mml:math id="M205" display="inline"><mml:mrow><mml:mn mathvariant="normal">5.3</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> on Frontier, <inline-formula><mml:math id="M206" display="inline"><mml:mrow><mml:mn mathvariant="normal">3.6</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> on Aurora, and <inline-formula><mml:math id="M207" display="inline"><mml:mrow><mml:mn mathvariant="normal">1.2</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> on Perlmutter. This means that Omega's central design principle of performance portability was demonstrated on the exascale architectures that are most relevant to the DOE. In addition, performance tests were conducted to 128 nodes with high-resolution domains of 4 million horizontal cells and 96 layers. Compute times scale nearly perfectly up to 128 CPU nodes and 32 GPU nodes. Good scaling to more nodes can be achieved with higher resolution configurations. Direct GPU-to-GPU communication was an important factor for successful Omega-V0 simulations on GPUs.</p>
      <p id="d2e6331">MPAS-Ocean is an important standard of comparison because it is the current ocean model in E3SM, and Omega is the candidate replacement. Omega-V0 is 1.4 times faster than MPAS-Ocean on CPUs. They use the same mesh specification, array structure, and indirect addressing of horizontal neighbors. The performance gains on CPUs can be attributed to both inefficiencies in the MPAS infrastructure related to frequent pointer retrievals from complex data structures and to a focus in Omega on improved optimization and memory layout for vectors. The speed-ups from MPAS-Ocean to Omega are particularly notable on GPUs, with a <inline-formula><mml:math id="M208" display="inline"><mml:mrow><mml:mn mathvariant="normal">4.7</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> speedup on Frontier and a <inline-formula><mml:math id="M209" display="inline"><mml:mrow><mml:mn mathvariant="normal">3.1</mml:mn><mml:mo>×</mml:mo></mml:mrow></mml:math></inline-formula> speedup on Perlmutter. In these tests, Omega-V0 and MPAS-Ocean had identical configurations and computed the same shallow water terms. The performance results confirm that MPAS-Ocean was constrained by the partial OpenACC implementation that required both host and device copies and related data motion, as well as MPI communications that were not device-aware, whereas Omega is a device-focused implementation with the potential to deliver faster simulations on GPU-based exascale computers.</p>
      <p id="d2e6354">Omega Version 1 will be a layered non-Boussinesq ocean model intended for real-world simulations. The underlying Kokkos framework will remain the same, but with additional terms for vertical advection and diffusion, an equation of state, hydrostatic pressure, and higher-order tracer advection. Version 1 will have similar capabilities as MPAS-Ocean in <xref ref-type="bibr" rid="bib1.bibx60" id="text.95"/>, and will be compared to realistic climatology. Version 2 will add coupling capability for surface fluxes within E3SM as in <xref ref-type="bibr" rid="bib1.bibx54" id="text.96"/>, and more advanced parameterizations. The improved performance of Omega on GPUs, along with the atmospheric component EAMxx <xref ref-type="bibr" rid="bib1.bibx15" id="paren.97"/>, will allow E3SM to pursue state-of-the-art science on the world's newest and largest exascale supercomputers.</p>
</sec>

      
      </body>
    <back><notes notes-type="codedataavailability"><title>Code and data availability</title>

      <p id="d2e6370">Omega Version 0.1.0 is available at <ext-link xlink:href="https://doi.org/10.11578/dc.20250723.1" ext-link-type="DOI">10.11578/dc.20250723.1</ext-link> <xref ref-type="bibr" rid="bib1.bibx55" id="paren.98"/>. Within the E3SM repository, Omega may be compiled as a standalone application by running CMake in the <monospace>components/omega</monospace> subdirectory. The Omega User's Guide may be found at <uri>https://docs.e3sm.org/Omega</uri> (last access: 13 April 2026). The testing framework is <monospace>polaris</monospace> version 0.7.0 <xref ref-type="bibr" rid="bib1.bibx5" id="paren.99"/>, which is available at <ext-link xlink:href="https://doi.org/10.5281/zenodo.15470123" ext-link-type="DOI">10.5281/zenodo.15470123</ext-link>.</p>
  </notes><notes notes-type="authorcontribution"><title>Author contributions</title>

      <p id="d2e6398">Code development, testing, and timing were conducted by all authors. Omega framework development was led by PJ, with team members SRB, YK, BO, MW. Shallow water model code developers included SRB, HK, AM, BO, MW. Testing, including the <monospace>polaris</monospace> development, was led by XSAD and CB with contributions by SB, SRB, MP, AB, KS. Performance measurement and improvements on three DOE computers were by MP, YK, AM, KR, SaS, MW. Project management was by LVR, MP, SRB. The manuscript writing was led by MP, with contributions by all authors.</p>
  </notes><notes notes-type="competinginterests"><title>Competing interests</title>

      <p id="d2e6407">The contact author has declared that none of the authors has any competing interests.</p>
  </notes><notes notes-type="disclaimer"><title>Disclaimer</title>

      <p id="d2e6413">Publisher's note: Copernicus Publications remains neutral with regard to jurisdictional claims made in the text, published maps, institutional affiliations, or any other geographical representation in this paper. The authors bear the ultimate responsibility for providing appropriate place names. Views expressed in the text are those of the authors and do not necessarily reflect the views of the publisher.</p>
  </notes><ack><title>Acknowledgements</title><p id="d2e6419">This research used computational resources provided by: the National Energy Research Scientific Computing Center (NERSC), a DOE Office of Science User Facility supported by the Office of Science of the DOE under Contract No. DE-AC02-05CH11231; Oak Ridge Leadership Computing Facility at the Oak Ridge National Laboratory, which is supported by the Office of Science of the US DOE under Contract No. DE-AC05-00OR22725; Argonne Leadership Computing Facility, a US DOE Office of Science user facility at Argonne National Laboratory, is based on research supported by the US DOE Office of Science-Advanced Scientific Computing Research Program, under Contract No. DE-AC02-06CH11357.</p></ack><notes notes-type="financialsupport"><title>Financial support</title>

      <p id="d2e6425">This research has been supported by Energy Exascale Earth System Model (E3SM) project within the US Department of Energy (DOE) Office of Science, Office of Biological and Environmental Research (BER). Kieran K. Ringel was additionally supported by the DOE's Los Alamos National Laboratory (LANL) LDRD Program and the Center for Nonlinear Studies.</p>
  </notes><notes notes-type="reviewstatement"><title>Review statement</title>

      <p id="d2e6431">This paper was edited by Chia-Te Chien and reviewed by Seiya Nishizawa and one anonymous referee.</p>
  </notes><ref-list>
    <title>References</title>

      <ref id="bib1.bibx1"><label>Advanced Micro Devices, Inc.(2023)</label><mixed-citation>Advanced Micro Devices, Inc.: HIP Programming Guide, <uri>https://rocmdocs.amd.com/en/latest/Programming_Guides/HIP-GUIDE.html</uri> (last access: 14 July 2025), 2023.</mixed-citation></ref>
      <ref id="bib1.bibx2"><label>AMD(2025)</label><mixed-citation>AMD: AMD Instinct MI250X Accelerators, <uri>https://www.amd.com/en/products/accelerators/instinct/mi200/mi250x.html</uri> (last access: 30 June 2025), 2025.</mixed-citation></ref>
      <ref id="bib1.bibx3"><label>Anderson et al.(2015)Anderson, Craig, Dennis, Edwards, Evans, Fischer, Jacob, Mickelson, Taylor, and Worley</label><mixed-citation>Anderson, J., Craig, A., Dennis, J., Edwards, J., Evans, K., Fischer, C., Jacob, R., Mickelson, S., Taylor, M., and Worley, P.: The Common Infrastructure for Modeling the Earth (CIME), <uri>https://esmci.github.io/cime</uri> (last access: 15 July 2025), 2015.</mixed-citation></ref>
      <ref id="bib1.bibx4"><label>Argonne National Laboratory(2025)</label><mixed-citation>Argonne National Laboratory: Aurora Factsheet, <uri>https://www.alcf.anl.gov/sites/default/files/2024-07/Aurora_FactSheet_2024.pdf</uri> (last access: 30 June 2025), 2025.</mixed-citation></ref>
      <ref id="bib1.bibx5"><label>Asay-Davis et al.(2025a)Asay-Davis, Begeman, Denlinger, Brus, Smith, Nolan, Comeau, Kennedy, Conlon, Barthel, and Jacob</label><mixed-citation>Asay-Davis, X., Begeman, C., Denlinger, A., Brus, S., Smith, K., Nolan, A., Comeau, D., Kennedy, J. H., Conlon, L., Barthel, A., and Jacob, R.: E3SM-Project/polaris: v0.7.0, Zenodo [code], <ext-link xlink:href="https://doi.org/10.5281/zenodo.15470123" ext-link-type="DOI">10.5281/zenodo.15470123</ext-link>, 2025a.</mixed-citation></ref>
      <ref id="bib1.bibx6"><label>Asay-Davis et al.(2025b)Asay-Davis, Hoffman, Begeman, Petersen, Hillebrand, Han, Nolan, Brus, Wolfram, barthel, Capodaglio, Calandrini, Denlinger, Vankova, Roekel, yariseidenbenz, pbosler, Brady, mperego, Smith, Moore-Maley, Takano, Cao, Zhang, Lilly, Carlson, Turner, and Engwirda</label><mixed-citation>Asay-Davis, X., Hoffman, M., Begeman, C., Petersen, M., Hillebrand, T., Han, H., Nolan, A., Brus, S., Wolfram, P. J., barthel, a., Capodaglio, G., Calandrini, S., Denlinger, A., Vankova, I., Roekel, L. V., yariseidenbenz, pbosler, Brady, R., mperego, Smith, C., Moore-Maley, B., Takano, Y., Cao, Z., Zhang, T., Lilly, J., Carlson, M., Turner, M., and Engwirda, D.: MPAS-Dev/compass: v1.7.0, Zenodo [code], <ext-link xlink:href="https://doi.org/10.5281/zenodo.15857467" ext-link-type="DOI">10.5281/zenodo.15857467</ext-link>, 2025b.</mixed-citation></ref>
      <ref id="bib1.bibx7"><label>Asay-Davis et al.(2025c)Asay-Davis, Begeman, Barthel, Brus, Jones, Kang, Kim, Mametjanov, ONeill, Petersen, Smith, Sreepathi, Van Roekel, and Waruszewski</label><mixed-citation>Asay-Davis, X. S., Begeman, C. B., Barthel, A. M., Brus, S. R., Jones, P. W., Kang, H.-G., Kim, Y., Mametjanov, A., O'Neill, B. J., Petersen, M. R., Smith, K. M., Sreepathi, S., Van Roekel, L. P., and Waruszewski, M.: Omega Documentation, <uri>https://docs.e3sm.org/Omega</uri> (last access: 13 April 2026), 2025c.</mixed-citation></ref>
      <ref id="bib1.bibx8"><label>Beckingsale et al.(2019)Beckingsale, Burmark, Hornung, Jones, Killian, Kunen, Pearce, Robinson, Ryujin, and Scogland</label><mixed-citation>Beckingsale, D. A., Burmark, J., Hornung, R., Jones, H., Killian, W., Kunen, A. J., Pearce, O., Robinson, P., Ryujin, B. S., and Scogland, T. R. W.: RAJA: Portable Performance for Large-Scale Scientific Applications, in: IEEE/ACM International Workshop on Performance, Portability and Productivity in HPC (P3HPC), <uri>https://www.osti.gov/biblio/1488819</uri> (last access: 13 April 2026), 2019.</mixed-citation></ref>
      <ref id="bib1.bibx9"><label>Beder(2023)</label><mixed-citation>Beder, J.: A YAML parser and emitter in C<inline-formula><mml:math id="M210" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula>, <uri>https://github.com/jbeder/yaml-cpp</uri> (last access: 13 April 2026), 2023.</mixed-citation></ref>
      <ref id="bib1.bibx10"><label>Bishnu et al.(2023)Bishnu, Strauss, and Petersen</label><mixed-citation>Bishnu, S., Strauss, R. R., and Petersen, M. R.: Comparing the Performance of Julia on CPUs versus GPUs and Julia-MPI versus Fortran-MPI: a case study with MPAS-Ocean (Version 7.1), Geosci. Model Dev., 16, 5539–5559, <ext-link xlink:href="https://doi.org/10.5194/gmd-16-5539-2023" ext-link-type="DOI">10.5194/gmd-16-5539-2023</ext-link>, 2023.</mixed-citation></ref>
      <ref id="bib1.bibx11"><label>Bishnu et al.(2024)Bishnu, Petersen, Quaife, and Schoonover</label><mixed-citation>Bishnu, S., Petersen, M. R., Quaife, B., and Schoonover, J.: A Verification Suite of Test Cases for the Barotropic Solver of Ocean Models, J. Adv. Model. Earth Syst., 16, e2022MS003545, <ext-link xlink:href="https://doi.org/10.1029/2022MS003545" ext-link-type="DOI">10.1029/2022MS003545</ext-link>, 2024. </mixed-citation></ref>
      <ref id="bib1.bibx12"><label>Bryan and Cox(1968)</label><mixed-citation> Bryan, K. and Cox, M. D.: A Nonlinear Model of an Ocean Driven by Wind and Differential Heating: Part I. Description of the Three-Dimensional Velocity and Density Fields, J. Atmos. Sci., 25, 945–967, 1968.</mixed-citation></ref>
      <ref id="bib1.bibx13"><label>Caldwell et al.(2019)Caldwell, Mametjanov, Tang, Van Roekel, Golaz et al.</label><mixed-citation>Caldwell, P. M., Mametjanov, A., Tang, Q., Van Roekel, L. P., Golaz, J., Lin, W., Bader, D. C., Keen, N. D., Feng, Y., Jacob, R., Maltrud, M. E., Roberts, A. F., Taylor, M. A., Veneziani, M., Wang, H., Wolfe, J. D., Balaguru, K., Cameron-Smith, P., Dong, L., Klein, S. A., Leung, L. R., Li, H., Li, Q., Liu, X., Neale, R. B., Pinheiro, M., Qian, Y., Ullrich, P. A., Xie, S., Yang, Y., Zhang, Y., Zhang, K., and Zhou, T.: The DOE E3SM Coupled Model Version 1: Description and Results at High Resolution, J. Adv. Model. Earth Syst., 11, 4095–4146, <ext-link xlink:href="https://doi.org/10.1029/2019MS001870" ext-link-type="DOI">10.1029/2019MS001870</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx14"><label>Cushman-Roisin and Beckers(2011)</label><mixed-citation> Cushman-Roisin, B. and Beckers, J.-M.: Introduction to geophysical fluid dynamics: physical and numerical aspects, Academic Press, ISBN 13:978-0120887590, 2011.</mixed-citation></ref>
      <ref id="bib1.bibx15"><label>Donahue et al.(2024)Donahue, Caldwell, Bertagna, Beydoun, Bogenschutz, Bradley, Clevenger, Foucar, Golaz, Guba, Hannah, Hillman, Johnson, Keen, Lin, Singh, Sreepathi, Taylor, Tian, Terai, Ullrich, Yuan, and Zhang</label><mixed-citation>Donahue, A. S., Caldwell, P. M., Bertagna, L., Beydoun, H., Bogenschutz, P. A., Bradley, A. M., Clevenger, T. C., Foucar, J., Golaz, C., Guba, O., Hannah, W., Hillman, B. R., Johnson, J. N., Keen, N., Lin, W., Singh, B., Sreepathi, S., Taylor, M. A., Tian, J., Terai, C. R., Ullrich, P. A., Yuan, X., and Zhang, Y.: To Exascale and Beyond – The Simple Cloud-Resolving E3SM Atmosphere Model (SCREAM), a Performance Portable Global Atmosphere Model for Cloud-Resolving Scales, J. Adv. Model. Earth Syst., 16, e2024MS004314, <ext-link xlink:href="https://doi.org/10.1029/2024MS004314" ext-link-type="DOI">10.1029/2024MS004314</ext-link>, 2024.</mixed-citation></ref>
      <ref id="bib1.bibx16"><label>Dongarra and Geist(2022)</label><mixed-citation>Dongarra, J. and Geist, A.: Report On The Oak Ridge National Laboratory's Frontier System, Tech. rep., University of Tennessee, <uri>https://icl.utk.edu/files/publications/2022/icl-utk-1570-2022.pdf</uri> (last access: 13 April 2026), 2022.</mixed-citation></ref>
      <ref id="bib1.bibx17"><label>Dukowicz and Smith(1994)</label><mixed-citation> Dukowicz, J. and Smith, R.: Implicit free-surface formulation of the Bryan-Cox-Semtner ocean model, J. Geophys. Res., 99, 7991–8014, 1994.</mixed-citation></ref>
      <ref id="bib1.bibx18"><label>Dukowicz et al.(1993)Dukowicz, Smith, and Malone</label><mixed-citation>Dukowicz, J. K., Smith, R. D., and Malone, R. C.: A Reformulation and Implementation of the Bryan-Cox-Semtner Ocean Model on the Connection Machine, J. Atmos. Ocean. Tech., 10, 195–208, <ext-link xlink:href="https://doi.org/10.1175/1520-0426(1993)010&lt;0195:ARAIOT&gt;2.0.CO;2" ext-link-type="DOI">10.1175/1520-0426(1993)010&lt;0195:ARAIOT&gt;2.0.CO;2</ext-link>, 1993.</mixed-citation></ref>
      <ref id="bib1.bibx19"><label>Eaton et al.(2024)Eaton, Gregory, Drach, Taylor, Hankin et al.</label><mixed-citation>Eaton, B., Gregory, J., Drach, B., Taylor, K., Hankin, S., Caron, J., Signell, R., Bentley, P., Rappa, G., Höck, H., Pamment, A., Juckes, M., Raspaud, M., Blower, J., Horne, R., Whiteaker, T., Blodgett, D., Zender, C., Lee, D., Hassel, D., Snow, A., Kölling, T, Allured, D., Jelenak, A., Soerensen, A. M., Gaultier, L., Herlédan, S., Manzano, F., Bärring, L., Barker, C., and Bartholomew, S. L.: NetCDF Climate and Forecast (CF) Metadata Conventions (1.12), Tech. rep., CF Community, Zenodo [data set], <ext-link xlink:href="https://doi.org/10.5281/zenodo.14275599" ext-link-type="DOI">10.5281/zenodo.14275599</ext-link>, 2024.</mixed-citation></ref>
      <ref id="bib1.bibx20"><label>Engwirda(2018)</label><mixed-citation>Engwirda, D.: Generalised primal-dual grids for unstructured co-volume schemes, J. Comput. Phys., 375, 155–176, <ext-link xlink:href="https://doi.org/10.1016/j.jcp.2018.07.025" ext-link-type="DOI">10.1016/j.jcp.2018.07.025</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bibx21"><label>ESMF(2020)</label><mixed-citation>ESMF: Earth System Modeling Framework, <uri>http://earthsystemmodeling.org/</uri> (last access: 13 April 2026), 2020.</mixed-citation></ref>
      <ref id="bib1.bibx22"><label>GEBCO Bathymetric Compilation Group(2023)</label><mixed-citation>GEBCO Bathymetric Compilation Group: The GEBCO_2023 Grid – a continuous terrain model of the global oceans and land, NERC EDS British Oceanographic Data Centre NOC, <ext-link xlink:href="https://doi.org/10.5285/f98b053b-0cbc-6c23-e053-6c86abc0af7b" ext-link-type="DOI">10.5285/f98b053b-0cbc-6c23-e053-6c86abc0af7b</ext-link>, 2023.</mixed-citation></ref>
      <ref id="bib1.bibx23"><label>Gill(1982)</label><mixed-citation> Gill, A. E.: Atmosphere-Ocean Dynamics, in: vol. 30 of International Geophysics Series, Academic Press, San Diego, California, ISBN 10:0122835220, 1982.</mixed-citation></ref>
      <ref id="bib1.bibx24"><label>Godoy et al.(2020)Godoy, Podhorszki, Wang, Atkins, Eisenhauer, Gu, Davis, Choi, Germaschewski, Huck, Huebl, Kim, Kress, Kurc, Liu, Logan, Mehta, Ostrouchov, Parashar, Poeschel, Pugmire, Suchyta, Takahashi, Thompson, Tsutsumi, Wan, Wolf, Wu, and Klasky</label><mixed-citation>Godoy, W. F., Podhorszki, N., Wang, R., Atkins, C., Eisenhauer, G., Gu, J., Davis, P., Choi, J., Germaschewski, K., Huck, K., Huebl, A., Kim, M., Kress, J., Kurc, T., Liu, Q., Logan, J., Mehta, K., Ostrouchov, G., Parashar, M., Poeschel, F., Pugmire, D., Suchyta, E., Takahashi, K., Thompson, N., Tsutsumi, S., Wan, L., Wolf, M., Wu, K., and Klasky, S.: ADIOS 2: The Adaptable Input Output System. A framework for high-performance data management, SoftwareX, 12, 100561, <ext-link xlink:href="https://doi.org/10.1016/j.softx.2020.100561" ext-link-type="DOI">10.1016/j.softx.2020.100561</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bibx25"><label>GridTools(2019)</label><mixed-citation>GridTools: GridTools, <uri>https://gridtools.github.io/gridtools/latest/index.html</uri> (last access: 13 April 2026), 2019.</mixed-citation></ref>
      <ref id="bib1.bibx26"><label>Ham et al.(2023)Ham, Kelly, Mitchell, Cotter, Kirby, Sagiyama, Bouziani, Vorderwuelbecke, Gregory, Betteridge, Shapero, Nixon-Hill, Ward, Farrell, Brubeck, Marsden, Gibson, Homolya, Sun, McRae, Luporini, Gregory, Lange, Funke, Rathgeber, Bercea, and Markall</label><mixed-citation>Ham, D. A., Kelly, P. H. J., Mitchell, L., Cotter, C. J., Kirby, R. C., Sagiyama, K., Bouziani, N., Vorderwuelbecke, S., Gregory, T. J., Betteridge, J., Shapero, D. R., Nixon-Hill, R. W., Ward, C. J., Farrell, P. E., Brubeck, P. D., Marsden, I., Gibson, T. H., Homolya, M., Sun, T., McRae, A. T. T., Luporini, F., Gregory, A., Lange, M., Funke, S. W., Rathgeber, F., Bercea, G.-T., and Markall, G. R.: Firedrake User Manual, in: 1st Edn., Imperial College London and University of Oxford and Baylor University and University of Washington, <ext-link xlink:href="https://doi.org/10.25561/104839" ext-link-type="DOI">10.25561/104839</ext-link>, 2023.</mixed-citation></ref>
      <ref id="bib1.bibx27"><label>Hamming(1987)</label><mixed-citation> Hamming, R. W.: Numerical Methods for Scientists and Engineers, in: 2nd Edn., Dover Publications, ISBN 10:9780486652412, 1987.</mixed-citation></ref>
      <ref id="bib1.bibx28"><label>He and Ding(2001)</label><mixed-citation> He, Y. and Ding, C.: Using Accurate Arithmetics to Improve Numerical Reproducibility and Stability in Parallel Applications, J. Supercomput., 18, 259–277, 2001.</mixed-citation></ref>
      <ref id="bib1.bibx29"><label>Hida et al.(2008)Hida, Xiaoye, and Bailey</label><mixed-citation>Hida, Y., Xiaoye, S., and Bailey, D. H.: Library for double-double and quad-double arithmetic, <uri>https://www.davidhbailey.com/dhbpapers/qd.pdf</uri> (last access: 13 April 2026), 2008.</mixed-citation></ref>
      <ref id="bib1.bibx30"><label>HPCwire(2021)</label><mixed-citation>HPCwire: AMD Launches Epyc Milan with 19 SKUs for HPC, Enterprise and Hyperscale, <ext-link xlink:href="https://www.hpcwire.com/2021/03/15/amd-launches-epyc-milan-with-19-skus-for-hpc-enterprise-and-hyperscale/">https://www.hpcwire.com/2021/03/15/amd-launches-epyc-milan-with-19-skus-for-hpc-enterprise-and</ext-link> (last access: 30 June 2025), 2021.</mixed-citation></ref>
      <ref id="bib1.bibx31"><label>John et al.(2021)John, Jeffers, and Sodani</label><mixed-citation>John, J. R., Jeffers, T., and Sodani, P.: Data Parallel C<inline-formula><mml:math id="M211" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula>: Mastering DPC<inline-formula><mml:math id="M212" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> for Programming of Heterogeneous Systems using C<inline-formula><mml:math id="M213" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> and SYCL, Apress, ISBN 978-1484255735, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx32"><label>Karypis(2013)</label><mixed-citation>Karypis, G.: METIS – Serial Graph Partitioning and Fill-reducing Matrix Ordering, GitHub [code], <uri>https://github.com/KarypisLab/METIS</uri> (last access: 13 April 2026), 2013.</mixed-citation></ref>
      <ref id="bib1.bibx33"><label>Kerbyson and Jones(2005)</label><mixed-citation>Kerbyson, D. and Jones, P.: A Performance Model of the Parallel Ocean Program, IJHPCA, 19, 261–276, <ext-link xlink:href="https://doi.org/10.1177/1094342005056114" ext-link-type="DOI">10.1177/1094342005056114</ext-link>, 2005.</mixed-citation></ref>
      <ref id="bib1.bibx34"><label>Kitware(2023a)</label><mixed-citation>Kitware: CDash: Continuous Integration Dashboard, <uri>https://www.cdash.org</uri> (last access: 15 July 2025), 2023a.</mixed-citation></ref>
      <ref id="bib1.bibx35"><label>Kitware(2023b)</label><mixed-citation>Kitware: CMake: Cross-Platform Make, <uri>https://cmake.org</uri> (last access: 15 July 2025), 2023b.</mixed-citation></ref>
      <ref id="bib1.bibx36"><label>Kitware(2023c)</label><mixed-citation>Kitware: CTest: Testing Tool for CMake Projects, <uri>https://cmake.org/cmake/help/latest/manual/ctest.1.html</uri> (last access: 15 July 2025), 2023c.</mixed-citation></ref>
      <ref id="bib1.bibx37"><label>Knuth(2005)</label><mixed-citation> Knuth, D.: The Art of Computer Programming, in: vol. 2, chap. 4, Addison-Wesley Press, ISBN 10:0201853930, 2005.</mixed-citation></ref>
      <ref id="bib1.bibx38"><label>Krishna et al.(2024)Krishna, Wu, Edwards, Hartnett, Dennis, and Vertenstein</label><mixed-citation>Krishna, J., Wu, D., Edwards, J., Hartnett, E., Dennis, J. M., and Vertenstein, M.: Software for Caching Output and Reads for Parallel I/O, v1.6, GitHub [code], <uri>https://github.com/E3SM-Project/scorpio</uri> (last access: 13 April 2026), 2024.</mixed-citation></ref>
      <ref id="bib1.bibx39"><label>Maltrud and McClean(2005)</label><mixed-citation> Maltrud, M. and McClean, J. L.: An eddy resolving global 1/10 degree ocean simulation, Ocean Model., 8, 31–54, 2005.</mixed-citation></ref>
      <ref id="bib1.bibx40"><label>Melman(2023)</label><mixed-citation>Melman, G.: Fast C<inline-formula><mml:math id="M214" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> logging library, GitHub [code], <uri>https://github.com/gabime/spdlog</uri> (last access: 13 April 2026), 2023.</mixed-citation></ref>
      <ref id="bib1.bibx41"><label>Mielikainen et al.(2012)Mielikainen, Huang, Huang, and Goldberg</label><mixed-citation>Mielikainen, J., Huang, B., Huang, H.-L. A., and Goldberg, M. D.: Improved GPU/CUDA Based Parallel Weather and Research Forecast (WRF) Single Moment 5-Class (WSM5) Cloud Microphysics, IEEE J. Select. Top. Appl., 5, 1256–1265, <ext-link xlink:href="https://doi.org/10.1109/JSTARS.2012.2188780" ext-link-type="DOI">10.1109/JSTARS.2012.2188780</ext-link>, 2012.</mixed-citation></ref>
      <ref id="bib1.bibx42"><label>Morlighem(2022)</label><mixed-citation>Morlighem, M.: MEaSUREs BedMachine Antarctica, Version 3, NSIDC, <ext-link xlink:href="https://doi.org/10.5067/FPSU0V1MWUB6" ext-link-type="DOI">10.5067/FPSU0V1MWUB6</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bibx43"><label>MPI(2025)</label><mixed-citation>MPI: MPI: A Message-Passing Interface Standard Version 5.0, <uri>https://www.mpi-forum.org/docs/mpi-5.0/mpi50-report.pdf</uri> (last access: 13 April 2026), 2025.</mixed-citation></ref>
      <ref id="bib1.bibx44"><label>Munk and Carrier(1950)</label><mixed-citation>Munk, W. H. and Carrier, G. F.: The Wind-driven Circulation in Ocean Basins of Various Shapes, Tellus, 2, 160–167, <ext-link xlink:href="https://doi.org/10.1111/j.2153-3490.1950.tb00327.x" ext-link-type="DOI">10.1111/j.2153-3490.1950.tb00327.x</ext-link>, 1950.</mixed-citation></ref>
      <ref id="bib1.bibx45"><label>NEMO(2025)</label><mixed-citation>NEMO: NEMO 5.0: NEMO User Guide, <uri>https://sites.nemo-ocean.io/user-guide/</uri> (last access: 13 April 2026), 2025.</mixed-citation></ref>
      <ref id="bib1.bibx46"><label>NERSC(2025)</label><mixed-citation>NERSC: Perlmutter architecture specification, <uri>https://docs.nersc.gov/systems/perlmutter/architecture/</uri> (last access: 30 June 2025), 2025.</mixed-citation></ref>
      <ref id="bib1.bibx47"><label>Norman et al.(2022)Norman, Lyngaas, Bagusetty, and Berrill</label><mixed-citation>Norman, M., Lyngaas, I., Bagusetty, A., and Berrill, M.: Portable C<inline-formula><mml:math id="M215" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> Code that can Look and Feel Like Fortran Code with Yet Another Kernel Launcher (YAKL), International Journal of Parallel Programming, <ext-link xlink:href="https://doi.org/10.1007/s10766-022-00739-0" ext-link-type="DOI">10.1007/s10766-022-00739-0</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bibx48"><label>NVIDIA Corporation(2023)</label><mixed-citation>NVIDIA Corporation: CUDA C Programming Guide, <uri>https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html</uri> (last access: 14 July 2025), 2023.</mixed-citation></ref>
      <ref id="bib1.bibx49"><label>Oak Ridge National Laboratory(2025)</label><mixed-citation>Oak Ridge National Laboratory: Frontier System Specifications, <uri>https://www.olcf.ornl.gov/olcf-resources/compute-systems/frontier/</uri> (last access: 30 June 2025), 2025.</mixed-citation></ref>
      <ref id="bib1.bibx50"><label>OpenACC(2022)</label><mixed-citation>OpenACC: The OpenACC Application Programming Interface Version 3.3, <uri>https://www.openacc.org/sites/default/files/inline-images/Specification/OpenACC-3.3-final.pdf</uri> (last access: 13 April 2026), 2022.</mixed-citation></ref>
      <ref id="bib1.bibx51"><label>OpenMP(2024)</label><mixed-citation>OpenMP: OpenMP Application Programming Interface, <uri>https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-6-0.pdf</uri> (last access: 13 April 2026), 2024.</mixed-citation></ref>
      <ref id="bib1.bibx52"><label>Pal et al.(2023)Pal, Barton, Petersen, Brus, Engwirda, Arbic, Roberts, Westerink, and Wirasaet</label><mixed-citation>Pal, N., Barton, K. N., Petersen, M. R., Brus, S. R., Engwirda, D., Arbic, B. K., Roberts, A. F., Westerink, J. J., and Wirasaet, D.: Barotropic tides in MPAS-Ocean (E3SM V2): impact of ice shelf cavities, Geosci. Model Dev., 16, 1297–1314, <ext-link xlink:href="https://doi.org/10.5194/gmd-16-1297-2023" ext-link-type="DOI">10.5194/gmd-16-1297-2023</ext-link>, 2023.</mixed-citation></ref>
      <ref id="bib1.bibx53"><label>Pedlosky(1990)</label><mixed-citation> Pedlosky, J. (Ed.): Geophysical Fluid Dynamics, in: 2nd Edn., Springer-Verlag, New York, ISBN 10:0387963871, 1990.</mixed-citation></ref>
      <ref id="bib1.bibx54"><label>Petersen et al.(2019)Petersen, Asay-Davis, Berres, Chen, Feige, Hoffman, Jacobsen, Jones, Maltrud, Price, Ringler, Streletz, Turner, Van Roekel, Veneziani, Wolfe, Wolfram, and Woodring</label><mixed-citation>Petersen, M. R., Asay-Davis, X. S., Berres, A. S., Chen, Q., Feige, N., Hoffman, M. J., Jacobsen, D. W., Jones, P. W., Maltrud, M. E., Price, S. F., Ringler, T. D., Streletz, G. J., Turner, A. K., Van Roekel, L. P., Veneziani, M., Wolfe, J. D., Wolfram, P. J., and Woodring, J. L.: An Evaluation of the Ocean and Sea Ice Climate of E3SM Using MPAS and Interannual CORE-II Forcing, J. Adv. Model. Earth Syst., 11, 1438–1458, <ext-link xlink:href="https://doi.org/10.1029/2018MS001373" ext-link-type="DOI">10.1029/2018MS001373</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx55"><label>Petersen et al.(2025)Petersen, Asay-Davis, Barthel, Begeman, Brus, Jones, Kang, Kim, Mametjanov, O'Neill, Smith, Sreepathi, Van Roekel, and Waruszewski</label><mixed-citation>Petersen, M. R., Asay-Davis, X. S., Barthel, A. M., Begeman, C. B., Brus, S. R., Jones, P. W., Kang, H.-G., Kim, Y., Mametjanov, A., O'Neill, B. J., Smith, K. M., Sreepathi, S., Van Roekel, L. P., and Waruszewski, M.: E3SM-Project/Omega, US Department of Energy Office of Scientific and Technical Information [code], <ext-link xlink:href="https://doi.org/10.11578/dc.20250723.1" ext-link-type="DOI">10.11578/dc.20250723.1</ext-link>, 2025.</mixed-citation></ref>
      <ref id="bib1.bibx56"><label>Porter and Heimbach(2025)</label><mixed-citation>Porter, A. R. and Heimbach, P.: Unlocking the power of parallel computing: GPU technologies for ocean forecasting, State of the Planet, 5-opsr, 23, <ext-link xlink:href="https://doi.org/10.5194/sp-5-opsr-23-2025" ext-link-type="DOI">10.5194/sp-5-opsr-23-2025</ext-link>, 2025.</mixed-citation></ref>
      <ref id="bib1.bibx57"><label>PSyclone(2019)</label><mixed-citation>PSyclone: PSyclone User Guide, <uri>https://psyclone.readthedocs.io/en/stable/</uri> (last access: 13 April 2026), 2019.</mixed-citation></ref>
      <ref id="bib1.bibx58"><label>Ramadhan et al.(2020)Ramadhan, Wagner, Hill, Campin, Churavy, Besard, Souza, Edelman, Ferrari, and Marshall</label><mixed-citation>Ramadhan, A., Wagner, G. L., Hill, C., Campin, J.-M., Churavy, V., Besard, T., Souza, A., Edelman, A., Ferrari, R., and Marshall, J.: Oceananigans.jl: Fast and friendly geophysical fluid dynamics on GPUs, J. Open Sour. Softw., 5, 2018, <ext-link xlink:href="https://doi.org/10.21105/joss.02018" ext-link-type="DOI">10.21105/joss.02018</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bibx59"><label>Ringler et al.(2010)Ringler, Thuburn, Klemp, and Skamarock</label><mixed-citation> Ringler, T. D., Thuburn, J., Klemp, J. B., and Skamarock, W. C.: A unified approach to energy conservation and potential vorticity dynamics for arbitrarily-structured C-grids, J. Comput. Phys., 229, 3065–3090, 2010.</mixed-citation></ref>
      <ref id="bib1.bibx60"><label>Ringler et al.(2013)Ringler, Petersen, Higdon, Jacobsen, Jones, and Maltrud</label><mixed-citation> Ringler, T. D., Petersen, M. R., Higdon, R. L., Jacobsen, D., Jones, P. W., and Maltrud, M.: A multi-resolution approach to global ocean modeling, OCEAN Model., 69, 211–232, 2013.</mixed-citation></ref>
      <ref id="bib1.bibx61"><label>Roache(2002)</label><mixed-citation> Roache, P. J.: Code verification by the method of manufactured solutions, J. Fluids Eng., 124, 4–10, 2002.</mixed-citation></ref>
      <ref id="bib1.bibx62"><label>Rosinski(2018)</label><mixed-citation>Rosinski, J.: GPTL – General Purpose Timing Library, GibHub [code], <uri>https://jmrosinski.github.io/GPTL/</uri> (last access: 13 April 2026), 2018.</mixed-citation></ref>
      <ref id="bib1.bibx63"><label>Salari and Knupp(2000)</label><mixed-citation>Salari, K. and Knupp, P.: Code verification by the method of manufactured solutions, Tech. rep., Sandia National Labs., Albuquerque, NM, USA, <ext-link xlink:href="https://doi.org/10.2172/759450" ext-link-type="DOI">10.2172/759450</ext-link>, 2000.</mixed-citation></ref>
      <ref id="bib1.bibx64"><label>Semtner and Chervin(1988)</label><mixed-citation> Semtner, A. J. and Chervin, R. M.: A simulation of the global ocean circulation with resolved eddies, J. Geophys. Res., 93, 15502–15522, 1988.</mixed-citation></ref>
      <ref id="bib1.bibx65"><label>Silvestri et al.(2025)Silvestri, Wagner, Constantinou, Hill, Campin, Souza, Bishnu, Churavy, Marshall, and Ferrari</label><mixed-citation>Silvestri, S., Wagner, G. L., Constantinou, N. C., Hill, C. N., Campin, J.-M., Souza, A. N., Bishnu, S., Churavy, V., Marshall, J., and Ferrari, R.: A GPU-Based Ocean Dynamical Core for Routine Mesoscale-Resolving Climate Simulations, J. Adv. Model. Earth Syst., 17, e2024MS004465, <ext-link xlink:href="https://doi.org/10.1029/2024MS004465" ext-link-type="DOI">10.1029/2024MS004465</ext-link>, 2025.</mixed-citation></ref>
      <ref id="bib1.bibx66"><label>Skamarock and Gassmann(2011)</label><mixed-citation>Skamarock, W. C. and Gassmann, A.: Conservative Transport Schemes for Spherical Geodesic Grids: High-Order Flux Operators for ODE-Based Time Integration, Mon. Weather Rev., 139, 2962–2975, <ext-link xlink:href="https://doi.org/10.1175/MWR-D-10-05056.1" ext-link-type="DOI">10.1175/MWR-D-10-05056.1</ext-link>, 2011.</mixed-citation></ref>
      <ref id="bib1.bibx67"><label>Smith et al.(2025)Smith, Barthel, Conlon, Van Roekel, Bartoletti, Golaz, Zhang, Begeman, Benedict, Bisht, Feng, Hannah, Harrop, Jeffery, Lin, Ma, Maltrud, Petersen, Singh, Tang, Tesfa, Wolfe, Xie, Zheng, Balaguru, Garuba, Gleckler, Hu, Lee, Moore-Maley, and Ordoñez</label><mixed-citation>Smith, K. M., Barthel, A. M., Conlon, L. M., Van Roekel, L. P., Bartoletti, A., Golaz, J.-C., Zhang, C., Begeman, C. B., Benedict, J. J., Bisht, G., Feng, Y., Hannah, W., Harrop, B. E., Jeffery, N., Lin, W., Ma, P.-L., Maltrud, M. E., Petersen, M. R., Singh, B., Tang, Q., Tesfa, T., Wolfe, J. D., Xie, S., Zheng, X., Balaguru, K., Garuba, O., Gleckler, P., Hu, A., Lee, J., Moore-Maley, B., and Ordoñez, A. C.: The DOE E3SM version 2.1: overview and assessment of the impacts of parameterized ocean submesoscales, Geosci. Model Dev., 18, 1613–1633, <ext-link xlink:href="https://doi.org/10.5194/gmd-18-1613-2025" ext-link-type="DOI">10.5194/gmd-18-1613-2025</ext-link>, 2025.</mixed-citation></ref>
      <ref id="bib1.bibx68"><label>Smith et al.(2010)Smith, Jones, Briegleb, Bryan, Danabasoglu, Dennis, Dukowicz, Eden, Fox-Kemper, Gent et al.</label><mixed-citation>Smith, R., Jones, P., Briegleb, B., Bryan, F., Danabasoglu, G., Dennis, J., Dukowicz, J., Eden, C., Fox-Kemper, B., Gent, P., Hecht, M., Kauffman, J., Large, W., Levine, M., Meares, L., Peacock, S., and Rosenbloom, N.: The Parallel Ocean Program (POP) Reference Manual: Ocean Component of the Community Climate System Model (CCSM) and Community Earth System Model (CESM), Tech. Rep. LAUR-01853, Los Alamos National Laboratory, Los Alamos, NM, <uri>https://opensky.ucar.edu/system/files/2024-09/manuscripts_825.pdf</uri> (last access: 13 April 2026), 2010.</mixed-citation></ref>
      <ref id="bib1.bibx69"><label>Smith et al.(2000)Smith, Maltrud, Bryan, and Hecht</label><mixed-citation> Smith, R. D., Maltrud, M. E., Bryan, F. O., and Hecht, M. W.: Numerical simulation of the North Atlantic Ocean at 1/10, J. Physi. Oceanogr., 30, 1532–1561, 2000.</mixed-citation></ref>
      <ref id="bib1.bibx70"><label>Stommel(1948)</label><mixed-citation> Stommel, H.: The westward intensification of wind-driven ocean currents, Eos Trans. Am. Geophys. Union, 29, 202–206, 1948.</mixed-citation></ref>
      <ref id="bib1.bibx71"><label>Strohmaier et al.(2025)Strohmaier, Dongarra, Simon, and Meuer</label><mixed-citation>Strohmaier, E., Dongarra, J., Simon, H., and Meuer, M.: TOP500 List – June 2025, <uri>https://top500.org/lists/top500/list/2025/06/</uri> (last access: 30 June 2025), 2025.</mixed-citation></ref>
      <ref id="bib1.bibx72"><label>Stroustrup(1986)</label><mixed-citation>Stroustrup, B.: The C<inline-formula><mml:math id="M216" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> Programming Language, Addison-Wesley, Reading, MA, ISBN 9780201114991, 1986.</mixed-citation></ref>
      <ref id="bib1.bibx73"><label>Stroustrup(2013)</label><mixed-citation>Stroustrup, B.: The C<inline-formula><mml:math id="M217" display="inline"><mml:mrow><mml:mo>+</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:math></inline-formula> Programming Language, in: 4th Edn., Addison-Wesley, ISBN 9780321563842, 2013.</mixed-citation></ref>
      <ref id="bib1.bibx74"><label>Thuburn et al.(2009)Thuburn, Ringler, Skamarock, and Klemp</label><mixed-citation> Thuburn, J., Ringler, T. D., Skamarock, W. C., and Klemp, J. B.: Numerical representation of geostrophic modes on arbitrarily structured C-grids, J Comput. Phys., 228, 8321–8335, 2009.</mixed-citation></ref>
      <ref id="bib1.bibx75"><label>Trott et al.(2021)Trott, Berger-Vergiat, Poliakoff, Rajamanickam, Lebrun-Grandie, Madsen, Al Awar, Gligoric, Shipman, and Womeldorff</label><mixed-citation>Trott, C., Berger-Vergiat, L., Poliakoff, D., Rajamanickam, S., Lebrun-Grandie, D., Madsen, J., Al Awar, N., Gligoric, M., Shipman, G., and Womeldorff, G.: The Kokkos Ecosystem: Comprehensive Performance Portability for High Performance Computing, Comput. Sci. Eng., 23, 10–18, <ext-link xlink:href="https://doi.org/10.1109/MCSE.2021.3098509" ext-link-type="DOI">10.1109/MCSE.2021.3098509</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx76"><label>Trott et al.(2022)Trott, Lebrun-Grandié et al.</label><mixed-citation>Trott, C. R., Lebrun-Grandié, D., et al.: Kokkos 3: Programming Model Extensions for the Exascale Era, IEEE T. Parallel Distrib. Syst., 33, 805–817, <ext-link xlink:href="https://doi.org/10.1109/TPDS.2021.3097283" ext-link-type="DOI">10.1109/TPDS.2021.3097283</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bibx77"><label>Unidata(2023)</label><mixed-citation>Unidata: Network Common Data Form, <uri>https://www.unidata.ucar.edu/software/netcdf/</uri> (last access: 13 April 2026), 2023.</mixed-citation></ref>
      <ref id="bib1.bibx78"><label>Vallis(2017)</label><mixed-citation>Vallis, G. K.: Atmospheric and oceanic fluid dynamics: fundamentals and large-scale circulation, in: 2nd Edn., Cambridge University Press, Cambridge, ISBN 978-0-521-84969-2, <ext-link xlink:href="https://doi.org/10.1017/9781107588417" ext-link-type="DOI">10.1017/9781107588417</ext-link>, 2017. </mixed-citation></ref>
      <ref id="bib1.bibx79"><label>Wallcraft(2000)</label><mixed-citation>Wallcraft, A. J.: SPMD OpenMP versus MPI for ocean models, Concurrency, 12, 1155–1164, <ext-link xlink:href="https://doi.org/10.1002/1096-9128(200010)12:12&lt;1155::AID-CPE532&gt;3.0.CO;2-5" ext-link-type="DOI">10.1002/1096-9128(200010)12:12&lt;1155::AID-CPE532&gt;3.0.CO;2-5</ext-link>, 2000.</mixed-citation></ref>
      <ref id="bib1.bibx80"><label>Wei et al.(2024)Wei, Han, Yu, Jiang, Liu, Lin, Yu, Xu, Zhao, Wang, Zheng, Xie, Zhou, Zhang, Zhang, Zhang, Yu, Wang, Bai, Li, Yu, Deng, Li, and Chi</label><mixed-citation>Wei, J., Han, X., Yu, J., Jiang, J., Liu, H., Lin, P., Yu, M., Xu, K., Zhao, L., Wang, P., Zheng, W., Xie, J., Zhou, Y., Zhang, T., Zhang, F., Zhang, Y., Yu, Y., Wang, Y., Bai, Y., Li, C., Yu, Z., Deng, H., Li, Y., and Chi, X.: A Performance-Portable Kilometer-Scale Global Ocean Model on ORISE and New Sunway Heterogeneous Supercomputers, in: SC24: International Conference for High Performance Computing, Networking, Storage and Analysis, 1–12, <ext-link xlink:href="https://doi.org/10.1109/SC41406.2024.00009" ext-link-type="DOI">10.1109/SC41406.2024.00009</ext-link>, 2024.</mixed-citation></ref>
      <ref id="bib1.bibx81"><label>Williamson et al.(1992)Williamson, Drake, Hack, Jakob, and Swarztrauber</label><mixed-citation> Williamson, D. L., Drake, J. B., Hack, J. J., Jakob, R., and Swarztrauber, P. N.: A standard test set for numerical approximations to the shallow water equations in spherical geometry, J. Comput. Phys., 102, 211–224, 1992.</mixed-citation></ref>
      <ref id="bib1.bibx82"><label>Xu et al.(2014)Xu, Huang, Zhang, Hu, Fu, and Yang</label><mixed-citation>Xu, S., Huang, X., Zhang, Y., Hu, Y., Fu, H., and Yang, G.: Porting the Princeton Ocean Model to GPUs, in: Algorithms and Architectures for Parallel Processing, Lecture Notes in Computer Science, edited by: Sun, X.-H., Qu, W., Stojmenovic, I., Zhou, W., Li, Z., Guo, H., Min, G., Yang, T., Wu, Y., and Liu, L., Springer International Publishing, Cham, 1–14, ISBN 978-3-319-11197-1, <ext-link xlink:href="https://doi.org/10.1007/978-3-319-11197-1_1" ext-link-type="DOI">10.1007/978-3-319-11197-1_1</ext-link>, 2014.</mixed-citation></ref>
      <ref id="bib1.bibx83"><label>Xu et al.(2015)Xu, Huang, Oey, Xu, Fu, Zhang, and Yang</label><mixed-citation>Xu, S., Huang, X., Oey, L.-Y., Xu, F., Fu, H., Zhang, Y., and Yang, G.: POM.gpu-v1.0: a GPU-based Princeton Ocean Model, Geosci. Model Dev., 8, 2815–2827, <ext-link xlink:href="https://doi.org/10.5194/gmd-8-2815-2015" ext-link-type="DOI">10.5194/gmd-8-2815-2015</ext-link>, 2015.</mixed-citation></ref>
      <ref id="bib1.bibx84"><label>YAML(2009)</label><mixed-citation>YAML: YAML Ain't Markup Language v1.2, <uri>https://yaml.org/</uri> (last access: 13 April 2026), 2009.</mixed-citation></ref>
      <ref id="bib1.bibx85"><label>Zhao et al.(2017)Zhao, Liang, Sun, Zhao, Sun, and Liu</label><mixed-citation>Zhao, X.-D., Liang, S.-X., Sun, Z.-C., Zhao, X.-Z., Sun, J.-W., and Liu, Z.-B.: A GPU accelerated finite volume coastal ocean model, J. Hydrodynam., 29, 679–690, <ext-link xlink:href="https://doi.org/10.1016/S1001-6058(16)60780-1" ext-link-type="DOI">10.1016/S1001-6058(16)60780-1</ext-link>, 2017.</mixed-citation></ref>

  </ref-list></back>
    <!--<article-title-html>The ocean model for E3SM global applications: Omega version 0.1.0 – a new high-performance computing code for exascale architectures</article-title-html>
<abstract-html/>
<ref-html id="bib1.bib1"><label>Advanced Micro Devices, Inc.(2023)</label><mixed-citation>
      
Advanced Micro Devices, Inc.: HIP Programming Guide,
<a href="https://rocmdocs.amd.com/en/latest/Programming_Guides/HIP-GUIDE.html" target="_blank"/> (last access: 14 July 2025), 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib2"><label>AMD(2025)</label><mixed-citation>
      
AMD: AMD Instinct MI250X Accelerators,
<a href="https://www.amd.com/en/products/accelerators/instinct/mi200/mi250x.html" target="_blank"/>
(last access: 30 June 2025), 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib3"><label>Anderson et al.(2015)Anderson, Craig, Dennis, Edwards, Evans,
Fischer, Jacob, Mickelson, Taylor, and Worley</label><mixed-citation>
      
Anderson, J., Craig, A., Dennis, J., Edwards, J., Evans, K., Fischer, C.,
Jacob, R., Mickelson, S., Taylor, M., and Worley, P.: The Common
Infrastructure for Modeling the Earth (CIME),
<a href="https://esmci.github.io/cime" target="_blank"/> (last access: 15 July 2025), 2015.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib4"><label>Argonne National Laboratory(2025)</label><mixed-citation>
      
Argonne National Laboratory: Aurora Factsheet,
<a href="https://www.alcf.anl.gov/sites/default/files/2024-07/Aurora_FactSheet_2024.pdf" target="_blank"/>
(last access: 30 June 2025), 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib5"><label>Asay-Davis et al.(2025a)Asay-Davis, Begeman, Denlinger,
Brus, Smith, Nolan, Comeau, Kennedy, Conlon, Barthel, and
Jacob</label><mixed-citation>
      
Asay-Davis, X., Begeman, C., Denlinger, A., Brus, S., Smith, K., Nolan, A.,
Comeau, D., Kennedy, J. H., Conlon, L., Barthel, A., and Jacob, R.: E3SM-Project/polaris: v0.7.0, Zenodo [code], <a href="https://doi.org/10.5281/zenodo.15470123" target="_blank">https://doi.org/10.5281/zenodo.15470123</a>, 2025a.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib6"><label>Asay-Davis et al.(2025b)Asay-Davis, Hoffman, Begeman,
Petersen, Hillebrand, Han, Nolan, Brus, Wolfram, barthel, Capodaglio,
Calandrini, Denlinger, Vankova, Roekel, yariseidenbenz, pbosler, Brady,
mperego, Smith, Moore-Maley, Takano, Cao, Zhang, Lilly, Carlson, Turner, and Engwirda</label><mixed-citation>
      
Asay-Davis, X., Hoffman, M., Begeman, C., Petersen, M., Hillebrand, T., Han,
H., Nolan, A., Brus, S., Wolfram, P. J., barthel, a., Capodaglio, G.,
Calandrini, S., Denlinger, A., Vankova, I., Roekel, L. V., yariseidenbenz,
pbosler, Brady, R., mperego, Smith, C., Moore-Maley, B., Takano, Y., Cao, Z.,
Zhang, T., Lilly, J., Carlson, M., Turner, M., and Engwirda, D.: MPAS-Dev/compass: v1.7.0, Zenodo [code], <a href="https://doi.org/10.5281/zenodo.15857467" target="_blank">https://doi.org/10.5281/zenodo.15857467</a>, 2025b.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib7"><label>Asay-Davis et al.(2025c)Asay-Davis, Begeman, Barthel,
Brus, Jones, Kang, Kim, Mametjanov, ONeill, Petersen, Smith, Sreepathi,
Van Roekel, and Waruszewski</label><mixed-citation>
      
Asay-Davis, X. S., Begeman, C. B., Barthel, A. M., Brus, S. R., Jones, P. W.,
Kang, H.-G., Kim, Y., Mametjanov, A., O'Neill, B. J., Petersen, M. R.,
Smith, K. M., Sreepathi, S., Van Roekel, L. P., and Waruszewski, M.: Omega
Documentation, <a href="https://docs.e3sm.org/Omega" target="_blank"/> (last access:
13 April 2026), 2025c.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib8"><label>Beckingsale et al.(2019)Beckingsale, Burmark, Hornung, Jones,
Killian, Kunen, Pearce, Robinson, Ryujin, and Scogland</label><mixed-citation>
      
Beckingsale, D. A., Burmark, J., Hornung, R., Jones, H., Killian, W., Kunen,
A. J., Pearce, O., Robinson, P., Ryujin, B. S., and Scogland, T. R. W.:
RAJA: Portable Performance for Large-Scale Scientific Applications, in:
IEEE/ACM International Workshop on Performance, Portability and Productivity
in HPC (P3HPC), <a href="https://www.osti.gov/biblio/1488819" target="_blank"/> (last access: 13 April 2026), 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib9"><label>Beder(2023)</label><mixed-citation>
      
Beder, J.: A YAML parser and emitter in C+ + , <a href="https://github.com/jbeder/yaml-cpp" target="_blank"/> (last access: 13 April 2026), 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib10"><label>Bishnu et al.(2023)Bishnu, Strauss, and Petersen</label><mixed-citation>
      
Bishnu, S., Strauss, R. R., and Petersen, M. R.: Comparing the Performance of
Julia on CPUs versus GPUs and Julia-MPI versus Fortran-MPI: a case study with
MPAS-Ocean (Version 7.1), Geosci. Model Dev., 16, 5539–5559,
<a href="https://doi.org/10.5194/gmd-16-5539-2023" target="_blank">https://doi.org/10.5194/gmd-16-5539-2023</a>, 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib11"><label>Bishnu et al.(2024)Bishnu, Petersen, Quaife, and
Schoonover</label><mixed-citation>
      
Bishnu, S., Petersen, M. R., Quaife, B., and Schoonover, J.: A Verification
Suite of Test Cases for the Barotropic Solver of Ocean Models, J. Adv. Model. Earth Syst., 16, e2022MS003545, <a href="https://doi.org/10.1029/2022MS003545" target="_blank">https://doi.org/10.1029/2022MS003545</a>, 2024.


    </mixed-citation></ref-html>
<ref-html id="bib1.bib12"><label>Bryan and Cox(1968)</label><mixed-citation>
      
Bryan, K. and Cox, M. D.: A Nonlinear Model of an Ocean Driven by Wind and
Differential Heating: Part I. Description of the Three-Dimensional Velocity
and Density Fields, J. Atmos. Sci., 25, 945–967, 1968.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib13"><label>Caldwell et al.(2019)Caldwell, Mametjanov, Tang, Van Roekel, Golaz et al.</label><mixed-citation>
      
Caldwell, P. M., Mametjanov, A., Tang, Q., Van Roekel, L. P., Golaz, J., Lin, W., Bader, D. C., Keen, N. D., Feng, Y., Jacob, R., Maltrud, M. E., Roberts, A. F., Taylor, M. A., Veneziani, M., Wang, H., Wolfe, J. D., Balaguru, K., Cameron-Smith, P., Dong, L., Klein, S. A., Leung, L. R., Li, H., Li, Q., Liu, X., Neale, R. B., Pinheiro, M., Qian, Y., Ullrich, P. A., Xie, S., Yang, Y., Zhang, Y., Zhang, K., and Zhou, T.: The DOE E3SM Coupled Model Version 1: Description and Results at High Resolution, J. Adv. Model. Earth Syst., 11, 4095–4146, <a href="https://doi.org/10.1029/2019MS001870" target="_blank">https://doi.org/10.1029/2019MS001870</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib14"><label>Cushman-Roisin and Beckers(2011)</label><mixed-citation>
      
Cushman-Roisin, B. and Beckers, J.-M.: Introduction to geophysical fluid
dynamics: physical and numerical aspects, Academic Press, ISBN 13:978-0120887590, 2011.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib15"><label>Donahue et al.(2024)Donahue, Caldwell, Bertagna, Beydoun,
Bogenschutz, Bradley, Clevenger, Foucar, Golaz, Guba, Hannah, Hillman,
Johnson, Keen, Lin, Singh, Sreepathi, Taylor, Tian, Terai, Ullrich, Yuan, and Zhang</label><mixed-citation>
      
Donahue, A. S., Caldwell, P. M., Bertagna, L., Beydoun, H., Bogenschutz, P. A., Bradley, A. M., Clevenger, T. C., Foucar, J., Golaz, C., Guba, O., Hannah, W., Hillman, B. R., Johnson, J. N., Keen, N., Lin, W., Singh, B., Sreepathi, S., Taylor, M. A., Tian, J., Terai, C. R., Ullrich, P. A., Yuan, X., and Zhang, Y.: To Exascale and Beyond – The Simple Cloud-Resolving E3SM
Atmosphere Model (SCREAM), a Performance Portable Global Atmosphere Model for
Cloud-Resolving Scales, J. Adv. Model. Earth Syst., 16, e2024MS004314, <a href="https://doi.org/10.1029/2024MS004314" target="_blank">https://doi.org/10.1029/2024MS004314</a>, 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib16"><label>Dongarra and Geist(2022)</label><mixed-citation>
      
Dongarra, J. and Geist, A.: Report On The Oak Ridge National Laboratory's
Frontier System, Tech. rep., University of Tennessee,
<a href="https://icl.utk.edu/files/publications/2022/icl-utk-1570-2022.pdf" target="_blank"/> (last access: 13 April 2026), 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib17"><label>Dukowicz and Smith(1994)</label><mixed-citation>
      
Dukowicz, J. and Smith, R.: Implicit free-surface formulation of the
Bryan-Cox-Semtner ocean model, J. Geophys. Res., 99, 7991–8014, 1994.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib18"><label>Dukowicz et al.(1993)Dukowicz, Smith, and Malone</label><mixed-citation>
      
Dukowicz, J. K., Smith, R. D., and Malone, R. C.: A Reformulation and
Implementation of the Bryan-Cox-Semtner Ocean Model on the Connection Machine, J. Atmos. Ocean. Tech., 10, 195–208,
<a href="https://doi.org/10.1175/1520-0426(1993)010&lt;0195:ARAIOT&gt;2.0.CO;2" target="_blank">https://doi.org/10.1175/1520-0426(1993)010&lt;0195:ARAIOT&gt;2.0.CO;2</a>, 1993.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib19"><label>Eaton et al.(2024)Eaton, Gregory, Drach, Taylor, Hankin
et al.</label><mixed-citation>
      
Eaton, B., Gregory, J., Drach, B., Taylor, K., Hankin, S., Caron, J., Signell, R., Bentley, P., Rappa, G., Höck, H., Pamment, A., Juckes, M., Raspaud, M., Blower, J., Horne, R., Whiteaker, T., Blodgett, D., Zender, C., Lee, D., Hassel, D., Snow, A., Kölling, T, Allured, D., Jelenak, A., Soerensen, A. M., Gaultier, L., Herlédan, S., Manzano, F., Bärring, L., Barker, C., and Bartholomew, S. L.: NetCDF Climate and Forecast (CF) Metadata Conventions (1.12), Tech. rep., CF Community, Zenodo [data set], <a href="https://doi.org/10.5281/zenodo.14275599" target="_blank">https://doi.org/10.5281/zenodo.14275599</a>, 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib20"><label>Engwirda(2018)</label><mixed-citation>
      
Engwirda, D.: Generalised primal-dual grids for unstructured co-volume schemes, J. Comput. Phys., 375, 155–176, <a href="https://doi.org/10.1016/j.jcp.2018.07.025" target="_blank">https://doi.org/10.1016/j.jcp.2018.07.025</a>, 2018.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib21"><label>ESMF(2020)</label><mixed-citation>
      
ESMF: Earth System Modeling Framework, <a href="http://earthsystemmodeling.org/" target="_blank"/> (last access: 13 April 2026), 2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib22"><label>GEBCO Bathymetric Compilation Group(2023)</label><mixed-citation>
      
GEBCO Bathymetric Compilation Group: The GEBCO_2023 Grid – a continuous
terrain model of the global oceans and land, NERC EDS British Oceanographic Data Centre NOC, <a href="https://doi.org/10.5285/f98b053b-0cbc-6c23-e053-6c86abc0af7b" target="_blank">https://doi.org/10.5285/f98b053b-0cbc-6c23-e053-6c86abc0af7b</a>, 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib23"><label>Gill(1982)</label><mixed-citation>
      
Gill, A. E.: Atmosphere-Ocean Dynamics, in: vol. 30 of International
Geophysics Series, Academic Press, San Diego, California, ISBN 10:0122835220, 1982.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib24"><label>Godoy et al.(2020)Godoy, Podhorszki, Wang, Atkins, Eisenhauer, Gu,
Davis, Choi, Germaschewski, Huck, Huebl, Kim, Kress, Kurc, Liu, Logan, Mehta, Ostrouchov, Parashar, Poeschel, Pugmire, Suchyta, Takahashi, Thompson, Tsutsumi, Wan, Wolf, Wu, and Klasky</label><mixed-citation>
      
Godoy, W. F., Podhorszki, N., Wang, R., Atkins, C., Eisenhauer, G., Gu, J.,
Davis, P., Choi, J., Germaschewski, K., Huck, K., Huebl, A., Kim, M., Kress,
J., Kurc, T., Liu, Q., Logan, J., Mehta, K., Ostrouchov, G., Parashar, M.,
Poeschel, F., Pugmire, D., Suchyta, E., Takahashi, K., Thompson, N.,
Tsutsumi, S., Wan, L., Wolf, M., Wu, K., and Klasky, S.: ADIOS 2: The
Adaptable Input Output System. A framework for high-performance data
management, SoftwareX, 12, 100561, <a href="https://doi.org/10.1016/j.softx.2020.100561" target="_blank">https://doi.org/10.1016/j.softx.2020.100561</a>, 2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib25"><label>GridTools(2019)</label><mixed-citation>
      
GridTools: GridTools,
<a href="https://gridtools.github.io/gridtools/latest/index.html" target="_blank"/> (last access: 13 April 2026), 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib26"><label>Ham et al.(2023)Ham, Kelly, Mitchell, Cotter, Kirby, Sagiyama,
Bouziani, Vorderwuelbecke, Gregory, Betteridge, Shapero, Nixon-Hill, Ward,
Farrell, Brubeck, Marsden, Gibson, Homolya, Sun, McRae, Luporini, Gregory,
Lange, Funke, Rathgeber, Bercea, and Markall</label><mixed-citation>
      
Ham, D. A., Kelly, P. H. J., Mitchell, L., Cotter, C. J., Kirby, R. C.,
Sagiyama, K., Bouziani, N., Vorderwuelbecke, S., Gregory, T. J., Betteridge,
J., Shapero, D. R., Nixon-Hill, R. W., Ward, C. J., Farrell, P. E., Brubeck,
P. D., Marsden, I., Gibson, T. H., Homolya, M., Sun, T., McRae, A. T. T.,
Luporini, F., Gregory, A., Lange, M., Funke, S. W., Rathgeber, F., Bercea,
G.-T., and Markall, G. R.: Firedrake User Manual, in:
1st Edn., Imperial College London and University of Oxford and Baylor University and University of Washington, <a href="https://doi.org/10.25561/104839" target="_blank">https://doi.org/10.25561/104839</a>, 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib27"><label>Hamming(1987)</label><mixed-citation>
      
Hamming, R. W.: Numerical Methods for Scientists and Engineers, in: 2nd Edn., Dover Publications, ISBN 10:9780486652412, 1987.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib28"><label>He and Ding(2001)</label><mixed-citation>
      
He, Y. and Ding, C.: Using Accurate Arithmetics to Improve Numerical
Reproducibility and Stability in Parallel Applications, J. Supercomput., 18, 259–277, 2001.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib29"><label>Hida et al.(2008)Hida, Xiaoye, and Bailey</label><mixed-citation>
      
Hida, Y., Xiaoye, S., and Bailey, D. H.: Library for double-double and
quad-double arithmetic, <a href="https://www.davidhbailey.com/dhbpapers/qd.pdf" target="_blank"/> (last access: 13 April 2026), 2008.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib30"><label>HPCwire(2021)</label><mixed-citation>
      
HPCwire: AMD Launches Epyc Milan with 19 SKUs for HPC, Enterprise and
Hyperscale,
<a href="https://www.hpcwire.com/2021/03/15/amd-launches-epyc-milan-with-19-skus-for-hpc-enterprise-and-hyperscale/" target="_blank">https://www.hpcwire.com/2021/03/15/amd-launches-epyc-milan-with-19-skus-for-hpc-enterprise-and</a>
(last access: 30 June 2025), 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib31"><label>John et al.(2021)John, Jeffers, and Sodani</label><mixed-citation>
      
John, J. R., Jeffers, T., and Sodani, P.: Data Parallel C+ + : Mastering DPC+ +  for Programming of Heterogeneous Systems using C+ +  and SYCL, Apress, ISBN 978-1484255735, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib32"><label>Karypis(2013)</label><mixed-citation>
      
Karypis, G.: METIS – Serial Graph Partitioning and Fill-reducing Matrix Ordering, GitHub [code], <a href="https://github.com/KarypisLab/METIS" target="_blank"/> (last access: 13 April 2026), 2013.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib33"><label>Kerbyson and Jones(2005)</label><mixed-citation>
      
Kerbyson, D. and Jones, P.: A Performance Model of the Parallel Ocean Program, IJHPCA, 19, 261–276, <a href="https://doi.org/10.1177/1094342005056114" target="_blank">https://doi.org/10.1177/1094342005056114</a>, 2005.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib34"><label>Kitware(2023a)</label><mixed-citation>
      
Kitware: CDash: Continuous Integration Dashboard,
<a href="https://www.cdash.org" target="_blank"/> (last access: 15 July 2025), 2023a.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib35"><label>Kitware(2023b)</label><mixed-citation>
      
Kitware: CMake: Cross-Platform Make, <a href="https://cmake.org" target="_blank"/> (last access: 15 July 2025), 2023b.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib36"><label>Kitware(2023c)</label><mixed-citation>
      
Kitware: CTest: Testing Tool for CMake Projects,
<a href="https://cmake.org/cmake/help/latest/manual/ctest.1.html" target="_blank"/> (last access:
15 July 2025), 2023c.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib37"><label>Knuth(2005)</label><mixed-citation>
      
Knuth, D.: The Art of Computer Programming, in: vol. 2, chap. 4, Addison-Wesley Press, ISBN 10:0201853930, 2005.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib38"><label>Krishna et al.(2024)Krishna, Wu, Edwards, Hartnett, Dennis, and
Vertenstein</label><mixed-citation>
      
Krishna, J., Wu, D., Edwards, J., Hartnett, E., Dennis, J. M., and Vertenstein, M.: Software for Caching Output and Reads for Parallel I/O, v1.6, GitHub [code], <a href="https://github.com/E3SM-Project/scorpio" target="_blank"/> (last access: 13 April 2026), 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib39"><label>Maltrud and McClean(2005)</label><mixed-citation>
      
Maltrud, M. and McClean, J. L.: An eddy resolving global 1/10 degree ocean
simulation, Ocean Model., 8, 31–54, 2005.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib40"><label>Melman(2023)</label><mixed-citation>
      
Melman, G.: Fast C+ +  logging library, GitHub [code], <a href="https://github.com/gabime/spdlog" target="_blank"/> (last access: 13 April 2026), 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib41"><label>Mielikainen et al.(2012)Mielikainen, Huang, Huang, and
Goldberg</label><mixed-citation>
      
Mielikainen, J., Huang, B., Huang, H.-L. A., and Goldberg, M. D.: Improved
GPU/CUDA Based Parallel Weather and Research Forecast (WRF)
Single Moment 5-Class (WSM5) Cloud Microphysics, IEEE J. Select. Top. Appl., 5, 1256–1265, <a href="https://doi.org/10.1109/JSTARS.2012.2188780" target="_blank">https://doi.org/10.1109/JSTARS.2012.2188780</a>, 2012.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib42"><label>Morlighem(2022)</label><mixed-citation>
      
Morlighem, M.: MEaSUREs BedMachine Antarctica, Version 3, NSIDC,
<a href="https://doi.org/10.5067/FPSU0V1MWUB6" target="_blank">https://doi.org/10.5067/FPSU0V1MWUB6</a>, 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib43"><label>MPI(2025)</label><mixed-citation>
      
MPI: MPI: A Message-Passing Interface Standard Version 5.0,
<a href="https://www.mpi-forum.org/docs/mpi-5.0/mpi50-report.pdf" target="_blank"/> (last access: 13 April 2026), 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib44"><label>Munk and Carrier(1950)</label><mixed-citation>
      
Munk, W. H. and Carrier, G. F.: The Wind-driven Circulation in Ocean
Basins of Various Shapes, Tellus, 2, 160–167,
<a href="https://doi.org/10.1111/j.2153-3490.1950.tb00327.x" target="_blank">https://doi.org/10.1111/j.2153-3490.1950.tb00327.x</a>, 1950.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib45"><label>NEMO(2025)</label><mixed-citation>
      
NEMO: NEMO 5.0: NEMO User Guide, <a href="https://sites.nemo-ocean.io/user-guide/" target="_blank"/> (last access: 13 April 2026), 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib46"><label>NERSC(2025)</label><mixed-citation>
      
NERSC: Perlmutter architecture specification,
<a href="https://docs.nersc.gov/systems/perlmutter/architecture/" target="_blank"/> (last access:
30 June 2025), 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib47"><label>Norman et al.(2022)Norman, Lyngaas, Bagusetty, and
Berrill</label><mixed-citation>
      
Norman, M., Lyngaas, I., Bagusetty, A., and Berrill, M.: Portable C+ +  Code that can Look and Feel Like Fortran Code with Yet Another Kernel Launcher (YAKL), International Journal of Parallel Programming, <a href="https://doi.org/10.1007/s10766-022-00739-0" target="_blank">https://doi.org/10.1007/s10766-022-00739-0</a>, 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib48"><label>NVIDIA Corporation(2023)</label><mixed-citation>
      
NVIDIA Corporation: CUDA C Programming Guide,
<a href="https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html" target="_blank"/> (last
access: 14 July 2025), 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib49"><label>Oak Ridge National Laboratory(2025)</label><mixed-citation>
      
Oak Ridge National Laboratory: Frontier System Specifications,
<a href="https://www.olcf.ornl.gov/olcf-resources/compute-systems/frontier/" target="_blank"/> (last access: 30 June 2025), 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib50"><label>OpenACC(2022)</label><mixed-citation>
      
OpenACC: The OpenACC Application Programming Interface Version 3.3,
<a href="https://www.openacc.org/sites/default/files/inline-images/Specification/OpenACC-3.3-final.pdf" target="_blank"/> (last access: 13 April 2026), 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib51"><label>OpenMP(2024)</label><mixed-citation>
      
OpenMP: OpenMP Application Programming Interface,
<a href="https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-6-0.pdf" target="_blank"/>
(last access: 13 April 2026), 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib52"><label>Pal et al.(2023)Pal, Barton, Petersen, Brus, Engwirda, Arbic,
Roberts, Westerink, and Wirasaet</label><mixed-citation>
      
Pal, N., Barton, K. N., Petersen, M. R., Brus, S. R., Engwirda, D., Arbic, B. K., Roberts, A. F., Westerink, J. J., and Wirasaet, D.: Barotropic tides in MPAS-Ocean (E3SM V2): impact of ice shelf cavities, Geosci. Model Dev., 16, 1297–1314, <a href="https://doi.org/10.5194/gmd-16-1297-2023" target="_blank">https://doi.org/10.5194/gmd-16-1297-2023</a>, 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib53"><label>Pedlosky(1990)</label><mixed-citation>
      
Pedlosky, J. (Ed.): Geophysical Fluid Dynamics, in: 2nd Edn., Springer-Verlag, New York, ISBN 10:0387963871, 1990.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib54"><label>Petersen et al.(2019)Petersen, Asay-Davis, Berres, Chen, Feige,
Hoffman, Jacobsen, Jones, Maltrud, Price, Ringler, Streletz, Turner, Van
Roekel, Veneziani, Wolfe, Wolfram, and Woodring</label><mixed-citation>
      
Petersen, M. R., Asay-Davis, X. S., Berres, A. S., Chen, Q., Feige, N.,
Hoffman, M. J., Jacobsen, D. W., Jones, P. W., Maltrud, M. E., Price, S. F.,
Ringler, T. D., Streletz, G. J., Turner, A. K., Van Roekel, L. P.,
Veneziani, M., Wolfe, J. D., Wolfram, P. J., and Woodring, J. L.: An
Evaluation of the Ocean and Sea Ice Climate of E3SM Using
MPAS and Interannual CORE-II Forcing, J. Adv. Model. Earth Syst., 11,
1438–1458, <a href="https://doi.org/10.1029/2018MS001373" target="_blank">https://doi.org/10.1029/2018MS001373</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib55"><label>Petersen et al.(2025)Petersen, Asay-Davis, Barthel,
Begeman, Brus, Jones, Kang, Kim, Mametjanov, O'Neill, Smith, Sreepathi,
Van Roekel, and Waruszewski</label><mixed-citation>
      
Petersen, M. R., Asay-Davis, X. S., Barthel, A. M., Begeman, C. B., Brus,
S. R., Jones, P. W., Kang, H.-G., Kim, Y., Mametjanov, A., O'Neill, B. J.,
Smith, K. M., Sreepathi, S., Van Roekel, L. P., and Waruszewski, M.:
E3SM-Project/Omega, US Department of Energy Office of Scientific and Technical Information [code], <a href="https://doi.org/10.11578/dc.20250723.1" target="_blank">https://doi.org/10.11578/dc.20250723.1</a>, 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib56"><label>Porter and Heimbach(2025)</label><mixed-citation>
      
Porter, A. R. and Heimbach, P.: Unlocking the power of parallel computing: GPU technologies for ocean forecasting, State of the Planet, 5-opsr, 23,
<a href="https://doi.org/10.5194/sp-5-opsr-23-2025" target="_blank">https://doi.org/10.5194/sp-5-opsr-23-2025</a>, 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib57"><label>PSyclone(2019)</label><mixed-citation>
      
PSyclone: PSyclone User Guide, <a href="https://psyclone.readthedocs.io/en/stable/" target="_blank"/> (last access: 13 April 2026), 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib58"><label>Ramadhan et al.(2020)Ramadhan, Wagner, Hill, Campin, Churavy, Besard, Souza, Edelman, Ferrari, and Marshall</label><mixed-citation>
      
Ramadhan, A., Wagner, G. L., Hill, C., Campin, J.-M., Churavy, V., Besard, T., Souza, A., Edelman, A., Ferrari, R., and Marshall, J.: Oceananigans.jl: Fast and friendly geophysical fluid dynamics on GPUs, J. Open Sour. Softw., 5, 2018, <a href="https://doi.org/10.21105/joss.02018" target="_blank">https://doi.org/10.21105/joss.02018</a>, 2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib59"><label>Ringler et al.(2010)Ringler, Thuburn, Klemp, and
Skamarock</label><mixed-citation>
      
Ringler, T. D., Thuburn, J., Klemp, J. B., and Skamarock, W. C.: A unified
approach to energy conservation and potential vorticity dynamics for
arbitrarily-structured C-grids, J. Comput. Phys., 229, 3065–3090, 2010.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib60"><label>Ringler et al.(2013)Ringler, Petersen, Higdon, Jacobsen, Jones, and
Maltrud</label><mixed-citation>
      
Ringler, T. D., Petersen, M. R., Higdon, R. L., Jacobsen, D., Jones, P. W., and Maltrud, M.: A multi-resolution approach to global ocean modeling, OCEAN
Model., 69, 211–232, 2013.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib61"><label>Roache(2002)</label><mixed-citation>
      
Roache, P. J.: Code verification by the method of manufactured solutions, J. Fluids Eng., 124, 4–10, 2002.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib62"><label>Rosinski(2018)</label><mixed-citation>
      
Rosinski, J.: GPTL – General Purpose Timing Library, GibHub [code], <a href="https://jmrosinski.github.io/GPTL/" target="_blank"/> (last access: 13 April 2026), 2018.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib63"><label>Salari and Knupp(2000)</label><mixed-citation>
      
Salari, K. and Knupp, P.: Code verification by the method of manufactured
solutions, Tech. rep., Sandia National Labs., Albuquerque, NM, USA, <a href="https://doi.org/10.2172/759450" target="_blank">https://doi.org/10.2172/759450</a>, 2000.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib64"><label>Semtner and Chervin(1988)</label><mixed-citation>
      
Semtner, A. J. and Chervin, R. M.: A simulation of the global ocean circulation with resolved eddies, J. Geophys. Res., 93, 15502–15522, 1988.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib65"><label>Silvestri et al.(2025)Silvestri, Wagner, Constantinou, Hill, Campin, Souza, Bishnu, Churavy, Marshall, and Ferrari</label><mixed-citation>
      
Silvestri, S., Wagner, G. L., Constantinou, N. C., Hill, C. N., Campin, J.-M., Souza, A. N., Bishnu, S., Churavy, V., Marshall, J., and Ferrari, R.: A GPU-Based Ocean Dynamical Core for Routine Mesoscale-Resolving Climate
Simulations, J. Adv. Model. Earth Syst., 17, e2024MS004465, <a href="https://doi.org/10.1029/2024MS004465" target="_blank">https://doi.org/10.1029/2024MS004465</a>, 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib66"><label>Skamarock and Gassmann(2011)</label><mixed-citation>
      
Skamarock, W. C. and Gassmann, A.: Conservative Transport Schemes for
Spherical Geodesic Grids: High-Order Flux Operators for
ODE-Based Time Integration, Mon. Weather Rev., 139, 2962–2975,
<a href="https://doi.org/10.1175/MWR-D-10-05056.1" target="_blank">https://doi.org/10.1175/MWR-D-10-05056.1</a>, 2011.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib67"><label>Smith et al.(2025)Smith, Barthel, Conlon, Van Roekel, Bartoletti,
Golaz, Zhang, Begeman, Benedict, Bisht, Feng, Hannah, Harrop, Jeffery, Lin,
Ma, Maltrud, Petersen, Singh, Tang, Tesfa, Wolfe, Xie, Zheng, Balaguru,
Garuba, Gleckler, Hu, Lee, Moore-Maley, and Ordoñez</label><mixed-citation>
      
Smith, K. M., Barthel, A. M., Conlon, L. M., Van Roekel, L. P., Bartoletti, A., Golaz, J.-C., Zhang, C., Begeman, C. B., Benedict, J. J., Bisht, G., Feng, Y., Hannah, W., Harrop, B. E., Jeffery, N., Lin, W., Ma, P.-L.,
Maltrud, M. E., Petersen, M. R., Singh, B., Tang, Q., Tesfa, T., Wolfe, J. D., Xie, S., Zheng, X., Balaguru, K., Garuba, O., Gleckler, P., Hu, A., Lee, J., Moore-Maley, B., and Ordoñez, A. C.: The DOE E3SM version 2.1:
overview and assessment of the impacts of parameterized ocean submesoscales,
Geosci. Model Dev., 18, 1613–1633, <a href="https://doi.org/10.5194/gmd-18-1613-2025" target="_blank">https://doi.org/10.5194/gmd-18-1613-2025</a>, 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib68"><label>Smith et al.(2010)Smith, Jones, Briegleb, Bryan, Danabasoglu, Dennis, Dukowicz, Eden, Fox-Kemper, Gent et al.</label><mixed-citation>
      
Smith, R., Jones, P., Briegleb, B., Bryan, F., Danabasoglu, G., Dennis, J.,
Dukowicz, J., Eden, C., Fox-Kemper, B., Gent, P., Hecht, M., Kauffman, J., Large, W., Levine, M., Meares, L., Peacock, S., and Rosenbloom, N.: The Parallel Ocean Program (POP) Reference Manual: Ocean Component of the Community Climate System Model (CCSM) and Community Earth System Model (CESM), Tech. Rep. LAUR-01853, Los Alamos National Laboratory, Los Alamos, NM, <a href="https://opensky.ucar.edu/system/files/2024-09/manuscripts_825.pdf" target="_blank"/>
(last access: 13 April 2026), 2010.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib69"><label>Smith et al.(2000)Smith, Maltrud, Bryan, and Hecht</label><mixed-citation>
      
Smith, R. D., Maltrud, M. E., Bryan, F. O., and Hecht, M. W.: Numerical
simulation of the North Atlantic Ocean at 1/10, J. Physi. Oceanogr., 30, 1532–1561, 2000.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib70"><label>Stommel(1948)</label><mixed-citation>
      
Stommel, H.: The westward intensification of wind-driven ocean currents, Eos
Trans. Am. Geophys. Union, 29, 202–206, 1948.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib71"><label>Strohmaier et al.(2025)Strohmaier, Dongarra, Simon, and
Meuer</label><mixed-citation>
      
Strohmaier, E., Dongarra, J., Simon, H., and Meuer, M.: TOP500 List – June 2025, <a href="https://top500.org/lists/top500/list/2025/06/" target="_blank"/> (last access:
30 June 2025), 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib72"><label>Stroustrup(1986)</label><mixed-citation>
      
Stroustrup, B.: The C+ +  Programming Language, Addison-Wesley, Reading, MA, ISBN 9780201114991, 1986.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib73"><label>Stroustrup(2013)</label><mixed-citation>
      
Stroustrup, B.: The C+ +  Programming Language, in: 4th Edn., Addison-Wesley, ISBN 9780321563842, 2013.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib74"><label>Thuburn et al.(2009)Thuburn, Ringler, Skamarock, and
Klemp</label><mixed-citation>
      
Thuburn, J., Ringler, T. D., Skamarock, W. C., and Klemp, J. B.: Numerical
representation of geostrophic modes on arbitrarily structured C-grids, J
Comput. Phys., 228, 8321–8335, 2009.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib75"><label>Trott et al.(2021)Trott, Berger-Vergiat, Poliakoff, Rajamanickam,
Lebrun-Grandie, Madsen, Al Awar, Gligoric, Shipman, and
Womeldorff</label><mixed-citation>
      
Trott, C., Berger-Vergiat, L., Poliakoff, D., Rajamanickam, S., Lebrun-Grandie, D., Madsen, J., Al Awar, N., Gligoric, M., Shipman, G., and Womeldorff, G.: The Kokkos Ecosystem: Comprehensive Performance Portability for High Performance Computing, Comput. Sci. Eng., 23, 10–18,
<a href="https://doi.org/10.1109/MCSE.2021.3098509" target="_blank">https://doi.org/10.1109/MCSE.2021.3098509</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib76"><label>Trott et al.(2022)Trott, Lebrun-Grandié et al.</label><mixed-citation>
      
Trott, C. R., Lebrun-Grandié, D., et al.: Kokkos 3: Programming Model
Extensions for the Exascale Era, IEEE T. Parallel Distrib. Syst., 33, 805–817, <a href="https://doi.org/10.1109/TPDS.2021.3097283" target="_blank">https://doi.org/10.1109/TPDS.2021.3097283</a>, 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib77"><label>Unidata(2023)</label><mixed-citation>
      
Unidata: Network Common Data Form,
<a href="https://www.unidata.ucar.edu/software/netcdf/" target="_blank"/> (last access: 13 April 2026), 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib78"><label>Vallis(2017)</label><mixed-citation>
      
Vallis, G. K.: Atmospheric and oceanic fluid dynamics: fundamentals and
large-scale circulation, in: 2nd Edn., Cambridge University Press, Cambridge,
ISBN 978-0-521-84969-2, <a href="https://doi.org/10.1017/9781107588417" target="_blank">https://doi.org/10.1017/9781107588417</a>, 2017.


    </mixed-citation></ref-html>
<ref-html id="bib1.bib79"><label>Wallcraft(2000)</label><mixed-citation>
      
Wallcraft, A. J.: SPMD OpenMP versus MPI for ocean models, Concurrency, 12, 1155–1164, <a href="https://doi.org/10.1002/1096-9128(200010)12:12&lt;1155::AID-CPE532&gt;3.0.CO;2-5" target="_blank">https://doi.org/10.1002/1096-9128(200010)12:12&lt;1155::AID-CPE532&gt;3.0.CO;2-5</a>,
2000.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib80"><label>Wei et al.(2024)Wei, Han, Yu, Jiang, Liu, Lin, Yu, Xu, Zhao, Wang, Zheng, Xie, Zhou, Zhang, Zhang, Zhang, Yu, Wang, Bai, Li, Yu, Deng, Li, and Chi</label><mixed-citation>
      
Wei, J., Han, X., Yu, J., Jiang, J., Liu, H., Lin, P., Yu, M., Xu, K., Zhao,
L., Wang, P., Zheng, W., Xie, J., Zhou, Y., Zhang, T., Zhang, F., Zhang, Y.,
Yu, Y., Wang, Y., Bai, Y., Li, C., Yu, Z., Deng, H., Li, Y., and Chi, X.: A
Performance-Portable Kilometer-Scale Global Ocean Model on ORISE and New
Sunway Heterogeneous Supercomputers, in: SC24: International Conference for
High Performance Computing, Networking, Storage and Analysis, 1–12,
<a href="https://doi.org/10.1109/SC41406.2024.00009" target="_blank">https://doi.org/10.1109/SC41406.2024.00009</a>, 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib81"><label>Williamson et al.(1992)Williamson, Drake, Hack, Jakob, and
Swarztrauber</label><mixed-citation>
      
Williamson, D. L., Drake, J. B., Hack, J. J., Jakob, R., and Swarztrauber,
P. N.: A standard test set for numerical approximations to the shallow water
equations in spherical geometry, J. Comput. Phys., 102, 211–224, 1992.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib82"><label>Xu et al.(2014)Xu, Huang, Zhang, Hu, Fu, and Yang</label><mixed-citation>
      
Xu, S., Huang, X., Zhang, Y., Hu, Y., Fu, H., and Yang, G.: Porting the
Princeton Ocean Model to GPUs, in: Algorithms and Architectures for
Parallel Processing, Lecture Notes in Computer Science, edited by: Sun, X.-H., Qu, W., Stojmenovic, I., Zhou, W., Li, Z., Guo, H., Min, G., Yang, T., Wu, Y., and Liu, L., Springer International Publishing, Cham, 1–14, ISBN 978-3-319-11197-1, <a href="https://doi.org/10.1007/978-3-319-11197-1_1" target="_blank">https://doi.org/10.1007/978-3-319-11197-1_1</a>, 2014.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib83"><label>Xu et al.(2015)Xu, Huang, Oey, Xu, Fu, Zhang, and
Yang</label><mixed-citation>
      
Xu, S., Huang, X., Oey, L.-Y., Xu, F., Fu, H., Zhang, Y., and Yang, G.:
POM.gpu-v1.0: a GPU-based Princeton Ocean Model, Geosci. Model Dev., 8, 2815–2827, <a href="https://doi.org/10.5194/gmd-8-2815-2015" target="_blank">https://doi.org/10.5194/gmd-8-2815-2015</a>, 2015.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib84"><label>YAML(2009)</label><mixed-citation>
      
YAML: YAML Ain't Markup Language v1.2, <a href="https://yaml.org/" target="_blank"/> (last access: 13 April 2026), 2009.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib85"><label>Zhao et al.(2017)Zhao, Liang, Sun, Zhao, Sun, and
Liu</label><mixed-citation>
      
Zhao, X.-D., Liang, S.-X., Sun, Z.-C., Zhao, X.-Z., Sun, J.-W., and Liu, Z.-B.: A GPU accelerated finite volume coastal ocean model, J. Hydrodynam., 29, 679–690, <a href="https://doi.org/10.1016/S1001-6058(16)60780-1" target="_blank">https://doi.org/10.1016/S1001-6058(16)60780-1</a>, 2017.

    </mixed-citation></ref-html>--></article>
