Add thread pinning optimization

tkoskela · tkoskela · commit e54259e246b1 · 2023-01-27T16:27:28.000Z
diff --git a/extra/weak_scaling/run_particleda.jl b/extra/weak_scaling/run_particleda.jl
@@ -1,15 +1,34 @@
 using ParticleDA
 using TimerOutputs
 using MPI
+using ThreadPinning
 
 # Initialise MPI
 MPI.Init()
-mpi_size = MPI.Comm_size(MPI.COMM_WORLD)
+comm = MPI.COMM_WORLD
+mpi_size = MPI.Comm_size(comm)
+my_rank = MPI.Comm_rank(comm)
+
+cores_per_numa = 16
+threads_per_rank = Threads.nthreads()
+ranks_per_numa = div(cores_per_numa, threads_per_rank)
+
+# Pin threads so that threads of a MPI rank will be pinned to cores with
+# contiguous IDs. This will ensure that
+#  - When running 16 or less threads per rank, all threads will be pinned to the same
+#    NUMA region as their master (sharing a memory controller within Infinity fabric)
+#  - When running 8 or less threads per rank, all threads will be pinned to the same
+#    Core Complex Die
+#  - When running 4 or less threads per rank, all threads will be pinned to the same
+#    Core Complex (sharing a L3 cache)
+
+my_numa, my_id_in_numa = divrem(my_rank, ranks_per_numa) .+ (1, 0)
+pinthreads( numa( my_numa, 1:Threads.nthreads() ) .+ threads_per_rank .* my_id_in_numa )
 
 # Save some variables for later use
 test_dir = joinpath(dirname(pathof(ParticleDA)), "..", "test")
 llw2d_src = joinpath(test_dir, "models", "llw2d.jl")
-observation_file = "observations.h5"
+observation_file = "test_observations.h5"
 
 # Instantiate the test environment
 using Pkg
diff --git a/test/Project.toml b/test/Project.toml
@@ -7,20 +7,22 @@ GaussianRandomFields = "e4b2fa32-6e09-5554-b718-106ed5adafe9"
 HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
+MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+ThreadPinning = "811555cd-349b-4f26-b7bc-1f208b848042"
 YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
 
 [compat]
 Distributions = "0.22, 0.23, 0.24, 0.25"
 GaussianRandomFields = "2.1.1"
 HDF5 = "0.14, 0.15, 0.16"
-MPI = "0.19"
+MPI = "0.20"
 StableRNGs = "1"
 YAML = "0.4"
-julia = "1.7"
+julia = "1.7"