Skip to content

Commit da43974

Browse files
committed
Merge branch 'master' into Simplify_history_indexing
bench: 2403242
2 parents f216f79 + 69a01b8 commit da43974

30 files changed

+1980
-300
lines changed

src/Makefile

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ endif
435435
ifeq ($(COMP),gcc)
436436
comp=gcc
437437
CXX=g++
438-
CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-declarations
438+
CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-declarations -Wstack-usage=128000
439439

440440
ifeq ($(arch),$(filter $(arch),armv7 armv8 riscv64))
441441
ifeq ($(OS),Android)
@@ -618,6 +618,19 @@ ifneq ($(comp),mingw)
618618
ifneq ($(KERNEL),Haiku)
619619
ifneq ($(COMP),ndk)
620620
LDFLAGS += -lpthread
621+
622+
add_lrt = yes
623+
ifeq ($(target_windows),yes)
624+
add_lrt = no
625+
endif
626+
627+
ifeq ($(KERNEL),Darwin)
628+
add_lrt = no
629+
endif
630+
631+
ifeq ($(add_lrt),yes)
632+
LDFLAGS += -lrt
633+
endif
621634
endif
622635
endif
623636
endif
@@ -628,6 +641,7 @@ ifeq ($(debug),no)
628641
CXXFLAGS += -DNDEBUG
629642
else
630643
CXXFLAGS += -g
644+
CXXFLAGS += -D_GLIBCXX_ASSERTIONS -D_GLIBCXX_DEBUG
631645
endif
632646

633647
### 3.2.2 Debugging with undefined behavior sanitizers

src/engine.cpp

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,12 @@
3333
#include "misc.h"
3434
#include "nnue/network.h"
3535
#include "nnue/nnue_common.h"
36+
#include "nnue/nnue_misc.h"
3637
#include "numa.h"
3738
#include "perft.h"
3839
#include "position.h"
3940
#include "search.h"
41+
#include "shm.h"
4042
#include "syzygy/tbprobe.h"
4143
#include "types.h"
4244
#include "uci.h"
@@ -57,11 +59,14 @@ Engine::Engine(std::optional<std::string> path) :
5759
threads(),
5860
networks(
5961
numaContext,
60-
NN::Networks(
61-
NN::NetworkBig({EvalFileDefaultNameBig, "None", ""}, NN::EmbeddedNNUEType::BIG),
62-
NN::NetworkSmall({EvalFileDefaultNameSmall, "None", ""}, NN::EmbeddedNNUEType::SMALL))) {
63-
pos.set(StartFEN, false, &states->back());
62+
// Heap-allocate because sizeof(NN::Networks) is large
63+
std::make_unique<NN::Networks>(
64+
std::make_unique<NN::NetworkBig>(NN::EvalFile{EvalFileDefaultNameBig, "None", ""},
65+
NN::EmbeddedNNUEType::BIG),
66+
std::make_unique<NN::NetworkSmall>(NN::EvalFile{EvalFileDefaultNameSmall, "None", ""},
67+
NN::EmbeddedNNUEType::SMALL))) {
6468

69+
pos.set(StartFEN, false, &states->back());
6570

6671
options.add( //
6772
"Debug Log File", Option("", [](const Option& o) {
@@ -254,6 +259,36 @@ void Engine::set_ponderhit(bool b) { threads.main_manager()->ponder = b; }
254259
void Engine::verify_networks() const {
255260
networks->big.verify(options["EvalFile"], onVerifyNetworks);
256261
networks->small.verify(options["EvalFileSmall"], onVerifyNetworks);
262+
263+
auto statuses = networks.get_status_and_errors();
264+
for (size_t i = 0; i < statuses.size(); ++i)
265+
{
266+
const auto [status, error] = statuses[i];
267+
std::string message = "Network replica " + std::to_string(i + 1) + ": ";
268+
if (status == SystemWideSharedConstantAllocationStatus::NoAllocation)
269+
{
270+
message += "No allocation.";
271+
}
272+
else if (status == SystemWideSharedConstantAllocationStatus::LocalMemory)
273+
{
274+
message += "Local memory.";
275+
}
276+
else if (status == SystemWideSharedConstantAllocationStatus::SharedMemory)
277+
{
278+
message += "Shared memory.";
279+
}
280+
else
281+
{
282+
message += "Unknown status.";
283+
}
284+
285+
if (error.has_value())
286+
{
287+
message += " " + *error;
288+
}
289+
290+
onVerifyNetworks(message);
291+
}
257292
}
258293

259294
void Engine::load_networks() {

src/engine.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,10 @@ class Engine {
115115
Position pos;
116116
StateListPtr states;
117117

118-
OptionsMap options;
119-
ThreadPool threads;
120-
TranspositionTable tt;
121-
LazyNumaReplicated<Eval::NNUE::Networks> networks;
118+
OptionsMap options;
119+
ThreadPool threads;
120+
TranspositionTable tt;
121+
LazyNumaReplicatedSystemWide<Eval::NNUE::Networks> networks;
122122

123123
Search::SearchManager::UpdateContext updateContext;
124124
std::function<void(std::string_view)> onVerifyNetworks;

src/evaluate.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,21 +98,21 @@ std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) {
9898
if (pos.checkers())
9999
return "Final evaluation: none (in check)";
100100

101-
Eval::NNUE::AccumulatorStack accumulators;
102-
auto caches = std::make_unique<Eval::NNUE::AccumulatorCaches>(networks);
101+
auto accumulators = std::make_unique<Eval::NNUE::AccumulatorStack>();
102+
auto caches = std::make_unique<Eval::NNUE::AccumulatorCaches>(networks);
103103

104104
std::stringstream ss;
105105
ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2);
106106
ss << '\n' << NNUE::trace(pos, networks, *caches) << '\n';
107107

108108
ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15);
109109

110-
auto [psqt, positional] = networks.big.evaluate(pos, accumulators, &caches->big);
110+
auto [psqt, positional] = networks.big.evaluate(pos, *accumulators, &caches->big);
111111
Value v = psqt + positional;
112112
v = pos.side_to_move() == WHITE ? v : -v;
113113
ss << "NNUE evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)\n";
114114

115-
v = evaluate(networks, pos, accumulators, *caches, VALUE_ZERO);
115+
v = evaluate(networks, pos, *accumulators, *caches, VALUE_ZERO);
116116
v = pos.side_to_move() == WHITE ? v : -v;
117117
ss << "Final evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)";
118118
ss << " [with scaled NNUE, ...]";

src/main.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,28 +17,28 @@
1717
*/
1818

1919
#include <iostream>
20+
#include <memory>
2021

2122
#include "bitboard.h"
2223
#include "misc.h"
2324
#include "position.h"
25+
#include "tune.h"
2426
#include "types.h"
2527
#include "uci.h"
26-
#include "tune.h"
2728

2829
using namespace Stockfish;
2930

3031
int main(int argc, char* argv[]) {
31-
3232
std::cout << engine_info() << std::endl;
3333

3434
Bitboards::init();
3535
Position::init();
3636

37-
UCIEngine uci(argc, argv);
37+
auto uci = std::make_unique<UCIEngine>(argc, argv);
3838

39-
Tune::init(uci.engine_options());
39+
Tune::init(uci->engine_options());
4040

41-
uci.loop();
41+
uci->loop();
4242

4343
return 0;
4444
}

src/memory.cpp

Lines changed: 8 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,6 @@
5555
// the calls at compile time), try to load them at runtime. To do this we need
5656
// first to define the corresponding function pointers.
5757

58-
extern "C" {
59-
using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE);
60-
using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID);
61-
using AdjustTokenPrivileges_t =
62-
bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
63-
}
6458
#endif
6559

6660

@@ -106,77 +100,14 @@ void std_aligned_free(void* ptr) {
106100

107101
static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) {
108102

109-
#if !defined(_WIN64)
110-
return nullptr;
111-
#else
112-
113-
HANDLE hProcessToken{};
114-
LUID luid{};
115-
void* mem = nullptr;
116-
117-
const size_t largePageSize = GetLargePageMinimum();
118-
if (!largePageSize)
119-
return nullptr;
120-
121-
// Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges
122-
123-
HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll"));
124-
125-
if (!hAdvapi32)
126-
hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));
127-
128-
auto OpenProcessToken_f =
129-
OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
130-
if (!OpenProcessToken_f)
131-
return nullptr;
132-
auto LookupPrivilegeValueA_f =
133-
LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
134-
if (!LookupPrivilegeValueA_f)
135-
return nullptr;
136-
auto AdjustTokenPrivileges_f =
137-
AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
138-
if (!AdjustTokenPrivileges_f)
139-
return nullptr;
140-
141-
// We need SeLockMemoryPrivilege, so try to enable it for the process
142-
143-
if (!OpenProcessToken_f( // OpenProcessToken()
144-
GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
145-
return nullptr;
146-
147-
if (LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid))
148-
{
149-
TOKEN_PRIVILEGES tp{};
150-
TOKEN_PRIVILEGES prevTp{};
151-
DWORD prevTpLen = 0;
152-
153-
tp.PrivilegeCount = 1;
154-
tp.Privileges[0].Luid = luid;
155-
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
156-
157-
// Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges()
158-
// succeeds, we still need to query GetLastError() to ensure that the privileges
159-
// were actually obtained.
160-
161-
if (AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp,
162-
&prevTpLen)
163-
&& GetLastError() == ERROR_SUCCESS)
164-
{
165-
// Round up size to full pages and allocate
166-
allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
167-
mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
168-
PAGE_READWRITE);
169-
170-
// Privilege no longer needed, restore previous state
171-
AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
172-
}
173-
}
174-
175-
CloseHandle(hProcessToken);
176-
177-
return mem;
178-
179-
#endif
103+
return windows_try_with_large_page_priviliges(
104+
[&](size_t largePageSize) {
105+
// Round up size to full pages and allocate
106+
allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
107+
return VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
108+
PAGE_READWRITE);
109+
},
110+
[]() { return (void*) nullptr; });
180111
}
181112

182113
void* aligned_large_pages_alloc(size_t allocSize) {

src/memory.h

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,29 @@
2929

3030
#include "types.h"
3131

32+
#if defined(_WIN64)
33+
34+
#if _WIN32_WINNT < 0x0601
35+
#undef _WIN32_WINNT
36+
#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes
37+
#endif
38+
39+
#if !defined(NOMINMAX)
40+
#define NOMINMAX
41+
#endif
42+
#include <windows.h>
43+
44+
#include <psapi.h>
45+
46+
extern "C" {
47+
using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE);
48+
using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID);
49+
using AdjustTokenPrivileges_t =
50+
bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
51+
}
52+
#endif
53+
54+
3255
namespace Stockfish {
3356

3457
void* std_aligned_alloc(size_t alignment, size_t size);
@@ -211,6 +234,81 @@ T* align_ptr_up(T* ptr) {
211234
reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
212235
}
213236

237+
#if defined(_WIN32)
238+
239+
template<typename FuncYesT, typename FuncNoT>
240+
auto windows_try_with_large_page_priviliges([[maybe_unused]] FuncYesT&& fyes, FuncNoT&& fno) {
241+
242+
#if !defined(_WIN64)
243+
return fno();
244+
#else
245+
246+
HANDLE hProcessToken{};
247+
LUID luid{};
248+
249+
const size_t largePageSize = GetLargePageMinimum();
250+
if (!largePageSize)
251+
return fno();
252+
253+
// Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges
254+
255+
HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll"));
256+
257+
if (!hAdvapi32)
258+
hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));
259+
260+
auto OpenProcessToken_f =
261+
OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
262+
if (!OpenProcessToken_f)
263+
return fno();
264+
auto LookupPrivilegeValueA_f =
265+
LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
266+
if (!LookupPrivilegeValueA_f)
267+
return fno();
268+
auto AdjustTokenPrivileges_f =
269+
AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
270+
if (!AdjustTokenPrivileges_f)
271+
return fno();
272+
273+
// We need SeLockMemoryPrivilege, so try to enable it for the process
274+
275+
if (!OpenProcessToken_f( // OpenProcessToken()
276+
GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
277+
return fno();
278+
279+
if (!LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid))
280+
return fno();
281+
282+
TOKEN_PRIVILEGES tp{};
283+
TOKEN_PRIVILEGES prevTp{};
284+
DWORD prevTpLen = 0;
285+
286+
tp.PrivilegeCount = 1;
287+
tp.Privileges[0].Luid = luid;
288+
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
289+
290+
// Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges()
291+
// succeeds, we still need to query GetLastError() to ensure that the privileges
292+
// were actually obtained.
293+
294+
if (!AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp,
295+
&prevTpLen)
296+
|| GetLastError() != ERROR_SUCCESS)
297+
return fno();
298+
299+
auto&& ret = fyes(largePageSize);
300+
301+
// Privilege no longer needed, restore previous state
302+
AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
303+
304+
CloseHandle(hProcessToken);
305+
306+
return std::forward<decltype(ret)>(ret);
307+
308+
#endif
309+
}
310+
311+
#endif
214312

215313
} // namespace Stockfish
216314

0 commit comments

Comments
 (0)