JuliaDynamics · bergio13 · Jul 10, 2025 · Jul 10, 2025 · Jul 10, 2025 · Jul 10, 2025
diff --git a/.gitignore b/.gitignore
@@ -17,4 +17,6 @@ test/adata.arrow
 test/mdata.arrow
 *.csv
 *.arrow
-tutorial.md
+tutorial.md
+log
+examples/rl/log
diff --git a/Project.toml b/Project.toml
@@ -30,9 +30,12 @@ StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
 
 [weakdeps]
 Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
+Crux = "e51cc422-768a-4345-bb8e-2246287ae729"
 GraphMakie = "1ecd5474-83a3-4783-bb4f-06765db800d2"
 Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
 OSMMakie = "76b6901f-8821-46bb-9129-841bc9cfe677"
+POMDPs = "a93abf59-7444-517b-a68a-c42f96afdd7d"
+Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 
 [extensions]
 AgentsArrow = "Arrow"
@@ -44,11 +47,13 @@ AgentsVisualizations = "Makie"
 Arrow = "2"
 CSV = "0.9.7, 0.10"
 CommonSolve = "0.2.4"
+Crux = "0.1.2"
 DataFrames = "0.21, 0.22, 1"
 DataStructures = "0.18"
 Distributed = "1"
 Distributions = "0.25"
 Downloads = "1"
+Flux = "0.14.25"
 GraphMakie = "0.5"
 Graphs = "1.4"
 JLD2 = "0.4, 0.5"
@@ -59,6 +64,7 @@ LinearAlgebra = "1"
 MacroTools = "0.5"
 Makie = "0.20, 0.21, 0.22"
 OSMMakie = "0.0, 0.1"
+POMDPs = "0.9.0, 1.0.0"
 PrecompileTools = "1"
 ProgressMeter = "1.5"
 Random = "1"

diff --git a/docs/ReinforcementLearningABM_Guide.md b/docs/ReinforcementLearningABM_Guide.md
@@ -0,0 +1,50 @@
+# ReinforcementLearningABM: A New Agent-Based Model Type
+
+## Overview
+
+The `ReinforcementLearningABM` is a new model type that extends the capabilities of `StandardABM` by integrating reinforcement learning (RL) functionality directly into the agent-based modeling framework. This model type provides a seamless way to train agents using RL algorithms while maintaining full compatibility with the existing Agents.jl ecosystem.
+
+## Key Features
+
+### 1. **Integrated RL Training**
+
+- Built-in support for training agents using various RL algorithms (PPO, DQN, A2C)
+- Automatic integration with POMDPs.jl and Crux.jl
+
+### 2. **Multi-Agent Learning Support**
+
+- Train multiple agent types simultaneously or sequentially
+- Support for heterogeneous agents with different action and observation spaces
+- Automatic policy management for trained agents
+
+### 3. **Flexible Architecture**
+
+- Inherits all functionality from `StandardABM`
+- Optional RL functionality - can be used as a regular ABM when RL is not needed
+
+### 4. **Easy Configuration**
+
+- Simple configuration system for RL components
+- Customizable observation functions, reward functions, and termination conditions
+- Support for custom neural network architectures
+
+## Architecture
+
+```
+ReinforcementLearningABM
+├── StandardABM components
+│   ├── agents, space, scheduler, properties, rng, etc.
+├── RL-specific components
+│   ├── rl_config: Configuration for RL training
+│   ├── trained_policies: Storage for trained policies
+│   ├── training_history: Record of training progress
+│   ├── is_training: Training mode flag
+```
+
+## Dependencies
+
+The RL functionality requires:
+
+- `POMDPs.jl`: For the POMDP interface
+- `Crux.jl`: For RL algorithms and neural networks
+- `Flux.jl`: For neural network components