Add Guru.yaml

DBlankvoort · DBlankvoort · commit a1ba36702a21 · 2025-07-08T13:50:04.000+02:00
diff --git a/Guru.yaml b/Guru.yaml
@@ -0,0 +1,105 @@
+---
+# Thank you for contributing!
+# In filling out this yaml file, please follow the criteria as described here:
+# https://osai-index.eu/contribute
+
+# You're free to build on this work and reuse the data. It is licensed under CC-BY 4.0, with the
+# stipulation that attribution should come in the form of a link to https://osai-index.eu/
+# and a citation to the peer-reviewed paper in which the dataset & criteria were published:
+
+# Liesenfeld, A. and Dingemanse, M., 2024. Rethinking open source generative AI: open-washing and the EU AI Act. In Proceedings of the 2024 ACM Conference on Fairness, Accountability, and Transparency (pp. 1774-1787).
+
+# Organization tags:
+# - National origin: United States
+# - Contributor type: Non-academic (Research institution)
+
+system:
+    name: Guru
+    link: https://huggingface.co/LLM360/guru-32B
+    type: text 
+    performanceclass: latest
+    basemodelname: Guru-32B
+    endmodelname: Qwen2.5-32B
+    endmodellicense: CC-BY-NC-4.0
+    releasedate: 2025-05
+    notes: Guru, a model which claims to improve upon the state-of-the-art.
+
+org:
+    name: LLM360
+    link: https://www.llm360.ai/index.html
+    notes: LLM360, an initiative to fully open-source LLMs.
+
+# availability:
+datasources_basemodel:
+    class: closed
+    link:
+    notes: Pretraining data not specified or documented.
+
+datasources_endmodel:
+    class: open
+    link: https://huggingface.co/datasets/LLM360/guru-RL-92k
+    notes: Dataset published on HuggingFace.
+
+weights_basemodel:
+    class: open
+    link: https://huggingface.co/Qwen/Qwen2.5-32B
+    notes: Model weights made available on HuggingFace.
+
+weights_endmodel:
+    class: open
+    link: https://huggingface.co/LLM360/guru-32B
+    notes: Model weights made available on HuggingFace.
+
+trainingcode:
+    class: partial
+    link: ["https://github.com/QwenLM", "https://github.com/LLM360/Reasoning360/tree/main/scripts/train"]
+    notes: Base model repository provides sparse source code and some examples for SFT. End model repository contains training script
+
+# documentation:
+code:
+    class: partial
+    link: ["https://github.com/QwenLM", "https://github.com/LLM360/Reasoning360"]
+    notes: Both repositories are fairly well-documented.
+
+hardware_architecture:
+    class: partial
+    link: ["https://arxiv.org/pdf/2506.14965", "https://github.com/LLM360/Reasoning360/blob/main/scripts/train/example_multinode_rl_qwen32b_base.sh"]
+    notes: Hardware architecture described for end model.
+
+preprint:
+    class: open
+    link: ["https://arxiv.org/abs/2505.09388", "https://arxiv.org/pdf/2506.14965"]
+    notes: Preprints published on arXiv.
+
+paper:
+    class: closed
+    link:
+    notes: No peer-reviewed paper found.
+
+modelcard:
+    class: closed
+    link: https://huggingface.co/LLM360/guru-32B
+    notes: Model card primarily contains usage instructions.
+
+datasheet:
+    class: open
+    link: https://huggingface.co/datasets/LLM360/guru-RL-92k
+    notes: Detailed datasheet provided on HuggingFace.
+
+# access:
+package:
+    class: closed
+    link: 
+    notes: No package found.
+
+api:
+    class: closed
+    link: 
+    notes: No API found.
+    metaprompt: closed
+
+licenses:
+    class: open
+    link: https://huggingface.co/LLM360/guru-32B
+    notes: CC-BY-NC-4.0, an OSI-approved license.
+