Fixes to the distinct and distinctBy KDocs after review.

Allex-Nik · Allex-Nik · commit 690b780060cc · 2025-12-15T11:13:21.000+01:00
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt
@@ -7,7 +7,11 @@ import org.jetbrains.kotlinx.dataframe.DataRow
 import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
 import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
 import org.jetbrains.kotlinx.dataframe.annotations.Refine
+import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DESCRIPTION
+import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DISTINCT_PARAM
 import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DISTINCT_RETURN
+import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.FUNCTION
+import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.PHRASE_ENDING
 import org.jetbrains.kotlinx.dataframe.api.Select.SelectSelectingOptions
 import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
 import org.jetbrains.kotlinx.dataframe.columns.SingleColumn
@@ -16,6 +20,7 @@ import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
 import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate
 import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
 import org.jetbrains.kotlinx.dataframe.documentation.Indent
+import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
 import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException
 import org.jetbrains.kotlinx.dataframe.impl.columns.DistinctColumnSet
 import org.jetbrains.kotlinx.dataframe.indices
@@ -25,54 +30,60 @@ import kotlin.reflect.KProperty
 // region DataFrame
 
 /**
- * ## The {@get NAME Distinct} Operation
+ * {@get [DESCRIPTION] Removes duplicated rows based on {@get [PHRASE_ENDING]}}.
  *
- * {@get DESCRIPTION It removes duplicated rows based on {@get PHRASE_ENDING}}.
- *
- * __NOTE:__ The [rows][DataRow] in the resulting [DataFrame] are in the same order
+ * The [rows][DataRow] in the resulting [DataFrame] are in the same order
  * as they were in the original [DataFrame].
  *
- * {@get [DISTINCT_PARAM]}
+ * See also {@get [FUNCTION] [distinctBy] that removes duplicated rows based on the specified columns
+ * and keeps all the columns in the resulting [DataFrame].}
+ *
+ * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
+ *
+ * See [Selecting Columns][SelectSelectingOptions].
  *
- * {@get [DISTINCT_RETURN] @return A new [DataFrame] containing only distinct rows.}
+ * For more information:
+ *
+ * {@include [DocumentationUrls.Distinct]}
+ *
+ * {@include [DocumentationUrls.DistinctBy]}
+ *
+ * {@get [DISTINCT_PARAM]}
  *
- * @see [Selecting Columns][SelectSelectingOptions].
- * @see {@include [DocumentationUrls.Distinct]}
- * @see {@include [DocumentationUrls.DistinctBy]}
+ * @return {@get [DISTINCT_RETURN] A new [DataFrame] containing only distinct rows}
  */
 @ExcludeFromSources
 @Suppress("ClassName")
 private interface DistinctDocs {
     interface DISTINCT_PARAM
 
     interface DISTINCT_RETURN
+
+    interface DESCRIPTION
+
+    interface PHRASE_ENDING
+
+    interface FUNCTION
 }
 
 /**
  * {@include [DistinctDocs]}
- * {@set PHRASE_ENDING all columns}.
- * {@set [DistinctDocs.DISTINCT_PARAM]}
+ * {@set [PHRASE_ENDING] all columns}.
+ * {@set [DISTINCT_PARAM]}
  */
 public fun <T> DataFrame<T>.distinct(): DataFrame<T> = distinctBy { all() }
 
 /**
  * {@include [DistinctDocs]}
- * {@set DESCRIPTION It selects the specified columns and keeps only distinct rows based on these selected columns}
- * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] The names of the columns to select
- * and to consider for evaluating distinct rows.}
- * {@set [DISTINCT_RETURN] @return A new [DataFrame] containing only selected columns and distinct rows.}
+ * {@set [DESCRIPTION] Selects the specified columns and keeps only distinct rows based on these selected columns}
+ * {@set [DISTINCT_PARAM] @param [columns\] The [ColumnsSelector] used to select columns
+ * that will be included in the resulting [DataFrame] and considered for evaluating distinct rows.}
+ * {@set [DISTINCT_RETURN] A new [DataFrame] containing only selected columns and distinct rows.}
  */
 @Refine
 @Interpretable("Distinct0")
 public fun <T, C> DataFrame<T>.distinct(columns: ColumnsSelector<T, C>): DataFrame<T> = select(columns).distinct()
 
-/**
- * {@include [DistinctDocs]}
- * {@set DESCRIPTION It selects the specified columns and keeps only distinct rows based on these selected columns}
- * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] The names of the columns to select
- * and to consider for evaluating distinct rows.}
- * {@set [DISTINCT_RETURN] @return A new [DataFrame] containing only selected columns and distinct rows.}
- */
 @Deprecated(DEPRECATED_ACCESS_API)
 @AccessApiOverload
 public fun <T> DataFrame<T>.distinct(vararg columns: KProperty<*>): DataFrame<T> =
@@ -83,64 +94,47 @@ public fun <T> DataFrame<T>.distinct(vararg columns: KProperty<*>): DataFrame<T>
 
 /**
  * {@include [DistinctDocs]}
- * {@set DESCRIPTION It selects the specified columns and keeps only distinct rows based on these selected columns}
- * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] The names of the columns to select
+ * {@set [DESCRIPTION] Selects the specified columns and keeps only distinct rows based on these selected columns}
+ * {@set [DISTINCT_PARAM] @param [columns\] The names of the columns to select
  * and to consider for evaluating distinct rows.}
- * {@set [DISTINCT_RETURN] @return A new [DataFrame] containing only selected columns and distinct rows.}
+ * {@set [DISTINCT_RETURN] A new [DataFrame] containing only selected columns and distinct rows.}
  */
 public fun <T> DataFrame<T>.distinct(vararg columns: String): DataFrame<T> = distinct { columns.toColumnSet() }
 
-/**
- * {@include [DistinctDocs]}
- * {@set DESCRIPTION It selects the specified columns and keeps only distinct rows based on these selected columns}
- * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns] The names of the columns to select
- * and to consider for evaluating distinct rows.}
- * {@set [DISTINCT_RETURN] @return A new [DataFrame] containing only selected columns and distinct rows.}
- */
 @Deprecated(DEPRECATED_ACCESS_API)
 @AccessApiOverload
 public fun <T> DataFrame<T>.distinct(vararg columns: AnyColumnReference): DataFrame<T> =
     distinct { columns.toColumnSet() }
 
-/**
- * {@include [DistinctDocs]}
- * {@set NAME DistinctBy}
- * {@set PHRASE_ENDING the specified columns}.
- * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns]
- * The names of the columns to consider for evaluating distinct rows.}
- */
 @Deprecated(DEPRECATED_ACCESS_API)
 @AccessApiOverload
 public fun <T> DataFrame<T>.distinctBy(vararg columns: KProperty<*>): DataFrame<T> =
     distinctBy { columns.toColumnSet() }
 
 /**
  * {@include [DistinctDocs]}
- * {@set NAME DistinctBy}
- * {@set PHRASE_ENDING the specified columns}.
- * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns]
+ * {@set [PHRASE_ENDING] the specified columns}.
+ * {@set [FUNCTION] [distinct] that selects the specified columns
+ * (if the columns are not specified, selects all columns)
+ * and keeps only distinct rows based on these selected columns.}
+ * {@set [DISTINCT_PARAM] @param [columns\]
  * The names of the columns to consider for evaluating distinct rows.}
  */
 public fun <T> DataFrame<T>.distinctBy(vararg columns: String): DataFrame<T> = distinctBy { columns.toColumnSet() }
 
-/**
- * {@include [DistinctDocs]}
- * {@set NAME DistinctBy}
- * {@set PHRASE_ENDING the specified columns}.
- * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns]
- * The names of the columns to consider for evaluating distinct rows.}
- */
 @Deprecated(DEPRECATED_ACCESS_API)
 @AccessApiOverload
 public fun <T> DataFrame<T>.distinctBy(vararg columns: AnyColumnReference): DataFrame<T> =
     distinctBy { columns.toColumnSet() }
 
 /**
  * {@include [DistinctDocs]}
- * {@set NAME DistinctBy}
- * {@set PHRASE_ENDING the specified columns}.
- * {@set [DistinctDocs.DISTINCT_PARAM] @param [columns]
- * The names of the columns to consider for evaluating distinct rows.}
+ * {@set [PHRASE_ENDING] the specified columns}.
+ * {@set [FUNCTION] [distinct] that selects the specified columns
+ * (if the columns are not specified, selects all columns)
+ * and keeps only distinct rows based on these selected columns.}
+ * {@set [DISTINCT_PARAM] @param [columns\] The [ColumnsSelector] used to select columns
+ * that will be considered for evaluating distinct rows.}
  */
 public fun <T, C> DataFrame<T>.distinctBy(columns: ColumnsSelector<T, C>): DataFrame<T> {
     val cols = get(columns)
@@ -185,7 +179,7 @@ public interface DistinctColumnsSelectionDsl {
      * Returns a new [ColumnSet] from [this] [ColumnSet] containing only distinct columns (by path).
      * This is useful when you've selected the same column multiple times but only want it once.
      *
-     * NOTE: This doesn't solve [DuplicateColumnNamesException] if you've selected two columns with the same name.
+     * This doesn't solve [DuplicateColumnNamesException] if you've selected two columns with the same name.
      * For this, you'll need to [rename][ColumnsSelectionDsl.named] one of the columns.
      *
      * ### Check out: [Grammar]
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt
@@ -84,10 +84,10 @@ internal interface DocumentationUrls {
     /** [See `remove` on the documentation website.]({@include [Url]}/remove.html) */
     interface Remove
 
-    /** <a href="{@include [Url]}/distinct.html">See `distinct` on the documentation website.</a> */
+    /** [See `distinct` on the documentation website.]({@include [Url]}/distinct.html) */
     interface Distinct
 
-    /** <a href="{@include [Url]}/distinct.html#distinctby">See `distinctBy` on the documentation website.</a> */
+    /** [See `distinctBy` on the documentation website.]({@include [Url]}/distinct.html#distinctby) */
     interface DistinctBy
 
     /** <a href="{@include [Url]}/flatten.html">See `flatten` on the documentation website.</a> */