Skip to content

Commit 1b9fa19

Browse files
authored
improve precompilation coverage (#3285)
1 parent ec7b123 commit 1b9fa19

File tree

6 files changed

+96
-10
lines changed

6 files changed

+96
-10
lines changed

Project.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ version = "1.5.0"
66
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
77
DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
88
Future = "9fa8497b-333b-5362-9e8d-4d0656e87820"
9+
InlineStrings = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48"
910
InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
1011
IteratorInterfaceExtensions = "82899510-4779-5014-852e-03e436cf321d"
1112
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -22,6 +23,7 @@ SortingAlgorithms = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
2223
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
2324
TableTraits = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
2425
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
26+
SentinelArrays = "91c51154-3ec4-41a3-a24f-3f23e20d615c"
2527
Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
2628

2729
[compat]
@@ -35,6 +37,7 @@ Missings = "0.4.2, 1"
3537
PooledArrays = "1.4.2"
3638
PrettyTables = "2.1"
3739
Reexport = "0.1, 0.2, 1"
40+
SentinelArrays = "1.2"
3841
ShiftedArrays = "1, 2"
3942
SnoopPrecompile = "1"
4043
SortingAlgorithms = "0.1, 0.2, 0.3, 1"

docs/src/man/basics.md

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ or
1616
```julia
1717
julia> ] # ']' should be pressed
1818

19-
(@v1.6) pkg> add DataFrames
19+
(@v1.9) pkg> add DataFrames
2020
```
2121

2222
If you want to make sure everything works as expected you can run the tests
@@ -35,9 +35,9 @@ you have installed with the `status` command.
3535
```julia
3636
julia> ]
3737

38-
(@v1.6) pkg> status DataFrames
39-
Status `C:\Users\TeAmp0is0N\.julia\environments\v1.6\Project.toml`
40-
[a93c6f00] DataFrames v1.1.1
38+
(@v1.9) pkg> status DataFrames
39+
Status `~\v1.6\Project.toml`
40+
[a93c6f00] DataFrames v1.5.0
4141
```
4242

4343
Throughout the rest of the tutorial we will assume that you have installed the
@@ -52,6 +52,40 @@ The most fundamental type provided by DataFrames.jl is `DataFrame`, where
5252
typically each row is interpreted as an observation and each column as a
5353
feature.
5454

55+
!!! note "Advanced installation configuration"
56+
57+
**Advanced installation settings.**
58+
DataFrames.jl puts in extra time and effort when the package is being built
59+
(precompiled) to make sure it is more responsive when you are using it.
60+
However, in some scenarios users might want to avoid this extra
61+
precompilaion effort to reduce the time needed to build the package and
62+
later to load it. To disable precompilation of DataFrames.jl in your current
63+
project you need to install the
64+
[SnoopPrecompile.jl](https://timholy.github.io/SnoopCompile.jl/stable/snoop_pc/)
65+
and [Preferences.jl](https://github.com/JuliaPackaging/Preferences.jl)
66+
packages and then run the following code:
67+
```
68+
using SnoopPrecompile, Preferences
69+
Preferences.set_preferences!(SnoopPrecompile,
70+
"skip_precompile" => union(Preferences.load_preference(SnoopPrecompile,
71+
"skip_precompile",
72+
String[]),
73+
["DataFrames"]);
74+
force=true)
75+
```
76+
If you later would want to re-enable precompilation of DataFrames.jl you
77+
can do it using the following commands:
78+
```
79+
using SnoopPrecompile, Preferences
80+
Preferences.set_preferences!(SnoopPrecompile,
81+
"skip_precompile" =>
82+
filter(!=("DataFrames"),
83+
Preferences.load_preference(SnoopPrecompile,
84+
"skip_precompile",
85+
String[]));
86+
force=true)
87+
```
88+
5589
## Constructors and Basic Utility Functions
5690

5791
### Constructors
@@ -1785,7 +1819,7 @@ in them:
17851819
julia> select(german, Not(["Age", "Saving accounts", "Checking account",
17861820
"Credit amount", "Purpose"]))
17871821
1000×5 DataFrame
1788-
Row │ id Sex Job Housing Duration
1822+
Row │ id Sex Job Housing Duration
17891823
│ Int64 String7 Int64 String7 Int64
17901824
──────┼──────────────────────────────────────────
17911825
1 │ 0 male 2 own 6

src/DataFrames.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ using PrettyTables
1111
using Random
1212
using Tables: ByRow
1313
import SnoopPrecompile
14+
import SentinelArrays
15+
import InlineStrings
1416

1517
import DataAPI,
1618
DataAPI.allcombinations,

src/abstractdataframe/abstractdataframe.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1892,6 +1892,12 @@ function Base.reduce(::typeof(vcat),
18921892
return res
18931893
end
18941894

1895+
# definition needed to avoid dispatch ambiguity
1896+
Base.reduce(::typeof(vcat),
1897+
dfs::SentinelArrays.ChainedVector{T, A} where {T<:AbstractDataFrame,
1898+
A<:AbstractVector{T}}) =
1899+
reduce(vcat, collect(AbstractDataFrame, dfs))
1900+
18951901
function _vcat(dfs::AbstractVector{AbstractDataFrame};
18961902
cols::Union{Symbol, AbstractVector{Symbol},
18971903
AbstractVector{<:AbstractString}}=:setequal)

src/other/precompile.jl

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ SnoopPrecompile.@precompile_all_calls begin
1111
combine(df, :c, [:c :f] .=> [sum, mean, std], :c => :d, [:a, :c] => cor)
1212
transform(df, :c, [:c :f] .=> [sum, mean, std], :c => :d, [:a, :c] => cor)
1313
groupby(df, :a)
14-
groupby(df, :q)
1514
groupby(df, :p)
15+
groupby(df, :q)
1616
gdf = groupby(df, :b)
1717
combine(gdf, :c, [:c :f] .=> [sum, mean, std], :c => :d, [:a, :c] => cor)
1818
transform(gdf, :c, [:c :f] .=> [sum, mean, std], :c => :d, [:a, :c] => cor)
@@ -22,16 +22,52 @@ SnoopPrecompile.@precompile_all_calls begin
2222
outerjoin(df, df, on=:a, makeunique=true)
2323
outerjoin(df, df, on=:b, makeunique=true)
2424
outerjoin(df, df, on=:c, makeunique=true)
25-
semijoin(df, df, on=:a)
26-
semijoin(df, df, on=:b)
27-
semijoin(df, df, on=:c)
2825
leftjoin!(df, DataFrame(a=[2, 5, 3, 1, 0]), on=:a)
2926
leftjoin!(df, DataFrame(b=["a", "b", "c", "d", "e"]), on=:b)
3027
leftjoin!(df, DataFrame(c=1:5), on=:c)
3128
reduce(vcat, [df, df])
3229
show(IOBuffer(), df)
3330
subset(df, :q)
34-
@view df[1:3, :]
31+
subset!(copy(df), :q)
32+
df[:, 1:2]
33+
df[1:2, :]
34+
df[1:2, 1:2]
3535
@view df[:, 1:2]
36+
@view df[1:2, :]
37+
@view df[1:2, 1:2]
3638
transform!(df, :c, [:c :f] .=> [sum, mean, std], :c => :d, [:a, :c] => cor)
39+
deleteat!(df, 1)
40+
append!(df, copy(df))
41+
push!(df, copy(df[1, :]))
42+
eachrow(df)
43+
eachcol(df)
44+
empty(df)
45+
empty!(copy(df))
46+
filter(:q => identity, df)
47+
filter!(:q => identity, df)
48+
first(df)
49+
last(df)
50+
hcat(df, df, makeunique=true)
51+
issorted(df)
52+
pop!(df)
53+
popfirst!(df)
54+
repeat(df, 2)
55+
reverse(df)
56+
reverse!(df)
57+
unique(df, :a)
58+
unique!(df, :a)
59+
wide = DataFrame(id=1:6,
60+
a=repeat(1:3, inner=2),
61+
b=repeat(1.0:2.0, inner=3),
62+
c=repeat(1.0:1.0, inner=6),
63+
d=repeat(1.0:3.0, inner=2))
64+
long = stack(wide)
65+
unstack(long)
66+
unstack(long, :variable, :value, combine=sum)
67+
flatten(DataFrame(a=[[1, 2], [3, 4]], b=[1, 2]), :a)
68+
dropmissing(DataFrame(a=[1, 2, 3, missing], b=["a", missing, "c", "d"]))
69+
df = DataFrame(rand(20, 2), :auto)
70+
df.id = repeat(1:2, 10)
71+
combine(df, AsTable(r"x") .=> [ByRow(sum), ByRow(mean)])
72+
combine(groupby(df, :id), AsTable(r"x") .=> [ByRow(sum), ByRow(mean)])
3773
end

test/dataframe.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1892,6 +1892,11 @@ end
18921892
DataFrame(c=[missing, missing]))
18931893
end
18941894

1895+
@testset "vcat ChainedVector ambiguity" begin
1896+
dfs = DataFrames.SentinelArrays.ChainedVector([[DataFrame(a=1)], [DataFrame(a=2)]])
1897+
@test reduce(vcat, dfs) == DataFrame(a=1:2)
1898+
end
1899+
18951900
@testset "names for Type, predicate + standard tests of cols" begin
18961901
df_long = DataFrame(a1=1:3, a2=[1, missing, 3],
18971902
b1=1.0:3.0, b2=[1.0, missing, 3.0],

0 commit comments

Comments
 (0)