@@ -256,48 +256,254 @@ unregistered_stdlibs = filter(all_stdlibs) do (uuid, _)
256256 return ! any (haskey (reg. pkgs, uuid) for reg in registries)
257257end
258258
259- # Helper function for getting these printed out in a nicely-sorted order
260- function print_sorted (io:: IO , d:: Dict ; indent:: Int = 0 )
261- println (io, " Dict{UUID,StdlibInfo}(" )
262- for (uuid, (name, version, deps, weakdeps)) in sort (collect (d), by = kv-> kv[2 ][1 ])
263- println (io,
264- " " ^ indent,
265- repr (uuid), " => StdlibInfo(\n " ,
266- " " ^ (indent + 4 ), repr (name), " ,\n " ,
267- " " ^ (indent + 4 ), repr (uuid), " ,\n " ,
268- " " ^ (indent + 4 ), repr (version), " ,\n " ,
269- " " ^ (indent + 4 ), repr (sort (deps)), " ,\n " ,
270- " " ^ (indent + 4 ), repr (sort (weakdeps)), " ,\n " ,
271- " " ^ indent, " )," ,
272- )
259+ # Analyze UUID usage for constants
260+ function analyze_uuid_usage (stdlibs_by_version)
261+ uuid_counts = Dict {UUID, Int} ()
262+ uuid_to_name = Dict {UUID, String} ()
263+
264+ for (_, stdlib_dict) in stdlibs_by_version
265+ for (uuid, info) in stdlib_dict
266+ uuid_counts[uuid] = get (uuid_counts, uuid, 0 ) + 1
267+ uuid_to_name[uuid] = info. name
268+
269+ for dep_uuid in info. deps
270+ uuid_counts[dep_uuid] = get (uuid_counts, dep_uuid, 0 ) + 1
271+ end
272+
273+ for weakdep_uuid in info. weakdeps
274+ uuid_counts[weakdep_uuid] = get (uuid_counts, weakdep_uuid, 0 ) + 1
275+ end
276+ end
277+ end
278+
279+ return uuid_counts, uuid_to_name
280+ end
281+
282+ # Group by UUID
283+ function group_by_uuid (stdlibs_by_version)
284+ uuid_to_versions = Dict {UUID, Vector{Tuple{VersionNumber, StdlibInfo}}} ()
285+
286+ for (version, stdlib_dict) in stdlibs_by_version
287+ for (uuid, info) in stdlib_dict
288+ if ! haskey (uuid_to_versions, uuid)
289+ uuid_to_versions[uuid] = []
290+ end
291+ push! (uuid_to_versions[uuid], (version, info))
292+ end
293+ end
294+
295+ for (uuid, versions) in uuid_to_versions
296+ sort! (versions, by= x-> x[1 ])
297+ end
298+
299+ return uuid_to_versions
300+ end
301+
302+ # Compare base info (everything except version)
303+ function base_info_equal (a:: StdlibInfo , b:: StdlibInfo )
304+ a. name == b. name &&
305+ a. uuid == b. uuid &&
306+ a. deps == b. deps &&
307+ a. weakdeps == b. weakdeps
308+ end
309+
310+ # Find segments where base info is constant
311+ function find_base_info_segments (versions_and_infos)
312+ segments = []
313+ i = 1
314+
315+ while i <= length (versions_and_infos)
316+ segment_start_idx = i
317+ segment_base_info = versions_and_infos[i][2 ]
318+ j = i + 1
319+
320+ # Find consecutive entries with same base info
321+ while j <= length (versions_and_infos)
322+ if base_info_equal (versions_and_infos[j][2 ], segment_base_info)
323+ j += 1
324+ else
325+ break
326+ end
327+ end
328+
329+ segment_end_idx = j - 1
330+ segment_entries = versions_and_infos[segment_start_idx: segment_end_idx]
331+
332+ # Within this segment, find version ranges based on package version
333+ version_ranges = []
334+ k = 1
335+ while k <= length (segment_entries)
336+ range_start_version = segment_entries[k][1 ]
337+ current_pkg_version = segment_entries[k][2 ]. version
338+ m = k + 1
339+
340+ while m <= length (segment_entries)
341+ if segment_entries[m][2 ]. version == current_pkg_version
342+ m += 1
343+ else
344+ break
345+ end
346+ end
347+
348+ range_end_version = segment_entries[m- 1 ][1 ]
349+ push! (version_ranges, ((range_start_version, range_end_version), current_pkg_version))
350+ k = m
351+ end
352+
353+ push! (segments, (segment_base_info, version_ranges))
354+ i = j
355+ end
356+
357+ return segments
358+ end
359+
360+ # Analyze stdlib patterns with segmentation
361+ function analyze_stdlib_patterns (uuid_to_versions)
362+ uuid_to_segments = Dict {UUID, Vector} ()
363+
364+ for (uuid, versions_and_infos) in uuid_to_versions
365+ segments = find_base_info_segments (versions_and_infos)
366+ uuid_to_segments[uuid] = segments
367+ end
368+
369+ return uuid_to_segments
370+ end
371+
372+ # Format UUID reference
373+ function format_uuid (uuid, uuid_constants)
374+ get (uuid_constants, uuid, " UUID(\" $(uuid) \" )" )
375+ end
376+
377+ # Format base info tuple
378+ function format_base_info (info, uuid_constants)
379+ deps_str = if isempty (info. deps)
380+ " UUID[]"
381+ else
382+ " UUID[" * join ([format_uuid (d, uuid_constants) for d in info. deps], " , " ) * " ]"
383+ end
384+
385+ weakdeps_str = if isempty (info. weakdeps)
386+ " UUID[]"
387+ else
388+ " UUID[" * join ([format_uuid (d, uuid_constants) for d in info. weakdeps], " , " ) * " ]"
389+ end
390+
391+ return """ (
392+ name = "$(info. name) ",
393+ uuid = $(format_uuid (info. uuid, uuid_constants)) ,
394+ deps = $(deps_str) ,
395+ weakdeps = $(weakdeps_str) ,
396+ )"""
397+ end
398+
399+ # Convert versions_dict to the format expected by compression functions
400+ # Convert tuples (name, version, deps, weakdeps) to StdlibInfo objects
401+ stdlibs_by_version = [
402+ v => Dict {UUID, StdlibInfo} (
403+ uuid => StdlibInfo (info[1 ], uuid, info[2 ], info[3 ], info[4 ])
404+ for (uuid, info) in stdlib_dict
405+ )
406+ for (v, stdlib_dict) in [(v, versions_dict[v]) for v in sort (collect (keys (versions_dict)))]
407+ ]
408+
409+ # Analyze and prepare compression
410+ @info (" Analyzing version map for compression..." )
411+ uuid_counts, uuid_to_name = analyze_uuid_usage (stdlibs_by_version)
412+
413+ # Define constants for frequently used UUIDs
414+ const_threshold = 5
415+ uuid_constants = Dict {UUID, String} ()
416+
417+ for (uuid, count) in uuid_counts
418+ if count >= const_threshold && haskey (uuid_to_name, uuid)
419+ name = uuid_to_name[uuid]
420+ const_name = " $(name) _uuid"
421+ const_name = replace (const_name, r" [^a-zA-Z0-9_]" => " _" )
422+ uuid_constants[uuid] = const_name
273423 end
274- print (io, " " ^ (max (indent - 4 , 0 )), " )" )
275424end
276425
426+ uuid_to_versions = group_by_uuid (stdlibs_by_version)
427+ uuid_to_segments = analyze_stdlib_patterns (uuid_to_versions)
428+
429+ # Output compressed version map
277430output_fname = joinpath (dirname (dirname (@__DIR__ )), " src" , " version_map.jl" )
278- @info (" Outputting to $(output_fname) " )
279- sorted_versions = sort (collect (keys (versions_dict)))
431+ @info (" Outputting compressed version map to $(output_fname) " )
280432open (output_fname, " w" ) do io
281- print (io, """
282- ## This file autogenerated by ext/HistoricalStdlibGenerator/generate_historical_stdlibs.jl
433+ println (io, " ## This file autogenerated by ext/HistoricalStdlibGenerator/generate_historical_stdlibs.jl" )
434+ println (io)
435+ println (io, " # Julia standard libraries with segment-based compression:" )
436+ println (io, " # - Each stdlib split into segments where base info (name, uuid, deps, weakdeps) is constant" )
437+ println (io, " # - Within each segment, only package version numbers stored per Julia version range" )
438+ println (io)
283439
284- # Julia standard libraries with duplicate entries removed so as to store only the
285- # first release in a set of releases that all contain the same set of stdlibs.
286- const STDLIBS_BY_VERSION = [
287- """ )
288- for v in sorted_versions
289- print (io, " $(repr (v)) => " )
290- print_sorted (io, versions_dict[v]; indent= 8 )
291- println (io, " ," )
440+ # Write UUID constants
441+ if ! isempty (uuid_constants)
442+ println (io, " # UUID constants" )
443+ for (uuid, const_name) in sort (collect (uuid_constants), by= x-> x[2 ])
444+ println (io, " const $(const_name) = UUID(\" $(uuid) \" )" )
445+ end
292446 println (io)
293447 end
294- println (io, " ]" )
295448
449+ # Write stdlib info with segments
450+ println (io, " # Format: UUID => [(base_info, [(julia_version_range, package_version)]), ...]" )
451+ println (io, " const STDLIB_SEGMENTS = Dict{UUID, Vector{Tuple{NamedTuple, Vector{Pair{Tuple{VersionNumber,VersionNumber}, Union{Nothing,VersionNumber}}}}}}(" )
452+
453+ sorted_uuids = sort (collect (keys (uuid_to_segments)), by= u-> uuid_to_name[u])
454+
455+ for (idx, uuid) in enumerate (sorted_uuids)
456+ segments = uuid_to_segments[uuid]
457+ uuid_str = format_uuid (uuid, uuid_constants)
458+
459+ println (io, " $(uuid_str) => [" )
460+
461+ for (seg_idx, (base_info, version_ranges)) in enumerate (segments)
462+ print (io, " (" )
463+ println (io, format_base_info (base_info, uuid_constants), " ," )
464+ println (io, " [" )
465+
466+ for (range_idx, ((start_v, end_v), ver)) in enumerate (version_ranges)
467+ ver_str = isnothing (ver) ? " nothing" : " v\" $(ver) \" "
468+ comma = range_idx < length (version_ranges) ? " ," : " "
469+ if start_v == end_v
470+ println (io, " (v\" $(start_v) \" , v\" $(start_v) \" ) => $(ver_str)$(comma) " )
471+ else
472+ println (io, " (v\" $(start_v) \" , v\" $(end_v) \" ) => $(ver_str)$(comma) " )
473+ end
474+ end
475+
476+ print (io, " ])" )
477+ println (io, seg_idx < length (segments) ? " ," : " " )
478+ end
479+
480+ print (io, " ]" )
481+ println (io, idx < length (sorted_uuids) ? " ," : " " )
482+ end
483+
484+ println (io, " )" )
296485 println (io)
486+
487+ # Write UNREGISTERED_STDLIBS
297488 print (io, """
298489 # Next, we also embed a list of stdlibs that must _always_ be treated as stdlibs,
299490 # because they cannot be resolved in the registry; they have only ever existed within
300491 # the Julia stdlib source tree, and because of that, trying to resolve them will fail.
301- const UNREGISTERED_STDLIBS =""" )
302- print_sorted (io, unregistered_stdlibs; indent= 4 )
492+ """ )
493+ println (io, " const UNREGISTERED_STDLIBS = Dict{UUID,StdlibInfo}(" )
494+ for (idx, (uuid, info)) in enumerate (sort (collect (unregistered_stdlibs), by= x-> x[2 ]. name))
495+ uuid_str = format_uuid (uuid, uuid_constants)
496+ deps_str = isempty (info. deps) ? " UUID[]" : " UUID[" * join ([format_uuid (d, uuid_constants) for d in info. deps], " , " ) * " ]"
497+ weakdeps_str = isempty (info. weakdeps) ? " UUID[]" : " UUID[" * join ([format_uuid (d, uuid_constants) for d in info. weakdeps], " , " ) * " ]"
498+ ver_str = isnothing (info. version) ? " nothing" : " v\" $(info. version) \" "
499+
500+ println (io, " $(uuid_str) => StdlibInfo(" )
501+ println (io, " \" $(info. name) \" ," )
502+ println (io, " $(uuid_str) ," )
503+ println (io, " $(ver_str) ," )
504+ println (io, " $(deps_str) ," )
505+ println (io, " $(weakdeps_str) ," )
506+ println (io, " )" , idx < length (unregistered_stdlibs) ? " ," : " " )
507+ end
508+ println (io, " )" )
303509end
0 commit comments