#!/bin/bash
# Sort big files by length of lines, using zstd for compression.
# (C) F-Hauri.ch 2023 - Licensed under terms of GPL V3+
# Usage: $0 "/path/to/FileToSortByLengthOfLines"
# File are to be completely read two time: 1st to prepare compression steps
#   and second to split'n compesss.

shopt -s extglob

tdir=$(mktemp -d)

mapfile -t sizes < <(
    awk '{ val[length($0)]+=1 };
	 END{ for (var in val) print var};' "$1")

subpids=()
for size in ${sizes[@]}; do
    printf -v file '%s/part_%08d' "$tdir" $size
    exec {cmpr[$size]}> >(zstd >$file)
    subpids+=($!)
done

awkBegin=${cmpr[@]@A}
awkBegin=${awkBegin##*\(}
awkBegin=${awkBegin%)}
awkBegin=${awkBegin// /;}

awk "BEGIN{ ${awkBegin//\[/cfd\[};};"'{
      print >(sprintf("/dev/fd/%d",cfd[length($0)]))
    };' < "$1"

for size in ${sizes[@]}; do
    exec {cmpr[$size]}>&-
done

wait ${subpids[@]}
zstdcat "$tdir"/part_*
rm "$tdir"/part_*
rmdir "$tdir"
