#!/bin/bash # ppGrep.sh - Progress Parallel Grep bash script # Run many "grep" in parallel, then monitor and show progression # by displaying progress bars for all grep process and for overall # progression. # Version: 0.1.4 -- Last update: Sat Jan 31 08:52:00 CET 2025 # (C) 2024-2025 Felix Hauri - felix@f-hauri.ch # WARNING 1: This script use, from: # packet: coreutils readlink, cat, sync, rm, stat # packet: libc-bin getent, getconf # packet: ncurses-bin tput # packet: procps ps # packet: zstd zstd, zstdcat # packet: grep grep # this script depends on: coreutils libc-bin ncurses-bin procps zstd grep # WARNING 2: This script REQUIRE an interactive console terminal! # TODO: # - grep on compressed files, like zgrep, zstdgrep... # - add option for barColor # - do tests on more different environments (fs, hdd, sdd, iscsi...) # - shuffle list to reduce chance that 1 folder with big files to 1 proc # - '-q' print 1st file found, quit and end all tasks # Some default values declare -i maxProc : ${maxProc:=3} ${barColor:=48;5;244} # barColor='48;5;25;38;5;32' declare -r barColor shopt -s globstar dotglob usage() { cat <<-EOF Usage: ${0##*/} [OPTIONS] [FILE] [FILE...] Options [-E|-F|-G|-P] [-l|-L] [-H] [-Z] [-a] [-c] [-b] [-i] [-l] [-n] [-o] [-s] and [-v] [-w] [-x], as [-e "PATTERN"] and [-f "PATTERN FILE"] are bind to 'grep' tasks (see man grep!). -j NUM Max job to run together (default: "$maxProc"). -C PATH 'cd' to PATH before running (instead of "${chDir:-$PWD}"). -T FILE Files list from FILE. -z Files list are null bytes separated. -d Dump both STDOUT and STDERR as soon as possible (in right order) default is to keep everything in memory until last job finish. -W Display Warnings when killing some subpid. -h Show this. Note: FILE cannot be else than a file! There are no '-r' option. EOF } readCmdLine(){ local opt opts OPTARG OPTIND bindGrep='EFGHLPZabcilnosvwxe:f:' local -A optVars='([j]=maxProc [T]=filesFrom [C]=chDir)' printf -v opts '%s:' "${!optVars[@]}" opts="hdzW$bindGrep$opts" while getopts "$opts" opt; do case $opt in [${!optVars[@]}] ) printf -v "${optVars["$opt"]}" %s "$OPTARG" ;; [${bindGrep/%*(?:)}] ) grepOpts+=(-$opt) ;; [${bindGrep#${bindGrep/%*(?:)}}] ) grepOpts+=(-$opt "$OPTARG") ;; h ) usage; exit 0 ;; z ) zeroRead=1 ;; W ) showWarn=1 ;; d ) dumpOutput=1 ;; * ) printf >&2 'ERROR %s: Unknow arg.\n' "${0##*/}" usage; exit 1 ;; esac done shift $((OPTIND-1)) [[ $1 ]] && grepOpts+=("$1") && shift [[ $1 ]] && files=("$@") } percentBar () { local prct totlen=$((8*$2)) lastchar barstring blankstring; printf -v prct %.2f "$1" ((prct=10#${prct/.}*totlen/10000, prct%8)) && printf -v lastchar '\\U258%X' $(( 16 - prct%8 )) || lastchar='' printf -v barstring '%*s' $((prct/8)) '' printf -v barstring '%b' "${barstring// /\\U2588}$lastchar" printf -v blankstring '%*s' $(((totlen-prct)/8)) '' printf -v "$3" '%s%s' "$barstring" "$blankstring" } percent(){ if [[ $2 ]] && (( $2 )); then local p=00$(($1*100000/$2)) printf -v "$3" %.2f ${p::-3}.${p: -3} else printf -v "$3" %.2f 0 fi } startSpinner () { tput civis >&$monFD exec {doSpinner}> >(spinner "$@") trap "kill $! 2>/dev/null;tput cnorm >&$monFD" 0 1 3 6 9 15 } stopSpinner () { if (( doSpinner )) && echo >&"$doSpinner"; then exec {doSpinner}>&- doSpinner=0 tput cnorm >&$monFD printf '\r\e[K\e[A\e[K' >&$monFD fi } spinner() { local str shs printf -v str '\e[A%s\e[B\e[4D%s,' ⠉⠉⠉⢹ ⠀⠀⠀⢸ ⠈⠉⠉⢹ ⠀⠀⠀⣸ ⠀⠉⠉⢹ ⠀⠀⢀⣸ ⠀⠈⠉⢹\ ⠀⠀⣀⣸ ⠀⠀⠉⢹ ⠀⢀⣀⣸ ⠀⠀⠈⢹ ⠀⣀⣀⣸ ⠀⠀⠀⢹ ⢀⣀⣀⣸ ⠀⠀⠀⢸ ⣀⣀⣀⣸ ⠀⠀⠀⢰ ⣄⣀⣀⣸ \ ⠀⠀⠀⢠ ⣆⣀⣀⣸ ⠀⠀⠀⢀ ⣇⣀⣀⣸ ⡀⠀⠀⠀ ⣇⣀⣀⣸ ⡄⠀⠀⠀ ⣇⣀⣀⣰ ⡆⠀⠀⠀ ⣇⣀⣀⣠ ⡇⠀⠀⠀ \ ⣇⣀⣀⣀ ⡏⠀⠀⠀ ⣇⣀⣀⡀ ⡏⠁⠀⠀ ⣇⣀⣀⠀ ⡏⠉⠀⠀ ⣇⣀⡀⠀ ⡏⠉⠁⠀ ⣇⣀⠀⠀ ⡏⠉⠉⠀ ⣇⡀⠀⠀ \ ⡏⠉⠉⠁ ⣇⠀⠀⠀ ⡏⠉⠉⠉ ⡇⠀⠀⠀ ⡏⠉⠉⠙ ⠇⠀⠀⠀ ⡏⠉⠉⠹ ⠃⠀⠀⠀ ⡏⠉⠉⢹ ⠁⠀⠀⠀ ⡏⠉⠉⢹ \ ⠀⠀⠀⠈ ⠏⠉⠉⢹ ⠀⠀⠀⠘ ⠋⠉⠉⢹ ⠀⠀⠀⠸ IFS=, read -a shs <<<$str local -i pnt printf '\e7' 1>&$monFD while ! read -rsn1 -t "${1:-.02}"; do printf '%s\e8' "${shs[pnt++%${#shs[@]}]}" 1>&$monFD done } declare -i doSpinner wait4oneTask() { # Inform both main and parallel processor when any child exit local -i epid rpids i local trapCmd cmdLne wait -np epid if (( epid )); then echo "${running[$epid]}" # to be read by "processJobs()" unset "running[$epid]" trapCmd="printf '%s\n' ${running[*]@Q};exit" trap "$trapCmd" 0 1 3 6 9 12 15 else read -ra rpids /dev/null [[ ${cmdLne[1]} == "$0" ]] && unset "rpids[$i]" done if (( ${#rpids[@]} < 1 )); then printf '%s\n' "${running[@]}" unset running fi fi } runParGrep() { # Parallel grep processor distribute grep process and re-order outputs local tmpfile sfiles files running trapCmd local -i bunch=$1 gpid i shift files=("$@") for (( i=0 ; i <= 1 + filecnt / bunch ; i++ )); do sfiles=("${files[@]: bunch * i : bunch}") (( ${#sfiles[@]} )) || continue printf -v tmpfile '%s_%0*d.zst' "$tmpLoc" ${#filecnt} "$i" grep -d skip -D skip "${grepOpts[@]}" "${sfiles[@]}" > >( zstd >"$tmpfile") 2> >(zstd >"$tmpfile.e") & gpid=$! running[gpid]="${#sfiles[*]} $tmpfile" trapCmd="printf '%s\n' ${running[*]@Q};exit" trap "$trapCmd" 0 1 3 6 9 12 15 ((${#running[@]}>=maxProc)) && wait4oneTask done while ((${#running[@]})); do wait4oneTask done exit } processJobs() { # Main *parent* process run and monitor childs local fmtLns fdone tfile fpos sfmt local -i i resFd bPid gpids overPos=0 penalty=5 toPrint crtPos fsize tfNum \ bunch bdivisor=$(( maxProc == 1 ? 1 : 3 * maxProc )) bunch=' filecnt / bdivisor > maxBunch ? maxBunch : filecnt / bdivisor ' if (( bunch < 1 )); then bunch=1 (( maxProc > filecnt )) && maxProc=$filecnt fi exec {resFd}< <(runParGrep "$bunch" "${files[@]}") bPid=$! printf -v fmtLns '%*s' ${maxProc} fmtLns=${fmtLns// /"%%%%-50s\r\e[50C\e[${barColor}m%%s\e[0m%6s%%%%%%%%\n"} printf -v sfmt '[%q]=cnt++ ' "${files[@]}" local -Ai idxs="($sfmt)" unset sfmt while [[ -d /proc/$bPid ]]; do if read -u ${resFd} -t .02 fdone tfile; then if [[ $tfile ]]; then IFS=. read -r tfNum _ <<< 10#${tfile#${tmpLoc}_} resarry[tfNum]="$tfile" overPos+=10#$fdone for (( i = 0 ; i < maxProc ; i++ )); do crt[i]=0 done penalty=5 ((dumpOutput)) && for (( i = 0; i < ${#resarry[@]}; i ++ )); do [[ -v resarry[i] ]] || break if [[ -f ${resarry[i]} ]]; then sync "${resarry[i]}"{,.e} zstdcat "${resarry[i]}" zstdcat "${resarry[i]/%/.e}" >&2 rm "${resarry[i]}" "${resarry[i]/%/.e}" resarry[i]='' else break fi # Dump early ("-d" switch) done else if (( penalty-- < 1 )); then stopSpinner kill -USR2 $bPid (( showWarn )) && printf '\rWARNING: Kill %d!\e[K\n\e7' "$bPid" >&$monFD failed+=1 break fi fi fi mapfile -t gpids < <(ps --ppid $bPid ho pid) toPrint=0 crtPos=$overPos fsize=0 for (( i = 0 ; i < maxProc ; i++ )); do if (( gpids[i] )); then file[i]=$(readlink /proc/${gpids[i]}/fd/3) if [[ ${file[i]} ]] && [[ ${idxs["${file[i]#$PWD/}"]} ]] && { read -r _ fpos /dev/null then (( crt[i]=( ${idxs["${file[i]#$PWD/}"]}% bunch ), crt[i]>0)) fileSize "${file[i]}" fsize percent $(( fpos < fsize ? fpos : fsize )) $fsize fpct[i] percentBar "${fpct[i]}" $((col-57)) fbar[i] toPrint+=1 fi crtPos+="crt[i]" file[i]=${file[i]##*/} file[i]=${file[i]::50} else fpct[i]=' 0.00' file[i]='' printf -v fbar[i] '%*s' $((col-57)) '' fi done if (( toPrint )); then stopSpinner if (( crtPos < lastPos )); then crtPos=$lastPos else lastPos=$crtPos fi printf -v fbars "$fmtLns" "${fpct[@]}" printf -v fbars "$fbars" "${fbar[@]}" printf -v fbars "$fbars" "${file[@]}" percent $(( crtPos < filecnt ? crtPos : filecnt )) $filecnt pct percentBar $pct $((col-38-2*${#filecnt})) bar printf >&$monFD \ '\r%sProc run: %d, done: %2d. Files: %*s/%s\e[%sm%s\e[0m%6s%%\e[%dA' \ "$fbars" ${#gpids[@]} ${#resarry[@]} ${#filecnt} $crtPos \ $filecnt "$barColor" "$bar" "$pct" ${maxProc} fi done stopSpinner while read -u ${resFd} -t .005 fdone tfile && [[ $tfile ]]; do IFS=. read -r tfNum _ <<< 10#${tfile#${tmpLoc}_} resarry[tfNum]="$tfile" overPos+=$fdone done (( overPos > filecnt )) && overPos=$filecnt percent $overPos $filecnt pct percentBar $pct $((col-38-2*${#filecnt})) bar printf '\e[%dB\rProc run: 0, done: %2d. Files: %*s/%s\e[%sm%s\e[0m%6s%%\n' \ $maxProc ${#resarry[@]} ${#filecnt} $overPos $filecnt "$barColor" \ "$bar" "$pct" >&$monFD } # Use loadable if installed if enable -f /usr/lib/bash/stat stat; then enable -f /usr/lib/bash/sync sync getTTY() { local _target=${1:-tty} _fd _gTTY IFS=: read -r _ _ _gTTY _ < <(getent group tty) local -A _stat for _fd in /proc/{$$,$(($(ps ho ppid $$)))}/fd/*; do stat -A _stat $(readlink "$_fd") 2>/dev/null case ${_stat["type"]}:${_stat["gid"]} in c:$_gTTY ) printf -v "$_target" '%s' "${_stat["link"]#/dev/}" return ;; esac done } fileSize() { local -A _Array stat -A _Array "$1" printf -v "${2:-filesize}" %u "${_Array[size]}" } else getTTY() { local _target=${1:-tty} _fd _perm _grp _nam for _fd in /proc/{$$,$(($(ps ho ppid $$)))}/fd/*; do { IFS=: read -r _perm _grp _nam < <(stat -c %A:%G:%n $( readlink "$_fd")) } 2>/dev/null case ${_perm::1}:$_grp in c:tty ) printf -v "$_target" '%s' "${_nam#/dev/}" return ;; esac done } fileSize() { read -r "${2:-fileSize}" < <(stat -c %s "$1") } fi # BEGIN # Prepare separated FD for monitor (keeping STDOUT and STDERR untouched) getTTY tty exec {monFD}>/dev/$tty # As xargs do, don't exceed ARG_MAX command line length export grepOpts=() argMax=$(getconf ARG_MAX) # Parse options from command line declare -i dumpOutput=0 showWarn=0 readCmdLine "$@" # Change dir before else if [[ $chDir ]] && [[ -d $chDir ]]; then cd $chDir || exit 1 fi # Build file list (Start spinner) if [[ -v filesFrom ]]; then if [[ $filesFrom == - ]]; then printf '\nRead file list... ' >&$monFD ; startSpinner if [[ -v zeroRead ]]; then mapfile -d '' -t files else mapfile -t files fi elif [[ -r $filesFrom ]]; then printf '\nRead file list... ' >&$monFD ; startSpinner if [[ -v zeroRead ]]; then mapfile -d '' -t files <"$filesFrom" else mapfile -t files <"$filesFrom" fi else printf >&2 "%s: %s: %s\n" "${0##*/}" "${filesFrom}" \ "$(TEXTDOMAIN=libc bash -c 'echo $"cannot open input file"')" exit 1 fi elif [[ ${files[0]} ]]; then if [[ -f ${files[0]} ]]; then printf '\nBuild file list... ' >&$monFD ; startSpinner else printf >&2 "%s: %s: %s\n" "${0##*/}" "${files[*]}" \ "$(TEXTDOMAIN=libc bash -c 'echo $"No such file or directory"')" exit 1 fi else printf '\nBuild file list... ' >&$monFD ; startSpinner files=(**) fi # Prepare quantity of file to by submitted to each grep process: $maxBunch filecnt=${#files[@]} linelen=0 for file in "${files[@]}"; do (( ${#file} > linelen )) && linelen=${#file} done maxBunch=$(( argMax / linelen )) col=$(tput cols) [[ -d /run/user/$UID ]] || mkdir /run/user/$UID declare -a resarry='()' declare -i failed=0 printf -v tmpLoc /run/user/%d/parGrep-%02X%02X%02X $UID ${RANDOM}{,,} processJobs if ((${#resarry[@]})); then printf -v digits '%*s' ${#filecnt} digits=${digits// /[0-9]} sync ${tmpLoc}_$digits.zst{,.e} zstdcat ${tmpLoc}_$digits.zst zstdcat ${tmpLoc}_$digits.zst.e >&2 rm ${tmpLoc}_$digits.zst{,.e} fi