#!/bin/bash
# 
# Problem is: Some UTF-8 characters could need, sometime more, sometime
# less than one character width [em] in monospaced font.
# Run this again, with any (dummy) argument for best formated result:
# $0 workaround

testStrings=(
    Généralités Language Théorème Février "Left: ←" "Yin Yang ☯"
    "I'm 􏹤􏸎􏸫􏸫􏸲" "󰽞 󱀸" "🀽🁗🁊🁓" "𝙎𝙤𝙧𝙧𝙮" "  "
    "對不起" "죄송합니" "ⷨⷪⷢ" "ⶦⶰⶑ" "⸺⸻" "⽄⾃"
)
strU8DiffLen () {
    # Syntax $0 <string> <var diff> <var 1stchars>
    local i vU8 byt1='' bytlen order oLang=$LANG oLcAll=$LC_ALL
    local -A counter info
    LANG=C LC_ALL=C
    bytlen=${#1}
    LANG=$oLang LC_ALL=$oLcAll
    printf -v $2 "%d" $(( bytlen - ${#1} ))
    for ((i=0; i<${#1}; i++)) ;do  # for all char in string...
        LANG=C printf -v vU8 "%q" "${1:i:1}"  # store in vU8 scriptable form
        vU8=(${vU8//\\/ })     # split vU8 on '\' and transform vU8 in array
	((${#vU8[@]}>1)) && {  # if array > 1 element (is utf8)
	    ((counter[${1:i:1}]++)) || order+=(${1:i:1}) # incr counter[char]
	    printf -v info[${1:i:1}] '%02X+%d(%s)' 0${vU8[1]} \
		$((${#vU8[@]}-2)) "${1:i:1}" # 1st byte hex + morecount ( char )
	}
    done
    for i in ${order[@]} ;do # prepare answer string 
	byt1+=${counter[$i]}x${info[$i]}\ 
	done
    printf -v $3 "%s" "${byt1% }"
}

for string in "${testStrings[@]}" ;do
    strU8DiffLen "$string" diff detail
    if [ -z "$1" ] ;then
	printf " - %-$((14+diff))s%2d chrs, %2d bts [%s]\n"\
	       "'$string'" ${#string} $((${#string}+diff)) "$detail"
    else
	printf " - %s\r\e[17C%2d chrs, %2d bts [%s]\n"\
	       "'$string'" ${#string} $((${#string}+diff)) "$detail"
    fi
done

((BASH_ARGC)) || while IFS= read -r line && [ "$line" ] ;do
    [ "${line:1:1}" = " " ] && line=${line:2} && echo "${line//\$0/$0}"
done <"$0"
