It is currently Thu May 24, 2018 8:07 am

fieldz: pick out fields/words from each input line easily

All times are UTC - 6 hours

Post new topic Reply to topic  [ 7 posts ] 
Author Message
 PostPosted: Mon Oct 22, 2012 1:33 am   

Joined: Mon Mar 02, 2009 3:03 am
Posts: 643

that's nice.

why does bash has to be in posix mode?
main is useless : function are mostly used for repeated code.
instead of testing the whole $* variable, just $1, or $# being 1 or more.
variables in tests should always be quoted.
expr is not bash, you probably can do the same thing using double suare brackets and BAASH_REMATCH
regex for [[ is easier to use in a variable
reg="^ *[\"']"
[[ $var =~ $reg ]] ...
you see that lhs var doesn't need to be quoted.
instead of using awk, you could read line in an array, based on defined IFS, and print its fields in any order.
use more quotes

 PostPosted: Mon Oct 22, 2012 3:38 pm   

Joined: Mon Mar 02, 2009 3:03 am
Posts: 643
[[ is not posix, so your script won't work with strictly posix shells.
--posix doesn't exactly mimic a posix shell
Change the behavior of bash where the default operation differs from the POSIX standard to match the standard (posix mode).
you can't rely on this option to test if your code is strictly posix, better is to use (d)ash.

what do you mean «you can't pre-declare functions in bash» ?
functions can be declared at the top of the script, just like variables.

$ var="foo bar"
$ test -z $var
bash: test: foo: binary operator expected
$ test -z "$var"

`expr` is not shell internal command, so
why use an external command when the shell can do it by itself?

 PostPosted: Wed Apr 18, 2018 4:44 am   

Joined: Wed Apr 18, 2018 4:30 am
Posts: 2

main() {
   if [[ -t 0 || `regexp ^-?-help$ $1` ]] ;then echo -e $msg;exit 1;fi
   #log "fieldz main111 debug col:$col. fsep:$fsep.  arg1:$1.  arg2:$2."
   local fsep=' ' rsep;
   space=" ";
   checkparams() {
      if test "$1" = -d;then
         return 2;
         if regexp "-d.*" "$1"; then
            fsep=${1#-d};return 1;fi
      if test "$1" = -r;then
         rsep=$2;return 2;
         if regexp "-r.*" "$1"; then
            rsep=${1#-r};return 1;fi
      if test "$1" = -z; then
         return 1;fi
   checkparams "[email protected]";shift $?
   checkparams "[email protected]";shift $?
   while isNaN $1;do if test ${1:0:1} = "-" || regexp "[0-9]+-" "$1"; then break;fi;
                 prefix=$prefix"$1$space"; shift;
   #log have prefix:$prefix, fsep:$fsep, rsep:$rsep, star:$*, space=$space.
   if test "$rsep";then awk 'BEGIN{RS="'$rsep'"} {gsub("\n"," ");print}';else cat;fi |
      while read line; do
         #log "main() read line from stdin as: $line "
         if test "$prefix";then echo -en "$prefix";fi
         if test -z $1;then field;fi;                #default, ie, last field
         for col in "[email protected]";do      #process each user specification of a field then call field() to print the required one.
            if isNaN $col;then
               #log "for loop,  NaN, col=$col"
               if regexp "-[^0-9]+" "$col"; then          #find if a dash<name> in spec.
                  fs=${col:1}; col=2;flag=\"\|\';            #it is a named field
               else if regexp "[0-9]+-" "$col"; then         #look for <num><dash>, eg, 2-, print all prior fields
                      if test $col -lt 0;then if test $col -eq -1;then col=''; else col=$col+1;fi; col=NF$col;fi #as below, copy to func
                   else echo -en "$col$space"; continue;          #is just text in spec copy to output
            else #is numeric col(s)
               if [[ $col = *+ ]];then
               if test $col -lt 0;then
                  if test $col -eq -1;then
                  else col=$col+1;fi;
            fi #endif isNaN
            field $flag;
            let i=i+1
            if test $# -ne $i && test -z $andon;then echo -en "$space";fi 2>/dev/null;
            unset andon upto
         done #end of: for col in [email protected]
         #log "main(), end for col loop, wchar0:$wchar0 "`wchar`
         echo 2>/dev/null # newline, also in case of broken pipe.
      done #end of: while read line

field() {
   #log Call field2 with "[email protected]"
   local res=$(field2 [email protected])
   if test "$res"; then echo -en "$res"; fi

field2() { #print the particular field , uses globals: andon, col, line
   #log "field():$fs.  col:$col.  arg1:$1. line:$line. andon:$andon."
   if test -z $col;then col=NF;fi
   echo   "$line"|
      if test -z $andon; then
         if test -z "$fs";then
            awk '{printf "%s", $('$col')}'
            if test -z $1; then
               awk -F"$fs" '{ printf "%s", $('$col') }'
            else                                     #flag for named field $1
               after=`awk -F"$fs" '{ printf "%s", $2 }'`  # >1 field match in? print $3, $4?
               if `regexp "^\ *[\"\']" "$after"`;then
                  echo -n "$after" | awk -F"$1" '{ printf "%s", $2 }'
                  after=${after## }
                  echo -n ${after%% *}
      else  # if $andon is not empty, !-z$andon,
         #log "field(): Do cut with fs:$fs,col:$col,upto:$upto."
         tr '\t' ' '|
            if test -z $upto; then
               awk -v space="$space" -F"$fs" '  { for(i=('$col');i<=NF;i++) {printf "%s%s", $i, space; }; }';
               awk -v space="$space" -F"$fs" '{
   for(i=1;i<=('$col');i++) {printf "%s%s", $i, space;  }; }';
            fi     |
            tr -d "\n"
   #log "field() end."

regexp() { #see if $1 as grep regex is in $2
   echo "[email protected]"|egrep -q -e "$rexp"

isNaN() {
   if test -z "$*";then return 1;fi
   if regexp "^-?[0-9]+\+?$" "$*";then return 1;else return 0;fi  # eg, -123+ is a number but not 12.3 nor a123, nor 123a

isNum() {
   if test -z "$*";then return 1;fi
   if isNaN "$*"; then return 1; else return 0; fi

wchar() {
   grep wchar /proc/$$/io|grep -o '[0-9]*';

msg="\nUsage: fieldz [-d field-delimiter] [-r record-separator] [ -z ] [[-]column-number[+]|-fieldname|text]...[filename]
   \nPrint given field column(s) from input.  Default is last field.  \
   \nUse a minus number to print counting from the last field.  \
   \nFirst field is numbered from 1 not from zero.  Zero refers to all fields.\
   \nUse -<fieldname> to print the value of a named field.
   \nOrdinary text is just copied to output.
   \nUse a plus sign, '+', after a field number to print all following fields.
   \nUse a minus sign, '-', after a field number to print all prior fields.
   \n-d <delimiter> will use given delimiter instead of spaces to split input fields
   \n-r <separator> will use given separator instead of newlines to split input records (newlines are swapped for spaces)
   \n-z output is not padded with space, eg, fieldz 1 2 3 will output three fields joined together without spaces.
   \n\neg,\t\$ echo yes we can | fieldz 2 3 3 1
   \n\t==> we can can yes
   \n\t\$ echo b=\"item1\" bcost=\"99\"   | fieldz 'It costs: ' -bcost=
   \n\t==> It costs: 99
   \n\t\$ fieldz -d: userid: 1  shell: -1 /etc/passwd \
   \n\t==> userid: guest shell:  /bin/sh
   \n\t$ find . -printf %T+' '  -print |sort|fieldz 2+ | tr '\\\n' ' ';echo
   \n\t--gives a single line output with the names of all files sorted by modified time, oldest first.
   \n\t$ cat bookmarks.xml | fieldz -r \<bookmark -d url=\" 2 | fieldz -d\" 1
   \n\t--extracts the url attribute value from an xml file
   \n\t$ echo -e \"abc def ghi das\"|fieldz -2-
   \n\t==> abc def ghi
   \n\t--prints all but the last field.

# debugfile=/tmp/fieldz.log
# exec > >(tee -a $debugfile)
# exec 2>&1
# rm -f $debugfile

log() { echo "{./fieldz debug, [email protected]}"; > /dev/tty; } # > $debugfile; }
trim() { sed 's/^[[:blank:]]*//;s/[[:blank:]]*$//'; } #blank is space or tab, [ \x09]

for lastarg; do :;done;    #trick to get last arg, goes through $1, $2, to last one.
#echo Got lastarg $lastarg
if test -f "$lastarg" && isNaN "$lastarg"; then  #last arg is a file!
   set -- "${@:1:$#-1}"   #as if, shift -1; rm's lastarg reset $1 $2 ... w/o last one.
   unset lastarg
main "[email protected]" < "${lastarg:-/dev/stdin}" | sed '/^$/d'| trim   #also rm empty lines & trim leading & trailing space

Display posts from previous:  Sort by  
Post new topic Reply to topic  [ 7 posts ] 

All times are UTC - 6 hours

Who is online

Users browsing this forum: No registered users and 10 guests

You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot post attachments in this forum

Jump to:  

BashScripts | Promote Your Page Too
Powered by phpBB © 2011 phpBB Group