#!/usr/bin/perl -w use strict; # OK. First of all. We know that this LOOKS ugly - but we promise -- it's not. # # like 'awk', but Joshy and Perly. # # Here are examples: # # cat file | jawk 1 3 4 # Means select out the 1st, 3rd, and 4th columns from file # # cat file | jawk 1..3 -2 -1 # Means select out the first through third, and the second to last, and last # # jawk 1..3 -2 -1 -- file # Means same as above. # Note use of -- to start list of files to read from @ARGV. # # jawk -x 1 2 -1 -- file # Means 'exclude' columns 1, 2, and -1 (the last). # # There is also a -exe='perlcode' mode where you access # the args via @F, and not via named positional args. Like so: # # cat file | jawk -e 'print "@F\n";' # # USES NO EXTERNAL MODULES (except Memoize)! (for speed, and # so we can run on machines with perl and no special modules installed). #use Memoize; # we're actually not even using memoize now. # # one test showed that using memoize() for # # convert_args_to_fields() sped up runtime by ~25% # my $prog = "jawk"; my $exclude; my $delimiter = ' '; my $joiner = ' '; my $newline = 0; # don't auto-add newlines my $awky = 0; # if we're awky then we use $1 $2 $3 and not @F my $debug = 0; my $warnings = 0; # do we show warnings for code run via -e ? # Jawk: like awk '{print $N}' . Use like 'ps -auxwww | grep ps | jawk 2' # NOTE: use -- option to pass files on the command line # # We _Don't_ use Getopt::Long, because we we're not sure how to # shoehorn our argument processing logic into it. # Specifically, we need to handle negative and positive numbers # (let's call them A and B) and various A..B-type ranges as options. # (e.g.: A, ..B, A.., and A..B) # # Note that allowing un-hyphened options which are not filenames is convenient # but probably breaks POSIX recommendations. # # Our closest 'role model' (other than awk) # is cut, which uses -f 'fields' to specify fields :/ ############################################ main(); ############################################ sub Usage { return "$prog [-x] [-e 'code'] [-d delim] fieldspec [fieldspec...] [-- (FILES..)]:\n" . " jawk 1 is somewhat like awk '{print \$1}'.\n" . " jawk also allows ranges with '..'. For example:\n" . " Fieldspec can be like A, A..B, A.., or ..B (A and B are + or - ints).\n" . " Negative values for A and B count backwards so -1 is the last field.\n" . " (NOTE: use -- or - FILENAME.txt to read from files\n" . " '--' is needed to treat FILENAME.txt as file and not fieldspec.)\n"; " -e 'perlcode' : more like awk but with \@F, and perl\n" . " -d delimiter\n" . " -D turns on DEBUG mode\n" . " -j joiner\n" . " -x means _don't_ show the numbered fields, and show the others.\n" . " -n means add a newline (when in -e mode).\n" . " -a is for old (deprecated) 'awky' mode with \$1 \$2 etc (from perl)\n" . " -w means 'use warnings' for perlcode run via -e\n"; } ############################################ sub main { #memoize( 'convert_args_to_fields' ); # this speeds up named-query-stats from 2.33 to 1.7 minutes (~25%). #memoize( 'invert_fields' ); # this doesn't work (as shown) because we pass fields by reference. my @args; my $exe = ""; # whatever perl code we should execute for each line # HANDLE Command Line Processing... Manually! # # shift items off @ARGV, processing as we go along, # putting fields like 1, 1.. or 1..2 etc into @args # # With all the code below, we've finally replicated most of Getopt::Long's -- option :). # Except Getopt::Long doesn't store away @args and @ARGV separately like we do. ARGVLOOP: while( defined($_ = shift @ARGV) ) { # manual handling of command-line options if (/^-x/) { $exclude = 1; } # -x option elsif (/^-n/) { $newline = 1; } # -n option elsif (/^-a/) { $awky = 1; } # -a is DEPRECATED # -a option, $1 instead of @F elsif (/^-D/) { $debug = 1; } # debug elsif (/^-w/) { $warnings = 1; } # turn on perl warnings -e code elsif (/^-e$/) { $exe .= shift( @ARGV ) || die "$prog: -e (exe) needs param\n"; } elsif (/^-e=(.*)$/) { $exe .= $1 || die "$prog: -e (exe) needs param\n"; } elsif (/^-d$/) { $delimiter = shift( @ARGV ) || die "$prog: -d (delim) needs param\n"; } elsif (/^-d=(.*)$/) { $delimiter = $1 || die "$prog: -d (delim) needs param\n"; } elsif (/^-j$/) { $joiner = shift( @ARGV ) || die "$prog: -j (joiner) needs param\n"; } elsif (/^-j=(.*)$/) { $joiner = $1 || die "$prog: -j (joiner) needs param\n"; } elsif (/^-\d+/) { push(@args, $_); } # negative digits elsif (/^--?$/) { last ARGVLOOP; } # stop processing at - or -- elsif (/^-/) { die "$prog: Option not understood: $_\n" . Usage(); } # other -options else { push(@args, $_); } # ok; non-hyphenated option like digit or .. } #print "$prog: args passed: @ARGV\n" if $verbose; warn "$prog: Doesn't make sense to use numbered fields and -e, fields ignored\n" if ($exe && @args); my $quote_meta_delimiter = defined($delimiter) ? quotemeta( $delimiter ) : ""; # read lines with the magical diamond operator. note use of '--' option, documented above. while( defined( my $line = <> ) ) { chomp($line); # split the line into parts my @parts; if ($delimiter eq ' ' ) { @parts = split( ' ', $line); } else { @parts = split(/$quote_meta_delimiter/, $line); # so you can split on chars like "(" #shift(@parts) while (@parts && $parts[0] =~ /^\s*$/); # should we strip leading blank fields? } if ($exe) { # if we have an exe from the command line, run it for each input line # if they passed a line to execute, then run it for each line we read my $exe_expanded = replace_exe_vars( $exe, \@parts ); # expand to perl script eval "$exe_expanded"; # string eval. warn "$prog: Error running: $exe_expanded: $@\n" if $@; print "\n" if $newline; # -n is "newline" mode. } else { # otherwise, pull out fields via numbered args. # convert the args (things from @ARGV that don't look like command-line options) # into fields. Must be done for each line, because we need the # number of elements. print STDERR "args are @args, parts are @parts\n" if $debug; my @fields = convert_args_to_fields( \@args, scalar(@parts) ); # if we're in -x mode, invert the fields to # figure out which are left after exclusions. if ($exclude) { @fields = invert_fields( \@fields, scalar(@parts) ); } # show the fields we want from @parts print (join($joiner, @parts[@fields]) . "\n"); } } exit(0); # done } ############################################ # my $exe_expanded = replace_exe_vars( $exe, \@fields ) # since we can't actually assign to $1, $2, $3, etc easily, # we manually parse out $\d+ and ${\d+} sequences from the exe string they pass # and pass back a string to be eval'ed :) # Apologies all around. sub replace_exe_vars { my ($exe, $fieldsref) = @_; my @fields = @$fieldsref; if ($awky) { # awky style, to be deprecated. Replace $1 $2 $3 etc #print "incoming exe: $exe; [@fields]\n"; #$exe =~ s/ \$ ([0-9]+) /my $c = $fields->[\$1-1];\\$c/geex; # $fields->[$1-1]/xg; while( $exe =~ m/ \$ ([0-9]+) /x) { my $field = $1; my $search = '\$' . $field; my $replace = ($field > 0 && $field <= @$fieldsref) ? $fieldsref->[$field-1] : ""; #print "$prog: Replacing field $field: $search with $replace\n"; $exe =~ s/ $search /$replace/xg; } } else { # non awky style, this is recommended # $exe holds the code to run on @F use vars qw( @F ); # make a global @F @F = @$fieldsref; # yes we 0-th element, because @F IS NORMAL PERL! my $tmpexe = "no strict; "; $tmpexe .= "no warnings; " unless $warnings; $tmpexe .= $exe; $exe = $tmpexe; } #print "outgoing exe: $exe\n"; return $exe; } ######################################################### # convert_args_to_fields( $args_ref, $numparts_in_line ) # args come in 1-based, and are returned 0-based # handles ranges like 1..3 or 3..1 # also handles negative args alone or in ranges, # like -2..1, or like -1..1 sub convert_args_to_fields { my ($args_ref, $numparts) = @_; return () unless $numparts; my @ret = @$args_ref; # convert the user's fields to field numbers within the fields # NOTE: ignores indexes outside existing fields print STDERR "$prog: 0: initially (@ret)\n" if $debug; # first, deal with negative indices by replacing them with their positive versions for (@ret) { while (/(-\d+)/) { # look for negative numbers. my $p = $1 + $numparts + 1; s/$1/$p/; # replace them with their positive versions, one at a time } } # parse the command line arguments for ints and ranges like a..b , a.. , and b.. . # --we've already replaced negative vals with their pos versions above # (This could be broken up into multiple steps for clarity) @ret = ( map { /^\d+$/ ? ($_) : # ** a single int /^(\d+)\.\.(\d+)$/ ? (get_range($1, $2)) : # ** an int range /^(\d+)\.\.$/ ? (get_range($1, max($1, $numparts))) : # ** an integer and up /^\.\.(\d+)$/ ? (get_range(1, $1)) : # ** up to an integer die "$prog: Don't know how to handle field '$_'\n" . Usage(); } @ret); print STDERR "$prog: 1: modified to (@ret)\n" if $debug; @ret = grep { $_ <= $numparts && $_ >= 1 } @ret; # match only indexes that we have a value for print STDERR "$prog: 2: modified to (@ret)\n" if $debug; @ret = map { $_ - 1 } @ret; # shift each int down by one; 1-based to 0-based print STDERR "$prog: 3: modified to (@ret)\n" if $debug; return @ret; } ############################################ # invert_fields( [ 1, 2], 3 );# ( [activated], num_fields ) # choose the opposite of whatever's currently selected from num_fields. # all indexes have been converted to non-negatives already, but are still 0-based. sub invert_fields { my ($fieldsref, $numparts) = @_; my %fieldshash; # for exclusion @fieldshash{ @$fieldsref } = (); # set @$fieldsref names as keys my @inversefields = grep { !exists($fieldshash{$_}) } ( 0 .. $numparts-1); # this is correct return @inversefields; # all the other fields } ############################################ # get_range( $a, $b ) # returns all the ints from $a to $b, inclusive # handles descending lists, unlike '..' :) sub get_range { my ($a, $b) = @_; if ( $a <= $b ) { return ($a..$b); } return (reverse($b..$a)); # they asked for a list in reverse. make it ascending, and reverse it. # or as Missy Elliott would say - flip it and reverse it. } ############################################ # max( $a, $b ) # returns the max of the two sub max { my ($a, $b) = @_; return $a if ($a >= $b); return $b; }