Awk

Overview

One-liners

awk  -F":" '{print $2}' file.txt               # Print selected fields (at a fixed position). Split up the lines of the file file.txt with ":" (colon) separated fields and print the second field ($2) of each line
awk  -F":" '{if ($2)print $2}' file.txt        # Same as above but print only output if the second field ($2) exists and is not empty
awk -F: '{ print $1 "-" $4 "-" $6 }' file.txt  # Print selected fields from each line separated by a dash
awk -F: '{ print $NF }' file.txt               # Print the last field in each line
awk '{ $2 = ""; print }' file.txt              # Print every line and delete the second field
ip route get 1.1.1.1 | awk '{for(i=1;i<=NF;i++){if($i=="src") print $(i+1)}}' # Print the content of a field after the field that contains the string "src". This will always print the computers "own IP" used for the communication with the public IP 1.1.1.1 (it will return 10.8.0.3 for the above examples).
awk  -F":" '/some regexp/{print $2}' file.txt                # Print field number two ($2) only on lines matching "some regexp" (fiel separator is ":")
awk  '/regexp a/{print};/regexp b/{printf $0}' file.txt      # Print lines matching "regexp a" and lines matching "regexp b" but the later ones are printed without newline (note the printf)
awk  -F":" '!/some regexp/{print $2}' file.txt               # Print field number two ($2) only on lines not matching "some regexp" (fiel separator is ":")
awk  -F":" '/some regexp/{next;}{print $2}' file.txt         # Print field number two ($2) only on lines not matching "some regexp" (fiel separator is ":")
awk  -F":" '/some regexp/{print $2;next}{print $3}' file.txt # Print field number two ($2) only on lines matching "some regexp" otherwise print field number three ($3) (fiel separator is ":"). The "next" command causes awk to continue with the next line and execute "{print $3}" only for non matching lines. This is like
/regexp/{...if..regexp..matches...;next}{...else...}
awk '$2 ~ /regexp/{print;}' file.txt                         # Print lines where field number two matches regexp (apply regexp only to field 2, not the whole line)
ps aux | awk '$8 ~ /R/{print;}NR==1{print}'                  # Here is an example parsing the linux "ps aux" command. It has in the eighth column the process state. To print all processes that are in running or runnable state you would look for the letter "R" in that 8-th column. You want as well to print line 1 of the ps command printout since it contains the column header
awk '/regexp/{i=2;next;}{if(i){i--; print;}}' file.txt       # Print the next two (i=2) lines after the line matching regexp
awk '/regexp/{i=2+1;}{if(i){i--; print;}}' file.txt          # Print the line and the next two (i=2) lines after the line matching regexp
awk '/start/,/stop/' file.txt                                # AWK ranges: Print the lines from a file starting at the line matching "start" until the line matching "stop". Note: make sure that the stop pattern does not match the start line otherwise only that line will be printed.
awk '/start/,0' file.txt                                     # AWK ranges: Print the lines starting at the line matching "start" until the end of the file. Note: make sure that the stop pattern does not match the start line otherwise only that line will be printed.
awk '/prompt.*ls/{s=1;print;next};/prompt/{s=0};s==1{print}' log.txt         # Change "prompt" to whatever string appears in your terminal prompt, e.g the hostname.
awk '/prompt.*ls/{s=1;print;next};s==0{next};{print};/prompt/{s=0};' log.txt # If you want to include the last prompt where to stop printing then try this. Change "prompt" to whatever string appears in your terminal prompt, e.g the hostname.
awk '!/regexp/{print $1 " " $2 }' file.txt                 # Print fields 1 and 2 from all lines not matching regexp
awk '/regexp1/&&!/regexp2/{print $1 " " $2 }' file.txt     # Print fields 1 and 2 from lines matching regexp1 and not matching regexp2

Regexp syntax:

  • c matches the non-metacharacter c.
  • \c matches the literal character c.
  • . matches any character including newline.
  • ^ matches the beginning of a string (example: ^1 , only lines starting with a one)
  • $ matches the end of a string (example: end$ , only lines ending in “end”)
  • [abc...] character list, matches any of the characters abc….
  • [0-9a-zA-Z] range of characters 0-9 and a-z,A-Z
  • [^abc...] negated character list, matches any character except abc….
  • r1|r2 alternation: matches either r1 or r2.
  • r1r2 concatenation: matches r1, and then r2.
  • r+ matches one or more r’s.
  • r* matches zero or more r’s.
  • r? matches zero or one r’s.
  • (r) grouping: matches r.

Insert a String After the Matching Line

awk '/regexp/{print $0; print "text inserted after matching line";next}{print}' file.txt   # This inserts a new line after the matching line. `$0` is the line where the search pattern "regexp" matches without the newline at the end. The awk print command prints the string and appends a new line.
awk '/regexp/{print $0 "text appended at end of the matching line";next}{print}' file.txt  # This appends a string to the matching line

Conditionals

awk '/regexp/{A-here;next}{B-here}' file.txt                                # If matching "do A" else "do B" (if .. then .. else in awk)
awk '/regexp/{gsub(/string/,"replacement");print $1;next}{print;}' file.txt # The example would print lines that do not match unchanged (action B is just "print;") while on lines that match /regexp/ it would replace /string/ by replacement and print the first element ($1).
awk '/regexpA/{A-do-here;}/regexpB/{B-do-here}' file.txt                    # If matching "A do..." OR if matching "B do.." (if .. then, if .. then, ...., in awk)
awk '/house/{print $1;}/cat/{print;}' file.txt

Replacement for Some Common Unix Commands

Useful in a non unix environment

awk 'END{print NR}'   # Count lines (wc -l)
awk '/regexp/'        # Search for matching lines (egrep regexp)
awk '!/regexp/'       # Print non matching lines (egrep -v regexp)
awk '/regexp/{print FNR,$0}' # Print matching lines with numbers (egrep -n regexp)
awk 'BEGIN {IGNORECASE=1};/regexp/' # Print matching lines and ignore case (egrep -i regexp)
awk '{print FNR "\t" $0}' # Number lines (cat -n)
awk 'a !~ $0{print}; {a=$0}' # Remove duplicate consecutive lines (uniq)
awk 'NR < 6' # Print first 5 lines of file (head -5)

Operations with Lines

awk '/^..*$/{ print FNR ":" $0 ;next}{print}' file.txt # Number non empty lines: prints all lines and adds a line number to non empty lines
awk '/^[ \t]*$/{next}{print}' file.txt                 # Remove empty lines: prints all lines except empty ones and lines with only space and tab:
awk 'length($0)>80{print FNR,$0}'  file.txt            # Number lines longer than 80 char and show them. This is useful to find all the lines longer than 80 characters (or any other length)
awk 'length < 80' file.txt                             # Print only lines of less than 80 characters
awk '/regexp/{gsub(/foo/, "bar")};{print}' file.txt    # Substitute foo for bar on lines matching regexp
awk '{sub(/[ \t]*$/, "");print}' file.txt              # Delete trailing white space (spaces, tabs)
awk '{sub(/^[ \t]+/, ""); print}' file.txt             # Delete leading white space
awk '/regexp/{sub(/^/, "++++"); print;next;}{print}' file.txt # Add some characters at the beginning of matching lines. Add ++++ at lines matching regexp.
gcc -Wall main.c |& awk '/: warning:/{print "\x1B[01;31m" $0 "\x1B[m";next;}{print}' # Color gcc warnings in red. The `\x1B` means the ascii character with hex number 1B (ESC).

Renaming files with AWK

ls *.MP3 | awk '{ printf("mv \"%s\" \"%s\"\n", $0, tolower($0)) }'      # Rename all .MP3 file to be lower case
ls *.MP3 | awk '{ printf("mv \"%s\" \"%s\"\n", $0, tolower($0)) }' | sh # The above will just print what would happen. To actually execute it you run

Substitute a regexp pattern with a given replacement string. We can e.g replace “ “ (spaces in the file names) by “-“:

ls | awk '{ printf("mv \"%s\" \"%s\"\n", $0, gensub(/ +/,"-","g")) }'      # Dryrun: prints what would happen.
ls | awk '{ printf("mv \"%s\" \"%s\"\n", $0, gensub(/ +/,"-","g")) }' | sh #  Actually executes the substitution.

The gensub function reads the strings from $0 (=current line) and returns the modified string. The third argument, the “g”, means to find and replace everywhere (globally) on the current line.

AWK as a command-line calculator

awk 'BEGIN{print 3.1+4/2}'                 # This prints 5.1
awk 'BEGIN{print sqrt(2)}'                 # This prints 1.41421
awk 'BEGIN{print 2^(1/2)}'                 # This prints 1.41421
awk 'BEGIN{printf "%.15f\n",4*atan2(1,1)}' # This prints 3.141592653589793 (PI with a 15 digits behind the decimal point)
awk 'BEGIN{printf "0x%x\n", 32}'           # Print decimal number as hex (this prints 0x20)
awk 'BEGIN{print strtonum(0x20)}'          # Convert hex string to decimal (this prints 32)

Math operators in gnu awk:

  • + - * /
  • ^ or **: Exponentiation
  • %: Modulo
  • exp(), log(): Exponential function and natural logarithm
  • atan2(y, x), sin(), cos(): work all in radians (fraction of PI)
  • sqrt() same as **(1/2): Square root
  • strtonum(): Convert hex (start with 0x) and octal (start with 0) to decimal

If you want to use this frequently then you could put this into your .bashrc file:

# add the awc function to .basrc
# use awc like this: awc "3.4+2+8+99.2" (do not forget the quotes)
awc(){ awk "BEGIN{ print $* }" ;}

On the shell you can then type awc "3.4+2+8+99.2" and it will print 112.6.

Awk Program

BEGIN          {<initializations>} 
   <pattern 1> {<program actions>} 
   <pattern 2> {<program actions>} 
   ...
END            {< final actions >}

Example:

awk '
    BEGIN { print "\n>>>Start" }
    !/(login|shutdown)/ { print NR, $0 }
    END { print "<<<END\n" }
' /etc/passwd

Variables

awk -F: '{print $1,$NF}' /etc/passwd     # Prints first and last field
awk -F: '{print NR, $0}' /etc/passwd     # Prints with line number
awk -F: '{print $(NF-1)}' /etc/passwd    # Prints second last field
awk -F: '{print $1 "=" $6}' /etc/passwd  # Prints first and sixth fields and custom string = in between

Conditions & Loops

awk '{if ($3>30) print $1}' /etc/passwd
awk 'BEGIN{
    while (a++ < 1000)
        s=s " ";
    print s
}'

Arrays

awk 'BEGIN {
   fruits["mango"] = "yellow";
   fruits["orange"] = "orange"
   print fruits["orange"] 
   print fruits["mango"]
}'

Functions

# => 5
awk 'BEGIN{print length("hello")}'
# => HELLO
awk 'BEGIN{print toupper("hello")}'
# => hel
awk 'BEGIN{print substr("hello", 1, 3)}'

Variables

Build-in Variables

$0           # Whole line                    
$1, $2...$NF # First, second… last field     
NR           # Total `N`umber of `R`ecords   
NF           # `N`number of `F`ields         
OFS          # `O`utput `F`ield `S`eparator  <br> _(default " ")_ 
FS           # input `F`ield `S`eparator <br> _(default " ")_     
ORS          # `O`utput `R`ecord `S`eparator <br> _(default "\n")_
RS           # input `R`ecord `S`eparator <br> _(default "\n")_   
FILENAME     # Name of the file  

Expressions

$1 == "root"      # First field equals root 
{print $(NF-1)}   # Second last field
NR!=1{print $0}   # From 2th record  
NR > 3            # From 4th record  
NR == 1           # First record     
END{print NR}     # Total records    
BEGIN{print OFMT} # Output format    
{print NR, $0}    # Line number      
{print NR "	" $0} # Line number (tab)
{$1 = NR; print}  # Replace 1th field with line number
$NF > 4           # Last field > 4      
NR % 2 == 0       # Even records        
NR==10, NR==20    # Records 10 to 20    
BEGIN{print ARGC} # Total arguments     
ORS=NR%5?",":"\n" # Concatenate records 

Examples

Print sum and average

awk -F: '{sum += $3}
     END { print sum, sum/NR }
' /etc/passwd

Printing parameters

awk 'BEGIN {
    for (i = 1; i < ARGC; i++)
        print ARGV[i] }' a b c

Output field separator as a comma

awk 'BEGIN { FS=":";OFS=","}
    {print $1,$2,$3,$4}' /etc/passwd

Position of match

awk 'BEGIN {
    if (match("One Two Three", "Tw"))
        print RSTART }'

Length of match

awk 'BEGIN {
    if (match("One Two Three", "re"))
        print RLENGTH }'

Environment Variables

ARGC    # Number or arguments 
ARGV    # Array of arguments  
FNR     # `F`ile `N`umber of `R`ecords     
OFMT    # Format for numbers       <br> _(default "%.6g")_         
RSTART  # Location in the string  
RLENGTH # Length of match         
SUBSEP  # Multi-dimensional array separator <br> _(default "\034")_
ARGIND  # Argument Index  

GNU awk only

ENVIRON     # Environment variables
IGNORECASE  # Ignore case       
CONVFMT     # Conversion format 
ERRNO       # System errors     
FIELDWIDTHS # Fixed width fields

Defining Variable

awk -v var1="Hello" -v var2="Wold" '
    END {print var1, var2}
' </dev/null

Use Shell Variables

awk -v varName="$PWD" '
    END {print varName}' </dev/null

Operators

Operators

| `{print $1}     | First field |
| `$2 == "foo"    | Equals      |
| `$2 != "foo"    | Not equals  |
| `"foo" in array | In array    |

Regular Expression

| `/regex/       | Line matches      |
| `!/regex/      | Line not matches  |
| `$1 ~ /regex/  | Field matches     |
| `$1 !~ /regex/ | Field not matches |

More conditions

| `($2 <= 4 || $3 < 20) | Or  |
| `($1 == 4 && $3 < 20)   | And |

Operations

Arithmetic Operations

+
-  
*  
/  
%  
++ 
-- 

Shorthand Assignments

+= 
-= 
*= 
/= 
%= 

Comparison Operators

==
!=
<
>
<=
>=

Examples

Match

awk 'BEGIN {
    if ("foo" ~ "^fo+$")
        print "Fooey!";
}'

Not Match

awk 'BEGIN {
    if ("boo" !~ "^fo+$")
        print "Boo!";
}'

If in Array

awk 'BEGIN {
    assoc["foo"] = "bar";
    assoc["bar"] = "baz";
    if ("foo" in assoc)
        print "Fooey!";
}'

Functions

Common Functions

index(s,t)          # Position in string s where string t occurs, 0 if not found  
length(s)           # Length of string s (or $0 if no arg)                        
rand                # Random number between 0 and 1                               
substr(s,index,len) # Return len-char substring of s that begins at index (counted from 1)           
srand               # Set seed for rand and return previous seed                    
int(x)              # Truncate x to integer value                                   
split(s,a,fs)       # Split string s into array a split by fs, returning length of a
match(s,r)          # Position in string s where regex r occurs, or 0 if not found  
sub(r,t,s)          # Substitute t for first occurrence of regex r in string s (or $0 if s not given)
gsub(r,t,s)         # Substitute t for all occurrences of regex r in string s
system(cmd)         # Execute cmd and return exit status                     
tolower(s)          # String s to lowercase                                  
toupper(s)          # String s to uppercase                                  
getline             # Set $0 to next input record from current input file.   

User Defined Function

awk '
    # Returns minimum number
    function find_min(num1, num2){
       if (num1 < num2)
       return num1
       return num2
    }
    # Returns maximum number
    function find_max(num1, num2){
       if (num1 > num2)
       return num1
       return num2
    }
    # Main function
    function main(num1, num2){
       result = find_min(num1, num2)
       print "Minimum =", result
      
       result = find_max(num1, num2)
       print "Maximum =", result
    }
    # Script execution starts here
    BEGIN {
       main(10, 60)
    }
'

Arrays

Array with Index

awk 'BEGIN {
    arr[0] = "foo";
    arr[1] = "bar";
    print(arr[0]); # => foo
    delete arr[0];
    print(arr[0]); # => ""
}'

Array with Key

awk 'BEGIN {
    assoc["foo"] = "bar";
    assoc["bar"] = "baz";
    print("baz" in assoc); # => 0
    print("foo" in assoc); # => 1
}'

Array with Split

awk 'BEGIN {
    split("foo:bar:baz", arr, ":");
    for (key in arr)
        print arr[key];
}'

Array with Asort

awk 'BEGIN {
    arr[0] = 3
    arr[1] = 2
    arr[2] = 4
    n = asort(arr)
    for (i = 1; i <= n ; i++)
        print(arr[i])
}'

Multi-dimensional

awk 'BEGIN {
    multidim[0,0] = "foo";
    multidim[0,1] = "bar";
    multidim[1,0] = "baz";
    multidim[1,1] = "boo";
}'

Multi-Dimensional Iteration

awk 'BEGIN {
    array[1,2]=3;
    array[2,3]=5;
    for (comb in array) {
        split(comb,sep,SUBSEP);
        print sep[1], sep[2], 
        array[sep[1],sep[2]]
    }
}'

Conditions

if-else Statement

awk -v count=2 'BEGIN {
    if (count == 1)
        print "Yes";
    else
        print "Huh?";
}'

Ternary operator

awk -v count=2 'BEGIN {
    print (count==1) ? "Yes" : "Huh?";
}'

Examples

Exists:

awk 'BEGIN {
    assoc["foo"] = "bar";
    assoc["bar"] = "baz";
    if ("foo" in assoc)
        print "Fooey!";
}'

Not exists:

awk 'BEGIN {
    assoc["foo"] = "bar";
    assoc["bar"] = "baz";
    if ("Huh" in assoc == 0 )
        print "Huh!";
}'

switch

awk -F: '{
    switch (NR * 2 + 1) {
        case 3:
        case "11":
            print NR - 1
            break
        
        case /2[[:digit:]]+/:
            print NR
        
        default:
            print NR + 1
        
        case -1:
            print NR * -1
    }
}' /etc/passwd

Loops

for…i

awk 'BEGIN {
    for (i = 0; i < 10; i++)
        print "i=" i;
}'

for…in

awk 'BEGIN {
    assoc["key1"] = "val1"
    assoc["key2"] = "val2"
    for (key in assoc)
        print assoc[key];
}'

Arguments

awk 'BEGIN {
    for (argnum in ARGV)
        print ARGV[argnum];
}' a b c

while

awk 'BEGIN {
    while (a < 10) {
        print "- " " concatenation: " a
        a++;
    }
}'

do…while

awk '{
    i = 1
    do {
        print $0
        i++
    } while (i <= 5)
}' /etc/passwd

Break

awk 'BEGIN {
    break_num = 5
    for (i = 0; i < 10; i++) {
        print i
        if (i == break_num)
            break
    }
}'

Continue

awk 'BEGIN {
    for (x = 0; x <= 10; x++) {
        if (x == 5 || x == 6)
            continue
        printf "%d ", x
    }
    print ""
}'

Examples

Powers of two between 1 and 100

awk 'BEGIN {
    for (i = 1; i <= 100; i *= 2)
        print i
}'

Reverse records

awk -F: '{ x[NR] = $0 }
    END {
        for (i = NR; i > 0; i--)
        print x[i]
    }
' /etc/passwd

Reverse fields

awk -F: '{
    for (i = NF; i > 0; i--)
        printf("%s ",$i);
    print ""
}' /etc/passwd

Sum by Record

awk -F: '{
    s=0;
    for (i = 1; i <= NF; i++)
        s += $i;
    print s
}' /etc/passwd

Sum whole file

awk -F: '
    {for (i = 1; i <= NF; i++)
        s += $i;
    };
    END{print s}
' /etc/passwd

Formatted Printing

Usage

Right align

awk 'BEGIN{printf "|%10s|\n", "hello"}'

|     hello|

Left align

awk 'BEGIN{printf "|%-10s|\n", "hello"}'

|hello     |

Common Specifiers

Character Description
c ASCII character
d Decimal integer
e, E, f Floating-point format
o Unsigned octal value
s String
% Literal %

Space

awk -F: '{
    printf "%-10s %s\n", $1, $(NF-1)
}' /etc/passwd | head -n 3

Outputs

root       /root
bin        /bin
daemon     /sbin
awk -F: 'BEGIN {
    printf "%-10s %s\n", "User", "Home"
    printf "%-10s %s\n", "----","----"}
    { printf "%-10s %s\n", $1, $(NF-1) }
' /etc/passwd | head -n 5

Outputs

User       Home
----       ----
root       /root
bin        /bin
daemon     /sbin

Miscellaneous

Regex Metacharacters

\
^
$
.
[
]
|
(
)
*
+
?

Escape Sequences

\b  # Backspace 
\f  # Form feed 
\n  # Newline (line feed)
\r  # Carriage return
\t  # Horizontal tab 
\v  # Vertical tab   

Run Script

$ cat demo.awk
#!/usr/bin/awk -f
BEGIN { x = 23 }
      { x += 2 }
END   { print x }
$ awk -f demo.awk /etc/passwd
69

Source

See Also

Back to top ↑

Updated: