Awk
Overview
One-liners
awk -F":" '{print $2}' file.txt # Print selected fields (at a fixed position). Split up the lines of the file file.txt with ":" (colon) separated fields and print the second field ($2) of each line
awk -F":" '{if ($2)print $2}' file.txt # Same as above but print only output if the second field ($2) exists and is not empty
awk -F: '{ print $1 "-" $4 "-" $6 }' file.txt # Print selected fields from each line separated by a dash
awk -F: '{ print $NF }' file.txt # Print the last field in each line
awk '{ $2 = ""; print }' file.txt # Print every line and delete the second field
Print Selected Fields (at a Variable Position)
ip route get 1.1.1.1 | awk '{for(i=1;i<=NF;i++){if($i=="src") print $(i+1)}}' # Print the content of a field after the field that contains the string "src". This will always print the computers "own IP" used for the communication with the public IP 1.1.1.1 (it will return 10.8.0.3 for the above examples).
Print Matching Lines
awk -F":" '/some regexp/{print $2}' file.txt # Print field number two ($2) only on lines matching "some regexp" (fiel separator is ":")
awk '/regexp a/{print};/regexp b/{printf $0}' file.txt # Print lines matching "regexp a" and lines matching "regexp b" but the later ones are printed without newline (note the printf)
awk -F":" '!/some regexp/{print $2}' file.txt # Print field number two ($2) only on lines not matching "some regexp" (fiel separator is ":")
awk -F":" '/some regexp/{next;}{print $2}' file.txt # Print field number two ($2) only on lines not matching "some regexp" (fiel separator is ":")
awk -F":" '/some regexp/{print $2;next}{print $3}' file.txt # Print field number two ($2) only on lines matching "some regexp" otherwise print field number three ($3) (fiel separator is ":"). The "next" command causes awk to continue with the next line and execute "{print $3}" only for non matching lines. This is like
/regexp/{...if..regexp..matches...;next}{...else...}
awk '$2 ~ /regexp/{print;}' file.txt # Print lines where field number two matches regexp (apply regexp only to field 2, not the whole line)
ps aux | awk '$8 ~ /R/{print;}NR==1{print}' # Here is an example parsing the linux "ps aux" command. It has in the eighth column the process state. To print all processes that are in running or runnable state you would look for the letter "R" in that 8-th column. You want as well to print line 1 of the ps command printout since it contains the column header
awk '/regexp/{i=2;next;}{if(i){i--; print;}}' file.txt # Print the next two (i=2) lines after the line matching regexp
awk '/regexp/{i=2+1;}{if(i){i--; print;}}' file.txt # Print the line and the next two (i=2) lines after the line matching regexp
awk '/start/,/stop/' file.txt # AWK ranges: Print the lines from a file starting at the line matching "start" until the line matching "stop". Note: make sure that the stop pattern does not match the start line otherwise only that line will be printed.
awk '/start/,0' file.txt # AWK ranges: Print the lines starting at the line matching "start" until the end of the file. Note: make sure that the stop pattern does not match the start line otherwise only that line will be printed.
awk '/prompt.*ls/{s=1;print;next};/prompt/{s=0};s==1{print}' log.txt # Change "prompt" to whatever string appears in your terminal prompt, e.g the hostname.
awk '/prompt.*ls/{s=1;print;next};s==0{next};{print};/prompt/{s=0};' log.txt # If you want to include the last prompt where to stop printing then try this. Change "prompt" to whatever string appears in your terminal prompt, e.g the hostname.
awk '!/regexp/{print $1 " " $2 }' file.txt # Print fields 1 and 2 from all lines not matching regexp
awk '/regexp1/&&!/regexp2/{print $1 " " $2 }' file.txt # Print fields 1 and 2 from lines matching regexp1 and not matching regexp2
Regexp syntax:
c
matches the non-metacharacter c.\c
matches the literal character c..
matches any character including newline.^
matches the beginning of a string (example: ^1 , only lines starting with a one)$
matches the end of a string (example: end$ , only lines ending in “end”)[abc...]
character list, matches any of the characters abc….[0-9a-zA-Z]
range of characters 0-9 and a-z,A-Z[^abc...]
negated character list, matches any character except abc….r1|r2
alternation: matches either r1 or r2.r1r2
concatenation: matches r1, and then r2.r+
matches one or more r’s.r*
matches zero or more r’s.r?
matches zero or one r’s.(r)
grouping: matches r.
Insert a String After the Matching Line
awk '/regexp/{print $0; print "text inserted after matching line";next}{print}' file.txt # This inserts a new line after the matching line. `$0` is the line where the search pattern "regexp" matches without the newline at the end. The awk print command prints the string and appends a new line.
awk '/regexp/{print $0 "text appended at end of the matching line";next}{print}' file.txt # This appends a string to the matching line
Conditionals
awk '/regexp/{A-here;next}{B-here}' file.txt # If matching "do A" else "do B" (if .. then .. else in awk)
awk '/regexp/{gsub(/string/,"replacement");print $1;next}{print;}' file.txt # The example would print lines that do not match unchanged (action B is just "print;") while on lines that match /regexp/ it would replace /string/ by replacement and print the first element ($1).
awk '/regexpA/{A-do-here;}/regexpB/{B-do-here}' file.txt # If matching "A do..." OR if matching "B do.." (if .. then, if .. then, ...., in awk)
awk '/house/{print $1;}/cat/{print;}' file.txt
Replacement for Some Common Unix Commands
Useful in a non unix environment
awk 'END{print NR}' # Count lines (wc -l)
awk '/regexp/' # Search for matching lines (egrep regexp)
awk '!/regexp/' # Print non matching lines (egrep -v regexp)
awk '/regexp/{print FNR,$0}' # Print matching lines with numbers (egrep -n regexp)
awk 'BEGIN {IGNORECASE=1};/regexp/' # Print matching lines and ignore case (egrep -i regexp)
awk '{print FNR "\t" $0}' # Number lines (cat -n)
awk 'a !~ $0{print}; {a=$0}' # Remove duplicate consecutive lines (uniq)
awk 'NR < 6' # Print first 5 lines of file (head -5)
Operations with Lines
awk '/^..*$/{ print FNR ":" $0 ;next}{print}' file.txt # Number non empty lines: prints all lines and adds a line number to non empty lines
awk '/^[ \t]*$/{next}{print}' file.txt # Remove empty lines: prints all lines except empty ones and lines with only space and tab:
awk 'length($0)>80{print FNR,$0}' file.txt # Number lines longer than 80 char and show them. This is useful to find all the lines longer than 80 characters (or any other length)
awk 'length < 80' file.txt # Print only lines of less than 80 characters
awk '/regexp/{gsub(/foo/, "bar")};{print}' file.txt # Substitute foo for bar on lines matching regexp
awk '{sub(/[ \t]*$/, "");print}' file.txt # Delete trailing white space (spaces, tabs)
awk '{sub(/^[ \t]+/, ""); print}' file.txt # Delete leading white space
awk '/regexp/{sub(/^/, "++++"); print;next;}{print}' file.txt # Add some characters at the beginning of matching lines. Add ++++ at lines matching regexp.
gcc -Wall main.c |& awk '/: warning:/{print "\x1B[01;31m" $0 "\x1B[m";next;}{print}' # Color gcc warnings in red. The `\x1B` means the ascii character with hex number 1B (ESC).
Renaming files with AWK
ls *.MP3 | awk '{ printf("mv \"%s\" \"%s\"\n", $0, tolower($0)) }' # Rename all .MP3 file to be lower case
ls *.MP3 | awk '{ printf("mv \"%s\" \"%s\"\n", $0, tolower($0)) }' | sh # The above will just print what would happen. To actually execute it you run
Substitute a regexp pattern with a given replacement string. We can e.g replace “ “ (spaces in the file names) by “-“:
ls | awk '{ printf("mv \"%s\" \"%s\"\n", $0, gensub(/ +/,"-","g")) }' # Dryrun: prints what would happen.
ls | awk '{ printf("mv \"%s\" \"%s\"\n", $0, gensub(/ +/,"-","g")) }' | sh # Actually executes the substitution.
The gensub function reads the strings from $0
(=current line) and returns the modified string. The third argument, the “g”, means to find and replace everywhere (globally) on the current line.
AWK as a command-line calculator
awk 'BEGIN{print 3.1+4/2}' # This prints 5.1
awk 'BEGIN{print sqrt(2)}' # This prints 1.41421
awk 'BEGIN{print 2^(1/2)}' # This prints 1.41421
awk 'BEGIN{printf "%.15f\n",4*atan2(1,1)}' # This prints 3.141592653589793 (PI with a 15 digits behind the decimal point)
awk 'BEGIN{printf "0x%x\n", 32}' # Print decimal number as hex (this prints 0x20)
awk 'BEGIN{print strtonum(0x20)}' # Convert hex string to decimal (this prints 32)
Math operators in gnu awk:
+ - * /
^ or **
: Exponentiation%
: Moduloexp(), log()
: Exponential function and natural logarithmatan2(y, x), sin(), cos()
: work all in radians (fraction of PI)sqrt()
same as**(1/2)
: Square rootstrtonum()
: Convert hex (start with 0x) and octal (start with 0) to decimal
If you want to use this frequently then you could put this into your .bashrc file:
# add the awc function to .basrc
# use awc like this: awc "3.4+2+8+99.2" (do not forget the quotes)
awc(){ awk "BEGIN{ print $* }" ;}
On the shell you can then type awc "3.4+2+8+99.2"
and it will print 112.6
.
Awk Program
BEGIN {<initializations>}
<pattern 1> {<program actions>}
<pattern 2> {<program actions>}
...
END {< final actions >}
Example:
awk '
BEGIN { print "\n>>>Start" }
!/(login|shutdown)/ { print NR, $0 }
END { print "<<<END\n" }
' /etc/passwd
Variables
awk -F: '{print $1,$NF}' /etc/passwd # Prints first and last field
awk -F: '{print NR, $0}' /etc/passwd # Prints with line number
awk -F: '{print $(NF-1)}' /etc/passwd # Prints second last field
awk -F: '{print $1 "=" $6}' /etc/passwd # Prints first and sixth fields and custom string = in between
Conditions & Loops
awk '{if ($3>30) print $1}' /etc/passwd
awk 'BEGIN{
while (a++ < 1000)
s=s " ";
print s
}'
Arrays
awk 'BEGIN {
fruits["mango"] = "yellow";
fruits["orange"] = "orange"
print fruits["orange"]
print fruits["mango"]
}'
Functions
# => 5
awk 'BEGIN{print length("hello")}'
# => HELLO
awk 'BEGIN{print toupper("hello")}'
# => hel
awk 'BEGIN{print substr("hello", 1, 3)}'
Variables
Build-in Variables
$0 # Whole line
$1, $2...$NF # First, second… last field
NR # Total `N`umber of `R`ecords
NF # `N`number of `F`ields
OFS # `O`utput `F`ield `S`eparator <br> _(default " ")_
FS # input `F`ield `S`eparator <br> _(default " ")_
ORS # `O`utput `R`ecord `S`eparator <br> _(default "\n")_
RS # input `R`ecord `S`eparator <br> _(default "\n")_
FILENAME # Name of the file
Expressions
$1 == "root" # First field equals root
{print $(NF-1)} # Second last field
NR!=1{print $0} # From 2th record
NR > 3 # From 4th record
NR == 1 # First record
END{print NR} # Total records
BEGIN{print OFMT} # Output format
{print NR, $0} # Line number
{print NR " " $0} # Line number (tab)
{$1 = NR; print} # Replace 1th field with line number
$NF > 4 # Last field > 4
NR % 2 == 0 # Even records
NR==10, NR==20 # Records 10 to 20
BEGIN{print ARGC} # Total arguments
ORS=NR%5?",":"\n" # Concatenate records
Examples
Print sum and average
awk -F: '{sum += $3}
END { print sum, sum/NR }
' /etc/passwd
Printing parameters
awk 'BEGIN {
for (i = 1; i < ARGC; i++)
print ARGV[i] }' a b c
Output field separator as a comma
awk 'BEGIN { FS=":";OFS=","}
{print $1,$2,$3,$4}' /etc/passwd
Position of match
awk 'BEGIN {
if (match("One Two Three", "Tw"))
print RSTART }'
Length of match
awk 'BEGIN {
if (match("One Two Three", "re"))
print RLENGTH }'
Environment Variables
ARGC # Number or arguments
ARGV # Array of arguments
FNR # `F`ile `N`umber of `R`ecords
OFMT # Format for numbers <br> _(default "%.6g")_
RSTART # Location in the string
RLENGTH # Length of match
SUBSEP # Multi-dimensional array separator <br> _(default "\034")_
ARGIND # Argument Index
GNU awk only
ENVIRON # Environment variables
IGNORECASE # Ignore case
CONVFMT # Conversion format
ERRNO # System errors
FIELDWIDTHS # Fixed width fields
Defining Variable
awk -v var1="Hello" -v var2="Wold" '
END {print var1, var2}
' </dev/null
Use Shell Variables
awk -v varName="$PWD" '
END {print varName}' </dev/null
Operators
Operators
| `{print $1} | First field |
| `$2 == "foo" | Equals |
| `$2 != "foo" | Not equals |
| `"foo" in array | In array |
Regular Expression
| `/regex/ | Line matches |
| `!/regex/ | Line not matches |
| `$1 ~ /regex/ | Field matches |
| `$1 !~ /regex/ | Field not matches |
More conditions
| `($2 <= 4 || $3 < 20) | Or |
| `($1 == 4 && $3 < 20) | And |
Operations
Arithmetic Operations
+
-
*
/
%
++
--
Shorthand Assignments
+=
-=
*=
/=
%=
Comparison Operators
==
!=
<
>
<=
>=
Examples
Match
awk 'BEGIN {
if ("foo" ~ "^fo+$")
print "Fooey!";
}'
Not Match
awk 'BEGIN {
if ("boo" !~ "^fo+$")
print "Boo!";
}'
If in Array
awk 'BEGIN {
assoc["foo"] = "bar";
assoc["bar"] = "baz";
if ("foo" in assoc)
print "Fooey!";
}'
Functions
Common Functions
index(s,t) # Position in string s where string t occurs, 0 if not found
length(s) # Length of string s (or $0 if no arg)
rand # Random number between 0 and 1
substr(s,index,len) # Return len-char substring of s that begins at index (counted from 1)
srand # Set seed for rand and return previous seed
int(x) # Truncate x to integer value
split(s,a,fs) # Split string s into array a split by fs, returning length of a
match(s,r) # Position in string s where regex r occurs, or 0 if not found
sub(r,t,s) # Substitute t for first occurrence of regex r in string s (or $0 if s not given)
gsub(r,t,s) # Substitute t for all occurrences of regex r in string s
system(cmd) # Execute cmd and return exit status
tolower(s) # String s to lowercase
toupper(s) # String s to uppercase
getline # Set $0 to next input record from current input file.
User Defined Function
awk '
# Returns minimum number
function find_min(num1, num2){
if (num1 < num2)
return num1
return num2
}
# Returns maximum number
function find_max(num1, num2){
if (num1 > num2)
return num1
return num2
}
# Main function
function main(num1, num2){
result = find_min(num1, num2)
print "Minimum =", result
result = find_max(num1, num2)
print "Maximum =", result
}
# Script execution starts here
BEGIN {
main(10, 60)
}
'
Arrays
Array with Index
awk 'BEGIN {
arr[0] = "foo";
arr[1] = "bar";
print(arr[0]); # => foo
delete arr[0];
print(arr[0]); # => ""
}'
Array with Key
awk 'BEGIN {
assoc["foo"] = "bar";
assoc["bar"] = "baz";
print("baz" in assoc); # => 0
print("foo" in assoc); # => 1
}'
Array with Split
awk 'BEGIN {
split("foo:bar:baz", arr, ":");
for (key in arr)
print arr[key];
}'
Array with Asort
awk 'BEGIN {
arr[0] = 3
arr[1] = 2
arr[2] = 4
n = asort(arr)
for (i = 1; i <= n ; i++)
print(arr[i])
}'
Multi-dimensional
awk 'BEGIN {
multidim[0,0] = "foo";
multidim[0,1] = "bar";
multidim[1,0] = "baz";
multidim[1,1] = "boo";
}'
Multi-Dimensional Iteration
awk 'BEGIN {
array[1,2]=3;
array[2,3]=5;
for (comb in array) {
split(comb,sep,SUBSEP);
print sep[1], sep[2],
array[sep[1],sep[2]]
}
}'
Conditions
if-else Statement
awk -v count=2 'BEGIN {
if (count == 1)
print "Yes";
else
print "Huh?";
}'
Ternary operator
awk -v count=2 'BEGIN {
print (count==1) ? "Yes" : "Huh?";
}'
Examples
Exists:
awk 'BEGIN {
assoc["foo"] = "bar";
assoc["bar"] = "baz";
if ("foo" in assoc)
print "Fooey!";
}'
Not exists:
awk 'BEGIN {
assoc["foo"] = "bar";
assoc["bar"] = "baz";
if ("Huh" in assoc == 0 )
print "Huh!";
}'
switch
awk -F: '{
switch (NR * 2 + 1) {
case 3:
case "11":
print NR - 1
break
case /2[[:digit:]]+/:
print NR
default:
print NR + 1
case -1:
print NR * -1
}
}' /etc/passwd
Loops
for…i
awk 'BEGIN {
for (i = 0; i < 10; i++)
print "i=" i;
}'
for…in
awk 'BEGIN {
assoc["key1"] = "val1"
assoc["key2"] = "val2"
for (key in assoc)
print assoc[key];
}'
Arguments
awk 'BEGIN {
for (argnum in ARGV)
print ARGV[argnum];
}' a b c
while
awk 'BEGIN {
while (a < 10) {
print "- " " concatenation: " a
a++;
}
}'
do…while
awk '{
i = 1
do {
print $0
i++
} while (i <= 5)
}' /etc/passwd
Break
awk 'BEGIN {
break_num = 5
for (i = 0; i < 10; i++) {
print i
if (i == break_num)
break
}
}'
Continue
awk 'BEGIN {
for (x = 0; x <= 10; x++) {
if (x == 5 || x == 6)
continue
printf "%d ", x
}
print ""
}'
Examples
Powers of two between 1 and 100
awk 'BEGIN {
for (i = 1; i <= 100; i *= 2)
print i
}'
Reverse records
awk -F: '{ x[NR] = $0 }
END {
for (i = NR; i > 0; i--)
print x[i]
}
' /etc/passwd
Reverse fields
awk -F: '{
for (i = NF; i > 0; i--)
printf("%s ",$i);
print ""
}' /etc/passwd
Sum by Record
awk -F: '{
s=0;
for (i = 1; i <= NF; i++)
s += $i;
print s
}' /etc/passwd
Sum whole file
awk -F: '
{for (i = 1; i <= NF; i++)
s += $i;
};
END{print s}
' /etc/passwd
Formatted Printing
Usage
Right align
awk 'BEGIN{printf "|%10s|\n", "hello"}'
| hello|
Left align
awk 'BEGIN{printf "|%-10s|\n", "hello"}'
|hello |
Common Specifiers
Character | Description |
---|---|
c |
ASCII character |
d |
Decimal integer |
e , E , f |
Floating-point format |
o |
Unsigned octal value |
s |
String |
% |
Literal % |
Space
awk -F: '{
printf "%-10s %s\n", $1, $(NF-1)
}' /etc/passwd | head -n 3
Outputs
root /root
bin /bin
daemon /sbin
Header
awk -F: 'BEGIN {
printf "%-10s %s\n", "User", "Home"
printf "%-10s %s\n", "----","----"}
{ printf "%-10s %s\n", $1, $(NF-1) }
' /etc/passwd | head -n 5
Outputs
User Home
---- ----
root /root
bin /bin
daemon /sbin
Miscellaneous
Regex Metacharacters
\
^
$
.
[
]
|
(
)
*
+
?
Escape Sequences
\b # Backspace
\f # Form feed
\n # Newline (line feed)
\r # Carriage return
\t # Horizontal tab
\v # Vertical tab
Run Script
$ cat demo.awk
#!/usr/bin/awk -f
BEGIN { x = 23 }
{ x += 2 }
END { print x }
$ awk -f demo.awk /etc/passwd
69