diff options
| author | Divay Prakash <divayprakash@users.noreply.github.com> | 2018-09-12 13:09:29 +0530 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-09-12 13:09:29 +0530 | 
| commit | 75ec4fe24a2e7b6ad770034ea279499624766f93 (patch) | |
| tree | 7dd7b3ff5a319cc0d7c05fa51869d4753d642996 /awk.html.markdown | |
| parent | 73a5e522f41bf39382fef2cc64ab205314da091b (diff) | |
| parent | c8284c9c959abf9662d03bc152af6c8a49a12e58 (diff) | |
Merge pull request #3215 from dtkerns/master
[English/en] Update awk.html.markdown
Diffstat (limited to 'awk.html.markdown')
| -rw-r--r-- | awk.html.markdown | 244 | 
1 files changed, 134 insertions, 110 deletions
| diff --git a/awk.html.markdown b/awk.html.markdown index de26c0a1..3d2c4ccb 100644 --- a/awk.html.markdown +++ b/awk.html.markdown @@ -6,14 +6,15 @@ contributors:  --- -AWK is a standard tool on every POSIX-compliant UNIX system. It's like a -stripped-down Perl, perfect for text-processing tasks and other scripting -needs. It has a C-like syntax, but without semicolons, manual memory -management, or static typing. It excels at text processing. You can call to it -from a shell script, or you can use it as a stand-alone scripting language. - -Why use AWK instead of Perl? Mostly because AWK is part of UNIX. You can always -count on it, whereas Perl's future is in question. AWK is also easier to read +AWK is a standard tool on every POSIX-compliant UNIX system. It's like +flex/lex, from the command-line, perfect for text-processing tasks and +other scripting needs. It has a C-like syntax, but without mandatory +semicolons (although, you should use them anyway, because they are required +when you're writing one-liners, something AWK excells at), manual memory +management, or static typing. It excels at text processing. You can call to +it from a shell script, or you can use it as a stand-alone scripting language. + +Why use AWK instead of Perl? Readability. AWK is easier to read  than Perl. For simple text-processing scripts, particularly ones that read  files line by line and split on delimiters, AWK is probably the right tool for  the job. @@ -23,8 +24,23 @@ the job.  # Comments are like this -# AWK programs consist of a collection of patterns and actions. The most -# important pattern is called BEGIN. Actions go into brace blocks. + +# AWK programs consist of a collection of patterns and actions. +pattern1 { action; } # just like lex +pattern2 { action; } + +# There is an implied loop and AWK automatically reads and parses each +# record of each file supplied. Each record is split by the FS delimiter, +# which defaults to white-space (multiple spaces,tabs count as one) +# You cann assign FS either on the command line (-F C) or in your BEGIN +# pattern + +# One of the special patterns is BEGIN. The BEGIN pattern is true +# BEFORE any of the files are read. The END pattern is true after +# an End-of-file from the last file (or standard-in if no files specified) +# There is also an output field separator (OFS) that you can assign, which +# defaults to a single space +  BEGIN {      # BEGIN will run at the beginning of the program. It's where you put all @@ -32,114 +48,116 @@ BEGIN {      # have no text files, then think of BEGIN as the main entry point.      # Variables are global. Just set them or use them, no need to declare.. -    count = 0 +    count = 0;      # Operators just like in C and friends -    a = count + 1 -    b = count - 1 -    c = count * 1 -    d = count / 1 # integer division -    e = count % 1 # modulus -    f = count ^ 1 # exponentiation - -    a += 1 -    b -= 1 -    c *= 1 -    d /= 1 -    e %= 1 -    f ^= 1 +    a = count + 1; +    b = count - 1; +    c = count * 1; +    d = count / 1; # integer division +    e = count % 1; # modulus +    f = count ^ 1; # exponentiation + +    a += 1; +    b -= 1; +    c *= 1; +    d /= 1; +    e %= 1; +    f ^= 1;      # Incrementing and decrementing by one -    a++ -    b-- +    a++; +    b--;      # As a prefix operator, it returns the incremented value -    ++a -    --b +    ++a; +    --b;      # Notice, also, no punctuation such as semicolons to terminate statements      # Control statements      if (count == 0) -        print "Starting with count of 0" +        print "Starting with count of 0";      else -        print "Huh?" +        print "Huh?";      # Or you could use the ternary operator -    print (count == 0) ? "Starting with count of 0" : "Huh?" +    print (count == 0) ? "Starting with count of 0" : "Huh?";      # Blocks consisting of multiple lines use braces      while (a < 10) {          print "String concatenation is done" " with a series" " of" -            " space-separated strings" -        print a +            " space-separated strings"; +        print a; -        a++ +        a++;      }      for (i = 0; i < 10; i++) -        print "Good ol' for loop" +        print "Good ol' for loop";      # As for comparisons, they're the standards: -    a < b   # Less than -    a <= b  # Less than or equal -    a != b  # Not equal -    a == b  # Equal -    a > b   # Greater than -    a >= b  # Greater than or equal +    # a < b   # Less than +    # a <= b  # Less than or equal +    # a != b  # Not equal +    # a == b  # Equal +    # a > b   # Greater than +    # a >= b  # Greater than or equal      # Logical operators as well -    a && b  # AND -    a || b  # OR +    # a && b  # AND +    # a || b  # OR      # In addition, there's the super useful regular expression match      if ("foo" ~ "^fo+$") -        print "Fooey!" +        print "Fooey!";      if ("boo" !~ "^fo+$") -        print "Boo!" +        print "Boo!";      # Arrays -    arr[0] = "foo" -    arr[1] = "bar" -    # Unfortunately, there is no other way to initialize an array. Ya just -    # gotta chug through every value line by line like that. - -    # You also have associative arrays -    assoc["foo"] = "bar" -    assoc["bar"] = "baz" +    arr[0] = "foo"; +    arr[1] = "bar"; +     +    # You can also initialize an array with the built-in function split() +     +    n = split("foo:bar:baz", arr, ":"); +    +    # You also have associative arrays (actually, they're all associative arrays) +    assoc["foo"] = "bar"; +    assoc["bar"] = "baz";      # And multi-dimensional arrays, with some limitations I won't mention here -    multidim[0,0] = "foo" -    multidim[0,1] = "bar" -    multidim[1,0] = "baz" -    multidim[1,1] = "boo" +    multidim[0,0] = "foo"; +    multidim[0,1] = "bar"; +    multidim[1,0] = "baz"; +    multidim[1,1] = "boo";      # You can test for array membership      if ("foo" in assoc) -        print "Fooey!" +        print "Fooey!";      # You can also use the 'in' operator to traverse the keys of an array      for (key in assoc) -        print assoc[key] +        print assoc[key];      # The command line is in a special array called ARGV      for (argnum in ARGV) -        print ARGV[argnum] +        print ARGV[argnum];      # You can remove elements of an array      # This is particularly useful to prevent AWK from assuming the arguments      # are files for it to process -    delete ARGV[1] +    delete ARGV[1];      # The number of command line arguments is in a variable called ARGC -    print ARGC +    print ARGC;      # AWK has several built-in functions. They fall into three categories. I'll      # demonstrate each of them in their own functions, defined later. -    return_value = arithmetic_functions(a, b, c) -    string_functions() -    io_functions() +    return_value = arithmetic_functions(a, b, c); +    string_functions(); +    io_functions();  }  # Here's how you define a function @@ -159,26 +177,26 @@ function arithmetic_functions(a, b, c,     d) {      # Now, to demonstrate the arithmetic functions      # Most AWK implementations have some standard trig functions -    localvar = sin(a) -    localvar = cos(a) -    localvar = atan2(b, a) # arc tangent of b / a +    localvar = sin(a); +    localvar = cos(a); +    localvar = atan2(b, a); # arc tangent of b / a      # And logarithmic stuff -    localvar = exp(a) -    localvar = log(a) +    localvar = exp(a); +    localvar = log(a);      # Square root -    localvar = sqrt(a) +    localvar = sqrt(a);      # Truncate floating point to integer -    localvar = int(5.34) # localvar => 5 +    localvar = int(5.34); # localvar => 5      # Random numbers -    srand() # Supply a seed as an argument. By default, it uses the time of day -    localvar = rand() # Random number between 0 and 1. +    srand(); # Supply a seed as an argument. By default, it uses the time of day +    localvar = rand(); # Random number between 0 and 1.      # Here's how to return a value -    return localvar +    return localvar;  }  function string_functions(    localvar, arr) { @@ -188,61 +206,66 @@ function string_functions(    localvar, arr) {      # Search and replace, first instance (sub) or all instances (gsub)      # Both return number of matches replaced -    localvar = "fooooobar" -    sub("fo+", "Meet me at the ", localvar) # localvar => "Meet me at the bar" -    gsub("e+", ".", localvar) # localvar => "m..t m. at th. bar" +    localvar = "fooooobar"; +    sub("fo+", "Meet me at the ", localvar); # localvar => "Meet me at the bar" +    gsub("e+", ".", localvar); # localvar => "m..t m. at th. bar"      # Search for a string that matches a regular expression      # index() does the same thing, but doesn't allow a regular expression -    match(localvar, "t") # => 4, since the 't' is the fourth character +    match(localvar, "t"); # => 4, since the 't' is the fourth character      # Split on a delimiter -    split("foo-bar-baz", arr, "-") # a => ["foo", "bar", "baz"] +    n = split("foo-bar-baz", arr, "-"); # a[1] = "foo"; a[2] = "bar"; a[3] = "baz"; n = 3      # Other useful stuff -    sprintf("%s %d %d %d", "Testing", 1, 2, 3) # => "Testing 1 2 3" -    substr("foobar", 2, 3) # => "oob" -    substr("foobar", 4) # => "bar" -    length("foo") # => 3 -    tolower("FOO") # => "foo" -    toupper("foo") # => "FOO" +    sprintf("%s %d %d %d", "Testing", 1, 2, 3); # => "Testing 1 2 3" +    substr("foobar", 2, 3); # => "oob" +    substr("foobar", 4); # => "bar" +    length("foo"); # => 3 +    tolower("FOO"); # => "foo" +    toupper("foo"); # => "FOO"  }  function io_functions(    localvar) {      # You've already seen print -    print "Hello world" +    print "Hello world";      # There's also printf -    printf("%s %d %d %d\n", "Testing", 1, 2, 3) +    printf("%s %d %d %d\n", "Testing", 1, 2, 3);      # AWK doesn't have file handles, per se. It will automatically open a file      # handle for you when you use something that needs one. The string you used      # for this can be treated as a file handle, for purposes of I/O. This makes -    # it feel sort of like shell scripting: +    # it feel sort of like shell scripting, but to get the same output, the string +    # must match exactly, so use a vaiable: +     +    outfile = "/tmp/foobar.txt"; -    print "foobar" >"/tmp/foobar.txt" +    print "foobar" > outfile; -    # Now the string "/tmp/foobar.txt" is a file handle. You can close it: -    close("/tmp/foobar.txt") +    # Now the string outfile is a file handle. You can close it: +    close(outfile);      # Here's how you run something in the shell -    system("echo foobar") # => prints foobar +    system("echo foobar"); # => prints foobar      # Reads a line from standard input and stores in localvar -    getline localvar +    getline localvar; -    # Reads a line from a pipe -    "echo foobar" | getline localvar # localvar => "foobar" -    close("echo foobar") +    # Reads a line from a pipe (again, use a string so you close it properly) +    cmd = "echo foobar"; +    cmd | getline localvar; # localvar => "foobar" +    close(cmd);      # Reads a line from a file and stores in localvar -    getline localvar <"/tmp/foobar.txt" -    close("/tmp/foobar.txt") +    infile = "/tmp/foobar.txt"; +    getline localvar < infile;  +    close(infile);  }  # As I said at the beginning, AWK programs consist of a collection of patterns -# and actions. You've already seen the all-important BEGIN pattern. Other +# and actions. You've already seen the BEGIN pattern. Other  # patterns are used only if you're processing lines from files or standard  # input.  # @@ -257,7 +280,7 @@ function io_functions(    localvar) {      # expression, /^fo+bar$/, and will be skipped for any line that fails to      # match it. Let's just print the line: -    print +    print;      # Whoa, no argument! That's because print has a default argument: $0.      # $0 is the name of the current line being processed. It is created @@ -268,16 +291,16 @@ function io_functions(    localvar) {      # does. And, like the shell, each field can be access with a dollar sign      # This will print the second and fourth fields in the line -    print $2, $4 +    print $2, $4;      # AWK automatically defines many other variables to help you inspect and      # process each line. The most important one is NF      # Prints the number of fields on this line -    print NF +    print NF;      # Print the last field on this line -    print $NF +    print $NF;  }  # Every pattern is actually a true/false test. The regular expression in the @@ -286,7 +309,7 @@ function io_functions(    localvar) {  # currently processing. Thus, the complete version of it is this:  $0 ~ /^fo+bar$/ { -    print "Equivalent to the last pattern" +    print "Equivalent to the last pattern";  }  a > 0 { @@ -315,10 +338,10 @@ a > 0 {  BEGIN {      # First, ask the user for the name -    print "What name would you like the average age for?" +    print "What name would you like the average age for?";      # Get a line from standard input, not from files on the command line -    getline name <"/dev/stdin" +    getline name < "/dev/stdin";  }  # Now, match every line whose first field is the given name @@ -335,8 +358,8 @@ $1 == name {      # ...etc. There are plenty more, documented in the man page.      # Keep track of a running total and how many lines matched -    sum += $3 -    nlines++ +    sum += $3; +    nlines++;  }  # Another special pattern is called END. It will run after processing all the @@ -348,7 +371,7 @@ $1 == name {  END {      if (nlines) -        print "The average age for " name " is " sum / nlines +        print "The average age for " name " is " sum / nlines;  }  ``` @@ -357,3 +380,4 @@ Further Reading:  * [Awk tutorial](http://www.grymoire.com/Unix/Awk.html)  * [Awk man page](https://linux.die.net/man/1/awk)  * [The GNU Awk User's Guide](https://www.gnu.org/software/gawk/manual/gawk.html) GNU Awk is found on most Linux systems. +* [AWK one-liner collection](http://tuxgraphics.org/~guido/scripts/awk-one-liner.html) | 
