From c97f310a5d81f49a202e71d12d506f3faa49b76c Mon Sep 17 00:00:00 2001 From: marshallmason Date: Tue, 9 May 2017 07:18:46 -0700 Subject: [awk/en] Added tutorial for AWK (#2716) --- awk.html.markdown | 359 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 359 insertions(+) create mode 100644 awk.html.markdown (limited to 'awk.html.markdown') diff --git a/awk.html.markdown b/awk.html.markdown new file mode 100644 index 00000000..90f88b1a --- /dev/null +++ b/awk.html.markdown @@ -0,0 +1,359 @@ +--- +language: awk +filename: learnawk.awk +contributors: + - ["Marshall Mason", "http://github.com/marshallmason"] +lang: en +--- + +AWK is a standard tool on every POSIX-compliant UNIX system. It's like a +stripped-down Perl, perfect for text-processing tasks and other scripting +needs. It has a C-like syntax, but without semicolons, manual memory +management, or static typing. It excels at text processing. You can call to it +from a shell script, or you can use it as a stand-alone scripting language. + +Why use AWK instead of Perl? Mostly because AWK is part of UNIX. You can always +count on it, whereas Perl's future is in question. AWK is also easier to read +than Perl. For simple text-processing scripts, particularly ones that read +files line by line and split on delimiters, AWK is probably the right tool for +the job. + +```awk +#!/usr/bin/awk -f + +# Comments are like this + +# AWK programs consist of a collection of patterns and actions. The most +# important pattern is called BEGIN. Actions go into brace blocks. +BEGIN { + + # BEGIN will run at the beginning of the program. It's where you put all + # the preliminary set-up code, before you process any text files. If you + # have no text files, then think of BEGIN as the main entry point. + + # Variables are global. Just set them or use them, no need to declare.. + count = 0 + + # Operators just like in C and friends + a = count + 1 + b = count - 1 + c = count * 1 + d = count / 1 + e = count % 1 # modulus + f = count ^ 1 # exponentiation + + a += 1 + b -= 1 + c *= 1 + d /= 1 + e %= 1 + f ^= 1 + + # Incrementing and decrementing by one + a++ + b-- + + # As a prefix operator, it returns the incremented value + ++a + --b + + # Notice, also, no punctuation such as semicolons to terminate statements + + # Control statements + if (count == 0) + print "Starting with count of 0" + else + print "Huh?" + + # Or you could use the ternary operator + print (count == 0) ? "Starting with count of 0" : "Huh?" + + # Blocks consisting of multiple lines use braces + while (a < 10) { + print "String concatenation is done" " with a series" " of" + " space-separated strings" + print a + + a++ + } + + for (i = 0; i < 10; i++) + print "Good ol' for loop" + + # As for comparisons, they're the standards: + a < b # Less than + a <= b # Less than or equal + a != b # Not equal + a == b # Equal + a > b # Greater than + a >= b # Greater than or equal + + # Logical operators as well + a && b # AND + a || b # OR + + # In addition, there's the super useful regular expression match + if ("foo" ~ "^fo+$") + print "Fooey!" + if ("boo" !~ "^fo+$") + print "Boo!" + + # Arrays + arr[0] = "foo" + arr[1] = "bar" + # Unfortunately, there is no other way to initialize an array. Ya just + # gotta chug through every value line by line like that. + + # You also have associative arrays + assoc["foo"] = "bar" + assoc["bar"] = "baz" + + # And multi-dimensional arrays, with some limitations I won't mention here + multidim[0,0] = "foo" + multidim[0,1] = "bar" + multidim[1,0] = "baz" + multidim[1,1] = "boo" + + # You can test for array membership + if ("foo" in assoc) + print "Fooey!" + + # You can also use the 'in' operator to traverse the keys of an array + for (key in assoc) + print assoc[key] + + # The command line is in a special array called ARGV + for (argnum in ARGV) + print ARGV[argnum] + + # You can remove elements of an array + # This is particularly useful to prevent AWK from assuming the arguments + # are files for it to process + delete ARGV[1] + + # The number of command line arguments is in a variable called ARGC + print ARGC + + # AWK has several built-in functions. They fall into three categories. I'll + # demonstrate each of them in their own functions, defined later. + + return_value = arithmetic_functions(a, b, c) + string_functions() + io_functions() +} + +# Here's how you define a function +function arithmetic_functions(a, b, c, localvar) { + + # Probably the most annoying part of AWK is that there are no local + # variables. Everything is global. For short scripts, this is fine, even + # useful, but for longer scripts, this can be a problem. + + # There is a work-around (ahem, hack). Function arguments are local to the + # function, and AWK allows you to define more function arguments than it + # needs. So just stick local variable in the function declaration, like I + # did above. As a convention, stick in some extra whitespace to distinguish + # between actual function parameters and local variables. In this example, + # a, b, and c are actual parameters, while d is merely a local variable. + + # Now, to demonstrate the arithmetic functions + + # Most AWK implementations have some standard trig functions + localvar = sin(a) + localvar = cos(a) + localvar = atan2(a, b) # arc tangent of b / a + + # And logarithmic stuff + localvar = exp(a) + localvar = log(a) + + # Square root + localvar = sqrt(a) + + # Truncate floating point to integer + localvar = int(5.34) # localvar => 5 + + # Random numbers + srand() # Supply a seed as an argument. By default, it uses the time of day + localvar = rand() # Random number between 0 and 1. + + # Here's how to return a value + return localvar +} + +function string_functions( localvar, arr) { + + # AWK, being a string-processing language, has several string-related + # functions, many of which rely heavily on regular expressions. + + # Search and replace, first instance (sub) or all instances (gsub) + # Both return number of matches replaced + localvar = "fooooobar" + sub("fo+", "Meet me at the ", localvar) # localvar => "Meet me at the bar" + gsub("e+", ".", localvar) # localvar => "m..t m. at th. bar" + + # Search for a string that matches a regular expression + # index() does the same thing, but doesn't allow a regular expression + match(localvar, "t") # => 4, since the 't' is the fourth character + + # Split on a delimiter + split("foo-bar-baz", arr, "-") # a => ["foo", "bar", "baz"] + + # Other useful stuff + sprintf("%s %d %d %d", "Testing", 1, 2, 3) # => "Testing 1 2 3" + substr("foobar", 2, 3) # => "oob" + substr("foobar", 4) # => "bar" + length("foo") # => 3 + tolower("FOO") # => "foo" + toupper("foo") # => "FOO" +} + +function io_functions( localvar) { + + # You've already seen print + print "Hello world" + + # There's also printf + printf("%s %d %d %d\n", "Testing", 1, 2, 3) + + # AWK doesn't have file handles, per se. It will automatically open a file + # handle for you when you use something that needs one. The string you used + # for this can be treated as a file handle, for purposes of I/O. This makes + # it feel sort of like shell scripting: + + print "foobar" >"/tmp/foobar.txt" + + # Now the string "/tmp/foobar.txt" is a file handle. You can close it: + close("/tmp/foobar.txt") + + # Here's how you run something in the shell + system("echo foobar") # => prints foobar + + # Reads a line from standard input and stores in localvar + getline localvar + + # Reads a line from a pipe + "echo foobar" | getline localvar # localvar => "foobar" + close("echo foobar") + + # Reads a line from a file and stores in localvar + getline localvar <"/tmp/foobar.txt" + close("/tmp/foobar.txt") +} + +# As I said at the beginning, AWK programs consist of a collection of patterns +# and actions. You've already seen the all-important BEGIN pattern. Other +# patterns are used only if you're processing lines from files or standard +# input. +# +# When you pass arguments to AWK, they are treated as file names to process. +# It will process them all, in order. Think of it like an implicit for loop, +# iterating over the lines in these files. these patterns and actions are like +# switch statements inside the loop. + +/^fo+bar$/ { + + # This action will execute for every line that matches the regular + # expression, /^fo+bar$/, and will be skipped for any line that fails to + # match it. Let's just print the line: + + print + + # Whoa, no argument! That's because print has a default argument: $0. + # $0 is the name of the current line being processed. It is created + # automatically for you. + + # You can probably guess there are other $ variables. Every line is + # implicitely split before every action is called, much like the shell + # does. And, like the shell, each field can be access with a dollar sign + + # This will print the second and fourth fields in the line + print $2, $4 + + # AWK automatically defines many other variables to help you inspect and + # process each line. The most important one is NF + + # Prints the number of fields on this line + print NF + + # Print the last field on this line + print $NF +} + +# Every pattern is actually a true/false test. The regular expression in the +# last pattern is also a true/false test, but part of it was hidden. If you +# don't give it a string to test, it will assume $0, the line that it's +# currently processing. Thus, the complete version of it is this: + +$0 ~ /^fo+bar$/ { + print "Equivalent to the last pattern" +} + +a > 0 { + # This will execute once for each line, as long as a is positive +} + +# You get the idea. Processing text files, reading in a line at a time, and +# doing something with it, particularly splitting on a delimiter, is so common +# in UNIX that AWK is a scripting language that does all of it for you, without +# you needing to ask. All you have to do is write the patterns and actions +# based on what you expect of the input, and what you want to do with it. + +# Here's a quick example of a simple script, the sort of thing AWK is perfect +# for. It will read a name from standard input and then will print the average +# age of everyone with that first name. Let's say you supply as an argument the +# name of a this data file: +# +# Bob Jones 32 +# Jane Doe 22 +# Steve Stevens 83 +# Bob Smith 29 +# Bob Barker 72 +# +# Here's the script: + +BEGIN { + + # First, ask the user for the name + print "What name would you like the average age for?" + + # Get a line from standard input, not from files on the command line + getline name <"/dev/stdin" +} + +# Now, match every line whose first field is the given name +$1 == name { + + # Inside here, we have access to a number of useful variables, already + # pre-loaded for us: + # $0 is the entire line + # $3 is the third field, the age, which is what we're interested in here + # NF is the number of fields, which should be 3 + # NR is the number of records (lines) seen so far + # FILENAME is the name of the file being processed + # FS is the field separator being used, which is " " here + # ...etc. There are plenty more, documented in the man page. + + # Keep track of a running total and how many lines matched + sum += $3 + nlines++ +} + +# Another special pattern is called END. It will run after processing all the +# text files. Unlike BEGIN, it will only run if you've given it input to +# process. It will run after all the files have been read and processed +# according to the rules and actions you've provided. The purpose of it is +# usually to output some kind of final report, or do something with the +# aggregate of the data you've accumulated over the course of the script. + +END { + if (nlines) + print "The average age for " name " is " sum / nlines +} + +``` +Further Reading: + +* [Awk tutorial](http://www.grymoire.com/Unix/Awk.html) +* [Awk man page](https://linux.die.net/man/1/awk) +* [The GNU Awk User's Guide](https://www.gnu.org/software/gawk/manual/gawk.html) GNU Awk is found on most Linux systems. -- cgit v1.2.3 From 985d23a52b76593a120adff5381c2df3a80fe298 Mon Sep 17 00:00:00 2001 From: HairyFotr Date: Wed, 23 Aug 2017 10:14:39 +0200 Subject: Fix a bunch of typos --- awk.html.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'awk.html.markdown') diff --git a/awk.html.markdown b/awk.html.markdown index 90f88b1a..8a9b4fd7 100644 --- a/awk.html.markdown +++ b/awk.html.markdown @@ -264,7 +264,7 @@ function io_functions( localvar) { # automatically for you. # You can probably guess there are other $ variables. Every line is - # implicitely split before every action is called, much like the shell + # implicitly split before every action is called, much like the shell # does. And, like the shell, each field can be access with a dollar sign # This will print the second and fourth fields in the line -- cgit v1.2.3 From 12eafb6c49437c19ecf3b8981a37457a9df2b04d Mon Sep 17 00:00:00 2001 From: Pratik Karki Date: Fri, 25 Aug 2017 14:04:05 +0545 Subject: fix language code suffix(#2832) --- awk.html.markdown | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'awk.html.markdown') diff --git a/awk.html.markdown b/awk.html.markdown index 8a9b4fd7..0e27528d 100644 --- a/awk.html.markdown +++ b/awk.html.markdown @@ -2,8 +2,8 @@ language: awk filename: learnawk.awk contributors: - - ["Marshall Mason", "http://github.com/marshallmason"] -lang: en + - ["Marshall Mason", "http://github.com/marshallmason"] + --- AWK is a standard tool on every POSIX-compliant UNIX system. It's like a -- cgit v1.2.3 From 894ddf5ec33eb2fdaf739850114f77fd9a0679dc Mon Sep 17 00:00:00 2001 From: Pratik Karki Date: Sat, 14 Oct 2017 18:46:05 +0545 Subject: fix for #2859 --- awk.html.markdown | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'awk.html.markdown') diff --git a/awk.html.markdown b/awk.html.markdown index 0e27528d..e3ea6318 100644 --- a/awk.html.markdown +++ b/awk.html.markdown @@ -38,7 +38,7 @@ BEGIN { a = count + 1 b = count - 1 c = count * 1 - d = count / 1 + d = count / 1 # integer division e = count % 1 # modulus f = count ^ 1 # exponentiation @@ -143,7 +143,7 @@ BEGIN { } # Here's how you define a function -function arithmetic_functions(a, b, c, localvar) { +function arithmetic_functions(a, b, c, d) { # Probably the most annoying part of AWK is that there are no local # variables. Everything is global. For short scripts, this is fine, even -- cgit v1.2.3 From d375092374d0a71f95e3a418c08236a68426dad0 Mon Sep 17 00:00:00 2001 From: Aleksandr Mazurik Date: Sat, 28 Jul 2018 21:48:25 +0300 Subject: [awk/en] Fix atan2 args order --- awk.html.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'awk.html.markdown') diff --git a/awk.html.markdown b/awk.html.markdown index e3ea6318..de26c0a1 100644 --- a/awk.html.markdown +++ b/awk.html.markdown @@ -161,7 +161,7 @@ function arithmetic_functions(a, b, c, d) { # Most AWK implementations have some standard trig functions localvar = sin(a) localvar = cos(a) - localvar = atan2(a, b) # arc tangent of b / a + localvar = atan2(b, a) # arc tangent of b / a # And logarithmic stuff localvar = exp(a) -- cgit v1.2.3 From c8284c9c959abf9662d03bc152af6c8a49a12e58 Mon Sep 17 00:00:00 2001 From: dtkerns Date: Tue, 11 Sep 2018 13:52:30 -0700 Subject: Update awk.html.markdown --- awk.html.markdown | 244 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 134 insertions(+), 110 deletions(-) (limited to 'awk.html.markdown') diff --git a/awk.html.markdown b/awk.html.markdown index de26c0a1..3d2c4ccb 100644 --- a/awk.html.markdown +++ b/awk.html.markdown @@ -6,14 +6,15 @@ contributors: --- -AWK is a standard tool on every POSIX-compliant UNIX system. It's like a -stripped-down Perl, perfect for text-processing tasks and other scripting -needs. It has a C-like syntax, but without semicolons, manual memory -management, or static typing. It excels at text processing. You can call to it -from a shell script, or you can use it as a stand-alone scripting language. - -Why use AWK instead of Perl? Mostly because AWK is part of UNIX. You can always -count on it, whereas Perl's future is in question. AWK is also easier to read +AWK is a standard tool on every POSIX-compliant UNIX system. It's like +flex/lex, from the command-line, perfect for text-processing tasks and +other scripting needs. It has a C-like syntax, but without mandatory +semicolons (although, you should use them anyway, because they are required +when you're writing one-liners, something AWK excells at), manual memory +management, or static typing. It excels at text processing. You can call to +it from a shell script, or you can use it as a stand-alone scripting language. + +Why use AWK instead of Perl? Readability. AWK is easier to read than Perl. For simple text-processing scripts, particularly ones that read files line by line and split on delimiters, AWK is probably the right tool for the job. @@ -23,8 +24,23 @@ the job. # Comments are like this -# AWK programs consist of a collection of patterns and actions. The most -# important pattern is called BEGIN. Actions go into brace blocks. + +# AWK programs consist of a collection of patterns and actions. +pattern1 { action; } # just like lex +pattern2 { action; } + +# There is an implied loop and AWK automatically reads and parses each +# record of each file supplied. Each record is split by the FS delimiter, +# which defaults to white-space (multiple spaces,tabs count as one) +# You cann assign FS either on the command line (-F C) or in your BEGIN +# pattern + +# One of the special patterns is BEGIN. The BEGIN pattern is true +# BEFORE any of the files are read. The END pattern is true after +# an End-of-file from the last file (or standard-in if no files specified) +# There is also an output field separator (OFS) that you can assign, which +# defaults to a single space + BEGIN { # BEGIN will run at the beginning of the program. It's where you put all @@ -32,114 +48,116 @@ BEGIN { # have no text files, then think of BEGIN as the main entry point. # Variables are global. Just set them or use them, no need to declare.. - count = 0 + count = 0; # Operators just like in C and friends - a = count + 1 - b = count - 1 - c = count * 1 - d = count / 1 # integer division - e = count % 1 # modulus - f = count ^ 1 # exponentiation - - a += 1 - b -= 1 - c *= 1 - d /= 1 - e %= 1 - f ^= 1 + a = count + 1; + b = count - 1; + c = count * 1; + d = count / 1; # integer division + e = count % 1; # modulus + f = count ^ 1; # exponentiation + + a += 1; + b -= 1; + c *= 1; + d /= 1; + e %= 1; + f ^= 1; # Incrementing and decrementing by one - a++ - b-- + a++; + b--; # As a prefix operator, it returns the incremented value - ++a - --b + ++a; + --b; # Notice, also, no punctuation such as semicolons to terminate statements # Control statements if (count == 0) - print "Starting with count of 0" + print "Starting with count of 0"; else - print "Huh?" + print "Huh?"; # Or you could use the ternary operator - print (count == 0) ? "Starting with count of 0" : "Huh?" + print (count == 0) ? "Starting with count of 0" : "Huh?"; # Blocks consisting of multiple lines use braces while (a < 10) { print "String concatenation is done" " with a series" " of" - " space-separated strings" - print a + " space-separated strings"; + print a; - a++ + a++; } for (i = 0; i < 10; i++) - print "Good ol' for loop" + print "Good ol' for loop"; # As for comparisons, they're the standards: - a < b # Less than - a <= b # Less than or equal - a != b # Not equal - a == b # Equal - a > b # Greater than - a >= b # Greater than or equal + # a < b # Less than + # a <= b # Less than or equal + # a != b # Not equal + # a == b # Equal + # a > b # Greater than + # a >= b # Greater than or equal # Logical operators as well - a && b # AND - a || b # OR + # a && b # AND + # a || b # OR # In addition, there's the super useful regular expression match if ("foo" ~ "^fo+$") - print "Fooey!" + print "Fooey!"; if ("boo" !~ "^fo+$") - print "Boo!" + print "Boo!"; # Arrays - arr[0] = "foo" - arr[1] = "bar" - # Unfortunately, there is no other way to initialize an array. Ya just - # gotta chug through every value line by line like that. - - # You also have associative arrays - assoc["foo"] = "bar" - assoc["bar"] = "baz" + arr[0] = "foo"; + arr[1] = "bar"; + + # You can also initialize an array with the built-in function split() + + n = split("foo:bar:baz", arr, ":"); + + # You also have associative arrays (actually, they're all associative arrays) + assoc["foo"] = "bar"; + assoc["bar"] = "baz"; # And multi-dimensional arrays, with some limitations I won't mention here - multidim[0,0] = "foo" - multidim[0,1] = "bar" - multidim[1,0] = "baz" - multidim[1,1] = "boo" + multidim[0,0] = "foo"; + multidim[0,1] = "bar"; + multidim[1,0] = "baz"; + multidim[1,1] = "boo"; # You can test for array membership if ("foo" in assoc) - print "Fooey!" + print "Fooey!"; # You can also use the 'in' operator to traverse the keys of an array for (key in assoc) - print assoc[key] + print assoc[key]; # The command line is in a special array called ARGV for (argnum in ARGV) - print ARGV[argnum] + print ARGV[argnum]; # You can remove elements of an array # This is particularly useful to prevent AWK from assuming the arguments # are files for it to process - delete ARGV[1] + delete ARGV[1]; # The number of command line arguments is in a variable called ARGC - print ARGC + print ARGC; # AWK has several built-in functions. They fall into three categories. I'll # demonstrate each of them in their own functions, defined later. - return_value = arithmetic_functions(a, b, c) - string_functions() - io_functions() + return_value = arithmetic_functions(a, b, c); + string_functions(); + io_functions(); } # Here's how you define a function @@ -159,26 +177,26 @@ function arithmetic_functions(a, b, c, d) { # Now, to demonstrate the arithmetic functions # Most AWK implementations have some standard trig functions - localvar = sin(a) - localvar = cos(a) - localvar = atan2(b, a) # arc tangent of b / a + localvar = sin(a); + localvar = cos(a); + localvar = atan2(b, a); # arc tangent of b / a # And logarithmic stuff - localvar = exp(a) - localvar = log(a) + localvar = exp(a); + localvar = log(a); # Square root - localvar = sqrt(a) + localvar = sqrt(a); # Truncate floating point to integer - localvar = int(5.34) # localvar => 5 + localvar = int(5.34); # localvar => 5 # Random numbers - srand() # Supply a seed as an argument. By default, it uses the time of day - localvar = rand() # Random number between 0 and 1. + srand(); # Supply a seed as an argument. By default, it uses the time of day + localvar = rand(); # Random number between 0 and 1. # Here's how to return a value - return localvar + return localvar; } function string_functions( localvar, arr) { @@ -188,61 +206,66 @@ function string_functions( localvar, arr) { # Search and replace, first instance (sub) or all instances (gsub) # Both return number of matches replaced - localvar = "fooooobar" - sub("fo+", "Meet me at the ", localvar) # localvar => "Meet me at the bar" - gsub("e+", ".", localvar) # localvar => "m..t m. at th. bar" + localvar = "fooooobar"; + sub("fo+", "Meet me at the ", localvar); # localvar => "Meet me at the bar" + gsub("e+", ".", localvar); # localvar => "m..t m. at th. bar" # Search for a string that matches a regular expression # index() does the same thing, but doesn't allow a regular expression - match(localvar, "t") # => 4, since the 't' is the fourth character + match(localvar, "t"); # => 4, since the 't' is the fourth character # Split on a delimiter - split("foo-bar-baz", arr, "-") # a => ["foo", "bar", "baz"] + n = split("foo-bar-baz", arr, "-"); # a[1] = "foo"; a[2] = "bar"; a[3] = "baz"; n = 3 # Other useful stuff - sprintf("%s %d %d %d", "Testing", 1, 2, 3) # => "Testing 1 2 3" - substr("foobar", 2, 3) # => "oob" - substr("foobar", 4) # => "bar" - length("foo") # => 3 - tolower("FOO") # => "foo" - toupper("foo") # => "FOO" + sprintf("%s %d %d %d", "Testing", 1, 2, 3); # => "Testing 1 2 3" + substr("foobar", 2, 3); # => "oob" + substr("foobar", 4); # => "bar" + length("foo"); # => 3 + tolower("FOO"); # => "foo" + toupper("foo"); # => "FOO" } function io_functions( localvar) { # You've already seen print - print "Hello world" + print "Hello world"; # There's also printf - printf("%s %d %d %d\n", "Testing", 1, 2, 3) + printf("%s %d %d %d\n", "Testing", 1, 2, 3); # AWK doesn't have file handles, per se. It will automatically open a file # handle for you when you use something that needs one. The string you used # for this can be treated as a file handle, for purposes of I/O. This makes - # it feel sort of like shell scripting: + # it feel sort of like shell scripting, but to get the same output, the string + # must match exactly, so use a vaiable: + + outfile = "/tmp/foobar.txt"; - print "foobar" >"/tmp/foobar.txt" + print "foobar" > outfile; - # Now the string "/tmp/foobar.txt" is a file handle. You can close it: - close("/tmp/foobar.txt") + # Now the string outfile is a file handle. You can close it: + close(outfile); # Here's how you run something in the shell - system("echo foobar") # => prints foobar + system("echo foobar"); # => prints foobar # Reads a line from standard input and stores in localvar - getline localvar + getline localvar; - # Reads a line from a pipe - "echo foobar" | getline localvar # localvar => "foobar" - close("echo foobar") + # Reads a line from a pipe (again, use a string so you close it properly) + cmd = "echo foobar"; + cmd | getline localvar; # localvar => "foobar" + close(cmd); # Reads a line from a file and stores in localvar - getline localvar <"/tmp/foobar.txt" - close("/tmp/foobar.txt") + infile = "/tmp/foobar.txt"; + getline localvar < infile; + close(infile); } # As I said at the beginning, AWK programs consist of a collection of patterns -# and actions. You've already seen the all-important BEGIN pattern. Other +# and actions. You've already seen the BEGIN pattern. Other # patterns are used only if you're processing lines from files or standard # input. # @@ -257,7 +280,7 @@ function io_functions( localvar) { # expression, /^fo+bar$/, and will be skipped for any line that fails to # match it. Let's just print the line: - print + print; # Whoa, no argument! That's because print has a default argument: $0. # $0 is the name of the current line being processed. It is created @@ -268,16 +291,16 @@ function io_functions( localvar) { # does. And, like the shell, each field can be access with a dollar sign # This will print the second and fourth fields in the line - print $2, $4 + print $2, $4; # AWK automatically defines many other variables to help you inspect and # process each line. The most important one is NF # Prints the number of fields on this line - print NF + print NF; # Print the last field on this line - print $NF + print $NF; } # Every pattern is actually a true/false test. The regular expression in the @@ -286,7 +309,7 @@ function io_functions( localvar) { # currently processing. Thus, the complete version of it is this: $0 ~ /^fo+bar$/ { - print "Equivalent to the last pattern" + print "Equivalent to the last pattern"; } a > 0 { @@ -315,10 +338,10 @@ a > 0 { BEGIN { # First, ask the user for the name - print "What name would you like the average age for?" + print "What name would you like the average age for?"; # Get a line from standard input, not from files on the command line - getline name <"/dev/stdin" + getline name < "/dev/stdin"; } # Now, match every line whose first field is the given name @@ -335,8 +358,8 @@ $1 == name { # ...etc. There are plenty more, documented in the man page. # Keep track of a running total and how many lines matched - sum += $3 - nlines++ + sum += $3; + nlines++; } # Another special pattern is called END. It will run after processing all the @@ -348,7 +371,7 @@ $1 == name { END { if (nlines) - print "The average age for " name " is " sum / nlines + print "The average age for " name " is " sum / nlines; } ``` @@ -357,3 +380,4 @@ Further Reading: * [Awk tutorial](http://www.grymoire.com/Unix/Awk.html) * [Awk man page](https://linux.die.net/man/1/awk) * [The GNU Awk User's Guide](https://www.gnu.org/software/gawk/manual/gawk.html) GNU Awk is found on most Linux systems. +* [AWK one-liner collection](http://tuxgraphics.org/~guido/scripts/awk-one-liner.html) -- cgit v1.2.3 From 38945d8be9f67cc6ed6d5ba213e907a31f01ec58 Mon Sep 17 00:00:00 2001 From: Gaurang Tandon <1gaurangtandon@gmail.com> Date: Mon, 7 Jan 2019 14:26:35 +0530 Subject: [awk/en] fix minor spelling mistakes --- awk.html.markdown | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'awk.html.markdown') diff --git a/awk.html.markdown b/awk.html.markdown index 3d2c4ccb..3b6e34b4 100644 --- a/awk.html.markdown +++ b/awk.html.markdown @@ -10,7 +10,7 @@ AWK is a standard tool on every POSIX-compliant UNIX system. It's like flex/lex, from the command-line, perfect for text-processing tasks and other scripting needs. It has a C-like syntax, but without mandatory semicolons (although, you should use them anyway, because they are required -when you're writing one-liners, something AWK excells at), manual memory +when you're writing one-liners, something AWK excels at), manual memory management, or static typing. It excels at text processing. You can call to it from a shell script, or you can use it as a stand-alone scripting language. @@ -32,7 +32,7 @@ pattern2 { action; } # There is an implied loop and AWK automatically reads and parses each # record of each file supplied. Each record is split by the FS delimiter, # which defaults to white-space (multiple spaces,tabs count as one) -# You cann assign FS either on the command line (-F C) or in your BEGIN +# You can assign FS either on the command line (-F C) or in your BEGIN # pattern # One of the special patterns is BEGIN. The BEGIN pattern is true @@ -238,7 +238,7 @@ function io_functions( localvar) { # handle for you when you use something that needs one. The string you used # for this can be treated as a file handle, for purposes of I/O. This makes # it feel sort of like shell scripting, but to get the same output, the string - # must match exactly, so use a vaiable: + # must match exactly, so use a variable: outfile = "/tmp/foobar.txt"; -- cgit v1.2.3 From ed100434e33ed95e69620fc9ff6167bb130d3862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre-Fran=C3=A7ois=20Clement?= Date: Fri, 8 Mar 2019 17:18:42 +0100 Subject: Move "awk" articles to the "tool" category --- awk.html.markdown | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'awk.html.markdown') diff --git a/awk.html.markdown b/awk.html.markdown index 3b6e34b4..1ef6b8d5 100644 --- a/awk.html.markdown +++ b/awk.html.markdown @@ -1,5 +1,6 @@ --- -language: awk +category: tool +tool: awk filename: learnawk.awk contributors: - ["Marshall Mason", "http://github.com/marshallmason"] -- cgit v1.2.3 From 2d682fdd8f77ca203714565a2991730709748d08 Mon Sep 17 00:00:00 2001 From: dubiouscript <44098276+dubiouscript@users.noreply.github.com> Date: Tue, 23 Apr 2019 11:31:13 +0000 Subject: add awk "gotchas" alpine wiki link --- awk.html.markdown | 1 + 1 file changed, 1 insertion(+) (limited to 'awk.html.markdown') diff --git a/awk.html.markdown b/awk.html.markdown index 1ef6b8d5..84985e2c 100644 --- a/awk.html.markdown +++ b/awk.html.markdown @@ -382,3 +382,4 @@ Further Reading: * [Awk man page](https://linux.die.net/man/1/awk) * [The GNU Awk User's Guide](https://www.gnu.org/software/gawk/manual/gawk.html) GNU Awk is found on most Linux systems. * [AWK one-liner collection](http://tuxgraphics.org/~guido/scripts/awk-one-liner.html) +* [Awk alpinelinux wiki](https://wiki.alpinelinux.org/wiki/Awk) a technical summary and list of "gotchas" (places where different implementations may behave in different or unexpected ways). -- cgit v1.2.3 From 5b51bf193499d301d8c86583ff3ab52c67636f4f Mon Sep 17 00:00:00 2001 From: dubiouscript <44098276+dubiouscript@users.noreply.github.com> Date: Tue, 23 Apr 2019 11:32:50 +0000 Subject: add link to dubiousjim/awkenough Awk utility routines --- awk.html.markdown | 1 + 1 file changed, 1 insertion(+) (limited to 'awk.html.markdown') diff --git a/awk.html.markdown b/awk.html.markdown index 84985e2c..3ff3f937 100644 --- a/awk.html.markdown +++ b/awk.html.markdown @@ -383,3 +383,4 @@ Further Reading: * [The GNU Awk User's Guide](https://www.gnu.org/software/gawk/manual/gawk.html) GNU Awk is found on most Linux systems. * [AWK one-liner collection](http://tuxgraphics.org/~guido/scripts/awk-one-liner.html) * [Awk alpinelinux wiki](https://wiki.alpinelinux.org/wiki/Awk) a technical summary and list of "gotchas" (places where different implementations may behave in different or unexpected ways). +* [basic libraries for awk](https://github.com/dubiousjim/awkenough) -- cgit v1.2.3