Perl is one of the more popular programming languages nowadays. It is a general purpose, interpreted and dynamic programming language, which is also used for scripting purposes.
Here are 20 code snippets in Perl that you can use for your projects.
1. Array Assignments
use strict;
#
# This script takes two file names, and copies the first one to the second.
#
if($#ARGV != 1) {
print STDERR "You must specify exactly two arguments.\n";
exit 4;
}
# If the output exists, confirm. This uses two standard string functions,
# substring and lower-case.
if( -e $ARGV[1]) {
print "Do you really want to overwrite $ARGV[1]? ";
my $resp =; 
chomp $resp;
if(lc(substr($resp, 0, 1)) ne 'y') { exit 0; }
}
# Open the files.
open(INFILE, $ARGV[0]) or die "Cannot open $ARGV[0]: $!.\n";
open(OUTFILE, ">$ARGV[1]") or die "Cannot write $ARGV[1]: $!.\n";
while(my $l =) { 
print OUTFILE $l;
}
close INFILE;
close OUTFILE;
2. Functions And Arguments
#
# Perl functions don't have parameters, their arguments are passed
# in an array @_. You can simulate parameters by assigning to a
# list, but you can just apply the usual array operations to @_.
#
use strict;
sub parg {
my($a, $b, $c) = @_;
print "A: $a $b $c\n";
print "B: $#_ [@_]\n\n";
}
parg("Hi", "there", "fred");
my @a1 = ("Today", "is", "the", "day");
parg(@a1);
parg("Me", @a1, "too");
my $joe = "sticks";
&parg ("Pooh $joe");
parg;
my @a2 = ("Never", "Mind");
parg @a2, "Boris", @a1, $joe;
3. Using Arguments I
#
# Some fairly random examples of
# subroutines, using different features.
#
use strict;
#
# redund(n, str) returns the string str
# concatinated to itself n times.
# This uses a standard technique for
# functions of a fixed number of arguments.
# (The perl x operator will do this.)
#
sub redun
{
# This is like a parameter list.
my($rptct, $str) = @_;
if($rptct < 1) { return ""; }
my($result) = "";
while($rptct--) {
$result .= $str;
}
return $result;
}
#
# glueem(a, b, c, d, ...) takes any number
# of arguments and returns the concatination
# of them all. This uses the array of arguments
# as an array to produce a variadic function.
#
sub glueem
{
my($result) = "";
my($arg);
foreach $arg (@_) {
$result = "$result$arg";
}
return $result;
}
#
# readall(fn) attempts to read the named
# file and return its contents as a single
# string. If it is successful, it returns
# the list (1, contents). If not, it
# returns (0, errmsg). Returning a list is really
# a way of having more than one return value.
#
sub readall
{
my($fn) = @_;
open(IN, $fn) or return (0, $!);
my($line);
my($result) = "";
while($line =) { 
$result .= $line;
}
return (1, $result);
}
#
# rev(list) reverses the contents of a list
# and returns the new list. There is a builtin
# function reverse which already does this.
#
sub rev
{
my(@arr) = @_;
my($low, $hi) = (0, $#_);
while($low < $hi) {
my($temp) = $arr[$low];
$arr[$low] = $arr[$hi];
$arr[$hi] = $temp;
++$low;
--$hi;
}
return @arr;
}
# Some repeated runduancy.
my $argle = redun(5, "smith ");
print "$argle\n";
$argle = redun(2, "verily ");
print "$argle\n\n";
# All to one. (Parens are optional in calls.)
$argle = glueem "Lets ", "see ", "if ", "this ", "works.";
print "$argle\n";
$argle = glueem ("Seems", " to.");
print "$argle\n\n";
# Can you read me?
my $fn = "f1.txt";
my ($ok, $text) = readall($fn);
if($ok) {
print "File $fn is:\n$text\n";
} else {
print "File $fn open failed: $text.\n";
}
$fn = "bogus.txt";
($ok, $text) = readall($fn);
if($ok) {
print "File $fn is:\n$text\n\n";
} else {
print "File $fn open failed: $text.\n";
}
# Suffering some reversals?
my @mike = ("How", "are", "you?");
my @sam = rev @mike;
print "@sam\n";
my $bill = glueem rev("this?", "see ", "you ", "Do ");
print "$bill\n";
# And, it makes a handy swap function, too.
$argle = 10;
my $bargle = 72;
($argle, $bargle) = rev($argle, $bargle);
print "$argle $bargle\n";
4. Using Arguments II
#
# Some more random examples of
# subroutines, using different features.
#
use strict;
#
# Some folks like to use shift to get the parmeters.
sub sent
{
my $subj = shift @_;
my $verb = shift @_;
my $adj = shift @_;
my $obj = shift @_;
print uc(substr($subj,0,1)), substr($subj,1), " $verb the $adj $obj.\n";
}
#
# The @_ array is special, because changing the parameters in it changes
# the arguments.
my $snakebreath; # We can refer to this in changeme.
sub changeme
{
my $first = shift @_;
$first = 'this'; # Does not change in caller.
$_[0] = 'that'; # Does change in caller.
$snakebreath = 77; # Plain global ref.
}
sent('alex', 'stole', 'red', 'wagon');
sent('susan', 'ignored', 'awful', 'pun');
$snakebreath = 99;
my ($x, $y) = ('today', 'tomorrow');
print "\n$snakebreath $x $y\n";
changeme($x, $y);
print "$snakebreath $x $y\n";
5. Using Arguments III
#
# One more approach is to use the array/hash conversion rules to
# build keyword parameters, with defaults.
#
use strict;
# Print a string one or more times under all sorts of controls.
sub barko {
# Check for correct pairing.
@_ % 2 == 0 or
die "barko: Odd number of arguments.\n";
# Store the parms, with defaults.
my %parms = ( 'string' => 'snake', # String to print
'between' => '', # Place between chars.
'repeat' => 1, # Repeat this many times.
'cascade' => 0, # Move each line right this much more.
'blankafter' => 1, # Extra blank line afterwards.
@_);
# Now %parms is a list of keyword => value pairs as sent, using
# defaults for keys not sent.
# Add the between to the string.
my $str = substr($parms{'string'}, 1);
$str =~ s/(.)/$parms{'between'}$1/g;
$str = substr($parms{'string'}, 0, 1) . $str;
# Printin' time!
my $preamt = 0;
for(my $n = $parms{'repeat'}; $n--; ) {
print ((' ' x $preamt), "$str\n");
$preamt += $parms{'cascade'};
}
print "\n" if $parms{'blankafter'};
}
# Call with various options. These can be sent in any order.
barko;
barko(repeat => 3, string => 'BOZON', cascade => 1);
barko(between => ' ');
barko(between => '<->', repeat => 5);->
barko(string => '** done **', blankafter => 0);
6. Count Blank Lines I
#
# This reads from the standard input and counts the number
# lines which are blank, and lines which are entirely perl-style
# comments (begin with # as the first non-blank character.) It reports
# these figures.
#
use strict;
# Counters to return.
my $nblank = 0;
my $ncomm = 0;
# Read each line into the variable $line.
while(my $line =) { 
if($line =~ /^\s*$/) { ++$nblank; }
if($line =~ /^\s*\#/) { ++$ncomm; }
}
7. Count Blank Lines II
#
# This reads each file on the command line, and for each counts the
# lines which are blank, and lines which are entirely perl-style
# comments (begin with # as the first non-blank character.) It reports
# the figures for each one, and the totals.
#
use strict;
#
# Take the file name and return either (numblank, comment) or (-1, err) if
# the file could not be opened.
sub count {
my ($fn) = @_;
# Open the file; return any possible failure.
open(IN, $fn) or return (-1, $!);
# Counters to return.
my $nblank = 0;
my $ncomm = 0;
# Read each line into the variable $line.
while(my $line =) { 
if($line =~ /^\s*$/) { ++$nblank; }
if($line =~ /^\s*\#/) { ++$ncomm; }
}
close IN;
return ($nblank, $ncomm);
}
# Keep this for later.
my $nfiles = @ARGV;
# Keep totals
my $totbl = 0;
my $totcom = 0;
# Run through the command line args and process each one.
while(my $fn = shift @ARGV) {
# Print the filename with padding to 20 positions. Printf works
# pretty much as in C, but there's less use for it in perl. I'm using
# it here to get the blank padding inserted by %20s.
printf("%-20s", "$fn:");
# Process the file, print the results.
my ($blcount, $commcount) = count($fn);
if($blcount < 0) {
print "Not readable.\n";
} else {
print "$blcount blank lines, $commcount comments.\n";
$totbl += $blcount;
$totcom += $commcount;
}
}
# Print toals, if there was more than one file. The x operator makes
# repeated copies of a string, so " " x 11 prints eleven spaces.
if($nfiles > 1) {
print "* TOTAL *", " " x 11, "$totbl blank lines, $totcom comments.\n";
}
8. Check C Programs
#
# This script scans a list of C programs and
# tries to find if statements which do assignments.
# Specifically, it looks for any line which contains
# text of the form if(....), where if is the first
# non-blank thing on the line, and the (....) contains
# an = which is not part of ==, !=, <=, or >=.
# Each such line is listed with the file name and the
# line number. This is not a perfect check, but it
# will do a decent job.
#
#
# This function analyzes one line to see if it it
# is suspicious. If so, it prints it. It also updates
# the global flag $badfound, and prints an extra message
# the first time a bad line is found. It should be
# called as
# check_line (filename, line)
# where the filename is used only when a message is
# generated. The $. line number is used also.
#
use strict;
my $badfound = 0;
sub check_line {
my($fn, $line) = @_;
# Check for that =.
if($line =~ /^\s*if\s*\(.*[^!<>=]=([^=].*\)|\))/) {
if(!$badfound) {
print("The following suspicious lines were found:\n");
$badfound = 1;
}
print "$fn:$.: $line\n";
}
}
#
# This function opens and reads one file, and calls
# check_line to analyze each line. Call it with the
# file name.
#
sub check_file {
my($fn) = @_;
if(!open(IN, $fn)) {
print "Cannot read $fn.\n";
return;
}
my($line);
while($line =) 
{
chomp $line;
check_line($fn,$line);
}
close IN;
}
#
# Go through the argument list and check each file
#
while(my $fn = shift @ARGV) {
check_file($fn);
}
if(!$badfound) { print "No suspicious lines were found.\n"; }
9. Applet Viewer
#! /usr/bin/perl
use strict;
my $width = 400;
my $height = 200;
$#ARGV > -1 or die "Usage: $0 classname [ width [ height ] ]\n";
if($#ARGV >= 1) { $width = $ARGV[1]; }
if($#ARGV >= 2) { $height = $ARGV[2]; }
my $clname = $ARGV[0];
if($clname !~ /\.class$/) { $clname = "$clname.class"; }
my $loc = $clname;
open(TMPHTML, ">.tmp.html") or die "Can't get temp file: $!";
print TMPHTML "\n$clname \n";
print TMPHTML "\n
print TMPHTML
"
print TMPHTML "
\n";Here are 20 code snippets in Perl that you can use for your projects.
1. Array Assignments
use strict;
#
# This script takes two file names, and copies the first one to the second.
#
if($#ARGV != 1) {
print STDERR "You must specify exactly two arguments.\n";
exit 4;
}
# If the output exists, confirm. This uses two standard string functions,
# substring and lower-case.
if( -e $ARGV[1]) {
print "Do you really want to overwrite $ARGV[1]? ";
my $resp =
chomp $resp;
if(lc(substr($resp, 0, 1)) ne 'y') { exit 0; }
}
# Open the files.
open(INFILE, $ARGV[0]) or die "Cannot open $ARGV[0]: $!.\n";
open(OUTFILE, ">$ARGV[1]") or die "Cannot write $ARGV[1]: $!.\n";
while(my $l =
print OUTFILE $l;
}
close INFILE;
close OUTFILE;
2. Functions And Arguments
#
# Perl functions don't have parameters, their arguments are passed
# in an array @_. You can simulate parameters by assigning to a
# list, but you can just apply the usual array operations to @_.
#
use strict;
sub parg {
my($a, $b, $c) = @_;
print "A: $a $b $c\n";
print "B: $#_ [@_]\n\n";
}
parg("Hi", "there", "fred");
my @a1 = ("Today", "is", "the", "day");
parg(@a1);
parg("Me", @a1, "too");
my $joe = "sticks";
&parg ("Pooh $joe");
parg;
my @a2 = ("Never", "Mind");
parg @a2, "Boris", @a1, $joe;
3. Using Arguments I
#
# Some fairly random examples of
# subroutines, using different features.
#
use strict;
#
# redund(n, str) returns the string str
# concatinated to itself n times.
# This uses a standard technique for
# functions of a fixed number of arguments.
# (The perl x operator will do this.)
#
sub redun
{
# This is like a parameter list.
my($rptct, $str) = @_;
if($rptct < 1) { return ""; }
my($result) = "";
while($rptct--) {
$result .= $str;
}
return $result;
}
#
# glueem(a, b, c, d, ...) takes any number
# of arguments and returns the concatination
# of them all. This uses the array of arguments
# as an array to produce a variadic function.
#
sub glueem
{
my($result) = "";
my($arg);
foreach $arg (@_) {
$result = "$result$arg";
}
return $result;
}
#
# readall(fn) attempts to read the named
# file and return its contents as a single
# string. If it is successful, it returns
# the list (1, contents). If not, it
# returns (0, errmsg). Returning a list is really
# a way of having more than one return value.
#
sub readall
{
my($fn) = @_;
open(IN, $fn) or return (0, $!);
my($line);
my($result) = "";
while($line =
$result .= $line;
}
return (1, $result);
}
#
# rev(list) reverses the contents of a list
# and returns the new list. There is a builtin
# function reverse which already does this.
#
sub rev
{
my(@arr) = @_;
my($low, $hi) = (0, $#_);
while($low < $hi) {
my($temp) = $arr[$low];
$arr[$low] = $arr[$hi];
$arr[$hi] = $temp;
++$low;
--$hi;
}
return @arr;
}
# Some repeated runduancy.
my $argle = redun(5, "smith ");
print "$argle\n";
$argle = redun(2, "verily ");
print "$argle\n\n";
# All to one. (Parens are optional in calls.)
$argle = glueem "Lets ", "see ", "if ", "this ", "works.";
print "$argle\n";
$argle = glueem ("Seems", " to.");
print "$argle\n\n";
# Can you read me?
my $fn = "f1.txt";
my ($ok, $text) = readall($fn);
if($ok) {
print "File $fn is:\n$text\n";
} else {
print "File $fn open failed: $text.\n";
}
$fn = "bogus.txt";
($ok, $text) = readall($fn);
if($ok) {
print "File $fn is:\n$text\n\n";
} else {
print "File $fn open failed: $text.\n";
}
# Suffering some reversals?
my @mike = ("How", "are", "you?");
my @sam = rev @mike;
print "@sam\n";
my $bill = glueem rev("this?", "see ", "you ", "Do ");
print "$bill\n";
# And, it makes a handy swap function, too.
$argle = 10;
my $bargle = 72;
($argle, $bargle) = rev($argle, $bargle);
print "$argle $bargle\n";
4. Using Arguments II
#
# Some more random examples of
# subroutines, using different features.
#
use strict;
#
# Some folks like to use shift to get the parmeters.
sub sent
{
my $subj = shift @_;
my $verb = shift @_;
my $adj = shift @_;
my $obj = shift @_;
print uc(substr($subj,0,1)), substr($subj,1), " $verb the $adj $obj.\n";
}
#
# The @_ array is special, because changing the parameters in it changes
# the arguments.
my $snakebreath; # We can refer to this in changeme.
sub changeme
{
my $first = shift @_;
$first = 'this'; # Does not change in caller.
$_[0] = 'that'; # Does change in caller.
$snakebreath = 77; # Plain global ref.
}
sent('alex', 'stole', 'red', 'wagon');
sent('susan', 'ignored', 'awful', 'pun');
$snakebreath = 99;
my ($x, $y) = ('today', 'tomorrow');
print "\n$snakebreath $x $y\n";
changeme($x, $y);
print "$snakebreath $x $y\n";
5. Using Arguments III
#
# One more approach is to use the array/hash conversion rules to
# build keyword parameters, with defaults.
#
use strict;
# Print a string one or more times under all sorts of controls.
sub barko {
# Check for correct pairing.
@_ % 2 == 0 or
die "barko: Odd number of arguments.\n";
# Store the parms, with defaults.
my %parms = ( 'string' => 'snake', # String to print
'between' => '', # Place between chars.
'repeat' => 1, # Repeat this many times.
'cascade' => 0, # Move each line right this much more.
'blankafter' => 1, # Extra blank line afterwards.
@_);
# Now %parms is a list of keyword => value pairs as sent, using
# defaults for keys not sent.
# Add the between to the string.
my $str = substr($parms{'string'}, 1);
$str =~ s/(.)/$parms{'between'}$1/g;
$str = substr($parms{'string'}, 0, 1) . $str;
# Printin' time!
my $preamt = 0;
for(my $n = $parms{'repeat'}; $n--; ) {
print ((' ' x $preamt), "$str\n");
$preamt += $parms{'cascade'};
}
print "\n" if $parms{'blankafter'};
}
# Call with various options. These can be sent in any order.
barko;
barko(repeat => 3, string => 'BOZON', cascade => 1);
barko(between => ' ');
barko(between => '<->', repeat => 5);->
barko(string => '** done **', blankafter => 0);
6. Count Blank Lines I
#
# This reads from the standard input and counts the number
# lines which are blank, and lines which are entirely perl-style
# comments (begin with # as the first non-blank character.) It reports
# these figures.
#
use strict;
# Counters to return.
my $nblank = 0;
my $ncomm = 0;
# Read each line into the variable $line.
while(my $line =
if($line =~ /^\s*$/) { ++$nblank; }
if($line =~ /^\s*\#/) { ++$ncomm; }
}
7. Count Blank Lines II
#
# This reads each file on the command line, and for each counts the
# lines which are blank, and lines which are entirely perl-style
# comments (begin with # as the first non-blank character.) It reports
# the figures for each one, and the totals.
#
use strict;
#
# Take the file name and return either (numblank, comment) or (-1, err) if
# the file could not be opened.
sub count {
my ($fn) = @_;
# Open the file; return any possible failure.
open(IN, $fn) or return (-1, $!);
# Counters to return.
my $nblank = 0;
my $ncomm = 0;
# Read each line into the variable $line.
while(my $line =
if($line =~ /^\s*$/) { ++$nblank; }
if($line =~ /^\s*\#/) { ++$ncomm; }
}
close IN;
return ($nblank, $ncomm);
}
# Keep this for later.
my $nfiles = @ARGV;
# Keep totals
my $totbl = 0;
my $totcom = 0;
# Run through the command line args and process each one.
while(my $fn = shift @ARGV) {
# Print the filename with padding to 20 positions. Printf works
# pretty much as in C, but there's less use for it in perl. I'm using
# it here to get the blank padding inserted by %20s.
printf("%-20s", "$fn:");
# Process the file, print the results.
my ($blcount, $commcount) = count($fn);
if($blcount < 0) {
print "Not readable.\n";
} else {
print "$blcount blank lines, $commcount comments.\n";
$totbl += $blcount;
$totcom += $commcount;
}
}
# Print toals, if there was more than one file. The x operator makes
# repeated copies of a string, so " " x 11 prints eleven spaces.
if($nfiles > 1) {
print "* TOTAL *", " " x 11, "$totbl blank lines, $totcom comments.\n";
}
8. Check C Programs
#
# This script scans a list of C programs and
# tries to find if statements which do assignments.
# Specifically, it looks for any line which contains
# text of the form if(....), where if is the first
# non-blank thing on the line, and the (....) contains
# an = which is not part of ==, !=, <=, or >=.
# Each such line is listed with the file name and the
# line number. This is not a perfect check, but it
# will do a decent job.
#
#
# This function analyzes one line to see if it it
# is suspicious. If so, it prints it. It also updates
# the global flag $badfound, and prints an extra message
# the first time a bad line is found. It should be
# called as
# check_line (filename, line)
# where the filename is used only when a message is
# generated. The $. line number is used also.
#
use strict;
my $badfound = 0;
sub check_line {
my($fn, $line) = @_;
# Check for that =.
if($line =~ /^\s*if\s*\(.*[^!<>=]=([^=].*\)|\))/) {
if(!$badfound) {
print("The following suspicious lines were found:\n");
$badfound = 1;
}
print "$fn:$.: $line\n";
}
}
#
# This function opens and reads one file, and calls
# check_line to analyze each line. Call it with the
# file name.
#
sub check_file {
my($fn) = @_;
if(!open(IN, $fn)) {
print "Cannot read $fn.\n";
return;
}
my($line);
while($line =
{
chomp $line;
check_line($fn,$line);
}
close IN;
}
#
# Go through the argument list and check each file
#
while(my $fn = shift @ARGV) {
check_file($fn);
}
if(!$badfound) { print "No suspicious lines were found.\n"; }
9. Applet Viewer
#! /usr/bin/perl
use strict;
my $width = 400;
my $height = 200;
$#ARGV > -1 or die "Usage: $0 classname [ width [ height ] ]\n";
if($#ARGV >= 1) { $width = $ARGV[1]; }
if($#ARGV >= 2) { $height = $ARGV[2]; }
my $clname = $ARGV[0];
if($clname !~ /\.class$/) { $clname = "$clname.class"; }
my $loc = $clname;
open(TMPHTML, ">.tmp.html") or die "Can't get temp file: $!";
print TMPHTML "\n
print TMPHTML "\n
$clname
\n";print TMPHTML
"
print TMPHTML "
close TMPHTML;
system "appletviewer .tmp.html";
unlink ".tmp.html";
10. UID Lookup
#
# This uses the Unix /etc/passwd file to map from a
# userid to a numerical uid. It accpets a list of
# userids on the command line, and maps each one.
#
use strict;
# The location of the password file.
my $pwd = "/etc/passwd";
#
# This function maps the indicated userid,
# and prints out the result. The file
# itself is already opened with the global
# handle PWD
#
sub getuid {
my ($userid) = @_;
# Rewind the file, and read until
# the userid is found.
my($line);
seek PWD, 0, 0;
while($line =
# Split the line to get the fields we're interested in.
my($puserid, $ppwd, $puid) = split(/:/, $line);
# If we found it, print and return.
if($puserid eq $userid) {
print "UID for $userid is $puid.\n";
return;
}
}
# If we got here, it didn't work.
print "No such user $userid.\n";
}
# Open the passwd file and scan through
# the argument list.
open (PWD, $pwd) or die "Cannot open $pwd: $!.\n";
while(my $userid = shift @ARGV) {
getuid($userid);
}
close PWD;
11. Link Finder I
#
# Takes a list of file names and lists the URLS they link by A tags.
# May have trouble with anchors which are not all on the same line.
#
use strict;
#
# Scan a file and print all the URL's it links to.
sub scan {
my ($fn) = @_;
open(IN, $fn) or return 0;
# Go through each line in the file.
while(
# Repeatedly match URLs in the line. Each one is removed by
# replacing it with the empty string. The loop body will execute
# once for each match/replace, and prints the URL part of the
# matched text.
while(s/<\s*A\s+[^>]*HREF\s*\=\s*"([^"]+)"//i) {
print " $1\n";
}
}
close IN;
return 1;
}
#
# Process each command-line argument as a file.
while(my $fn = shift @ARGV) {
print "$fn:\n";
if(!scan($fn)) { print "[Open $fn failed: $!]\n"; }
}
12. Link Finder II
#
# This is similar to links1.pl, but it takes a list of absolute URLs, downloads
# their contents, and attempts to print the URLs in absolute form. Still will
# have trouble with anchors which are not all on the same line.
#
# This won't work directly on Windows because it runs lynx to fetch the
# pages.
#
use strict;
#
# Scan a URL and print all the URL's it links to.
sub scan {
my ($url) = @_;
# URLs that denote directories are s'posed end with a /. We really
# need to talk to the server and read the response headers, but this
# sort of guess usually works.
if($url !~ m|/$| && $url !~ m|\.htm(l?)$|) {
$url .= '/';
}
# Do surgery on the url to extract certain parts.
# Find the protocol://hostname part.
$url =~ m@^([a-zA-Z]+\://[a-zA-Z\.]+)(/|$)@;
my $prothost = $1;
# Find the URL with the last component removed.
my $stem = $url;
$stem =~ s|/[^/]*$|/|;
# This form of open runs the command and pipes its output to the
# perl program through the file handle. Reads from IN will return
# a line of output from the lynx command. This form the lynx command
# simply fetchs the contents of a URL and prints its text to standard
# output. The -useragent option keeps remote sites from getting rid
# of non-text-friendly code (which some do when you are using lynx),
# and the 2>/dev/null is Unix shell notation which discards error messages.
open(IN, "lynx -useragent=fred -source $url 2>/dev/null|") or return 0;
# Go through each line in the file.
while(
# Repeatedly match URLs in the line. Each one is removed by
# replacing it with the empty string. The loop body will execute
# once for each match/replace, and prints the URL part of the
# matched text.
while(s/<\s*A\s+[^>]*HREF\s*\=\s*"([^"]+)"//i) {
my $ref = $1;
# Make the reference absolute.
if($ref =~ m|^[a-zA-Z]*\://|) {
# Already absolute.
} elsif($ref =~ m|^/|) {
# Relative to host.
$ref = "$prothost$ref";
} else {
# Relative to page location.
$ref = "$stem$ref";
}
print " $ref\n";
}
}
close IN;
return 1;
}
#
# Process each command-line argument as a file.
while(my $fn = shift @ARGV) {
print "$fn:\n";
if(!scan($fn)) { print "[Download $fn failed: $!]\n"; }
}
13. Link Finder III
#
# This is a version of links2 which uses the $/ variable to change the
# unit which reading reads. The $/ variable may contain any string, and
# it is used as the line separator. We can set it to '<' to (usually)
# avoid breaking an HTML tag. This will fail when
#
# This won't work directly on Windows because it runs lynx to fetch the
# pages.
#
use strict;
#
# Scan a URL and print all the URL's it links to.
sub scan {
my ($url) = @_;
# URLs that denote directories are s'posed end with a /. We really
# need to talk to the server and read the response headers, but this
# sort of guess usually works.
if($url !~ m|/$| && $url !~ m|\.htm(l?)$|) {
$url .= '/';
}
# Do surgery on the url to extract certain parts.
# Find the protocol://hostname part.
$url =~ m@^([a-zA-Z]+\://[a-zA-Z\.]+)(/|$)@;
my $prothost = $1;
# Find the URL with the last component removed.
my $stem = $url;
$stem =~ s|/[^/]*$|/|;
# This form of open runs the command and pipes its output to the
# perl program through the file handle. Reads from IN will return
# a line of output from the lynx command. This form the lynx command
# simply fetchs the contents of a URL and prints its text to standard
# output. The -useragent option keeps remote sites from getting rid
# of non-text-friendly code (which some do when you are using lynx),
# and the 2>/dev/null is Unix shell notation which discards error messages.
open(IN, "lynx -useragent=fred -source $url 2>/dev/null|") or return 0;
# The $/ variable determines what character separates lines. We'll set
# to '>' since we're more interested in tags than lines. We save the old
# value of $/ and restore it later. Good practice, though it doesn't
# actually matter in this program.
my $oldsl = $/;
$/ = '>';
# Go through each unit in the file. Note that each unit may contain
# multiple lines, but cannot contain more than one HTML tag. It may
# split a tag if a > occurs inside a tag, which can happen only if the
# > is inside quotes.
my $ln;
while($ln =
# Throw away through the first < and spaces.
$ln =~ s/^[^\<]*\<\s*//;
# On the off chance that someone put a > in quotes, check for
# balance. The horrid pattern checks for a string containing an
# odd number of quotes. If we find one, we simply read another
# unit, tack in on, and use perl redo to repeat the loop w/o
# running the test (and clobbering $ln). However, if you say
# while(my $ln =
# of $ln, because it gets reallocated.
if($ln =~ /^[^"]*("[^"]*")*[^"]*"[^"]*$/) {
my $more =
$ln .= $more;
redo;
}
# Extract the URL from the A tag. This pattern is different
# from the others since it also deals with > in quotes. (Now,
# how often will that happen?)
$ln =~ s/^A\s+([^>]|"[^"]*")*HREF\s*\=\s*"([^"]+)"//i or next;
my $ref = $2;
# Make the reference absolute.
if($ref =~ m|^[a-zA-Z]*\://|) {
# Already absolute.
} elsif($ref =~ m|^/|) {
# Relative to host.
$ref = "$prothost$ref";
} else {
# Relative to page location.
$ref = "$stem$ref";
}
print " $ref\n";
}
close IN;
$/ = $oldsl;
return 1;
}
#
# Process each command-line argument as a file.
while(my $fn = shift @ARGV) {
print "$fn:\n";
if(!scan($fn)) { print "[Download $fn failed: $!]\n"; }
}
14. Log Reader
#!/usr/bin/perl
#
# Takes the WWW name of a file and reports downloads recorded in the main
# Apache log file.
#
# Needed for the inet_aton function, a format conversion.
use Socket;
$one_client = 1; # Report client only the first time.
$client_name = 1; # Look up host names.
$infile = "/etc/httpd/logs/access_log"; # Read this log file
$prefix = 0; # Match any page with the indicated prefix.
$one_ref = 0; # Report each matched page only the first time.
$substring = 0; # Target is a substring.
$exten = ""; # Extension.
while($arg = shift @ARGV) {
if($arg !~ /^-/) { break; }
if($arg eq "-a") { $one_client = 0; }
elsif($arg eq "-n") { $client_name = 0; }
elsif($arg eq "-p") { $prefix = 1; }
elsif($arg eq "-r") { $one_ref = 1; }
elsif($arg eq "-s") { $substring = 1; }
elsif($arg eq "-t") {
($exten = shift @ARGV) && ($exten !~ /^-/) or die "No extension.\n";
$exten =~ s/^\.//;
}
elsif($arg eq "-i") {
($infile = shift @ARGV) && ($infile !~ /^-/) or die "No filename.\n";
}
elsif($arg =~ /^-/) { die "Bad flag $arg\n"; }
else { last; }
}
# Get the file target, strip trailing /.
$target = $arg;
$target =~ s|/$||;
if($substring) {
$target = ".*$target.*";
}
elsif($prefix) {
$target = "$target.*";
}
if($exten) {
$target .= "\\.$exten";
}
# Open the file.
open(IN, $infile) or die "Cannot read $infile: $!.\n";
# Read the log file entries.
my $line;
while($line =
chomp $line;
# Attempt to match the line, and get the parts we want.
if($line =~ m|^([0-9\.A-Za-z]+).*\[([0-9]+)/([A-Za-z]+)/([0-9]+)\:([0-9]+)\:([0-9]+)\:([0-9]+).*\]\s\[\[GET\s($target)(/?)\sHTTP|) {
# Extract the fields.
my ($client, $day, $mon, $year, $hh, $mm, $ss, $targ) =
($1, $2, $3, $4, $5, $6, $7, $8);
$hh =~ s/^0//;
# Eliminate duplicate client or URL.
if($one_client && exists $clients{$client}) { next; }
if($one_ref) {
if(exists $refed{$target}) { next; }
$refed{$target} = 1;
}
# Fix the AM/PM.
my $apm = "AM";
if($hh == 0) {
$hh = 12;
} elsif($hh == 12) {
$apm = "PM";
} elsif($hh > 12) {
$apm = "PM";
$hh -= 12;
}
# Map the client IP address, if required. Also caches results from
# previous attempts, since it tends to be slow.
my ($hname);
if(exists $clients{$client}) {
$hname = $clients{$client};
} else {
if($client_name) {
$hname = gethostbyaddr(inet_aton($client), AF_INET);
$clients{$client} = $hname;
} else {
$clients{$client} = undef;
}
}
if(defined $hname) {
$client = "$hname ($client)";
}
$outline = "$targ at $hh:$mm $apm $mon $day, $year to $client";
if(length $outline > 79) {
$outline =~ s/^(.{1,78}\S)(\s.*)$/$1\n\t$2/;
}
print "$outline\n";
}
}
15. HTML File Display
#
# This translates a file, such a program or other text file, so that
# it can be displayed literally in HTML. It brackets the code in
# the
andtags, expands tabs, and translates the characters
# which HTML treats as special so they will be displayed literally.
#
use strict;
#
# Expand tabs at 8 stops per tab.
#
sub expand {
my ($line) = @_;
my ($left, $right); # Parens needed so my applies to both.
while($line =~ /\t/) {
($left, $right) = split (/\t/, $line, 2);
my($tabamt) = 8 - length($left) % 8;
$line = $left . (" " x $tabamt) . $right;
}
return $line;
}
print "
\n";
# Copy with changes.
while(my $line =
chomp $line;
$line = expand($line);
$line =~ s/&/&/g;
$line =~ s/</</g;
$line =~ s/>/>/g;
$line =~ s/"/"/g;
print "$line\n";
}
print "\n";
16. Text to HTML Translator
#
# Translate a simple langauge into HTML.
# Blank lines become
# _stuff_ becomes stuff
# @stuff@ becomes stuff
# {stuff} becomes stuff
# [stuff] becomes stuff
# [[stuff]] becomes
header.
# [:stuff:] becomes
header and centered.
# A line of 3 or more --- becomes an
.
# A >>> at the start of a line becomes
# Other stuff is retained, with & < > " being translated into
# approprite stuff. The \ escapes substitutions.
# Generally, \ is removed. Use \\ to add a single one.
#
use strict;
# Set of plain replacements. Order is important.
my @plain = ( '\&', '&',
'^\s*\>\>\>\s*', ' ',
'\<', '<', '\>', '>', '\"', '"',
'\[\[', '
', '\]\]', '
','\[\:', '
', '\:\]', '
'\{', '', '\}', '',
'\[', '', '\]', '',
'^\s*$', '',
'^\-\-\-+$', '
' );
# Set of replacements which toggle. Replacement will be the open
# or closing version of the tag.
my @toggle = ( '\_', 'u', '\@', 'tt' );
my %toggle_status = ( '\_' => '', '\@' => '' );
# Prefix.
print '', "\n";
# Read the input.
while(defined(my $line =
chomp $line;
# Do the simple replacements.
my @rs = @plain;
while(my $r = shift @rs) {
my $rt = shift @rs;
$line =~ s/(^|[^\\])$r/$1$rt/g;
}
# Work through the toggles.
@rs = @toggle;
while(my $r = shift @rs) {
my $rt = shift @rs;
# Accumulate the new line here.
my $newline = '';
# Take each left-most match for $r. The *? construct
# is a * that matches as few as possible.
while($line =~ /^(.*?(^|[^\\]))$r(.*)$/) {
# Move the left part to newline and add the
# replacement, then continue with the right part.
$newline .= "$1<$toggle_status{$r}$rt>";
$line = $3;
# Toggle the status of the symbol.
$toggle_status{$r} = $toggle_status{$r} ? '' : '/';
}
$line = $newline . $line;
}
# Reduce escapes.
$line =~ s/\\(.)/$1/g;
print "$line\n";
}
print "\n";
17. HTML Table Generator
#
# This generates a simple HTML table from a text file. The file begins
# with zero or more lines with set options of the form option = value.
# The first line after the options contains just spaces and | which
# defines the extent of each column.
#
use strict;
#
# Expand tabs at 8 stops per tab.
#
sub expand {
my ($line) = @_;
my ($left, $right);
while($line =~ /\t/) {
($left, $right) = split (/\t/, $line, 2);
my($tabamt) = 8 - length($left) % 8;
$line = $left . (" " x $tabamt) . $right;
}
return $line;
}
#
# Legal options and default values.
#
my %options = ( color => '#bb7777',
title => '',
colsep => 3 );
#
# Read the options line, then return the first non-option line.
#
sub read_opts {
my($line);
while($line =
if($line !~ /=/) { return $line; }
# Read and extract the option.
my($name, $value) = split(/\s*=\s*/, $line, 2);
$name =~ s/^\s*//;
$value =~ s/\s*$//;
# Check it
if(! exists $options{$name}) {
print STDERR "Illegal option $name.\n";
exit 10;
}
# Set it.
$options{$name} = $value;
}
}
#
# Take the format line, and break it into an array of patterns to
# extract the data for a column.
sub first {
# Get the first line, make sure it starts with |.
my($first) = @_;
chomp $first;
$first = expand($first);
$first =~ s/^ /|/;
my(@pats) = ();
# Take successive leading |.... groups off the string.
while($first =~ /^\|/) {
$first =~ s/^(\|[^|]*)//;
my($flength) = length($1);
push @pats, ".{0,$flength}";
}
# Replace the last one to permit it to take the rest of the line.
pop @pats;
push @pats, ".*";
return @pats;
}
# One-character translations made on the data.
my %trans = ( '\[' => '', '\]' => '',
'\{' => '', '\}' => '' );
#
# Scan the input, formatting each input line as an HTML
# table, breaking them into columns according to the line
# breaking format array sent as argument.
sub scan {
while(my $line =
chomp $line;
$line = expand($line);
print "
my $sep = "";
foreach my $colpat (@_) {
print $sep;
$line =~ s/^($colpat)//;
my $colcont = $1;
$colcont =~ s/\s*$//;
foreach my $ch(keys %trans) {
$colcont =~ s/(^|[^\\])$ch/$1$trans{$ch}/g;
$colcont =~ s/\\($ch)/$1/g;
}
$colcont =~ s/\\(.)/./g;
print "
if($line =~ /^\s*$/) { last; }
$sep = qq|
}
print "\n";
}
}
#
# Read the column def line, generate the header, copy the data,
# and close the table.
my $topline = read_opts;
my @pats = first($topline);
print "\n";
if($options{"title"} ne "") {
print qq|
}
print qq|\n|;
print "
if($options{"title"} ne "") {
print qq|
$options{"title"}
scan(@pats);
print <
EOF
18. File Reader I
use strict;
#
# This script takes a file name, opens the file, and prints the
# contents.
#
if($#ARGV != 0) {
print STDERR "You must specify exactly one argument.\n";
exit 4;
}
# Open the file.
open(INFILE, $ARGV[0]) or die "Cannot open $ARGV[0]: $!.\n";
while(my $l =
print $l;
}
close INFILE;
use strict;
#
# This script takes a list of file names, and opens and prints each one.
#
while(my $fn = shift @ARGV) {
# Open the file.
if(!open(INFILE, $fn)) {
print STDERR "Cannot open $fn: $!\n";
next;
}
# Copy it.
while(my $l =
print $l;
}
close INFILE;
}
19. File Reader III
use strict;
#
# This script also prints the contents of all the listed files, but
# it first scans through the list to check that each file exists and
# is readable. It will stop if there are any errors.
#
my $bad = 0;
foreach my $fn (@ARGV) {
if(! -r $fn) {
# File cannot be read. See if it exists or not for a better
# error message.
if(-e $fn) {
print STDERR "You do not have permission to read $fn.\n";
} else {
print STDERR "File $fn does not exist.\n";
}
# One way or the other, it's bad.
$bad = 1;
}
}
# If there was a problem, bail out.
if($bad) { exit 2; }
# Copy all the files.
while(my $fn = shift @ARGV) {
# Open the file.
if(!open(INFILE, $fn)) {
# We know the file is readable, but sometimes something else goes
# wrong. It's safer to check.
print STDERR "Cannot open $fn: $!\n";
next;
}
# Copy it.
while(my $l =
print $l;
}
close INFILE;
}
# See the man page for the very long list of file tests, some of which are
# Unix-specific. To find it from perl documentation, choose the
# standard documentation pack, supporting manpages, perlfunc, then
# choose -X (first in the alphabetical list).
20. File Copier
use strict;
#
# This script takes two file names, and copies the first one to the second.
#
if($#ARGV != 1) {
print STDERR "You must specify exactly two arguments.\n";
exit 4;
}
# If the output exists, confirm. This uses two standard string functions,
# substring and lower-case.
if( -e $ARGV[1]) {
print "Do you really want to overwrite $ARGV[1]? ";
my $resp =
chomp $resp;
if(lc(substr($resp, 0, 1)) ne 'y') { exit 0; }
}
# Open the files.
open(INFILE, $ARGV[0]) or die "Cannot open $ARGV[0]: $!.\n";
open(OUTFILE, ">$ARGV[1]") or die "Cannot write $ARGV[1]: $!.\n";
while(my $l =
print OUTFILE $l;
}
close INFILE;
close OUTFILE;



0 comments
Post a Comment