#!/usr/local/bin/perl # # $Id: favicon.cgi,v 0.6 2009/03/08 10:26:40 dankogai Exp dankogai $ # use strict; use warnings; use CGI; use CGI::Carp qw/fatalsToBrowser/; use DB_File; use Fcntl; use HTML::Parser; use LWP::UserAgent; # Config parameters -- customize those our $dbfile = '/home/www/api/.favicon/favicon.db'; our $ttl = 86400; # agent name our $VERSION = sprintf "%d.%02d", q$Revision: 0.6 $ =~ /(\d+)/g; our $agent = $0; $agent =~ s,.*/,,o; $agent .= "/$VERSION"; no warnings 'uninitialized'; my $q = CGI->new; my $uri = $ENV{PATH_INFO}; #die $uri; $uri =~ s,^/,,o or die 'invalid uri: ', $q->escapeHTML($uri); $uri =~ s,^(https?:)/+,$1//,o or die 'invalid scheme: ', $q->escapeHTML($uri); $uri .= '?' . $ENV{QUERY_STRING} if $ENV{QUERY_STRING}; my ($favicon_uri, $lastmod) = get_favicon($uri, $dbfile, $ttl); if ($favicon_uri){ print $q->redirect(-uri => $favicon_uri, -status => 301, -x_last_checked => scalar localtime $lastmod); }else{ print $q->header(-status => 404, -x_last_checked => scalar localtime $lastmod); } sub get_favicon{ my $uri = URI->new(shift); my ($dbfile, $ttl) = @_; my ($furi, $lastmod) = get_cache($uri, $dbfile); # 0th attempt; via db return ($furi, $lastmod) if $lastmod and time() - $lastmod < $ttl; # 1st attempt; via my $ua = LWP::UserAgent->new( timeout => 15, keep_alive => 4, agent => $agent); # just check first 4096 bytes; my $req = HTTP::Request->new(GET => $uri); $req->headers->header(Range => "bytes=0-4095"); # warn $req->as_string; my $res = $ua->request($req); # warn $res->status_line; return set_cache($uri => '', $dbfile) unless $res->is_success; if ($res->header('Content-Type') =~ m,text/(?:x|ht)ml,io){ my $start_h = sub{ my ($self, $tagname, $attr) = @_; return unless $tagname eq 'link'; return unless $attr->{rel} =~ /\A(shortcut )?icon\z/i; $furi = URI->new($attr->{href})->abs($uri); $self->eof; }; my $hp = HTML::Parser->new(start_h => [ $start_h => "self,tagname,attr" ]); $hp->parse($res->content); return set_cache($uri => $furi, $dbfile) if $furi; } # 2nd attempt; top-level; $furi = $uri->clone; $furi->path_query("/favicon.ico"); $res = $ua->head($furi); return set_cache($uri => $furi, $dbfile) if($res->is_success); # sorry, no favicon return set_cache($uri => '', $dbfile); } sub get_cache($$){ my ($uri, $dbfile) = @_; tie my %favicon_of, 'DB_File', $dbfile, O_RDONLY|O_NONBLOCK|O_SHLOCK, 0444, $DB_HASH or return; my ($furi, $lastmod) = split /\t/, $favicon_of{$uri}; untie %favicon_of; return ($furi, $lastmod); } sub set_cache($$$){ my ($uri, $furi, $dbfile) = @_; tie my %favicon_of, 'DB_File', $dbfile, O_CREAT|O_RDWR|O_EXLOCK, 0666, $DB_HASH or die "$dbfile:$!"; my $lastmod = time(); $favicon_of{$uri} = $furi . "\t" . $lastmod; untie %favicon_of; return ($furi, $lastmod); }