summary refs log blame commit diff stats
path: root/plugins/checksum/checksum.c
blob: a4cadc19e255a13fc894845cc12fd12b25859c71 (plain) (tree)
1
2
3
4
5
6
7
8
9
          
                                        
  





                                                                                
  

                                                                             
  






                                                                                



                   
                   

                      
                     
                        
                 
 
            


                          
                    







                           
                           


                                                                                                                             
 
                                                                                                                   


                                                                 
 
                                                                 










                                                                                    
     
















                                                                  
      




















                                                 
                              
















                                                              
                      
 
                                 
 
                                           
         
                                                                     
                 
                                                                                                              

                    
                 
                                                                                 
                 
         
            
         
                                                        
         

 

            
 
                                                            
 
                                          
         


                                     
         
                            



           
              
 
                                                                                   





                                         
                                                                                                                                  
                                                                                                                                 


                         

                                                                                          
                                                                             


            
                                          
         
 
                                       

                                                                                                                                       
                                                                                  
                 
                                                                                                                                                         
                                                                                        

                                                                                                    

                    
                 

                                                                                                                                                                                 
                 

            
         
                                                            

         
                       
                                





                                          
                                                                                                                                  

                                                                                                                                 
                                         

                                                                                                                                       
                                                                                  

                                                                                                                                  
                                                                                                                               

                    
                 

                                                                                                                                                                  
                 

            
         
                                                            

         
                                

 

                                                         
 
                                                 
         

                               
                                                      
         


                                    
         
                                                                  

                                                                                                            
         
 
                                


   
                                                                                                                             


                           


                                  
 
                                                     
                                                           
         
                                                                        
         
 


                                                                                                         
 
                                                        



                 
                            
 
                                                          

                 
/* HexChat
 * Copyright (c) 2010-2012 Berke Viktor.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <openssl/sha.h>
#include <glib.h>

#ifdef WIN32
#ifndef snprintf
#define snprintf _snprintf
#endif
#define stat _stat64
#else
/* for INT_MAX */
#include <limits.h>
#define __USE_LARGEFILE64
#define _LARGEFILE_SOURCE
#define _LARGEFILE64_SOURCE
#endif

#include "hexchat-plugin.h"

#define BUFSIZE 32768
#define DEFAULT_LIMIT 256									/* default size is 256 MiB */

static hexchat_plugin *ph;									/* plugin handle */
static char name[] = "Checksum";
static char desc[] = "Calculate checksum for DCC file transfers";
static char version[] = "3.1";

/* Use of OpenSSL SHA256 interface: http://adamlamers.com/?p=5 */
static void
sha256_hash_string (unsigned char hash[SHA256_DIGEST_LENGTH], char outputBuffer[65])
{
	int i;
	for (i = 0; i < SHA256_DIGEST_LENGTH; i++)
	{
		sprintf (outputBuffer + (i * 2), "%02x", hash[i]);
	}
	outputBuffer[64] = 0;
}

#if 0
static void
sha256 (char *string, char outputBuffer[65])
{
	int i;
	unsigned char hash[SHA256_DIGEST_LENGTH];
	SHA256_CTX sha256;

	SHA256_Init (&sha256);
	SHA256_Update (&sha256, string, strlen (string));
	SHA256_Final (hash, &sha256);

	for (i = 0; i < SHA256_DIGEST_LENGTH; i++)
	{
		sprintf (outputBuffer + (i * 2), "%02x", hash[i]);
	}
	outputBuffer[64] = 0;
}
#endif

static int
sha256_file (char *path, char outputBuffer[65])
{
	int bytesRead;
	unsigned char *buffer;
	unsigned char hash[SHA256_DIGEST_LENGTH];
	SHA256_CTX sha256;

	FILE *file = fopen (path, "rb");
	if (!file)
	{
		return -534;
	}

	SHA256_Init (&sha256);
	buffer = malloc (BUFSIZE);
	bytesRead = 0;

	if (!buffer)
	{
		fclose (file);
		return ENOMEM;
	}

	while ((bytesRead = fread (buffer, 1, BUFSIZE, file)))
	{
		SHA256_Update (&sha256, buffer, bytesRead);
	}

	SHA256_Final (hash, &sha256);
	sha256_hash_string (hash, outputBuffer);

	fclose (file);
	free (buffer);
	return 0;
}

static void
set_limit (char* size)
{
	int buffer = atoi (size);

	if (buffer > 0 && buffer < INT_MAX)
	{
		if (hexchat_pluginpref_set_int (ph, "limit", buffer))
		{
			hexchat_printf (ph, "File size limit has successfully been set to: %d MiB\n", buffer);
		}
		else
		{
			hexchat_printf (ph, "File access error while saving!\n");
		}
	}
	else
	{
		hexchat_printf (ph, "Invalid input!\n");
	}
}

static int
get_limit ()
{
	int size = hexchat_pluginpref_get_int (ph, "limit");

	if (size <= -1 || size >= INT_MAX)
	{
		return DEFAULT_LIMIT;
	}
	else
	{
		return size;
	}
}

static void
print_limit ()
{
	hexchat_printf (ph, "File size limit for checksums: %d MiB", get_limit ());
}

static int
dccrecv_cb (char *word[], void *userdata)
{
	int result;
	struct stat buffer;									/* buffer for storing file info */
	char sum[65];											/* buffer for checksum */
	const char *file;
	char *cfile;

	if (hexchat_get_prefs (ph, "dcc_completed_dir", &file, NULL) == 1 && file[0] != 0)
	{
		cfile = g_strconcat (file, G_DIR_SEPARATOR_S, word[1], NULL);
	}
	else
	{
		cfile = g_strdup(word[2]);
	}

	result = stat (cfile, &buffer);
	if (result == 0)										/* stat returns 0 on success */
	{
		if (buffer.st_size <= (unsigned long long) get_limit () * 1048576)
		{
			sha256_file (cfile, sum);						/* file is the full filename even if completed dir set */
			/* try to print the checksum in the privmsg tab of the sender */
			hexchat_set_context (ph, hexchat_find_context (ph, NULL, word[3]));
			hexchat_printf (ph, "SHA-256 checksum for %s (local):  %s\n", word[1], sum);
		}
		else
		{
			hexchat_set_context (ph, hexchat_find_context (ph, NULL, word[3]));
			hexchat_printf (ph, "SHA-256 checksum for %s (local):  (size limit reached, no checksum calculated, you can increase it with /CHECKSUM INC)\n", word[1]);
		}
	}
	else
	{
		hexchat_printf (ph, "File access error!\n");
	}

	g_free (cfile);
	return HEXCHAT_EAT_NONE;
}

static int
dccoffer_cb (char *word[], void *userdata)
{
	int result;
	struct stat buffer;									/* buffer for storing file info */
	char sum[65];											/* buffer for checksum */

	result = stat (word[3], &buffer);
	if (result == 0)										/* stat returns 0 on success */
	{
		if (buffer.st_size <= (unsigned long long) get_limit () * 1048576)
		{
			sha256_file (word[3], sum);						/* word[3] is the full filename */
			hexchat_commandf (ph, "quote PRIVMSG %s :SHA-256 checksum for %s (remote): %s", word[2], word[1], sum);
		}
		else
		{
			hexchat_set_context (ph, hexchat_find_context (ph, NULL, word[3]));
			hexchat_printf (ph, "quote PRIVMSG %s :SHA-256 checksum for %s (remote): (size limit reached, no checksum calculated)", word[2], word[1]);
		}
	}
	else
	{
		hexchat_printf (ph, "File access error!\n");
	}

	return HEXCHAT_EAT_NONE;
}

static int
checksum (char *word[], char *word_eol[], void *userdata)
{
	if (!g_ascii_strcasecmp ("GET", word[2]))
	{
		print_limit ();
	}
	else if (!g_ascii_strcasecmp ("SET", word[2]))
	{
		set_limit (word[3]);
	}
	else
	{
		hexchat_printf (ph, "Usage: /CHECKSUM GET|SET\n");
		hexchat_printf (ph, "  GET - print the maximum file size (in MiB) to be hashed\n");
		hexchat_printf (ph, "  SET <filesize> - set the maximum file size (in MiB) to be hashed\n");
	}

	return HEXCHAT_EAT_NONE;
}

int
hexchat_plugin_init (hexchat_plugin *plugin_handle, char **plugin_name, char **plugin_desc, char **plugin_version, char *arg)
{
	ph = plugin_handle;

	*plugin_name = name;
	*plugin_desc = desc;
	*plugin_version = version;

	/* this is required for the very first run */
	if (hexchat_pluginpref_get_int (ph, "limit") == -1)
	{
		hexchat_pluginpref_set_int (ph, "limit", DEFAULT_LIMIT);
	}

	hexchat_hook_command (ph, "CHECKSUM", HEXCHAT_PRI_NORM, checksum, "Usage: /CHECKSUM GET|SET", 0);
	hexchat_hook_print (ph, "DCC RECV Complete", HEXCHAT_PRI_NORM, dccrecv_cb, NULL);
	hexchat_hook_print (ph, "DCC Offer", HEXCHAT_PRI_NORM, dccoffer_cb, NULL);

	hexchat_printf (ph, "%s plugin loaded\n", name);
	return 1;
}

int
hexchat_plugin_deinit (void)
{
	hexchat_printf (ph, "%s plugin unloaded\n", name);
	return 1;
}
none). --cachedir - directory for the item and directory cache files. --css - stylesheet URL --flush - flushes the item and directory caches. --[no]header - produce block header/footer (default is no headers). --help - prints this message. --hiddendirs - search hidden directories in podpath --htmldir - directory for resulting HTML files. --htmlroot - http-server base directory from which all relative paths in podpath stem (default is /). --[no]index - generate an index at the top of the resulting html (default behaviour). --infile - filename for the pod to convert (input taken from stdin by default). --libpods - colon-separated list of pages to search for =item pod directives in as targets of C<> and implicit links (empty by default). note, these are not filenames, but rather page names like those that appear in L<> links. --outfile - filename for the resulting html file (output sent to stdout by default). --podpath - colon-separated list of directories containing library pods (empty by default). --podroot - filesystem base directory from which all relative paths in podpath stem (default is .). --[no]quiet - suppress some benign warning messages (default is off). --[no]recurse - recurse on those subdirectories listed in podpath (default behaviour). --title - title that will appear in resulting html file. --[no]verbose - self-explanatory (off by default). --[no]netscape - deprecated, has no effect. for backwards compatibility only. END_OF_USAGE } sub parse_command_line { my ( $opt_backlink, $opt_cachedir, $opt_css, $opt_flush, $opt_header, $opt_help, $opt_htmldir, $opt_htmlroot, $opt_index, $opt_infile, $opt_libpods, $opt_netscape, $opt_outfile, $opt_podpath, $opt_podroot, $opt_quiet, $opt_recurse, $opt_title, $opt_verbose, $opt_hiddendirs ); unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html}; my $result = GetOptions( 'backlink=s' => \$opt_backlink, 'cachedir=s' => \$opt_cachedir, 'css=s' => \$opt_css, 'flush' => \$opt_flush, 'header!' => \$opt_header, 'help' => \$opt_help, 'hiddendirs!' => \$opt_hiddendirs, 'htmldir=s' => \$opt_htmldir, 'htmlroot=s' => \$opt_htmlroot, 'index!' => \$opt_index, 'infile=s' => \$opt_infile, 'libpods=s' => \$opt_libpods, 'netscape!' => \$opt_netscape, 'outfile=s' => \$opt_outfile, 'podpath=s' => \$opt_podpath, 'podroot=s' => \$opt_podroot, 'quiet!' => \$opt_quiet, 'recurse!' => \$opt_recurse, 'title=s' => \$opt_title, 'verbose!' => \$opt_verbose, ); usage( "-", "invalid parameters" ) if not $result; usage("-") if defined $opt_help; # see if the user asked for help $opt_help = ""; # just to make -w shut-up. @Podpath = split( ":", $opt_podpath ) if defined $opt_podpath; @Libpods = split( ":", $opt_libpods ) if defined $opt_libpods; $Backlink = $opt_backlink if defined $opt_backlink; $Cachedir = $opt_cachedir if defined $opt_cachedir; $Css = $opt_css if defined $opt_css; $Header = $opt_header if defined $opt_header; $Htmldir = $opt_htmldir if defined $opt_htmldir; $Htmlroot = $opt_htmlroot if defined $opt_htmlroot; $Doindex = $opt_index if defined $opt_index; $Podfile = $opt_infile if defined $opt_infile; $HiddenDirs = $opt_hiddendirs if defined $opt_hiddendirs; $Htmlfile = $opt_outfile if defined $opt_outfile; $Podroot = $opt_podroot if defined $opt_podroot; $Quiet = $opt_quiet if defined $opt_quiet; $Recurse = $opt_recurse if defined $opt_recurse; $Title = $opt_title if defined $opt_title; $Verbose = $opt_verbose if defined $opt_verbose; warn "Flushing item and directory caches\n" if $opt_verbose && defined $opt_flush; $Dircache = "$Cachedir/pod2htmd.tmp"; $Itemcache = "$Cachedir/pod2htmi.tmp"; if ( defined $opt_flush ) { 1 while unlink( $Dircache, $Itemcache ); } } my $Saved_Cache_Key; sub get_cache { my ( $dircache, $itemcache, $podpath, $podroot, $recurse ) = @_; my @cache_key_args = @_; # A first-level cache: # Don't bother reading the cache files if they still apply # and haven't changed since we last read them. my $this_cache_key = cache_key(@cache_key_args); return if $Saved_Cache_Key and $this_cache_key eq $Saved_Cache_Key; # load the cache of %Pages and %Items if possible. $tests will be # non-zero if successful. my $tests = 0; if ( -f $dircache && -f $itemcache ) { warn "scanning for item cache\n" if $Verbose; $tests = load_cache( $dircache, $itemcache, $podpath, $podroot ); } # if we didn't succeed in loading the cache then we must (re)build # %Pages and %Items. if ( !$tests ) { warn "scanning directories in pod-path\n" if $Verbose; scan_podpath( $podroot, $recurse, 0 ); } $Saved_Cache_Key = cache_key(@cache_key_args); } sub cache_key { my ( $dircache, $itemcache, $podpath, $podroot, $recurse ) = @_; return join( '!', $dircache, $itemcache, $recurse, @$podpath, $podroot, stat($dircache), stat($itemcache) ); } # # load_cache - tries to find if the caches stored in $dircache and $itemcache # are valid caches of %Pages and %Items. if they are valid then it loads # them and returns a non-zero value. # sub load_cache { my ( $dircache, $itemcache, $podpath, $podroot ) = @_; my ($tests); local $_; $tests = 0; open( CACHE, "<$itemcache" ) || die "$0: error opening $itemcache for reading: $!\n"; $/ = "\n"; # is it the same podpath? $_ = <CACHE>; chomp($_); $tests++ if ( join( ":", @$podpath ) eq $_ ); # is it the same podroot? $_ = <CACHE>; chomp($_); $tests++ if ( $podroot eq $_ ); # load the cache if its good if ( $tests != 2 ) { close(CACHE); return 0; } warn "loading item cache\n" if $Verbose; while (<CACHE>) { /(.*?) (.*)$/; $Items{$1} = $2; } close(CACHE); warn "scanning for directory cache\n" if $Verbose; open( CACHE, "<$dircache" ) || die "$0: error opening $dircache for reading: $!\n"; $/ = "\n"; $tests = 0; # is it the same podpath? $_ = <CACHE>; chomp($_); $tests++ if ( join( ":", @$podpath ) eq $_ ); # is it the same podroot? $_ = <CACHE>; chomp($_); $tests++ if ( $podroot eq $_ ); # load the cache if its good if ( $tests != 2 ) { close(CACHE); return 0; } warn "loading directory cache\n" if $Verbose; while (<CACHE>) { /(.*?) (.*)$/; $Pages{$1} = $2; } close(CACHE); return 1; } # # scan_podpath - scans the directories specified in @podpath for directories, # .pod files, and .pm files. it also scans the pod files specified in # @Libpods for =item directives. # sub scan_podpath { my ( $podroot, $recurse, $append ) = @_; my ( $pwd, $dir ); my ( $libpod, $dirname, $pod, @files, @poddata ); unless ($append) { %Items = (); %Pages = (); } # scan each directory listed in @Podpath $pwd = getcwd(); chdir($podroot) || die "$0: error changing to directory $podroot: $!\n"; foreach $dir (@Podpath) { scan_dir( $dir, $recurse ); } # scan the pods listed in @Libpods for =item directives foreach $libpod (@Libpods) { # if the page isn't defined then we won't know where to find it # on the system. next unless defined $Pages{$libpod} && $Pages{$libpod}; # if there is a directory then use the .pod and .pm files within it. # NOTE: Only finds the first so-named directory in the tree. # if ($Pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) { if ( $Pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/ ) { # find all the .pod and .pm files within the directory $dirname = $1; opendir( DIR, $dirname ) || die "$0: error opening directory $dirname: $!\n"; @files = grep( /(\.pod|\.pm)\z/ && !-d $_, readdir(DIR) ); closedir(DIR); # scan each .pod and .pm file for =item directives foreach $pod (@files) { open( POD, "<$dirname/$pod" ) || die "$0: error opening $dirname/$pod for input: $!\n"; @poddata = <POD>; close(POD); clean_data( \@poddata ); scan_items( \%Items, "$dirname/$pod", @poddata ); } # use the names of files as =item directives too. ### Don't think this should be done this way - confuses issues.(WL) ### foreach $pod (@files) { ### $pod =~ /^(.*)(\.pod|\.pm)$/; ### $Items{$1} = "$dirname/$1.html" if $1; ### } } elsif ( $Pages{$libpod} =~ /([^:]*\.pod):/ || $Pages{$libpod} =~ /([^:]*\.pm):/ ) { # scan the .pod or .pm file for =item directives $pod = $1; open( POD, "<$pod" ) || die "$0: error opening $pod for input: $!\n"; @poddata = <POD>; close(POD); clean_data( \@poddata ); scan_items( \%Items, "$pod", @poddata ); } else { warn "$0: shouldn't be here (line " . __LINE__ . "\n" unless $Quiet; } } @poddata = (); # clean-up a bit chdir($pwd) || die "$0: error changing to directory $pwd: $!\n"; # cache the item list for later use warn "caching items for later use\n" if $Verbose; open( CACHE, ">$Itemcache" ) || die "$0: error open $Itemcache for writing: $!\n"; print CACHE join( ":", @Podpath ) . "\n$podroot\n"; foreach my $key ( keys %Items ) { print CACHE "$key $Items{$key}\n"; } close(CACHE); # cache the directory list for later use warn "caching directories for later use\n" if $Verbose; open( CACHE, ">$Dircache" ) || die "$0: error open $Dircache for writing: $!\n"; print CACHE join( ":", @Podpath ) . "\n$podroot\n"; foreach my $key ( keys %Pages ) { print CACHE "$key $Pages{$key}\n"; } close(CACHE); } # # scan_dir - scans the directory specified in $dir for subdirectories, .pod # files, and .pm files. notes those that it finds. this information will # be used later in order to figure out where the pages specified in L<> # links are on the filesystem. # sub scan_dir { my ( $dir, $recurse ) = @_; my ( $t, @subdirs, @pods, $pod, $dirname, @dirs ); local $_; @subdirs = (); @pods = (); opendir( DIR, $dir ) || die "$0: error opening directory $dir: $!\n"; while ( defined( $_ = readdir(DIR) ) ) { if ( -d "$dir/$_" && $_ ne "." && $_ ne ".." && ( $HiddenDirs || !/^\./ ) ) { # directory $Pages{$_} = "" unless defined $Pages{$_}; $Pages{$_} .= "$dir/$_:"; push( @subdirs, $_ ); } elsif (/\.pod\z/) { # .pod s/\.pod\z//; $Pages{$_} = "" unless defined $Pages{$_}; $Pages{$_} .= "$dir/$_.pod:"; push( @pods, "$dir/$_.pod" ); } elsif (/\.html\z/) { # .html s/\.html\z//; $Pages{$_} = "" unless defined $Pages{$_}; $Pages{$_} .= "$dir/$_.pod:"; } elsif (/\.pm\z/) { # .pm s/\.pm\z//; $Pages{$_} = "" unless defined $Pages{$_}; $Pages{$_} .= "$dir/$_.pm:"; push( @pods, "$dir/$_.pm" ); } elsif ( -T "$dir/$_" ) { # script(?) local *F; if ( open( F, "$dir/$_" ) ) { my $line; while ( defined( $line = <F> ) ) { if ( $line =~ /^=(?:pod|head1)/ ) { $Pages{$_} = "" unless defined $Pages{$_}; $Pages{$_} .= "$dir/$_.pod:"; last; } } close(F); } } } closedir(DIR); # recurse on the subdirectories if necessary if ($recurse) { foreach my $subdir (@subdirs) { scan_dir( "$dir/$subdir", $recurse ); } } } # # scan_headings - scan a pod file for head[1-6] tags, note the tags, and # build an index. # sub scan_headings { my ( $sections, @data ) = @_; my ( $tag, $which_head, $otitle, $listdepth, $index ); local $Ignore = 0; $listdepth = 0; $index = ""; # scan for =head directives, note their name, and build an index # pointing to each of them. foreach my $line (@data) { if ( $line =~ /^=(head)([1-6])\s+(.*)/ ) { ( $tag, $which_head, $otitle ) = ( $1, $2, $3 ); my $title = depod($otitle); my $name = anchorify($title); $$sections{$name} = 1; $title = process_text( \$otitle ); while ( $which_head != $listdepth ) { if ( $which_head > $listdepth ) { $index .= "\n" . ( "\t" x ($listdepth) ) . ( $listdepth > 0 ? qq{<li class="branch">\n} . "\t"x($listdepth + 1): "" ) . "<ul>"; $listdepth++; } elsif ( $which_head < $listdepth ) { $listdepth--; $index .= "\n" . ( "\t" x $listdepth ) . ( $listdepth > 0 ? "\t" : "" ) . "</ul>" . ( $listdepth >= 0 ? "\n" . ("\t"x$listdepth) . "</li>" : "" ) . "\n"; } } $index .= "\n" . ( "\t" x $listdepth ) . "<li>" . "<a href=\"#" . $name . "\">" . $title . "</a></li>"; } } # finish off the lists while ( $listdepth-- ) { $index .= "\n" . ( "\t" x $listdepth ) . ($listdepth > 0 ? "\t" : "") ."</ul>\n" . ($listdepth > 0 ? ("\t" x $listdepth) . "</li>" : "" ); } # get rid of bogus lists $index =~ s,\t*<ul>\s*</ul>\n,,g; return $index; } # # scan_items - scans the pod specified by $pod for =item directives. we # will use this information later on in resolving C<> links. # sub scan_items { my ( $itemref, $pod, @poddata ) = @_; my ( $i, $item ); local $_; $pod =~ s/\.pod\z//; $pod .= ".html" if $pod; foreach $i ( 0 .. $#poddata ) { my $txt = depod( $poddata[$i] ); # figure out what kind of item it is. # Build string for referencing this item. if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet next unless $1; $item = $1; } elsif ( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list $item = $1; } elsif ( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item $item = $1; } else { next; } my $fid = fragment_id($item); $$itemref{$fid} = "$pod" if $fid; } } # # process_head - convert a pod head[1-6] tag and convert it to HTML format. # sub process_head { my ( $tag, $heading, $hasindex ) = @_; # figure out the level of the =head $tag =~ /head([1-6])/; my $level = $1; if ($Listlevel) { warn "$0: $Podfile: unterminated list at =head in paragraph $Paragraph. ignoring.\n" unless $Quiet; while ($Listlevel) { process_back(); } } print HTML "<p>\n"; if ( $level == 1 && !$Top ) { print HTML "<a href=\"#__index__\"><small>$Backlink</small></a>\n" if $hasindex and $Backlink; print HTML "</p>\n<hr />\n"; } else { print HTML "</p>\n"; } my $name = anchorify( depod($heading) ); my $convert = process_text( \$heading ); $convert =~ s{</?a[^>]+>}{}g; print HTML "<h$level><a name=\"$name\" />$convert</h$level>\n"; } # # emit_item_tag - print an =item's text # Note: The global $EmittedItem is used for inhibiting self-references. # my $EmittedItem; sub emit_item_tag($$$) { my ( $otext, $text, $compact ) = @_; my $item = fragment_id( depod($text), -generate ); Carp::confess( "Undefined fragment '$text' (" . depod($text) . ") from fragment_id() in emit_item_tag() in $Podfile" ) if !defined $item; $EmittedItem = $item; ### print STDERR "emit_item_tag=$item ($text)\n"; print HTML '<strong>'; if ( $Items_Named{$item}++ ) { print HTML process_text( \$otext ); } else { my $name = $item; $name = anchorify($name); print HTML #qq{<a name="$name" class="item">}, process_text( \$otext ), # '</a>' ; } print HTML "</strong>\n"; undef($EmittedItem); } sub emit_li { my ($tag) = @_; if ( $Items_Seen[$Listlevel]++ == 0 ) { push( @Listend, "</$tag>" ); print HTML "<$tag>\n"; } my $emitted = $tag eq 'dl' ? 'dt' : 'li'; print HTML "<$emitted>"; return $emitted; } # # process_item - convert a pod item tag and convert it to HTML format. # sub process_item { my ($otext) = @_; my $need_dd = 0; # set to 1 if we need a <dd></dd> after an item # lots of documents start a list without doing an =over. this is # bad! but, the proper thing to do seems to be to just assume # they did do an =over. so warn them once and then continue. if ( $Listlevel == 0 ) { warn "$0: $Podfile: unexpected =item directive in paragraph $Paragraph. ignoring.\n" unless $Quiet; process_over(); } # formatting: insert a paragraph if preceding item has >1 paragraph if ($After_Lpar) { print HTML $need_dd ? "</dd>\n" : "</li>\n" if $After_Lpar; $After_Lpar = 0; } # remove formatting instructions from the text my $text = depod($otext); my $emitted; # the tag actually emitted, used for closing # all the list variants: if ( $text =~ /\A\*/ ) { # bullet $emitted = emit_li('ul'); if ( $text =~ /\A\*\s+(\S.*)\Z/s ) { # with additional text my $tag = $1; $otext =~ s/\A\*\s+//; emit_item_tag( $otext, $tag, 1 ); } print HTML "</li>" } elsif ( $text =~ /\A\d+/ ) { # numbered list $emitted = emit_li('ol'); if ( $text =~ /\A(?>\d+\.?)\s*(\S.*)\Z/s ) { # with additional text my $tag = $1; $otext =~ s/\A\d+\.?\s*//; emit_item_tag( $otext, $tag, 1 ); } print HTML "</li>"; } else { # definition list $emitted = emit_li('dl'); if ( $text =~ /\A(.+)\Z/s ) { # should have text emit_item_tag( $otext, $text, 1 ); } $need_dd = 1; } print HTML "\n"; return $need_dd; } # # process_over - process a pod over tag and start a corresponding HTML list. # sub process_over { # start a new list $Listlevel++; push( @Items_Seen, 0 ); $After_Lpar = 0; } # # process_back - process a pod back tag and convert it to HTML format. # sub process_back { my $need_dd = shift; if ( $Listlevel == 0 ) { warn "$0: $Podfile: unexpected =back directive in paragraph $Paragraph. ignoring.\n" unless $Quiet; return; } # close off the list. note, I check to see if $Listend[$Listlevel] is # defined because an =item directive may have never appeared and thus # $Listend[$Listlevel] may have never been initialized. $Listlevel--; if ( defined $Listend[$Listlevel] ) { print HTML $need_dd ? "</dd>\n" : "</li>\n" if $After_Lpar; print HTML $Listend[$Listlevel]; print HTML "\n"; pop(@Listend); } $After_Lpar = 0; # clean up item count pop(@Items_Seen); } # # process_cut - process a pod cut tag, thus start ignoring pod directives. # sub process_cut { $Ignore = 1; } # # process_pod - process a pod tag, thus stop ignoring pod directives # until we see a corresponding cut. # sub process_pod { # no need to set $Ignore to 0 cause the main loop did it } # # process_for - process a =for pod tag. if it's for html, spit # it out verbatim, if illustration, center it, otherwise ignore it. # sub process_for { my ( $whom, $text ) = @_; if ( $whom =~ /^(pod2)?html$/i ) { print HTML $text; } elsif ( $whom =~ /^illustration$/i ) { 1 while chomp $text; for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) { $text .= $ext, last if -r "$text$ext"; } print HTML qq{<p align="center"><img src="$text" alt="$text illustration" /></p>}; } } # # process_begin - process a =begin pod tag. this pushes # whom we're beginning on the begin stack. if there's a # begin stack, we only print if it us. # sub process_begin { my ( $whom, $text ) = @_; $whom = lc($whom); push( @Begin_Stack, $whom ); if ( $whom =~ /^(pod2)?html$/ ) { print HTML $text if $text; } } # # process_end - process a =end pod tag. pop the # begin stack. die if we're mismatched. # sub process_end { my ( $whom, $text ) = @_; $whom = lc($whom); if ( !defined $Begin_Stack[-1] or $Begin_Stack[-1] ne $whom ) { Carp::confess( "Unmatched begin/end at chunk $Paragraph in pod $Podfile\n"); } pop(@Begin_Stack); } # # process_pre - indented paragraph, made into <pre></pre> # sub process_pre { my ($text) = @_; my ($rest); return if $Ignore; $rest = $$text; # insert spaces in place of tabs $rest =~ s#(.+)# my $line = $1; 1 while $line =~ s/(\t+)/' ' x ((length($1) * 8) - $-[0] % 8)/e; $line; #eg; # convert some special chars to HTML escapes $rest = html_escape($rest); # try and create links for all occurrences of perl.* within # the preformatted text. $rest =~ s{ (\s*)(perl\w+) }{ if ( defined $Pages{$2} ){ # is a link qq($1<a href="$Htmlroot/$Pages{$2}">$2</a>); } elsif (defined $Pages{dosify($2)}) { # is a link qq($1<a href="$Htmlroot/$Pages{dosify($2)}">$2</a>); } else { "$1$2"; } }xeg; $rest =~ s{ (<a\ href="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)? }{ my $url ; if ( $Htmlfileurl ne '' ){ # Here, we take advantage of the knowledge # that $Htmlfileurl ne '' implies $Htmlroot eq ''. # Since $Htmlroot eq '', we need to prepend $Htmldir # on the fron of the link to get the absolute path # of the link's target. We check for a leading '/' # to avoid corrupting links that are #, file:, etc. my $old_url = $3 ; $old_url = "$Htmldir$old_url" if $old_url =~ m{^\/}; $url = relativize_url( "$old_url.html", $Htmlfileurl ); } else { $url = "$3.html" ; } "$1$url" ; }xeg; # Look for embedded URLs and make them into links. We don't # relativize them since they are best left as the author intended. my $urls = '(' . join( '|', qw{ http telnet mailto news gopher file wais ftp } ) . ')'; my $ltrs = '\w'; my $gunk = '/#~:.?+=&%@!\-'; my $punc = '.:!?\-;'; my $any = "${ltrs}${gunk}${punc}"; $rest =~ s{ \b # start at word boundary ( # begin $1 { $urls : # need resource and a colon (?!:) # Ignore File::, among others. [$any] +? # followed by one or more of any valid # character, but be conservative and # take only what you need to.... ) # end $1 } (?= &quot; &gt; # maybe pre-quoted '<a href="...">' | # or: [$punc]* # 0 or more punctuation (?: # followed [^$any] # by a non-url char | # or $ # end of the string ) # | # or else $ # then end of the string ) }{<a href="$1">$1</a>}igox; # text should be as it is (verbatim) $$text = $rest; } # # pure text processing # # pure_text/inIS_text: differ with respect to automatic C<> recognition. # we don't want this to happen within IS # sub pure_text($) { my $text = shift(); process_puretext( $text, 1 ); } sub inIS_text($) { my $text = shift(); process_puretext( $text, 0 ); } # # process_puretext - process pure text (without pod-escapes) converting # double-quotes and handling implicit C<> links. # sub process_puretext { my ( $text, $notinIS ) = @_; ## Guessing at func() or [\$\@%&]*var references in plain text is destined ## to produce some strange looking ref's. uncomment to disable: ## $notinIS = 0; my ( @words, $lead, $trail ); # keep track of leading and trailing white-space $lead = ( $text =~ s/\A(\s+)//s ? $1 : "" ); $trail = ( $text =~ s/(\s+)\Z//s ? $1 : "" ); # split at space/non-space boundaries @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text ); # process each word individually foreach my $word (@words) { # skip space runs next if $word =~ /^\s*$/; # see if we can infer a link or a function call # # NOTE: This is a word based search, it won't automatically # mark "substr($var, 1, 2)" because the 1st word would be "substr($var" # User has to enclose those with proper C<> if ( $notinIS && $word =~ m/ ^([a-z_]{2,}) # The function name \( ([0-9][a-z]* # Manual page(1) or page(1M) |[^)]*[\$\@\%][^)]+ # ($foo), (1, @foo), (%hash) | # () ) \) ([.,;]?)$ # a possible punctuation follows /xi ) { # has parenthesis so should have been a C<> ref ## try for a pagename (perlXXX(1))? my ( $func, $args, $rest ) = ( $1, $2, $3 || '' ); if ( $args =~ /^\d+$/ ) { my $url = page_sect( $word, '' ); if ( defined $url ) { $word = qq(<a href="$url" class="man">the $word manpage</a>$rest); next; } } ## try function name for a link, append tt'ed argument list $word = emit_C( $func, '', "($args)" ) . $rest; #### disabled. either all (including $\W, $\w+{.*} etc.) or nothing. ## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) { ## # perl variables, should be a C<> ref ## $word = emit_C( $word ); } elsif ( $word =~ m,^\w+://\w, ) { # looks like a URL # Don't relativize it: leave it as the author intended $word = qq(<a href="$word">$word</a>); } elsif ( $word =~ /[\w.-]+\@[\w-]+\.\w/ ) { # looks like an e-mail address my ( $w1, $w2, $w3 ) = ( "", $word, "" ); ( $w1, $w2, $w3 ) = ( "(", $1, ")$2" ) if $word =~ /^\((.*?)\)(,?)/; ( $w1, $w2, $w3 ) = ( "&lt;", $1, "&gt;$2" ) if $word =~ /^<(.*?)>(,?)/; $word = qq($w1<a href="mailto:$w2">$w2</a>$w3); } else { $word = html_escape($word) if $word =~ /["&<>]/; } } # put everything back together return $lead . join( '', @words ) . $trail; } # # process_text - handles plaintext that appears in the input pod file. # there may be pod commands embedded within the text so those must be # converted to html commands. # sub process_text1($$;$$); sub pattern ($) { $_[0] ? '\s+' . ( '>' x ( $_[0] + 1 ) ) : '>' } sub closing ($) { local ($_) = shift; ( defined && s/\s+\z// ) ? length : 0 } sub process_text { return if $Ignore; my ($tref) = @_; my $res = process_text1( 0, $tref ); $res =~ s/\s+$//s; $$tref = $res; } sub process_text_rfc_links { my $text = shift; # For every "RFCnnnn" or "RFC nnn", link it to the authoritative # ource. Do not use the /i modifier here. Require "RFC" to be written in # in capital letters. $text =~ s{ (?<=[^<>[:alpha:]]) # Make sure this is not an URL already (RFC\s*([0-9]{1,5}))(?![0-9]) # max 5 digits } {<a href="http://www.ietf.org/rfc/rfc$2.txt" class="rfc">$1</a>}gx; $text; } sub process_text1($$;$$) { my ( $lev, $rstr, $func, $closing ) = @_; my $res = ''; unless ( defined $func ) { $func = ''; $lev++; } if ( $func eq 'B' ) { # B<text> - boldface $res = '<strong>' . process_text1( $lev, $rstr ) . '</strong>'; } elsif ( $func eq 'C' ) { # C<code> - can be a ref or <code></code> # need to extract text my $par = go_ahead( $rstr, 'C', $closing ); ## clean-up of the link target my $text = depod($par); ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ; ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n"; $res = emit_C( $text, $lev > 1 || ( $par =~ /[BI]</ ) ); } elsif ( $func eq 'E' ) { # E<x> - convert to character $$rstr =~ s/^([^>]*)>//; my $escape = $1; $escape =~ s/^(\d+|X[\dA-F]+)$/#$1/i; $res = "&$escape;"; } elsif ( $func eq 'F' ) { # F<filename> - italicize $res = '<em class="file">' . process_text1( $lev, $rstr ) . '</em>'; } elsif ( $func eq 'I' ) { # I<text> - italicize $res = '<em>' . process_text1( $lev, $rstr ) . '</em>'; } elsif ( $func eq 'L' ) { # L<link> - link ## L<text|cross-ref> => produce text, use cross-ref for linking ## L<cross-ref> => make text from cross-ref ## need to extract text my $par = go_ahead( $rstr, 'L', $closing ); # some L<>'s that shouldn't be: # a) full-blown URL's are emitted as-is if ( $par =~ m{^\w+://}s ) { return make_URL_href($par); } # b) C<...> is stripped and treated as C<> if ( $par =~ /^C<(.*)>$/ ) { my $text = depod($1); return emit_C( $text, $lev > 1 || ( $par =~ /[BI]</ ) ); } # analyze the contents $par =~ s/\n/ /g; # undo word-wrapped tags my $opar = $par; my $linktext; if ( $par =~ s{^([^|]+)\|}{} ) { $linktext = $1; } if( $par =~ m{^\w+://}s ) { return make_URL_href( $par, $linktext ); } # make sure sections start with a / $par =~ s{^"}{/"}; my ( $page, $section, $ident ); # check for link patterns if ( $par =~ m{^([^/]+?)/(?!")(.*?)$} ) { # name/ident # we've got a name/ident (no quotes) if ( length $2 ) { ( $page, $ident ) = ( $1, $2 ); } else { ( $page, $section ) = ( $1, $2 ); } ### print STDERR "--> L<$par> to page $page, ident $ident\n"; } elsif ( $par =~ m{^(.*?)/"?(.*?)"?$} ) { # [name]/"section" # even though this should be a "section", we go for ident first ( $page, $ident ) = ( $1, $2 ); ### print STDERR "--> L<$par> to page $page, section $section\n"; } elsif ( $par =~ /\s/ ) { # this must be a section with missing quotes ( $page, $section ) = ( '', $par ); ### print STDERR "--> L<$par> to void page, section $section\n"; } else { ( $page, $section ) = ( $par, '' ); ### print STDERR "--> L<$par> to page $par, void section\n"; } # now, either $section or $ident is defined. the convoluted logic # below tries to resolve L<> according to what the user specified. # failing this, we try to find the next best thing... my ( $url, $ltext, $fid ); RESOLVE: { if ( defined $ident ) { ## try to resolve $ident as an item ( $url, $fid ) = coderef( $page, $ident ); if ($url) { if ( !defined($linktext) ) { $linktext = $ident; $linktext .= " in " if $ident && $page; $linktext .= "the $page manpage" if $page; } ### print STDERR "got coderef url=$url\n"; last RESOLVE; } ## no luck: go for a section (auto-quoting!) $section = $ident; } ## now go for a section my $htmlsection = htmlify($section); $url = page_sect( $page, $htmlsection ); if ($url) { if ( !defined($linktext) ) { $linktext = $section; $linktext .= " in " if $section && $page; $linktext .= "the $page manpage" if $page; } ### print STDERR "got page/section url=$url\n"; last RESOLVE; } ## no luck: go for an ident if ($section) { $ident = $section; } else { $ident = $page; $page = undef(); } ( $url, $fid ) = coderef( $page, $ident ); if ($url) { if ( !defined($linktext) ) { $linktext = $ident; $linktext .= " in " if $ident && $page; $linktext .= "the $page manpage" if $page; } ### print STDERR "got section=>coderef url=$url\n"; last RESOLVE; } # warning; show some text. $linktext = $opar unless defined $linktext; warn "$0: $Podfile: cannot resolve L<$opar> in paragraph $Paragraph.\n" unless $Quiet; } # now we have a URL or just plain code $$rstr = $linktext . '>' . $$rstr; if ( defined($url) ) { $res = "<a href=\"$url\">" . process_text1( $lev, $rstr ) . '</a>'; } else { $res = '<em>' . process_text1( $lev, $rstr ) . '</em>'; } } elsif ( $func eq 'S' ) { # S<text> - non-breaking spaces $res = process_text1( $lev, $rstr ); $res =~ s/ /&nbsp;/g; } elsif ( $func eq 'X' ) { # X<> - ignore warn "$0: $Podfile: invalid X<> in paragraph $Paragraph.\n" unless $$rstr =~ s/^[^>]*>// or $Quiet; } elsif ( $func eq 'Z' ) { # Z<> - empty warn "$0: $Podfile: invalid Z<> in paragraph $Paragraph.\n" unless $$rstr =~ s/^>// or $Quiet; } else { my $term = pattern $closing; while ( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ) { # all others: either recurse into new function or # terminate at closing angle bracket(s) my $pt = $1; $pt .= $2 if !$3 && $lev == 1; $res .= $lev == 1 ? pure_text($pt) : inIS_text($pt); return $res if !$3 && $lev > 1; if ($3) { $res .= process_text1( $lev, $rstr, $3, closing $4 ); } } if ( $lev == 1 ) { $res .= pure_text($$rstr); } elsif ( !$Quiet ) { my $snippet = substr( $$rstr, 0, 60 ); warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph: '$snippet'.\n" } $res = process_text_rfc_links($res); } return $res; } # # go_ahead: extract text of an IS (can be nested) # sub go_ahead($$$) { my ( $rstr, $func, $closing ) = @_; my $res = ''; my @closing = ($closing); while ( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+\s+)?|@{[pattern $closing[0]]})//s ) { $res .= $1; unless ($3) { shift @closing; return $res unless @closing; } else { unshift @closing, closing $4; } $res .= $2; } unless ($Quiet) { my $snippet = substr( $$rstr, 0, 60 ); warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph (go_ahead): '$snippet'.\n"; } return $res; } # # emit_C - output result of C<text> # $text is the depod-ed text # sub emit_C($;$$) { my ( $text, $nocode, $args ) = @_; $args = '' unless defined $args; my $res; my ( $url, $fid ) = coderef( undef(), $text ); # need HTML-safe text my $linktext = html_escape("$text$args"); if ( $text !~ /^[\$@%]/ && defined($url) && ( !defined($EmittedItem) || $EmittedItem ne $fid ) ) { $res = "<a href=\"$url\"><code>$linktext</code></a>"; } elsif ( 0 && $nocode ) { $res = $linktext; } else { $res = "<code>$linktext</code>"; } return $res; } # # html_escape: make text safe for HTML # sub html_escape { my $rest = $_[0]; $rest =~ s/&/&amp;/g; $rest =~ s/</&lt;/g; $rest =~ s/>/&gt;/g; $rest =~ s/"/&quot;/g; # &apos; is only in XHTML, not HTML4. Be conservative #$rest =~ s/'/&apos;/g; return $rest; } # # dosify - convert filenames to 8.3 # sub dosify { my ($str) = @_; return lc($str) if $^O eq 'VMS'; # VMS just needs casing if ($Is83) { $str = lc $str; $str =~ s/(\.\w+)/substr ($1,0,4)/ge; $str =~ s/(\w+)/substr ($1,0,8)/ge; } return $str; } # # page_sect - make a URL from the text of a L<> # sub page_sect($$) { my ( $page, $section ) = @_; my ( $linktext, $page83, $link ); # work strings # check if we know that this is a section in this page if ( !defined $Pages{$page} && defined $Sections{$page} ) { $section = $page; $page = ""; ### print STDERR "reset page='', section=$section\n"; } $page83 = dosify($page); $page = $page83 if ( defined $Pages{$page83} ); if ( $page eq "" ) { $link = "#" . anchorify($section); } elsif ( $page =~ /::/ ) { $page =~ s,::,/,g; # Search page cache for an entry keyed under the html page name, # then look to see what directory that page might be in. NOTE: # this will only find one page. A better solution might be to produce # an intermediate page that is an index to all such pages. my $page_name = $page; $page_name =~ s,^.*/,,s; if ( defined( $Pages{$page_name} ) && $Pages{$page_name} =~ /([^:]*$page)\.(?:pod|pm):/ ) { $page = $1; } else { # NOTE: This branch assumes that all A::B pages are located in # $Htmlroot/A/B.html . This is often incorrect, since they are # often in $Htmlroot/lib/A/B.html or such like. Perhaps we could # analyze the contents of %Pages and figure out where any # cousins of A::B are, then assume that. So, if A::B isn't found, # but A::C is found in lib/A/C.pm, then A::B is assumed to be in # lib/A/B.pm. This is also limited, but it's an improvement. # Maybe a hints file so that the links point to the correct places # nonetheless? } $link = "$Htmlroot/$page.html"; $link .= "#" . anchorify($section) if ($section); } elsif ( !defined $Pages{$page} ) { $link = ""; } else { $section = anchorify($section) if $section ne ""; ### print STDERR "...section=$section\n"; # if there is a directory by the name of the page, then assume that an # appropriate section will exist in the subdirectory # if ($section ne "" && $Pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) { if ( $section ne "" && $Pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/ ) { $link = "$Htmlroot/$1/$section.html"; ### print STDERR "...link=$link\n"; # since there is no directory by the name of the page, the section will # have to exist within a .html of the same name. thus, make sure there # is a .pod or .pm that might become that .html } else { $section = "#$section" if $section; ### print STDERR "...section=$section\n"; # check if there is a .pod with the page name. # for L<Foo>, Foo.(pod|pm) is preferred to A/Foo.(pod|pm) if ( $Pages{$page} =~ /([^:]*)\.(?:pod|pm):/ ) { $link = "$Htmlroot/$1.html$section"; } else { $link = ""; } } } if ($link) { # Here, we take advantage of the knowledge that $Htmlfileurl ne '' # implies $Htmlroot eq ''. This means that the link in question # needs a prefix of $Htmldir if it begins with '/'. The test for # the initial '/' is done to avoid '#'-only links, and to allow # for other kinds of links, like file:, ftp:, etc. my $url; if ( $Htmlfileurl ne '' ) { $link = "$Htmldir$link" if $link =~ m{^/}s; $url = relativize_url( $link, $Htmlfileurl ); # print( " b: [$link,$Htmlfileurl,$url]\n" ); } else { $url = $link; } return $url; } else { return undef(); } } # # relativize_url - convert an absolute URL to one relative to a base URL. # Assumes both end in a filename. # sub relativize_url { my ( $dest, $source ) = @_; my ( $dest_volume, $dest_directory, $dest_file ) = File::Spec::Unix->splitpath($dest); $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ); my ( $source_volume, $source_directory, $source_file ) = File::Spec::Unix->splitpath($source); $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ); my $rel_path = ''; if ( $dest ne '' ) { $rel_path = File::Spec::Unix->abs2rel( $dest, $source ); } if ( $rel_path ne '' && substr( $rel_path, -1 ) ne '/' && substr( $dest_file, 0, 1 ) ne '#' ) { $rel_path .= "/$dest_file"; } else { $rel_path .= "$dest_file"; } return $rel_path; } # # coderef - make URL from the text of a C<> # sub coderef($$) { my ( $page, $item ) = @_; my ($url); my $fid = fragment_id($item); if ( defined($page) && $page ne "" ) { # we have been given a $page... $page =~ s{::}{/}g; Carp::confess( "Undefined fragment '$item' from fragment_id() in coderef() in $Podfile" ) if !defined $fid; # Do we take it? Item could be a section! my $base = $Items{$fid} || ""; $base =~ s{[^/]*/}{}; if ( $base ne "$page.html" ) { ### print STDERR "coderef( $page, $item ): items{$fid} = $Items{$fid} = $base => discard page!\n"; $page = undef(); } } else { # no page - local items precede cached items if ( defined($fid) ) { if ( exists $Local_Items{$fid} ) { $page = $Local_Items{$fid}; } else { $page = $Items{$fid}; } } } # if there was a pod file that we found earlier with an appropriate # =item directive, then create a link to that page. if ( defined $page ) { if ($page) { if ( exists $Pages{$page} and $Pages{$page} =~ /([^:.]*)\.[^:]*:/ ) { $page = $1 . '.html'; } my $link = "$Htmlroot/$page#" . anchorify($fid); # Here, we take advantage of the knowledge that $Htmlfileurl # ne '' implies $Htmlroot eq ''. if ( $Htmlfileurl ne '' ) { $link = "$Htmldir$link"; $url = relativize_url( $link, $Htmlfileurl ); } else { $url = $link; } } else { $url = "#" . anchorify($fid); } confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/; } return ( $url, $fid ); } # # Adapted from Nick Ing-Simmons' PodToHtml package. sub relative_url { my $source_file = shift; my $destination_file = shift; my $source = URI::file->new_abs($source_file); my $uo = URI::file->new( $destination_file, $source )->abs; return $uo->rel->as_string; } # # finish_list - finish off any pending HTML lists. this should be called # after the entire pod file has been read and converted. # sub finish_list { while ( $Listlevel > 0 ) { print HTML "</dl>\n"; $Listlevel--; } } # # htmlify - converts a pod section specification to a suitable section # specification for HTML. Note that we keep spaces and special characters # except ", ? (Netscape problem) and the hyphen (writer's problem...). # sub htmlify { my ($heading) = @_; $heading =~ s/(\s+)/ /g; $heading =~ s/\s+\Z//; $heading =~ s/\A\s+//; # The hyphen is a disgrace to the English language. # $heading =~ s/[-"?]//g; $heading =~ s/["?]//g; $heading = lc($heading); return $heading; } # # similar to htmlify, but turns non-alphanumerics into underscores # sub anchorify { my ($anchor) = @_; $anchor =~ s/\([^)]*\)//; $anchor = htmlify($anchor); $anchor =~ s/\W/_/g; $anchor =~ tr/_/_/s; return $anchor; } # # depod - convert text by eliminating all interior sequences # Note: can be called with copy or modify semantics # my %E2c; $E2c{lt} = '<'; $E2c{gt} = '>'; $E2c{sol} = '/'; $E2c{verbar} = '|'; $E2c{amp} = '&'; # in Tk's pods sub depod1($;$$); sub depod($) { my $string; if ( ref( $_[0] ) ) { $string = ${ $_[0] }; ${ $_[0] } = depod1( \$string ); } else { $string = $_[0]; depod1( \$string ); } } sub depod1($;$$) { my ( $rstr, $func, $closing ) = @_; my $res = ''; return $res unless defined $$rstr; if ( !defined($func) ) { # skip to next begin of an interior sequence while ( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?//s ) { # recurse into its text $res .= $1 . depod1( $rstr, $2, closing $3); } $res .= $$rstr; } elsif ( $func eq 'E' ) { # E<x> - convert to character $$rstr =~ s/^([^>]*)>//; $res .= $E2c{$1} || ""; } elsif ( $func eq 'X' ) { # X<> - ignore $$rstr =~ s/^[^>]*>//; } elsif ( $func eq 'Z' ) { # Z<> - empty $$rstr =~ s/^>//; } else { # all others: either recurse into new function or # terminate at closing angle bracket my $term = pattern $closing; while ( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ) { $res .= $1; last unless $3; $res .= depod1( $rstr, $3, closing $4 ); } ## If we're here and $2 ne '>': undelimited interior sequence. ## Ignored, as this is called without proper indication of where we are. ## Rely on process_text to produce diagnostics. } return $res; } { my %seen; # static fragment record hash sub fragment_id_readable { my $text = shift; my $generate = shift; # optional flag my $orig = $text; # leave the words for the fragment identifier, # change everything else to underbars. $text =~ s/[^A-Za-z0-9_]+/_/g; # do not use \W to avoid locale dependency. $text =~ s/_{2,}/_/g; $text =~ s/\A_//; $text =~ s/_\Z//; unless ($text) { # Nothing left after removing punctuation, so leave it as is # E.g. if option is named: "=item -#" $text = $orig; } if ($generate) { if ( exists $seen{$text} ) { # This already exists, make it unique $seen{$text}++; $text = $text . $seen{$text}; } else { $seen{$text} = 1; # first time seen this fragment } } $text; } } my @HC; sub fragment_id_obfuscated { # This was the old "_2d_2d__" my $text = shift; my $generate = shift; # optional flag # text? Normalize by obfuscating the fragment id to make it unique $text =~ s/\s+/_/sg; $text =~ s{(\W)}{ defined( $HC[ord($1)] ) ? $HC[ord($1)] : ( $HC[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe; $text = substr( $text, 0, 50 ); $text; } # # fragment_id - construct a fragment identifier from: # a) =item text # b) contents of C<...> # sub fragment_id { my $text = shift; my $generate = shift; # optional flag $text =~ s/\s+\Z//s; if ($text) { # a method or function? return $1 if $text =~ /(\w+)\s*\(/; return $1 if $text =~ /->\s*(\w+)\s*\(?/; # a variable name? return $1 if $text =~ /^([\$\@%*]\S+)/; # some pattern matching operator? return $1 if $text =~ m|^(\w+/).*/\w*$|; # fancy stuff... like "do { }" return $1 if $text =~ m|^(\w+)\s*{.*}$|; # honour the perlfunc manpage: func [PAR[,[ ]PAR]...] # and some funnies with ... Module ... return $1 if $text =~ m{^([a-z\d_]+)(\s+[A-Z,/& ][A-Z\d,/& ]*)?$}; return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$}; return fragment_id_readable( $text, $generate ); } else { return; } } # # make_URL_href - generate HTML href from URL # Special treatment for CGI queries. # sub make_URL_href($;$) { my ($url) = shift; my $linktext = shift || $url; if ( $url !~ s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<a href="$1$2">$1</a>}i ) { $url = "<a href=\"$url\">$linktext</a>"; } return $url; } 1;