#!/usr/bin/env python
# -*- coding: latin-1 -*-
#
# Time-stamp: <2004-04-25 15:01:58 Graham.Williams togaware.com>
#
#  COPYRIGHT
# Togaware 2004 All rights are reserved.
#
# Authors: Graham Williams
#

"""Check that href links exist, and if not print a warning.

The home and documentation elements will have href attributes.  Each
of these is checked and if a connection can not be made an error is
reported.

The "ctan:" links are ignored for now.

"""

########################################################################
# IMPORTS
#

import re
import sys
import os
import shutil
import urllib2

from tcutils import commify, list_packages, loadXML

########################################################################
# SYSTEM VARIABLES
#
__version__ = "$Revision: 1.2 $".split()[1]
__verdate__ = "$Date: 2004/04/25 05:06:42 $".split()[1:3]
__source__  = "$Source: /cvsroot/texcatalogue/texcatalogue/src/checklinks.py,v $".\
              split()[1]
__program__ = re.sub('^.RCSfile: (.*).py,v .', "\\1",
                     "$RCSfile: checklinks.py,v $")

########################################################################
#
# READ AND PROCESS
#

def link_okay(href):
    try:
        site = urllib2.urlopen(href)
    except Exception, e:
        print e
        return False
    return True

def check_links(packages):
    progress = ""
    for p in packages:
        #
        # Progress meter!
        #
        if p[0] <> progress:
            print p[0],
            sys.stdout.flush()
            progress = p[0]
        # xmlfname = path + "/" + p[0] + "/" + p + ".xml"
        # print p
        doc = loadXML(p)
        if not doc: continue
        #
        # Modifications - USE XML
        #------------------------------------------------------------
        for t in ['home', 'documentation']:
            nodes = doc.getElementsByTagName(t)
            for n in nodes:
                href = n.getAttribute('href')
                protocol = href.split(":")[0]
                if protocol <> 'ctan' and not link_okay(href):
                    print "Warning: %s.%s not found: %s" %\
                          (p, t, href)
        #------------------------------------------------------------
        # Finished
        #
            
###################################################################
# MAIN PROGRAM
#
def _usage():
    """Print documentation."""
    print __doc__

def _main():
    """Process command line options then perform the action."""
    packages = list_packages()
    print "The catalogue contains %s entries." % commify(len(packages))
    # packages = ['a2ac', 'a0poster', 'a4', 'a2ping']
    check_links(packages)

########################################################################
#
# INTERACTIVE
#
# Test to see if we are running as a main program, rather than being
# imported as a library. If it's a main program, then let's do some
# work. Otherwise let's quielty proceed.
#
if __name__ == "__main__":
    _main()
