#!/bin/bash

# author: H Lally Singh
# this is just a skeletal script
# it does not contain error checking (which you are expected to do)
# and the extra credit options

FILENAME=$1

#
# Processing
#

# returns 0 (true!) if the string $1 is a closing tag for $2
function sametag () {
        if [ "</$2>" == "$1" ]; then
                return 0
        else
                return 1
        fi
}

function matchtag () {
        local TAG LNAME LINE
        TAG="$1"
        DEPTH="$2"
        # extract the localname from the tag
        LNAME=`echo $TAG | sed 's/<\([a-zA-Z]*\).*/\1/g'`

        # strip off any validly-formed attributes, and any spaces left behind.
        CLEANTAG=`echo $TAG | sed 's/ [a-zA-Z]*="[^">]*"//g' | sed 's/ //g'`

        if [ "<$LNAME/>" == "$CLEANTAG" ]; then
            return 0
        fi

        if [ "<$LNAME>" != "$CLEANTAG" ]; then
            echo "$FILENAME is ill-formed."
            exit 1
        fi

        # so it's not self-closing
        read LINE

        # now read and process the children.
        while ! sametag "$LINE" "$LNAME"; do
                matchtag "$LINE" "$2  "
                read LINE
        done

        if ! sametag "$LINE" "$LNAME"; then
                echo "$FILENAME is ill-formed."
                exit 1
        fi
}

function toplevel () {
        read FOO; 
        matchtag $FOO ""; 
        read FOO; 
        if [ "$FOO" != "" ]; then 
            echo "$FILENAME is ill-formed."
                exit 1; 
        else 
            echo "$FILENAME is well-formed."            
                exit 0; 
        fi
}

#
# Normalize
#

# kill off the <? ?> line, then concatenate the lines together,
# then put each tag on a different line, with text on separate lines.
# then feed it into the rest of the validator.
cat $FILENAME | tr '\012' ' ' \
   | sed -e 's/<?.*?>//g' \
   | sed -e 's/[ \t]*\(<[^>]*>\)/\n\1\n/g' \
   | sed -n -e '/^[ \t]*$/!p' \
   | sed -n -e '/<.*/p' \
   | ( toplevel  )