#!/bin/bash # author: H Lally Singh # this is just a skeletal script # it does not contain error checking (which you are expected to do) # and the extra credit options FILENAME=$1 # # Processing # # returns 0 (true!) if the string $1 is a closing tag for $2 function sametag () { if [ "" == "$1" ]; then return 0 else return 1 fi } function matchtag () { local TAG LNAME LINE TAG="$1" DEPTH="$2" # extract the localname from the tag LNAME=`echo $TAG | sed 's/<\([a-zA-Z]*\).*/\1/g'` # strip off any validly-formed attributes, and any spaces left behind. CLEANTAG=`echo $TAG | sed 's/ [a-zA-Z]*="[^">]*"//g' | sed 's/ //g'` if [ "<$LNAME/>" == "$CLEANTAG" ]; then return 0 fi if [ "<$LNAME>" != "$CLEANTAG" ]; then echo "$FILENAME is ill-formed." exit 1 fi # so it's not self-closing read LINE # now read and process the children. while ! sametag "$LINE" "$LNAME"; do matchtag "$LINE" "$2 " read LINE done if ! sametag "$LINE" "$LNAME"; then echo "$FILENAME is ill-formed." exit 1 fi } function toplevel () { read FOO; matchtag $FOO ""; read FOO; if [ "$FOO" != "" ]; then echo "$FILENAME is ill-formed." exit 1; else echo "$FILENAME is well-formed." exit 0; fi } # # Normalize # # kill off the line, then concatenate the lines together, # then put each tag on a different line, with text on separate lines. # then feed it into the rest of the validator. cat $FILENAME | tr '\012' ' ' \ | sed -e 's///g' \ | sed -e 's/[ \t]*\(<[^>]*>\)/\n\1\n/g' \ | sed -n -e '/^[ \t]*$/!p' \ | sed -n -e '/<.*/p' \ | ( toplevel )