#!/bin/sh
#
# Corrects many of the problems in the 2nd version of
# OASIS/NIST XML conformance test data (3/15/2001).
#
# This corrects problems noticed with respect to
# the 2/5/2000 patch (to the 1st edition).  Potential
# issues in the tests from IBM haven't been looked at.
#
# - See "UNRESOLVED PROBLEMS" diagnostic
#
# - ibm-valid-P02-ibm02v01 may have corrupt UTF-8;
#   some parsers (including IE5) reject it.
#
# - XSL stylesheets still have merge marks
#
# GET THE BUGFIXED VERSION FROM CVS AT SOURCEFORGE!!
# Or (new) see http://www.w3.org/XML/Test/
#

DATE=`date -I`

#
# use "tar.gz" not ".zip":
# - no standard text mangling support
# - handles empty files correctly
# - "tar cvz" twice as compact as "zip -9qr" 
#
ORIGINAL=xmlconf-feb05.tar.gz
OLD_FILE=xmlconf-20010315.tar.gz
NEW_FILE=xmlconf-$DATE.tar.gz

mkdir tmp.$$
cd tmp.$$

echo "Extracting $OLD_FILE"
tar xfz ../$OLD_FILE

#
# Restore missing files
# Most of these cause test failures
#
echo "Restoring missing zero length files (5 files) ..."
for F in \
    "oasis/p39fail3.xml" \
    "sun/valid/null.ent" \
    "xmltest/not-wf/sa/050.xml" \
    "xmltest/not-wf/sa/null.ent" \
    "xmltest/valid/not-sa/003-2.ent"
do
    touch xmlconf/$F
done


#
# Restore original EOL formatting, affecting
# - metadata (all of it),
# - test input data (most of Sun's ???),
# - test output data (most of Sun's ???),
# - some documentation
#
# A number of these cause test failures.
# The input changes can affect test results.
# All cause big trouble auditing the changes!!!
#

echo "Restoring EOL formatting (190 files) ..."
for F in \
    "testcases.dtd" \
    "xmlconf.xml" \
    "japanese/japanese.xml" \
    "oasis/oasis.xml" \
    "xmltest/xmltest.xml" \
    "sun/sun-error.xml" \
    "sun/sun-invalid.xml" \
    "sun/sun-not-wf.xml" \
    "sun/sun-valid.xml" \
    \
    "sun/cxml.html" \
    \
    "sun/invalid/attr01.xml" \
    "sun/invalid/attr02.xml" \
    "sun/invalid/attr03.xml" \
    "sun/invalid/attr04.xml" \
    "sun/invalid/attr05.xml" \
    "sun/invalid/attr06.xml" \
    "sun/invalid/attr07.xml" \
    "sun/invalid/attr08.xml" \
    "sun/invalid/attr09.xml" \
    "sun/invalid/attr10.xml" \
    "sun/invalid/attr13.xml" \
    "sun/invalid/attr14.xml" \
    "sun/invalid/attr15.xml" \
    "sun/invalid/attr16.xml" \
    "sun/invalid/dtd01.xml" \
    "sun/invalid/dtd02.xml" \
    "sun/invalid/dtd03.xml" \
    "sun/invalid/dtd06.xml" \
    "sun/invalid/el01.xml" \
    "sun/invalid/el02.xml" \
    "sun/invalid/el03.xml" \
    "sun/invalid/el04.xml" \
    "sun/invalid/el05.xml" \
    "sun/invalid/el06.xml" \
    "sun/invalid/id01.xml" \
    "sun/invalid/id02.xml" \
    "sun/invalid/id03.xml" \
    "sun/invalid/id04.xml" \
    "sun/invalid/id05.xml" \
    "sun/invalid/id06.xml" \
    "sun/invalid/id07.xml" \
    "sun/invalid/id08.xml" \
    "sun/invalid/id09.xml" \
    "sun/invalid/not-sa01.xml" \
    "sun/invalid/not-sa02.xml" \
    "sun/invalid/not-sa03.xml" \
    "sun/invalid/not-sa04.xml" \
    "sun/invalid/not-sa05.xml" \
    "sun/invalid/not-sa06.xml" \
    "sun/invalid/not-sa07.xml" \
    "sun/invalid/not-sa08.xml" \
    "sun/invalid/not-sa09.xml" \
    "sun/invalid/not-sa10.xml" \
    "sun/invalid/not-sa11.xml" \
    "sun/invalid/not-sa12.xml" \
    "sun/invalid/not-sa13.xml" \
    "sun/invalid/not-sa14.xml" \
    "sun/invalid/optional01.xml" \
    "sun/invalid/optional02.xml" \
    "sun/invalid/optional03.xml" \
    "sun/invalid/optional04.xml" \
    "sun/invalid/optional05.xml" \
    "sun/invalid/optional06.xml" \
    "sun/invalid/optional07.xml" \
    "sun/invalid/optional08.xml" \
    "sun/invalid/optional09.xml" \
    "sun/invalid/optional10.xml" \
    "sun/invalid/optional11.xml" \
    "sun/invalid/optional12.xml" \
    "sun/invalid/optional13.xml" \
    "sun/invalid/optional14.xml" \
    "sun/invalid/optional15.xml" \
    "sun/invalid/optional16.xml" \
    "sun/invalid/optional17.xml" \
    "sun/invalid/optional18.xml" \
    "sun/invalid/optional19.xml" \
    "sun/invalid/optional20.xml" \
    "sun/invalid/optional21.xml" \
    "sun/invalid/optional22.xml" \
    "sun/invalid/optional23.xml" \
    "sun/invalid/optional24.xml" \
    "sun/invalid/optional25.xml" \
    "sun/invalid/pe01.dtd" \
    "sun/invalid/pe01.ent" \
    "sun/invalid/pe01.xml" \
    "sun/invalid/required00.xml" \
    "sun/invalid/required01.xml" \
    "sun/invalid/required02.xml" \
    "sun/invalid/root.xml" \
    "sun/not-wf/attlist01.xml" \
    "sun/not-wf/attlist02.xml" \
    "sun/not-wf/attlist03.xml" \
    "sun/not-wf/attlist04.xml" \
    "sun/not-wf/attlist05.xml" \
    "sun/not-wf/attlist06.xml" \
    "sun/not-wf/attlist07.xml" \
    "sun/not-wf/attlist08.xml" \
    "sun/not-wf/attlist09.xml" \
    "sun/not-wf/attlist10.xml" \
    "sun/not-wf/attlist11.xml" \
    "sun/not-wf/cond01.xml" \
    "sun/not-wf/cond02.xml" \
    "sun/not-wf/cond.dtd" \
    "sun/not-wf/content01.xml" \
    "sun/not-wf/content02.xml" \
    "sun/not-wf/content03.xml" \
    "sun/not-wf/decl01.ent" \
    "sun/not-wf/decl01.xml" \
    "sun/not-wf/dtd00.xml" \
    "sun/not-wf/dtd01.xml" \
    "sun/not-wf/dtd02.xml" \
    "sun/not-wf/dtd03.xml" \
    "sun/not-wf/dtd04.xml" \
    "sun/not-wf/dtd05.xml" \
    "sun/not-wf/dtd07.dtd" \
    "sun/not-wf/dtd07.xml" \
    "sun/not-wf/element00.xml" \
    "sun/not-wf/element01.xml" \
    "sun/not-wf/element02.xml" \
    "sun/not-wf/element03.xml" \
    "sun/not-wf/element04.xml" \
    "sun/not-wf/encoding01.xml" \
    "sun/not-wf/encoding02.xml" \
    "sun/not-wf/encoding03.xml" \
    "sun/not-wf/encoding04.xml" \
    "sun/not-wf/encoding05.xml" \
    "sun/not-wf/encoding06.xml" \
    "sun/not-wf/encoding07.xml" \
    "sun/not-wf/pi.xml" \
    "sun/not-wf/pubid01.xml" \
    "sun/not-wf/pubid02.xml" \
    "sun/not-wf/pubid03.xml" \
    "sun/not-wf/pubid04.xml" \
    "sun/not-wf/pubid05.xml" \
    "sun/not-wf/sgml01.xml" \
    "sun/not-wf/sgml02.xml" \
    "sun/not-wf/sgml03.xml" \
    "sun/not-wf/sgml04.xml" \
    "sun/not-wf/sgml05.xml" \
    "sun/not-wf/sgml06.xml" \
    "sun/not-wf/sgml07.xml" \
    "sun/not-wf/sgml08.xml" \
    "sun/not-wf/sgml09.xml" \
    "sun/not-wf/sgml10.xml" \
    "sun/not-wf/sgml11.xml" \
    "sun/not-wf/sgml12.xml" \
    "sun/not-wf/sgml13.xml" \
    "sun/not-wf/uri01.xml" \
    "sun/valid/dtd00.xml" \
    "sun/valid/dtd01.xml" \
    "sun/valid/dtdtest.dtd" \
    "sun/valid/element.xml" \
    "sun/valid/ext01.ent" \
    "sun/valid/ext01.xml" \
    "sun/valid/ext02.xml" \
    "sun/valid/notation01.dtd" \
    "sun/valid/notation01.xml" \
    "sun/valid/not-sa01.xml" \
    "sun/valid/not-sa02.xml" \
    "sun/valid/not-sa03.xml" \
    "sun/valid/not-sa04.xml" \
    "sun/valid/optional.xml" \
    "sun/valid/pe00.dtd" \
    "sun/valid/pe00.xml" \
    "sun/valid/pe01.xml" \
    "sun/valid/pe02.xml" \
    "sun/valid/required00.xml" \
    "sun/valid/sa01.xml" \
    "sun/valid/sa02.xml" \
    "sun/valid/sa03.xml" \
    "sun/valid/sa04.xml" \
    "sun/valid/sa05.xml" \
    "sun/valid/sa.dtd" \
    "sun/valid/sgml01.xml" \
    "sun/valid/v-lang01.xml" \
    "sun/valid/v-lang02.xml" \
    "sun/valid/v-lang03.xml" \
    "sun/valid/v-lang04.xml" \
    "sun/valid/v-lang05.xml" \
    "sun/valid/v-lang06.xml" \
    \
    "xmltest/valid/sa/out/098.xml" \
    \
    "sun/valid/out/notation01.xml" \
    "sun/valid/out/not-sa01.xml" \
    "sun/valid/out/not-sa02.xml" \
    "sun/valid/out/not-sa03.xml" \
    "sun/valid/out/not-sa04.xml" \
    "sun/valid/out/sa02.xml" \
    "sun/valid/out/sa03.xml" \
    "sun/valid/out/sa04.xml" \
    "sun/valid/out/sa05.xml" \

do
    tr -d \\r < xmlconf/$F > tmp-$$
    mv tmp-$$ xmlconf/$F
done


#
# Patch:
# - DTD ... restore validity, document changes
# - metadata ... restore validity, fix new bug
#
patch -p0 << EOT
--- xmlconf-2001-04-27/testcases.dtd	Fri Apr 27 12:37:15 2001
+++ xmlconf/testcases.dtd	Fri Apr 27 12:42:07 2001
@@ -9,11 +9,6 @@
 
     DTD describing a database of XML tests.
 
-    NOTE:  the OASIS/NIST test effort also has a DTD for its test
-    environment, but that DTD is not yet suited for automated test
-    processing.  (Among other issues, it doesn't record information
-    putting any test case into the test matrix.)  If that gets fixed,
-    it may be useful to switch over to that DTD.
 -->
 
 <!--
@@ -33,11 +33,17 @@
     in standard Japanese encodings, using Japanese characters used
     inside names and name tokens as well as inside text.  Or the
     profile might be associated with the test supplier.
+
+    URIs in enclosed <TEST ...> elements are intended to be resolved
+    with respect to the URI of the file in which each <TEST...>
+    element is found.  To work around APIs which, like DOM, hide those
+    URIs, xml:base may be used (iff it agrees with the real URI).
 -->
 
 <!ELEMENT TESTCASES (TEST|TESTCASES)*>
 <!ATTLIST TESTCASES
     PROFILE	CDATA		#IMPLIED
+    xml:base    CDATA		#IMPLIED
     >
 
 <!--
@@ -79,6 +85,9 @@
     Each test is in a particular XML document, with a URI.  If these
     tests are accessed over a network, the path will be relative to
     the base URI of the document holding the testcase.
+
+    Test cases which conform to the XML 1.0 REC, but not to the
+    XML Namespaces REC, must have a NAMESPACE attribute of "no".
 -->
 
 <!ELEMENT TEST (#PCDATA | EM | B)*>
--- xmlconf-2001-04-27/sun/sun-valid.xml	Fri Apr 27 14:56:38 2001
+++ xmlconf/sun/sun-valid.xml	Fri Apr 27 14:57:43 2001
@@ -19,10 +19,6 @@
     Tests clauses 1, 3, and 4 of the Element Valid
 	validity constraint.</TEST>
 
-<TEST URI="valid/empty.xml" ID="empty" TYPE="invalid" SECTIONS="2.4 2.7 [18]">
-    Whitespace found in CDATA sections (and entity references?)
-    is unlike other whitespace</TEST>
-
 <TEST URI="valid/ext01.xml" ID="ext01"
 	OUTPUT="valid/out/ext01.xml"
 	SECTIONS="4.3.1 4.3.2 [77] [78]" TYPE="valid">
--- xmlconf-2001-04-27/xmltest/xmltest.xml	Fri Apr 27 14:56:38 2001
+++ xmlconf/xmltest/xmltest.xml	Fri Apr 27 14:57:59 2001
@@ -1001,7 +1001,7 @@
     break normalization only occurs when parsing external parsed entities.</TEST>
 <TEST TYPE="valid" ENTITIES="none" ID="valid-sa-069"
 	URI="valid/sa/069.xml" SECTIONS="4.7"
-	OUTPUT="valid/sa/out069.xml">
+	OUTPUT="valid/sa/out/069.xml">
     Verifies that an XML parser will parse a NOTATION
     declaration; the output phase of this test ensures that
     it's reported to the application. </TEST>
EOT

echo ""
echo "UNRESOLVED PROBLEMS:"
echo "  xmlconf/sun/valid/out/not-sa02.xml"
echo "  xmlconf/sun/valid/out/sa02.xml"
echo "    ... output files are now missing a space (bad canon form)"
echo "  xmlconf/xmltest/not-wf/sa/176.xml"
echo "  xmlconf/xmltest/valid/sa/093.xml"
echo "    ... input files changed line-ends (legal cases are now untested)"
echo "  ibm/not-wf/P13/ibm13n03.xml"
echo "    ... input appears to be in ISO-8859-1, not UTF-8"
echo ""

echo "Creating $NEW_FILE ..."
mv xmlconf xmlconf-$DATE
tar cfz ../$NEW_FILE xmlconf-$DATE


#
# To audit changes in the 3/15/2001 version, they
# were compared to the 2/5/2000 version and against
# the "changes.html".  (Except for the new IBM cases,
# where that can't be done.)
#
if [ -f ../$ORIGINAL ]; then
    echo ''
    echo "Selective compare of $ORIGINAL against $NEW_FILE"

    tar xfz ../$ORIGINAL

    # things checking out as ok to ignore, or checked separately
    cat > EXCLUDES <<EOT
README
readme.html
ibm
japanese
xmlconf-*
sun-*.xml
oasis.xml
xmltest.xml
xmlconf.xml
testcases.dtd
xmlconformance.*xsl
top3.jpe
changes.html
xmlconf.htm
cxml.html
e2.xml
p02pass1.xml
empty.xml
EOT
    diff -r --brief --exclude-from=EXCLUDES xmlconf xmlconf-$DATE |
    	grep -v "^Only" |
	egrep -v "valid/sa/out/069|valid/sa/out/076" |
	egrep -v "valid/sa/out/090|valid/sa/out/091" |
	egrep -v "invalid/attr11|invalid/attr12" |
	egrep -v "valid/out/not-sa02|valid/out/sa02" |
	egrep -v "not-wf/sa/176.xml|valid/sa/093.xml"
    rm EXCLUDES

    echo ''
    echo "Diff of metadata that still needs checking ..."
    # other changes looked OK
    for F in \

    do
	echo '** xmlconf/'$F
	diff xmlconf/$F xmlconf-$DATE/$F
	echo ''
    done
fi

cd ..
rm -rf tmp.$$
