diff -Nurd texinfo-7.2.orig/ChangeLog texinfo-7.2/ChangeLog --- texinfo-7.2.orig/ChangeLog 2024-12-23 04:46:57.000000000 -0500 +++ texinfo-7.2/ChangeLog 2025-02-11 18:42:57.000000000 -0500 @@ -1,3 +1,27 @@ +2025-02-11 Patrice Dumas + + * tta/tests/many_input_files/*.sh: comment out escape_file_names.pl + call when there are no non-ASCII output file names. + Add --utf8-argument option to escape_file_names.pl to normalize UTF-8 + encoded files before escaping. Report from Daniel Macks. + +2025-02-10 Patrice Dumas + + Normalize UTF-8 tests output file names for reproducibility + + * tta/tests/escape_file_names.pl: if --utf8-argument is given, decode, + normalize and reencode input file name. + + * tta/tests/run_parser_all.sh (escape_file_names): if + OUTPUT_FILE_NAME_ENCODING=UTF-8 appears on the command line pass + utf8_output_file argument to escape_file_names and have + escape_file_names pass the --utf8-argument command-line option to + escape_file_names.pl. + + This allows to normalize output files to get the same result + independently of the platform notmalization of UTF-8 file names. + Report from Daniel Macks. + 2024-12-23 Gavin Smith 7.2 diff -Nurd texinfo-7.2.orig/tp/tests/escape_file_names.pl texinfo-7.2/tp/tests/escape_file_names.pl --- texinfo-7.2.orig/tp/tests/escape_file_names.pl 2024-12-19 13:31:43.000000000 -0500 +++ texinfo-7.2/tp/tests/escape_file_names.pl 2025-02-11 18:42:23.000000000 -0500 @@ -26,8 +26,17 @@ use File::Spec; use File::Path; +use Encode; +use Unicode::Normalize; + my @files; +my $utf8_argument = 0; +if (defined($ARGV[0]) and $ARGV[0] eq '--utf8-argument') { + $utf8_argument = 1; + shift; +} + # Read all of input first while (<>) { chomp; @@ -40,12 +49,21 @@ my @moved_files; -for my $file (@files) { - if ($file =~ /[^[:ascii:]]/) { - unshift @moved_files, $file; +for my $in_file (@files) { + if ($in_file =~ /[^[:ascii:]]/) { + my $file_name; + if ($utf8_argument) { + # normalize to get a file name independent of the platform normalization + my $decoded_file_name = Encode::decode('UTF-8', $in_file); + my $normalized_file_name = Unicode::Normalize::NFC($decoded_file_name); + $file_name = Encode::encode('UTF-8', $normalized_file_name); + } else { + $file_name = $in_file; + } + unshift @moved_files, $in_file; my $ascii_name = ''; - for my $char (split('', $file)) { + for my $char (split('', $file_name)) { if (ord($char) < 0x80) { $ascii_name .= $char; } else { @@ -55,12 +73,12 @@ my $dest_path = $ascii_name; - if (-d $file) { + if (-d $in_file) { mkdir $dest_path; } else { - my $copy_succeeded = copy($file, $dest_path); + my $copy_succeeded = copy($in_file, $dest_path); if (not $copy_succeeded) { - warn "could not move $file: $!\n"; + warn "could not move $in_file: $!\n"; exit(1); } } diff -Nurd texinfo-7.2.orig/tp/tests/many_input_files/different_encodings.sh texinfo-7.2/tp/tests/many_input_files/different_encodings.sh --- texinfo-7.2.orig/tp/tests/many_input_files/different_encodings.sh 2024-12-19 13:31:43.000000000 -0500 +++ texinfo-7.2/tp/tests/many_input_files/different_encodings.sh 2025-02-11 18:44:26.000000000 -0500 @@ -55,7 +55,8 @@ else outdir=$basename cp -pr $outdir $raw_output_dir - find "${outdir}" | $PERL ${srcdir}/../escape_file_names.pl + # no non-ASCII output file names + #find "${outdir}" | $PERL ${srcdir}/../escape_file_names.pl dir=$basename if [ -d "$srcdir/${dir}_res" ]; then diff -Nurd texinfo-7.2.orig/tp/tests/many_input_files/different_languages_gen_master_menu.sh texinfo-7.2/tp/tests/many_input_files/different_languages_gen_master_menu.sh --- texinfo-7.2.orig/tp/tests/many_input_files/different_languages_gen_master_menu.sh 2024-12-19 13:31:43.000000000 -0500 +++ texinfo-7.2/tp/tests/many_input_files/different_languages_gen_master_menu.sh 2025-02-11 18:44:26.000000000 -0500 @@ -49,7 +49,8 @@ else outdir=$basename cp -pr $outdir $raw_output_dir - find $outdir | $PERL $srcdir/../escape_file_names.pl + # no non-ASCII output file names + #find $outdir | $PERL $srcdir/../escape_file_names.pl dir=$basename if [ -d "$srcdir/${dir}_res" ]; then diff -Nurd texinfo-7.2.orig/tp/tests/many_input_files/first_file_not_found.sh texinfo-7.2/tp/tests/many_input_files/first_file_not_found.sh --- texinfo-7.2.orig/tp/tests/many_input_files/first_file_not_found.sh 2024-12-19 13:31:43.000000000 -0500 +++ texinfo-7.2/tp/tests/many_input_files/first_file_not_found.sh 2025-02-11 18:44:26.000000000 -0500 @@ -50,7 +50,8 @@ else outdir=$basename cp -pr $outdir $raw_output_dir - find "${outdir}" | $PERL ${srcdir}/../escape_file_names.pl + # no non-ASCII output file names + #find "${outdir}" | $PERL ${srcdir}/../escape_file_names.pl dir=$basename if [ -d "$srcdir/${dir}_res" ]; then diff -Nurd texinfo-7.2.orig/tp/tests/many_input_files/for_nbspaces_different_encodings.sh texinfo-7.2/tp/tests/many_input_files/for_nbspaces_different_encodings.sh --- texinfo-7.2.orig/tp/tests/many_input_files/for_nbspaces_different_encodings.sh 2024-12-19 13:31:43.000000000 -0500 +++ texinfo-7.2/tp/tests/many_input_files/for_nbspaces_different_encodings.sh 2025-02-11 18:44:26.000000000 -0500 @@ -53,7 +53,8 @@ else outdir=$basename cp -pr $outdir $raw_output_dir - find $outdir | $PERL $srcdir/../escape_file_names.pl + # no non-ASCII output file names + #find $outdir | $PERL $srcdir/../escape_file_names.pl dir=$basename if [ -d "$srcdir/${dir}_res" ]; then diff -Nurd texinfo-7.2.orig/tp/tests/many_input_files/input_dir_non_ascii.sh texinfo-7.2/tp/tests/many_input_files/input_dir_non_ascii.sh --- texinfo-7.2.orig/tp/tests/many_input_files/input_dir_non_ascii.sh 2024-12-19 13:31:43.000000000 -0500 +++ texinfo-7.2/tp/tests/many_input_files/input_dir_non_ascii.sh 2025-02-11 18:44:26.000000000 -0500 @@ -58,7 +58,8 @@ else outdir=$basename cp -pr $outdir $raw_output_dir - find $outdir | $PERL $srcdir/../escape_file_names.pl + # no non-ASCII output file names + #find $outdir | $PERL $srcdir/../escape_file_names.pl dir=$basename if [ -d "$srcdir/${dir}_res" ]; then diff -Nurd texinfo-7.2.orig/tp/tests/many_input_files/output_dir_file_non_ascii.sh texinfo-7.2/tp/tests/many_input_files/output_dir_file_non_ascii.sh --- texinfo-7.2.orig/tp/tests/many_input_files/output_dir_file_non_ascii.sh 2024-12-19 13:31:43.000000000 -0500 +++ texinfo-7.2/tp/tests/many_input_files/output_dir_file_non_ascii.sh 2025-02-11 18:44:26.000000000 -0500 @@ -60,7 +60,7 @@ else outdir=$basename cp -pr $outdir $raw_output_dir - find $outdir | $PERL $srcdir/../escape_file_names.pl + find $outdir | $PERL $srcdir/../escape_file_names.pl --utf8-argument dir=$basename if [ -d "$srcdir/${dir}_res" ]; then diff -Nurd texinfo-7.2.orig/tp/tests/many_input_files/output_dir_non_ascii.sh texinfo-7.2/tp/tests/many_input_files/output_dir_non_ascii.sh --- texinfo-7.2.orig/tp/tests/many_input_files/output_dir_non_ascii.sh 2024-12-19 13:31:43.000000000 -0500 +++ texinfo-7.2/tp/tests/many_input_files/output_dir_non_ascii.sh 2025-02-11 18:44:26.000000000 -0500 @@ -60,7 +60,7 @@ else outdir=$basename cp -pr $outdir $raw_output_dir - find $outdir | $PERL $srcdir/../escape_file_names.pl + find $outdir | $PERL $srcdir/../escape_file_names.pl --utf8-argument dir=$basename if [ -d "$srcdir/${dir}_res" ]; then diff -Nurd texinfo-7.2.orig/tp/tests/many_input_files/tex_l2h.sh texinfo-7.2/tp/tests/many_input_files/tex_l2h.sh --- texinfo-7.2.orig/tp/tests/many_input_files/tex_l2h.sh 2024-10-10 14:00:30.000000000 -0400 +++ texinfo-7.2/tp/tests/many_input_files/tex_l2h.sh 2025-02-11 18:44:26.000000000 -0500 @@ -81,11 +81,13 @@ -e 's/^# LaTeX2HTML.*/# LaTeX2HTML/' \ "$file" > "$outdir/$filename" done - + for file in "$raw_outdir/"*_l2h_labels.pl; do filename=`basename "$file"` sed -e 's/^# LaTeX2HTML.*/# LaTeX2HTML/' "$file" > "$outdir/$filename" done + # no non-ASCII output file names + #find $outdir | $PERL $srcdir/../escape_file_names.pl dir=$basename if [ -d "$srcdir/${dir}_res" ]; then diff -Nurd texinfo-7.2.orig/tp/tests/many_input_files/tex_l2h_output_dir_non_ascii.sh texinfo-7.2/tp/tests/many_input_files/tex_l2h_output_dir_non_ascii.sh --- texinfo-7.2.orig/tp/tests/many_input_files/tex_l2h_output_dir_non_ascii.sh 2024-12-19 13:31:43.000000000 -0500 +++ texinfo-7.2/tp/tests/many_input_files/tex_l2h_output_dir_non_ascii.sh 2025-02-11 18:44:26.000000000 -0500 @@ -97,7 +97,7 @@ filename=`basename "$file"` sed -e 's/^# LaTeX2HTML.*/# LaTeX2HTML/' "$file" > "$destination_outdir/$filename" done - find $outdir | $PERL $srcdir/../escape_file_names.pl + find $outdir | $PERL $srcdir/../escape_file_names.pl --utf8-argument dir=$basename if [ -d "$srcdir/${dir}_res" ]; then diff -Nurd texinfo-7.2.orig/tp/tests/many_input_files/tex_t4ht.sh texinfo-7.2/tp/tests/many_input_files/tex_t4ht.sh --- texinfo-7.2.orig/tp/tests/many_input_files/tex_t4ht.sh 2024-12-19 13:31:43.000000000 -0500 +++ texinfo-7.2/tp/tests/many_input_files/tex_t4ht.sh 2025-02-11 18:44:26.000000000 -0500 @@ -51,7 +51,8 @@ rm -f $outdir/*_tex4ht_*.log \ $outdir/*_tex4ht_*.idv $outdir/*_tex4ht_*.dvi \ $outdir/*_tex4ht_tex.html $outdir/*.png $outdir/$stdout_file - find $outdir | $PERL $srcdir/../escape_file_names.pl + # no non-ASCII output file names + #find $outdir | $PERL $srcdir/../escape_file_names.pl dir=${basename} if [ -d $srcdir/${dir}_res ]; then diff -Nurd texinfo-7.2.orig/tp/tests/many_input_files/tex_t4ht_output_dir_non_ascii.sh texinfo-7.2/tp/tests/many_input_files/tex_t4ht_output_dir_non_ascii.sh --- texinfo-7.2.orig/tp/tests/many_input_files/tex_t4ht_output_dir_non_ascii.sh 2024-12-19 13:31:43.000000000 -0500 +++ texinfo-7.2/tp/tests/many_input_files/tex_t4ht_output_dir_non_ascii.sh 2025-02-11 18:44:26.000000000 -0500 @@ -63,7 +63,7 @@ rm -f $destination_outdir/*_tex4ht_*.log \ $destination_outdir/*_tex4ht_*.idv $destination_outdir/*_tex4ht_*.dvi \ $destination_outdir/*_tex4ht_tex.html $destination_outdir/*.png $outdir/$stdout_file - find $outdir | $PERL $srcdir/../escape_file_names.pl + find $outdir | $PERL $srcdir/../escape_file_names.pl --utf8-argument dir=${basename} if [ -d $srcdir/${dir}_res ]; then diff -Nurd texinfo-7.2.orig/tp/tests/run_parser_all.sh texinfo-7.2/tp/tests/run_parser_all.sh --- texinfo-7.2.orig/tp/tests/run_parser_all.sh 2024-12-19 13:31:43.000000000 -0500 +++ texinfo-7.2/tp/tests/run_parser_all.sh 2025-02-11 18:42:23.000000000 -0500 @@ -176,7 +176,11 @@ # ensure only ASCII filenames are used in output escape_file_names () { - find "${outdir}${dir}" | $PERL ${srcdir}/escape_file_names.pl + utf8_argument= + if test "z$1" = zutf8_output_file ; then + utf8_argument=--utf8-argument + fi + find "${outdir}${dir}" | $PERL ${srcdir}/escape_file_names.pl $utf8_argument } LC_ALL=C; export LC_ALL @@ -430,6 +434,11 @@ continue 2 fi + utf8_output_file=no + if echo "$remaining" | grep 'OUTPUT_FILE_NAME_ENCODING=UTF-8' >/dev/null; then + utf8_output_file=utf8_output_file + fi + dir=$current test -d "${outdir}$dir" && rm -rf "${outdir}$dir" mkdir "${outdir}$dir" @@ -453,7 +462,7 @@ rm -rf "${raw_outdir}$dir" post_process_output - escape_file_names + escape_file_names $utf8_output_file if test "z$res_dir_used" != 'z' ; then diff $DIFF_OPTIONS -r "$res_dir_used" "${outdir}$dir" 2>>$logfile > "$testdir/$diffs_dir/$diff_base.diff"