commit 901aadb474e06693524e3fc6f2092f4d94bb1d07
parent 60cba53ab7684b466185fd3886fb19d22b688e53
Author: lumidify <nobody@lumidify.org>
Date:   Wed, 27 Dec 2023 15:26:40 +0100
Improve book sorting
Diffstat:
5 files changed, 257 insertions(+), 61 deletions(-)
diff --git a/LSG/Config.pm b/LSG/Config.pm
@@ -25,6 +25,7 @@ use Exporter qw(import);
 our @EXPORT_OK = qw($config);
 
 # Yes, I know this isn't just used for real config
+# FIXME: separate fields from config.ini from other parts to avoid conflicts
 our $config;
 
 sub read_modified_dates {
@@ -81,6 +82,7 @@ sub read_config {
 			$section = $1;
 			next;
 		}
+		# FIXME: report errors properly
 		my ($key, $value) = split("=", $_, 2);
 		if ($value =~ /:/) {
 			my @value = split(":", $value);
diff --git a/LSG/Generate.pm b/LSG/Generate.pm
@@ -26,32 +26,60 @@ use File::Path qw(make_path);
 use LSG::Markdown;
 use LSG::Config qw($config);
 
-sub gen_files() {
-        foreach my $pageid (keys %{$config->{"metadata"}}) {
-                foreach my $lang (keys %{$config->{"langs"}}) {
-                        my $template = $config->{"metadata"}->{$pageid}->{"template"} . ".$lang.html";
-                        if (
-                                exists($config->{"modified_dates"}->{"pages"}->{"$pageid.$lang"}) &&
-                                exists($config->{"modified_dates"}->{"templates"}->{$template}) &&
-                                $config->{"modified_dates"}->{"pages"}->{"$pageid.$lang"} eq $config->{"metadata"}->{$pageid}->{"modified"}->{$lang} &&
-                                $config->{"modified_dates"}->{"templates"}->{$template} eq $config->{"templates"}->{$template}->{"modified"} &&
-				(!exists $config->{"metadata"}->{$pageid}->{$lang}->{"always_update"} ||
-				 $config->{"metadata"}->{$pageid}->{$lang}->{"always_update"} ne "true")
-                        ) {
-                                next;
-                        }
-                        print("Processing $pageid.$lang\n");
-                        my $html_dir = catfile("site", $lang, $config->{"metadata"}->{$pageid}->{"dirname"});
-                        make_path($html_dir);
-                        my $fullname = catfile("pages", "$pageid.$lang");
-                        my $html = LSG::Markdown::parse_md($lang, $pageid, $fullname);
-                        my $final_html = LSG::Template::render_template($html, $lang, $pageid);
-                        my $html_file = catfile("site", $lang, $pageid) . ".html";
-                        open(my $in, ">", $html_file) or die "ERROR: can't open $html_file for writing\n";
-                        print $in $final_html;
-                        close($in);
-                }
+sub gen_page {
+	my ($pageid, $html_pages) = @_;
+	my @ret_pages;
+	foreach my $lang (keys %{$config->{"langs"}}) {
+		my $template = $config->{"metadata"}->{$pageid}->{"template"} . ".$lang.html";
+		# FIXME: also check if the html file actually exists
+		# -> maybe compare with modified date of html instead of markdown
+		if (
+			exists($config->{"modified_dates"}->{"pages"}->{"$pageid.$lang"}) &&
+			exists($config->{"modified_dates"}->{"templates"}->{$template}) &&
+			$config->{"modified_dates"}->{"pages"}->{"$pageid.$lang"} eq $config->{"metadata"}->{$pageid}->{"modified"}->{$lang} &&
+			$config->{"modified_dates"}->{"templates"}->{$template} eq $config->{"templates"}->{$template}->{"modified"} &&
+			(!exists $config->{"metadata"}->{$pageid}->{$lang}->{"always_update"} ||
+			 $config->{"metadata"}->{$pageid}->{$lang}->{"always_update"} ne "true")
+		) {
+			next;
+		}
+		#print("Processing $pageid.$lang\n");
+		my $html_dir = catfile("site", $lang, $config->{"metadata"}->{$pageid}->{"dirname"});
+		make_path($html_dir);
+		my $html;
+		if (defined($html_pages)) {
+			if (!exists($html_pages->{$lang})) {
+				die "Page $pageid does not exist for language $lang\n";
+			}
+			$html = $html_pages->{$lang};
+		} else {
+			my $fullname = catfile("pages", "$pageid.$lang");
+			my @tmp_pages;
+			($html, @tmp_pages) = LSG::Markdown::parse_md($lang, $pageid, $fullname);
+			push(@ret_pages, @tmp_pages);
+		}
+		my $final_html = LSG::Template::render_template($html, $lang, $pageid);
+		my $html_file = catfile("site", $lang, $pageid) . ".html";
+		open(my $in, ">", $html_file) or die "ERROR: can't open $html_file for writing\n";
+		print $in $final_html;
+		close($in);
+	}
+	return @ret_pages;
+}
+
+sub gen_files {
+	my %extra_pages;
+	for my $pageid (keys %{$config->{"metadata"}}) {
+		for my $page (gen_page($pageid)) {
+			if (exists $extra_pages{$page->[0]}->{$page->[1]}) {
+				die "Duplicate page $page->[0] for language $page->[1]\n";
+			}
+			$extra_pages{$page->[0]}->{$page->[1]} = $page->[2];
+		}
         }
+	for my $pageid (keys %extra_pages) {
+		gen_page($pageid, $extra_pages{$pageid});
+	}
 }
 
 sub delete_obsolete_recurse {
@@ -74,6 +102,13 @@ sub delete_obsolete_recurse {
                 }
         }
         closedir($dh);
+        opendir($dh, $dir) or die "Unable to open directory \"" . getcwd() . "/$dir\": $!\n";
+	if (scalar(grep { $_ ne "." && $_ ne ".." } readdir($dh)) == 0) {
+		print("Deleting old directory \"" . getcwd() . "/$dir\".\n");
+		rmdir($dir);
+	}
+	closedir($dh);
+	# FIXME: remove empty dirs
         foreach (@dirs) {
                 delete_obsolete_recurse($_);
         }
diff --git a/LSG/Markdown.pm b/LSG/Markdown.pm
@@ -128,15 +128,17 @@ sub finish_child {
 	my $file = shift;
 	my $parent = $child->{"parent"};
 
+	my ($html, @extra_pages);
 	if ($child->{"type"} eq "img") {
-		$parent->{"txt"} .= handle_img($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
+		$html = handle_img($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
 	} elsif ($child->{"type"} eq "lnk") {
-		$parent->{"txt"} .= handle_lnk($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
+		$html = handle_lnk($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
 	} elsif ($child->{"type"} eq "fnc") {
-		$parent->{"txt"} .= handle_fnc($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
+		($html, @extra_pages) = handle_fnc($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
 	}
+	$parent->{"txt"} .= $html;
 
-	return $parent;
+	return ($parent, @extra_pages);
 }
 
 # FIXME: This whole "parser" is extremely primitive and will break on a lot of input.
@@ -160,6 +162,7 @@ sub parse_md {
 	my %structure = (txt => "", child => {});
 	my $cur_child_ref = \%structure;
 	my @states = (0);
+	my @extra_pages;
 	foreach (<$in>) {
 		foreach my $char (split //, $_) {
 			if ($char eq "\\") {
@@ -196,7 +199,9 @@ sub parse_md {
 				$states[-1] |= $IN_URL;
 			} elsif ($char eq ")" && ($states[-1] & $IN_URL)) {
 				pop(@states);
-				$cur_child_ref = finish_child($cur_child_ref, $pageid, $lang, $_, $inpath);
+				my @tmp_pages;
+				($cur_child_ref, @tmp_pages) = finish_child($cur_child_ref, $pageid, $lang, $_, $inpath);
+				push(@extra_pages, @tmp_pages);
 			} else {
 				if ($states[-1] & $IN_IMG_START) {
 					pop(@states);
@@ -213,7 +218,7 @@ sub parse_md {
 		}
 	}
 
-	return markdown($structure{"txt"});
+	return (markdown($structure{"txt"}), @extra_pages);
 }
 
 1;
diff --git a/LSG/UserFuncs.pm b/LSG/UserFuncs.pm
@@ -31,6 +31,14 @@ use LSG::Misc;
 # 2:  page language
 # 3-: other args (e.g. for func call)
 
+# Return value:
+# Usually just the html text.
+# Optionally, a list of array references of the form [$pageid, $lang, $html]
+# defining further pages, together with the complete body html text of the
+# page. The returned text is always taken verbatim as the html code of the
+# page body, there is no option to interpret it as markdown.
+# When called from templates, the extra pages are ignored.
+
 # Yeah, this is extremely inefficient, but it's
 # not like we're comparing billions of books.
 sub sort_numeric {
@@ -58,33 +66,176 @@ sub sort_numeric {
 }
 
 sub sort_books {
-	my $pageid = shift;
-	my $lang = shift;
-	my $sort_by = shift;
-	my $create_subheadings = shift;
-	if (!$sort_by) {die "ERROR: not enough arguments to function call in $pageid\n"}
-	my $output = "";
-	my %tmp_md = ();
+	# FIXME: 'list' currently doesn't make much sense - the
+	# sorting should be changed to just be alphabetical by
+	# title when 'list' is used
+
+	# $mode == list: just list books
+	# $mode == combined: create subheadings for @sort_by
+	# $mode == separate: create separate pages for @sort_by
+	# $dir: directory to search for pages to sort
+	# (new pages are also created in this directory)
+	# @sort_by: list of metadata attributes to sort by
+	# (this is a hierarchical sorting, i.e. if the second
+	# category in @sort_by is the same for two pages,
+	# the first category must also be the same, and so
+	# on, otherwise there will probably be an error at
+	# some point, or the result will just be weird)
+	my ($pageid, $lang, $dir, $mode, @sort_by) = @_;
+	if (!defined($dir) || !defined($mode)) {
+		die "ERROR: Too few arguments to sort_by.\n";
+	}
+	if ($mode eq "list") {
+		$mode = 0;
+	} elsif ($mode eq "combined") {
+		$mode = 1;
+	} elsif ($mode eq "separate") {
+		$mode = 2;
+	} else {
+		die "ERROR: Invalid mode $mode for sort_books.\n";
+	}
+	my %tmp_md;
 	foreach my $id (keys %{$config->{"metadata"}}) {
-		if ($config->{"metadata"}->{$id}->{"dirname"} eq "books") {
+		# pages generated by sort_books need to be skipped so when this
+		# function is called again for other languages, it doesn't try
+		# to sort all the generated pages (yes, this is really ugly)
+
+		# prevent autovivification of $config->{"metadata"}->{$id}->{$lang}
+		next if (exists($config->{"metadata"}->{$id}->{$lang}) &&
+			$config->{"metadata"}->{$id}->{$lang}->{"generated:sort_books"});
+		if ($config->{"metadata"}->{$id}->{"dirname"} eq $dir) {
 			$tmp_md{$id} = $config->{"metadata"}->{$id};
-			if (!exists($config->{"metadata"}->{$id}->{$lang}->{$sort_by})) {
-				die "ERROR: $pageid: can't sort by \"$sort_by\"\n";
+			my $found = 0;
+			for my $sb (@sort_by) {
+				if (!exists($config->{"metadata"}->{$id}->{$lang}) ||
+				    !exists($config->{"metadata"}->{$id}->{$lang}->{$sb})) {
+					$found = 1;
+				} else {
+					if ($found) {
+						# there can't be any "undef gaps" - as soon as one sort key
+						# is undef, all the ones afterwards are ignored (in the
+						# final output, the page is located on the same "level" as
+						# the category of the first undef sort key)
+						die "ERROR: $pageid: metadata $sb defined but previous " .
+						     "sort key already undef.\n";
+					}
+					my $val = $config->{"metadata"}->{$id}->{$lang}->{$sb};
+					if (!exists($config->{"$sb:$lang"}->{$val})) {
+						die "No display value configured for sort key $sb=$val (language $lang).\n";
+					}
+				}
 			}
 		}
 	}
-	my $current = "";
-	foreach my $id (sort {$tmp_md{$a}->{$lang}->{$sort_by} cmp $tmp_md{$b}->{$lang}->{$sort_by} or
-	                      sort_numeric($tmp_md{$a}->{$lang}->{"title"}, $tmp_md{$b}->{$lang}->{"title"})} (keys %tmp_md)) {
-		if ($create_subheadings && $create_subheadings eq "true" && $current ne $tmp_md{$id}->{$lang}->{$sort_by}) {
-			$current = $tmp_md{$id}->{$lang}->{$sort_by};
-			$output .= "<h3>$current</h3>\n";
+	# I could do a Schwartzian transform here, but I won't because I'm too lazy.
+	my @sorted = sort {
+		for my $sb (@sort_by) {
+			# if a sort_by value is undef, use the title of the page instead
+			# so entries on the same level are sorted properly even if some
+			# are actual pages and other are categories
+			my $sort_a = exists($tmp_md{$a}->{$lang}->{$sb}) ?
+				$config->{"$sb:$lang"}->{$tmp_md{$a}->{$lang}->{$sb}} :
+				$tmp_md{$a}->{$lang}->{"title"};
+			my $sort_b = exists($tmp_md{$b}->{$lang}->{$sb}) ?
+				$config->{"$sb:$lang"}->{$tmp_md{$b}->{$lang}->{$sb}} :
+				$tmp_md{$b}->{$lang}->{"title"};
+			if ((my $ret = sort_numeric($sort_a, $sort_b))) {
+				return $ret;
+			}
 		}
+		return sort_numeric($tmp_md{$a}->{$lang}->{"title"}, $tmp_md{$b}->{$lang}->{"title"});
+	} (keys %tmp_md);
+	my $output = "";
+	my %current;
+	my @extra_pages;
+	my @page_stack = ([$pageid, $lang, ""]);
+	my $margin_dir = $config->{"lang_dirs"}->{$lang} eq "rtl" ? "right" : "left";
+	foreach my $id (@sorted) {
 		my $rel_lnk = LSG::Misc::gen_relative_link("$lang/$pageid", "$lang/$id.html");
-		$output .= "<p><a href=\"$rel_lnk\">" . $tmp_md{$id}->{$lang}->{"title"} . "</a></p>\n";
+		if ($mode == 1 || $mode == 2) {
+			my $indent = 0;
+			my $found_unequal = 0;
+			for my $i (0..$#sort_by) {
+				my $sb = $sort_by[$i];
+				# Note: it would be possible to uses exists instead of
+				# defined here, but using defined makes the code a bit simpler
+				if (defined($current{$sb}) != defined($tmp_md{$id}->{$lang}->{$sb}) ||
+				    (defined($current{$sb}) && $current{$sb} ne $tmp_md{$id}->{$lang}->{$sb})) {
+					$found_unequal = 1;
+					$current{$sb} = $tmp_md{$id}->{$lang}->{$sb};
+					for my $j ($indent+1..$#page_stack) {
+						push(@extra_pages, pop(@page_stack));
+					}
+					if (defined($current{$sb})) {
+						my $name = $config->{"$sb:$lang"}->{$current{$sb}};
+						# This is currently hard-coded. Up to four heading sizes are
+						# used (starting at <h3>), then they just stay the same
+						if ($mode == 1) {
+							my $h_sz = $indent + 3 > 6 ? 6 : $indent + 3;
+							$output .= "<h$h_sz style=\"margin-$margin_dir: " .
+								($indent * 15). "pt;\">$name</h$h_sz>\n";
+						} else {
+							my $new_id = "$dir/$sb/$current{$sb}";
+							if (exists $config->{"metadata"}->{$new_id}->{$lang}) {
+								die "ERROR: Duplicate page $new_id (lang $lang).\n";
+							}
+							my $cat_lnk = LSG::Misc::gen_relative_link(
+								"$lang/$page_stack[-1]->[0]", "$lang/$new_id.html"
+							);
+							$page_stack[-1]->[2] .= "<p><a href=\"$cat_lnk\">$name</a></p>\n";
+							push(@page_stack, [
+								$new_id,
+								$lang,
+								"<h3>$name</h3>\n"
+							]);
+							$config->{"metadata"}->{$new_id}->{$lang} = {
+								title => $name,
+								"generated:sort_books" => 1
+							};
+							# FIXME: maybe check if these overwrite a different value
+							$config->{"metadata"}->{$new_id}->{"template"} = $config->{"metadata"}->{$pageid}->{"template"};
+							$config->{"metadata"}->{$new_id}->{"dirname"} = "$dir/$sb";
+							$config->{"metadata"}->{$new_id}->{"basename"} = $current{$sb};
+						}
+					}
+				} elsif ($found_unequal && defined($current{$sb})) {
+					die "ERROR: $sb same as previous page in list for page $id, but higher-level category different (lang $lang).\n";
+				}
+				if (!defined($current{$sb})) {
+					# as soon as one sort key is undef, the other ones should
+					# also be undef for it to make sense
+					for my $j ($i+1..$#sort_by) {
+						if (defined($tmp_md{$id}->{$lang}->{$sort_by[$j]})) {
+							die "ERROR: $sort_by[$j] set for page $id, but $sb unset (lang $lang).\n";
+						}
+						$current{$sort_by[$j]} = undef;
+					}
+					last;
+				}
+				$indent++;
+			}
+			if ($mode == 1) {
+				$output .= "<p style=\"margin-$margin_dir: " . ($indent * 15) . "pt;\">" .
+					"<a href=\"$rel_lnk\">" . $tmp_md{$id}->{$lang}->{"title"} . "</a></p>\n";
+			} else {
+				$rel_lnk = LSG::Misc::gen_relative_link("$lang/$page_stack[-1]->[0]", "$lang/$id.html");
+				$page_stack[-1]->[2] .= "<p><a href=\"$rel_lnk\">" . $tmp_md{$id}->{$lang}->{"title"} . "</a></p>\n";
+			}
+		} else {
+			$output .= "<p><a href=\"$rel_lnk\">" . $tmp_md{$id}->{$lang}->{"title"} . "</a></p>\n";
+		}
 	}
 
-	return $output;
+	if ($mode == 2) {
+		for my $i (1..$#page_stack) {
+			push(@extra_pages, pop(@page_stack));
+		}
+		$output = $page_stack[0]->[2];
+		shift @page_stack;
+		return ($output, @extra_pages);
+	} else {
+		return $output;
+	}
 }
 
 sub gen_lang_selector {
@@ -115,6 +266,9 @@ sub gen_nav {
 	#}
 	foreach my $nav_page (@nav) {
 		my $title = $config->{"metadata"}->{$nav_page}->{$lang}->{"title"};
+		if (!defined($title)) {
+			die "Unable to find title for navigation page \"$nav_page\"\n";
+		}
 		my $url = LSG::Misc::gen_relative_link("$lang/$pageid", "$lang/$nav_page.html");
 		$output .= "<li><a href=\"$url\">$title</a></li>\n";
 	}
diff --git a/lsg.pl b/lsg.pl
@@ -222,22 +222,22 @@ B<sort_books>
 
 B<Parameters:>
 
-- attribute to sort by
+- directory to take books from
 
-- create heading when attribute changes or not
+- mode
 
-B<Purpose:>
-
-Generate sorted list of all books, first by the given attribute, which can be anything
-in the metadata, then by the titles. The second attribute can be used to create, for
-instance, category titles. This does not make sense though when the attribute is just
-the title which changes every time anyways. If the second argument is left out, it
-defaults to "false". The attribute to be sorted by (obviously) needs to be defined for
-each book.
+- attribute(s) to sort by
 
-B<Example:>
+B<Purpose:>
 
-C<{sort_books}(category false)>
+Generate sorted list of all pages in the given directory, first by the given attribute(s),
+which can be anything in the metadata, then by the titles. The mode argument can be used
+to either just list all books ("list"), list all books with subheadings for the different
+sorting attributes ("combined"), or generate different pages for the different values of
+the sorting attributes. Note that the display names for the attributes need to be defined
+in the configuration file config.ini. For instance, if a page contains metadata
+"category:stuff", config.ini must contain a section "[category:$lang]" for each language
+that contains a line "stuff=Display Name".
 
 This function was created for a book site, but it could probably be used for articles
 as well.