Skip to content

Commit 0d88750

Browse files
authored
Merge pull request #1607 from ISSOtm/preproc-order
Allow specifying preprocessor order
2 parents 6c20736 + 6b790b8 commit 0d88750

File tree

4 files changed

+257
-26
lines changed

4 files changed

+257
-26
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ serde_json = "1.0"
3333
shlex = "1"
3434
tempfile = "3.0"
3535
toml = "0.5.1"
36+
topological-sort = "0.1.0"
3637

3738
# Watch feature
3839
notify = { version = "4.0", optional = true }

guide/src/format/configuration/preprocessors.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,25 @@ be overridden by adding a `command` field.
5656
[preprocessor.random]
5757
command = "python random.py"
5858
```
59+
60+
### Require A Certain Order
61+
62+
The order in which preprocessors are run can be controlled with the `before` and `after` fields.
63+
For example, suppose you want your `linenos` preprocessor to process lines that may have been `{{#include}}`d; then you want it to run after the built-in `links` preprocessor, which you can require using either the `before` or `after` field:
64+
65+
```toml
66+
[preprocessor.linenos]
67+
after = [ "links" ]
68+
```
69+
70+
or
71+
72+
```toml
73+
[preprocessor.links]
74+
before = [ "linenos" ]
75+
```
76+
77+
It would also be possible, though redundant, to specify both of the above in the same config file.
78+
79+
Preprocessors having the same priority specified through `before` and `after` are sorted by name.
80+
Any infinite loops will be detected and produce an error.

src/book/mod.rs

Lines changed: 227 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use std::process::Command;
2020
use std::string::ToString;
2121
use tempfile::Builder as TempFileBuilder;
2222
use toml::Value;
23+
use topological_sort::TopologicalSort;
2324

2425
use crate::errors::*;
2526
use crate::preprocess::{
@@ -372,12 +373,7 @@ fn determine_renderers(config: &Config) -> Vec<Box<dyn Renderer>> {
372373
renderers
373374
}
374375

375-
fn default_preprocessors() -> Vec<Box<dyn Preprocessor>> {
376-
vec![
377-
Box::new(LinkPreprocessor::new()),
378-
Box::new(IndexPreprocessor::new()),
379-
]
380-
}
376+
const DEFAULT_PREPROCESSORS: &[&'static str] = &["links", "index"];
381377

382378
fn is_default_preprocessor(pre: &dyn Preprocessor) -> bool {
383379
let name = pre.name();
@@ -386,36 +382,127 @@ fn is_default_preprocessor(pre: &dyn Preprocessor) -> bool {
386382

387383
/// Look at the `MDBook` and try to figure out what preprocessors to run.
388384
fn determine_preprocessors(config: &Config) -> Result<Vec<Box<dyn Preprocessor>>> {
389-
let mut preprocessors = Vec::new();
385+
// Collect the names of all preprocessors intended to be run, and the order
386+
// in which they should be run.
387+
let mut preprocessor_names = TopologicalSort::<String>::new();
390388

391389
if config.build.use_default_preprocessors {
392-
preprocessors.extend(default_preprocessors());
390+
for name in DEFAULT_PREPROCESSORS {
391+
preprocessor_names.insert(name.to_string());
392+
}
393393
}
394394

395395
if let Some(preprocessor_table) = config.get("preprocessor").and_then(Value::as_table) {
396-
for key in preprocessor_table.keys() {
397-
match key.as_ref() {
398-
"links" => preprocessors.push(Box::new(LinkPreprocessor::new())),
399-
"index" => preprocessors.push(Box::new(IndexPreprocessor::new())),
400-
name => preprocessors.push(interpret_custom_preprocessor(
401-
name,
402-
&preprocessor_table[name],
403-
)),
396+
for (name, table) in preprocessor_table.iter() {
397+
preprocessor_names.insert(name.to_string());
398+
399+
let exists = |name| {
400+
(config.build.use_default_preprocessors && DEFAULT_PREPROCESSORS.contains(&name))
401+
|| preprocessor_table.contains_key(name)
402+
};
403+
404+
if let Some(before) = table.get("before") {
405+
let before = before.as_array().ok_or_else(|| {
406+
Error::msg(format!(
407+
"Expected preprocessor.{}.before to be an array",
408+
name
409+
))
410+
})?;
411+
for after in before {
412+
let after = after.as_str().ok_or_else(|| {
413+
Error::msg(format!(
414+
"Expected preprocessor.{}.before to contain strings",
415+
name
416+
))
417+
})?;
418+
419+
if !exists(after) {
420+
// Only warn so that preprocessors can be toggled on and off (e.g. for
421+
// troubleshooting) without having to worry about order too much.
422+
warn!(
423+
"preprocessor.{}.after contains \"{}\", which was not found",
424+
name, after
425+
);
426+
} else {
427+
preprocessor_names.add_dependency(name, after);
428+
}
429+
}
430+
}
431+
432+
if let Some(after) = table.get("after") {
433+
let after = after.as_array().ok_or_else(|| {
434+
Error::msg(format!(
435+
"Expected preprocessor.{}.after to be an array",
436+
name
437+
))
438+
})?;
439+
for before in after {
440+
let before = before.as_str().ok_or_else(|| {
441+
Error::msg(format!(
442+
"Expected preprocessor.{}.after to contain strings",
443+
name
444+
))
445+
})?;
446+
447+
if !exists(before) {
448+
// See equivalent warning above for rationale
449+
warn!(
450+
"preprocessor.{}.before contains \"{}\", which was not found",
451+
name, before
452+
);
453+
} else {
454+
preprocessor_names.add_dependency(before, name);
455+
}
456+
}
404457
}
405458
}
406459
}
407460

408-
Ok(preprocessors)
461+
// Now that all links have been established, queue preprocessors in a suitable order
462+
let mut preprocessors = Vec::with_capacity(preprocessor_names.len());
463+
// `pop_all()` returns an empty vector when no more items are not being depended upon
464+
for mut names in std::iter::repeat_with(|| preprocessor_names.pop_all())
465+
.take_while(|names| !names.is_empty())
466+
{
467+
// The `topological_sort` crate does not guarantee a stable order for ties, even across
468+
// runs of the same program. Thus, we break ties manually by sorting.
469+
// Careful: `str`'s default sorting, which we are implicitly invoking here, uses code point
470+
// values ([1]), which may not be an alphabetical sort.
471+
// As mentioned in [1], doing so depends on locale, which is not desirable for deciding
472+
// preprocessor execution order.
473+
// [1]: https://doc.rust-lang.org/stable/std/cmp/trait.Ord.html#impl-Ord-14
474+
names.sort();
475+
for name in names {
476+
let preprocessor: Box<dyn Preprocessor> = match name.as_str() {
477+
"links" => Box::new(LinkPreprocessor::new()),
478+
"index" => Box::new(IndexPreprocessor::new()),
479+
_ => {
480+
// The only way to request a custom preprocessor is through the `preprocessor`
481+
// table, so it must exist, be a table, and contain the key.
482+
let table = &config.get("preprocessor").unwrap().as_table().unwrap()[&name];
483+
let command = get_custom_preprocessor_cmd(&name, table);
484+
Box::new(CmdPreprocessor::new(name, command))
485+
}
486+
};
487+
preprocessors.push(preprocessor);
488+
}
489+
}
490+
491+
// "If `pop_all` returns an empty vector and `len` is not 0, there are cyclic dependencies."
492+
// Normally, `len() == 0` is equivalent to `is_empty()`, so we'll use that.
493+
if preprocessor_names.is_empty() {
494+
Ok(preprocessors)
495+
} else {
496+
Err(Error::msg("Cyclic dependency detected in preprocessors"))
497+
}
409498
}
410499

411-
fn interpret_custom_preprocessor(key: &str, table: &Value) -> Box<CmdPreprocessor> {
412-
let command = table
500+
fn get_custom_preprocessor_cmd(key: &str, table: &Value) -> String {
501+
table
413502
.get("command")
414503
.and_then(Value::as_str)
415504
.map(ToString::to_string)
416-
.unwrap_or_else(|| format!("mdbook-{}", key));
417-
418-
Box::new(CmdPreprocessor::new(key.to_string(), command))
505+
.unwrap_or_else(|| format!("mdbook-{}", key))
419506
}
420507

421508
fn interpret_custom_renderer(key: &str, table: &Value) -> Box<CmdRenderer> {
@@ -515,8 +602,8 @@ mod tests {
515602

516603
assert!(got.is_ok());
517604
assert_eq!(got.as_ref().unwrap().len(), 2);
518-
assert_eq!(got.as_ref().unwrap()[0].name(), "links");
519-
assert_eq!(got.as_ref().unwrap()[1].name(), "index");
605+
assert_eq!(got.as_ref().unwrap()[0].name(), "index");
606+
assert_eq!(got.as_ref().unwrap()[1].name(), "links");
520607
}
521608

522609
#[test]
@@ -563,9 +650,123 @@ mod tests {
563650

564651
// make sure the `preprocessor.random` table exists
565652
let random = cfg.get_preprocessor("random").unwrap();
566-
let random = interpret_custom_preprocessor("random", &Value::Table(random.clone()));
653+
let random = get_custom_preprocessor_cmd("random", &Value::Table(random.clone()));
654+
655+
assert_eq!(random, "python random.py");
656+
}
657+
658+
#[test]
659+
fn preprocessor_before_must_be_array() {
660+
let cfg_str = r#"
661+
[preprocessor.random]
662+
before = 0
663+
"#;
567664

568-
assert_eq!(random.cmd(), "python random.py");
665+
let cfg = Config::from_str(cfg_str).unwrap();
666+
667+
assert!(determine_preprocessors(&cfg).is_err());
668+
}
669+
670+
#[test]
671+
fn preprocessor_after_must_be_array() {
672+
let cfg_str = r#"
673+
[preprocessor.random]
674+
after = 0
675+
"#;
676+
677+
let cfg = Config::from_str(cfg_str).unwrap();
678+
679+
assert!(determine_preprocessors(&cfg).is_err());
680+
}
681+
682+
#[test]
683+
fn preprocessor_order_is_honored() {
684+
let cfg_str = r#"
685+
[preprocessor.random]
686+
before = [ "last" ]
687+
after = [ "index" ]
688+
689+
[preprocessor.last]
690+
after = [ "links", "index" ]
691+
"#;
692+
693+
let cfg = Config::from_str(cfg_str).unwrap();
694+
695+
let preprocessors = determine_preprocessors(&cfg).unwrap();
696+
let index = |name| {
697+
preprocessors
698+
.iter()
699+
.enumerate()
700+
.find(|(_, preprocessor)| preprocessor.name() == name)
701+
.unwrap()
702+
.0
703+
};
704+
let assert_before = |before, after| {
705+
if index(before) >= index(after) {
706+
eprintln!("Preprocessor order:");
707+
for preprocessor in &preprocessors {
708+
eprintln!(" {}", preprocessor.name());
709+
}
710+
panic!("{} should come before {}", before, after);
711+
}
712+
};
713+
714+
assert_before("index", "random");
715+
assert_before("index", "last");
716+
assert_before("random", "last");
717+
assert_before("links", "last");
718+
}
719+
720+
#[test]
721+
fn cyclic_dependencies_are_detected() {
722+
let cfg_str = r#"
723+
[preprocessor.links]
724+
before = [ "index" ]
725+
726+
[preprocessor.index]
727+
before = [ "links" ]
728+
"#;
729+
730+
let cfg = Config::from_str(cfg_str).unwrap();
731+
732+
assert!(determine_preprocessors(&cfg).is_err());
733+
}
734+
735+
#[test]
736+
fn dependencies_dont_register_undefined_preprocessors() {
737+
let cfg_str = r#"
738+
[preprocessor.links]
739+
before = [ "random" ]
740+
"#;
741+
742+
let cfg = Config::from_str(cfg_str).unwrap();
743+
744+
let preprocessors = determine_preprocessors(&cfg).unwrap();
745+
746+
assert!(preprocessors
747+
.iter()
748+
.find(|preprocessor| preprocessor.name() == "random")
749+
.is_none());
750+
}
751+
752+
#[test]
753+
fn dependencies_dont_register_builtin_preprocessors_if_disabled() {
754+
let cfg_str = r#"
755+
[preprocessor.random]
756+
before = [ "links" ]
757+
758+
[build]
759+
use-default-preprocessors = false
760+
"#;
761+
762+
let cfg = Config::from_str(cfg_str).unwrap();
763+
764+
let preprocessors = determine_preprocessors(&cfg).unwrap();
765+
766+
assert!(preprocessors
767+
.iter()
768+
.find(|preprocessor| preprocessor.name() == "links")
769+
.is_none());
569770
}
570771

571772
#[test]

0 commit comments

Comments
 (0)