diff options
author | Federico Mena Quintero <federico@gnome.org> | 2022-03-08 16:45:57 -0600 |
---|---|---|
committer | Federico Mena Quintero <federico@gnome.org> | 2022-03-08 19:37:44 -0600 |
commit | 8dc81944708e92aff573f9badb708d7de421b721 (patch) | |
tree | 42daa5d3714e23f3a138c512d540a561c504214f /tests | |
parent | 874c16ec8f46822a0a7d7cfdbdceb65bdb37b452 (diff) | |
download | librsvg-8dc81944708e92aff573f9badb708d7de421b721.tar.gz |
PdfPredicate.with_text() - simple way to test for a PDF containing some textual content
This is *really* basic; lopdf's Document::extract_text() simply
concatenates all the text strings in a page and returns them, so the
predicate just does String::contains().
If we ever need something more sophisticated, we can walk the PDF
structure manually with lopdf (gulp) and extract individual commands
for text.
Part-of: <https://gitlab.gnome.org/GNOME/librsvg/-/merge_requests/673>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/src/predicates/pdf.rs | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/tests/src/predicates/pdf.rs b/tests/src/predicates/pdf.rs index 862e64bb..62a3da4b 100644 --- a/tests/src/predicates/pdf.rs +++ b/tests/src/predicates/pdf.rs @@ -50,6 +50,13 @@ impl PdfPredicate { d: Detail::Link(link.to_string()), } } + + pub fn with_text(self: Self, text: &str) -> DetailPredicate<Self> { + DetailPredicate::<Self> { + p: self, + d: Detail::Text(text.to_string()), + } + } } impl Predicate<[u8]> for PdfPredicate { @@ -86,6 +93,7 @@ enum Detail { PageSize(Dimensions, usize), CreationDate(DateTime<Utc>), Link(String), + Text(String), } /// A PDF page's dimensions from its `MediaBox`. @@ -160,6 +168,7 @@ impl DetailPredicate<PdfPredicate> { Detail::PageSize(d, idx) => doc.get_page_size(*idx).map_or(false, |dim| dim == *d), Detail::CreationDate(d) => doc.get_creation_date().map_or(false, |date| date == *d), Detail::Link(link) => document_has_link(doc, &link), + Detail::Text(text) => document_has_text(doc, &text), } } @@ -193,6 +202,9 @@ impl DetailPredicate<PdfPredicate> { "actual link contents", "FIXME: who knows, but it's not what we expected".to_string(), ), + Detail::Text(_) => { + Product::new("actual text contents", doc.extract_text(&[1]).unwrap()) + } } } } @@ -290,10 +302,21 @@ impl fmt::Display for DetailPredicate<PdfPredicate> { Detail::PageSize(d, _) => write!(f, "is a PDF sized {}", d), Detail::CreationDate(d) => write!(f, "is a PDF created {:?}", d), Detail::Link(l) => write!(f, "is a PDF with a link to {}", l), + Detail::Text(t) => write!(f, "is a PDF with \"{}\" in its text content", t), } } } +// This is an extremely trivial test for a string being present in the document's +// text objects. +fn document_has_text(document: &lopdf::Document, needle: &str) -> bool { + if let Ok(haystack) = text_from_first_page(document) { + haystack.contains(needle) + } else { + false + } +} + // We do a super simple test that a PDF actually contains an Annotation object // with a particular link. We don't test that this annotation is actually linked // from a page; that would be nicer. @@ -327,3 +350,9 @@ fn dict_has_a_with_link(dict: &Dictionary, link_text: &str) -> bool { .map(|string| string == link_text.as_bytes()) .unwrap_or(false) } + +fn text_from_first_page(doc: &lopdf::Document) -> lopdf::Result<String> { + // This is extremely simplistic; lopdf just concatenates all the text in the page + // into a single string. + doc.extract_text(&[1]) +} |