audiorip.sh: add audiobook mode
[cmccabe-bin] / pdfgrep.sh
1 #!/bin/bash
2
3 #
4 # pdfgrep
5 #
6 # A script for searching through PDFs to find some words.
7 #
8 # I guess that everyone knows that PDFs aren't greppable. However, if you use
9 # this script, you can grep through PDFs for various key words, just as if
10 # they were text.
11 #
12 # usage: pdfgrep [pattern] [PDFs to search ...]
13 #
14 # Colin McCabe
15 #
16
17 die() {
18     echo $1
19     exit 1
20 }
21
22 ispdf() {
23     echo ${1} | grep -q -i -E '.pdf$'
24 }
25
26 which pdftotext &> /dev/null || die "you must have pdftotext installed"
27 which fold &> /dev/null || die "you must have fold installed"
28
29 PATTERN=$1
30 shift
31 #echo "PATTERN=${PATTERN}"
32
33 TMPDIR=`mktemp -d -t pdfgrep.XXXXXXXXXX` || exit 1
34 trap "rm -rf ${TMPDIR}; exit" INT TERM EXIT
35
36 for PDF in "$@"; do
37     if ispdf "${PDF}"; then
38         #echo "PDF = $PDF"
39         pdftotext "${PDF}" "${TMPDIR}/${PDF}.pre.txt"
40         fold -s -w 120 "${TMPDIR}/${PDF}.pre.txt" > "${TMPDIR}/${PDF}.txt"
41         grep --with-filename -i --color=always \
42             ${PATTERN} "${TMPDIR}/${PDF}.txt" | \
43             sed "s ^${TMPDIR}/  "
44         rm -f "${TMPDIR}/${PDF}.txt"
45     fi
46 done
47
48 exit 0