#! /usr/bin/env bash # ======================================================================================= # This is part of the How-To article submitted to TranslatorsCafe.com by Daniel Storbeck # http://www.translatorscafe.com/cafe/Articles.asp?ArtID=109 # ======================================================================================= # convert one PDF document to a set of JPEG images # each representing one page of the source document # check syntax of given command line if [ $# -ne 1 ] then printf "%b" "Error. The script takes exactly one argument.\n" >&2 printf "%b" "usage: transform file.pdf\n" >&2 exit 1 fi # get file name without extension # assume the file name doesn't contain any whitespace FNAME=${1%.[pP][dD][fF]} if [ -z $FNAME ] then printf "%b" "Error. Cannot process the given file name.\n" >&2 printf "%b" "usage: transform file.pdf\n" >&2 exit 1 fi # get the number of pages in the PDF document # to do this we need the pdfinfo, grep and awk commands NRPAGES=$(pdfinfo $1 | grep "Pages:" | awk '{print $2}') # get number of digits of number of pages DIGITS=0 NUMBER=$NRPAGES while [ $NUMBER -gt 0 ] do NUMBER=$(($NUMBER/10)) DIGITS=$(($DIGITS+1)) done # convert from PDF to PS pdftops $1 # split the PS document in individual pages NRPAGESPLUSONE=$(($NRPAGES+1)) for (( N=1 ; N<$NRPAGESPLUSONE ; N++ )) do # get number of digits of actual number DGTS=0 NUMBER=$N while [ $NUMBER -gt 0 ] do NUMBER=$(($NUMBER/10)) DGTS=$(($DGTS+1)) done # get number of zeros in prefix NRZEROS=$(($DIGITS-$DGTS)) # do the padding NRPADDED=$N while [ $NRZEROS -gt 0 ] do NRPADDED="0"$NRPADDED NRZEROS=$(($NRZEROS-1)) done # extract actual page echo -n "psselect: " psselect -p$N $FNAME.ps $FNAME$NRPADDED.ps # convert to JPEG convert -density 300 $FNAME$NRPADDED.ps $FNAME$NRPADDED.jpg echo "converted page nr. $N to JPEG" # delete PS page rm $FNAME$NRPADDED.ps done # remove the PS document rm $FNAME.ps