#!/bin/sh # Usage: ppttitles 01/index.htm 01_3.xml # Read in 01/01/index.htm and generate a list of the PowerPoint slide titles # in xml format # The output will look like # # # # # # # # The second argument is optional. If it is present, then # it is assumed to name a UTF-8 file that was generated by Windows Media # File Editor and then converted to UTF-8. We look in this file # for Script tags that have Command Attributes and adjust the # output to match the base of the Command Attribute. For example # if the index.htm file contains links to files like sld002.htm, and # the second argument contains links to files like 01/slide2.gif, then # we adjust the output so that we have links to files like slide2.gif # instead of sld002.htm. # if [ $# -ne 1 -a $# -ne 2 ]; then echo "Usage: $0 index.htm [toc.xml ]" echo " Read in index.htm and generate a list of the PowerPoint slide titles" exit 9 fi if [ $# -eq 2 ]; then # Look inside argument $2 and determine whether to use Slide or slide # We use the second Command= because the first one is sometimes a # different from the second one because it might have been added by hand. commandurl=`awk '$0 ~ / Command="/ { commandCount++ if (commandCount == 2) { p = match($0,/ Command=/) + length(" Command=") + 1 commandEtc = substr($0, p, length($0) - p) split(commandEtc, command, "\""); np = split(command[1], pa, "/"); baseurl=pa[np] if (baseurl ~ /slide/) { print "slide" } else if (baseurl ~ /Slide/) { print "Slide" } exit } }' $2` else commandurl=Slide fi if [ "$commandurl" = "" ]; then echo "$0: Warning, 'commandurl' = '', which is likely a bug" 1>&2 echo " The problem is likely in '$2'" 1>&2 fi grep -i 'a href="sld' $1 | grep -vi "Click here to start" | tr "\013\205\222\223\224\225\226\227" " .'' \-\-_" | sed 's/&/&/g' | awk ' BEGIN { print "" print "" } { nf=split($0, f, "<") # Look for the A HREF tag and print out the part after the > for(i=1; i<=nf; i++) { if (f[i] ~ /A HREF/) { split(f[i], tag, ">") split(tag[1], href, "\"") if (tag[2] ~ /^PPT Slide$/ && href[2] ~ /^sld/) { talkSlideNumber = substr(href[2],4,length(href[2])-7) talkSlideNumberFixed = talkSlideNumber + 0 tag[2] = "Slide " talkSlideNumberFixed } if (href[2] ~ /^sld/ && commandurl != "sld") { # Convertd sld to whatever the value of commandurl is, # get rid of leading zeros, so sld001.htm -> slide1.gif slideNumber = substr(href[2],4,length(href[2])-7) previousSlideNumberFixed = slideNumberFixed slideNumberFixed = slideNumber + 0 # We use talkSlideNumberFixed below. # FIXME: We need two variables here, slideNumberFixed and # talkSlideNumberFixed to handle checking against the # the previous slide number. talkSlideNumberFixed = slideNumberFixed if (slideNumberFixed > previousSlideNumberFixed) { if (mergedPPTFile == 0) { # If we are handling slides for only one # powerpoint file, then we will end up here. href[2] = commandurl slideNumberFixed ".gif" } else { # We are handling a merged powerpoint file # situation #print "" mergedCount++ href[2] = commandurl mergedCount ".gif" } } else { # We are handling a file that contains the # concatenation of multiple index.htm files. # This would occur if we combined multiple talks # into a single ppt file and then generated # slides for each individual talk and then # created an htm file with: # cat */*/index.htm > all.htm # However, the talks themselves will have URL # events that point to an every increasing SlideNN.gif # file. # We assume that each talk is separated by a blank # gif. # So, the first talk will have # Slide1.gif ... SlideN.gif # SlideN+1 will be blank # The second talk will have SlideN+2.gif ... SlideM.gif # Thus if slideNumberFixed is less than # the previousSlideNumberFixed, we should set a # flag indicating we are ignoring the count and using # our own count and increment the count by two # to skip the blank if (mergedPPTFile == 0) { print "" mergedPPTFile = 1 mergedCount = previousSlideNumberFixed + 2 } else { print "" mergedCount += 2 } href[2] = commandurl mergedCount ".gif" } } if (tag[2] == "") { # FIXME: The title is empty, which can cause problems # when playing the stream. The problem here is that # the title of the slide is likely so long that it is # on the line after the A HREF line. tag[2] = "Slide " talkSlideNumberFixed } print " " } } } END { print ""} ' commandurl=$commandurl