===== Script to extract data from SoC open applications ===== #!/bin/sh # A very simple shell script to extract essential data from Google SoC # Application list pages # M. Connor Fri May 12 03:05:08 GMT 2006 # Outputs name, email, project_title, score, has_mentor? # First we Delete blank lines sed -e "/^$/d" | while read xx do lineclass=`echo $xx | sed -e "s@^.*class=@@" -e "s@\"@@g" -e "s@\>.*@@"` # echo "lineclass: $lineclass" case "$lineclass" in "listrequestapp" ) read xx read xx email=`echo $xx | sed -e "s@^.*csaid=@@" -e "s@\:.*@@"` read xx name=`echo $xx | sed -e "s@\:.*@@"` read xx projtitle=`echo $xx | sed -e "s@^\<b\>\<i\>@@" -e "s@</i></b>@@"` read xx read xx read xx read xx read score printf "%s\t%s\t%s\t%s\t1\n" "$name" "$email" "$projtitle" "$score" ;; "listapp" ) read xx read xx email=`echo $xx | sed -e "s@^.*csaid=@@" -e "s@\:.*@@"` read xx name=`echo $xx | sed -e "s@\:.*@@"` read xx projtitle=`echo $xx | sed -e "s@^\<b\>\<i\>@@" -e "s@</i></b>@@"` read xx read xx read xx read score printf "%s\t%s\t%s\t%s\t0\n" "$name" "$email" "$projtitle" "$score" ;; esac done exit 0