#!/bin/sh
GET=fetch
#GET=echo
FILETYPES="jpg jpeg png gif"
filename() {
echo $1 | grep -o '[^/]*$'
}
get_img() {
if [ ! -f `filename $1` ]; then
$GET -q $1 2> /dev/null
if [ $? -eq 0 ]; then
echo getting $1
fi
#else
#echo $1 already exists.
fi
}
insert_header() {
file=$1
echo '
' > $file
lines=`wc -l ../index.html | awk '{ print $1 }'`
tail -$(($lines - 2)) ../index.html | head -$(($lines - 4)) | sed 's,"\([^"]*\)","../\1",' >> $file
echo '
' >> $file
date >> $file
echo '
' >> $file
}
grab_board() {
url=$1
dir=$2
prefix=`echo $url | sed 's/[^/]*$//'`
board=`filename $url`
mkdir $dir 2> /dev/null
cd $dir 2> /dev/null
mkdir thumbs 2> /dev/null
mkdir threads 2> /dev/null
#echo board $board
cd threads
$GET -q $url > /dev/null
for link in `egrep -o 'a href="[^"]+' $board | egrep -o '[^"=]*res[^" #]+' | uniq`; do
echo $link | grep '^http://'
if [ $? = 0 ]; then
$GET -q $link > /dev/null
else
$GET -q $prefix$link > /dev/null
fi
done
cd ..
for thread in `ls threads/*html`; do
thumbs=`egrep -io 'img src="?[^" #]+' $thread | egrep -o '[^="]+$' | grep thumb`
cd thumbs
for img in $thumbs; do
get_img $img
done
cd .. > /dev/null
for type in $FILETYPES; do
imgs=`egrep -io 'a href="?[^" #]+' $thread | egrep -o '[^="]+$' | grep -i $type\$`
for img in $imgs; do
get_img $img
done
#echo $imgs
done
done
rm snowcrash.html
num=0
file_num=0
file_name=index.tmphtml
insert_header $file_name
for img in `ls | grep -v 'html$' | grep '\.' | sort -rn`; do
thumb=`echo $img | cut -d . -f 1`s.jpg
echo $img | grep '\.gif$' > /dev/null
if [ $? -eq 0 ]; then
# 000000e0 46 46 47 47 47 48 48 48 49 49 49 4a 4a 4a 4b 4b |FFGGGHHHIIIJJJKK|
hd $img | head -15 | tail -1 | grep GGG > /dev/null
if [ $? -eq 0 ]; then
#echo deleting snowcrash $img
#rm $img thumbs/$thumb
echo $img thumbs/$thumb >> snowcrash.html
continue
fi
fi
num=$(($num + 1))
if [ $num -gt 100 ]; then
num=0
file_num=$(($file_num + 1))
file_name=${file_num}.tmphtml
insert_header $file_name
fi
date=`echo $img | cut -d. -f 1 | sed s,...$,,`
date=`date -r $date '+%m/%d/%y %H:%M:%S'`
echo '' >> $file_name
done
for i in `ls *.tmphtml`; do
echo '
0' >> $i
for j in `ls *.html | sort -n`; do
if [ $j = index.tmphtml -o $j = index.html -o $j = snowcrash.html ]; then
continue
fi
echo ''`echo $j | sed 's,\(.*\)\..*html,\1,'`'' >> $i
done
echo '' >> $i
mv $i `echo $i | sed s,\.tmp,.,`
done
cd ..
}
#n=`wc -l $1 | awk '{ print $1 }'`
#for i in `jot $n 1`; do
# line=`cat $1 | head -$i | tail -1`
# url=`echo $line | awk '{ print $1 }'`
# dir=`echo $line | awk '{ print $2 }'`
# grab_board $url $dir
#done
grab_board $1 $2