#!/bin/bash if [ -z "$1" ]; then exit fi URL=$1 if [ -n "$2" ]; then NOTEBOOK=$2 else NOTEBOOK='read' fi TODO='no' if [ -n "$3" ]; then echo $3 | grep todo > /dev/null 2>&1 if [ "$?" -eq 0 ]; then TODO='yes' fi fi mkdir -p $HOME/.nevernote TMP_DIR=`mktemp -d $HOME/.nevernote/nevernote.XXXXXX` NEVERNOTE_DIR="/mnt/tabula/nevernote/$NOTEBOOK" TODO_DIR="/mnt/tabula/nevernote/todo" ## Take page title echo echo `date '+%H:%M:%S'` echo $URL ## Check if URL is forbidden to download grep -x "$URL" $HOME/.nevernote/nevernote-list-excluded > /dev/null 2>&1 if [ "$?" -eq 0 ]; then echo "exclude" echo $URL >> $HOME/.nevernote/nevernote-error-excluded rm -r $TMP_DIR exit fi ## Check if it is downloading now #ps ax | grep "./scripts/nevernote.sh" | awk '{print($7)}' | grep -x "$URL" #if [ "$?" -eq 0 ]; then # echo "downloading now" # rm -r $TMP_DIR # exit #fi ## Check downloaded urls for duplicates #head -qn 1 ${NEVERNOTE_DIR}/*/wget.log | awk '{print($3)}' | grep -x "$URL" > /dev/null 2>&1 grep -x "$URL" $HOME/.nevernote/nevernote-list-downloaded > /dev/null 2>&1 if [ "$?" -eq 0 ]; then echo "dublicate" echo $URL >> $HOME/.nevernote/nevernote-error-dups rm -r $TMP_DIR exit fi wget -T 15 -t 5 --user-agent="" -P $TMP_DIR "$URL" > /dev/null 2>&1 INDEX_PAGE=`ls $TMP_DIR` if [ "$INDEX_PAGE" = '' ]; then echo "download error" echo $URL >> $HOME/.nevernote/nevernote-error-404 rm -r $TMP_DIR exit fi ## Convert page to system's charset enconv "$TMP_DIR/$INDEX_PAGE" > /dev/null 2>&1 ## Remove RC and LF symbols #tr -d '\n' < "$TMP_DIR/$INDEX_PAGE" | tr -d '\r' > "$TMP_DIR/${INDEX_PAGE}.plain" #mv "$TMP_DIR/${INDEX_PAGE}.plain" "$TMP_DIR/$INDEX_PAGE" ## Extract title and leave non-destruct chars PAGE_DIR=$(sed -n -e 's/.*