#!/bin/sh
#
#   subtitleripper - execute the hole chain for extract subtitles
#                    from vob
#
#   Copyright (C) 2015 Joachim Wiedorn <joodevel@joonet.de>
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 2 of the License, or
#   (at your option) any later version.
#
#   This package is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# -------------------------------------------------------------------------

# enable this for debugging
#set -x

VERSION=0.5.2

# Exit script on Control-C (signal 2)
trap 'echo "Control-C pressed."; exit 1;' 2

# function to print usage information
usage()
{
cat << _END_
Usage:
    subtitleripper -i vob -a num [-f lang] [-s num]
    subtitleripper -h
    subtitleripper -V

    -a num          Number of subtitle inside the input .vob file.
                    It can be given in original notation of the
                    vob file, e.g. 0x20, but also only as single
                    number, e.g. "0" for 0x20 or "1" for 0x21,
                    default is "0".

    -f lang         You can optionally specify a language filter
                    using this option. Currently English, French
                    and German are supported.
                    lang = {de|en|fr|nl|none}, default is "none".

    -h              Print this usage help.

    -i vob          Input mpeg2 filename, usually with suffix .vob
                    which have the needed subtitle inside.

    -s num          Set spacewidth between words in units of dots
                    which will be used by GOCR. Default is "0" for
                    autodetection (details see: man gocr).

    -V              Print version number.

Example:
    Extract subtitle number 2 from VOB file with dutch language
    filter:

        subtitleripper  -i title.vob  -a 2  -f nl

Version:
    Part of subtitleripper ${VERSION} package.

_END_
exit 1
}

# if no argument is given display usage information
if [ $# -eq 0 ]; then
    usage
fi

# process command line options
while getopts "a:f:hi:s:V" OPTION
do
  case $OPTION in
    a)
        # subtitle number
        LENGTH=$(echo -n "${OPTARG}" | wc -m)
        if [ ${LENGTH} -gt 0 ] && [ ${LENGTH} -lt 5 ]; then
            SUB_TITLE=${OPTARG}
        fi
        ;;
    f)
        # language filter
        LENGTH=$(echo -n "${OPTARG}" | wc -m)
        if [ ${LENGTH} -gt 0 ] && [ ${LENGTH} -lt 4 ]; then
            LANGUAGE=${OPTARG}
        fi
        ;;
    i)
        # input mpeg2 file
        MPEG_FILE="${OPTARG}"
        ;;
    s)
        # minimum wordspace
        if [ ${OPTARG} -gt 0 ] && [ ${OPTARG} -lt 40 ]; then
            GOCR_SPACE="-s ${OPTARG}"
        fi
        ;;
    V)
        # print version number
        echo ${VERSION}
        exit 1
        ;;
    *)
        # print usage help (also with -h)
        usage
        ;;
  esac
done

if [ ! -e ${MPEG_FILE} ]; then
    echo "Input file '${MPEG_FILE}' not found - Abort!"
    MPEG_FILE=""
    exit 1
else
    # detect absolute path to mpeg file
    MPEG_FILENAME=$(basename ${MPEG_FILE})
    MPEG_ABSPATH=$(cd $(dirname ../orig/title-18.vob); pwd)
    MPEG_FILE=${MPEG_ABSPATH}/${MPEG_FILENAME}
fi

# recognize length of subtitle number and convert to long/short subtitle number
LENGTH=$(echo -n "${SUB_TITLE}" | wc -m)
if [ ${LENGTH} -eq 1 ] && [ ${SUB_TITLE} -ge 0 ] && [ ${SUB_TITLE} -le 9 ]; then
    SUB_NUM="0"$(( ${SUB_TITLE} + 0 ))
    SUB_TITLE="0x"$(( ${SUB_NUM} + 20 ))
elif [ ${LENGTH} -eq 2 ] && [ ${SUB_TITLE} -ge 10 ] && [ ${SUB_TITLE} -le 19 ]; then
    SUB_NUM=${SUB_TITLE}
    SUB_TITLE="0x"$(( ${SUB_NUM} + 20 ))
elif [ ${LENGTH} -eq 4 ] && [ $(echo ${SUB_TITLE} | sed 's/^0x[23]./99/') = "99" ]; then
    SUB_NUM=$( echo ${SUB_TITLE} | sed 's/^0x2/0/' | sed 's/^0x3/1/' )
else
    echo "Subtitle number '${SUB_TITLE}' not usable - Abort!"
    exit 1
fi

# pgm file base name and base path
BASE_PATH="subtitle_${SUB_NUM}"
BASE_NAME="subtitle-${SUB_NUM}"
if [ "${LANGUAGE}" != "none" ]; then
    BASE_PATH="${BASE_PATH}_${LANGUAGE}"
    BASE_NAME="${BASE_NAME}-${LANGUAGE}"
fi

# Check if gocr is in the search path
TCEXTRACT_TEST=`which gocr`
if [ ! -x "${TCEXTRACT_TEST}" -a ! -L "${TCEXTRACT_TEST}" ]; then
    echo "Cannot find needed program 'tcextract' - Abort!"
    exit 1
fi

# Check if gocr is in the search path
GOCR_TEST=`which gocr`
if [ ! -x "${GOCR_TEST}" -a ! -L "${GOCR_TEST}" ]; then
    echo "Cannot find needed program 'gocr' - Abort!"
    exit 1
fi

echo "Extracting subtitles from ${MPEG_FILENAME} ..."
echo "(this can take a while)"

[ -d ${BASE_PATH} ] || mkdir -p ${BASE_PATH}
cd ${BASE_PATH}

if [ -f ${BASE_NAME}0001.pgm ]; then
    echo
    echo "Extracted subtitle image files (.pgm) found - will use it!"
    rm -f ${BASE_NAME}*.txt ${BASE_NAME}.srt 2>/dev/null
    sleep 1
else
    echo
    echo "Extract stream of subtitles and convert to image files (.pgm) ..."
    tcextract -x ps1 -t vob -a ${SUB_TITLE} -i ${MPEG_FILE} |\
        subtitle2pgm -o "${BASE_NAME}"
fi

if [ -f ${BASE_NAME}.srtx ]; then
    echo
    echo "Recognize subtitle strings and save into text files (.txt) ..."
    echo
    echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
    pgm2txt ${GOCR_SPACE} -f ${LANGUAGE} "${BASE_NAME}"
    echo
    echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"

    echo
    echo "Add all subtitles its time to .srt file ..."
    srttool -s -w -i ${BASE_NAME}.srtx -o ${BASE_NAME}.srt

    cp -p ${BASE_NAME}.srt  ..
else
    echo "Cannot find subtitle .srtx file - Abort!"
    exit 1
fi

echo
echo "Ready."
