Blame gst/subparse/tmplayerparse.c

Packit 971217
/* GStreamer tmplayer format subtitle parser
Packit 971217
 * Copyright (C) 2006-2008 Tim-Philipp Müller <tim centricular net>
Packit 971217
 *
Packit 971217
 * This library is free software; you can redistribute it and/or
Packit 971217
 * modify it under the terms of the GNU Library General Public
Packit 971217
 * License as published by the Free Software Foundation; either
Packit 971217
 * version 2 of the License, or (at your option) any later version.
Packit 971217
 *
Packit 971217
 * This library is distributed in the hope that it will be useful,
Packit 971217
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 971217
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 971217
 * Library General Public License for more details.
Packit 971217
 *
Packit 971217
 * You should have received a copy of the GNU Library General Public
Packit 971217
 * License along with this library; if not, write to the
Packit 971217
 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
Packit 971217
 * Boston, MA 02110-1301, USA.
Packit 971217
 */
Packit 971217
Packit 971217
#include "tmplayerparse.h"
Packit 971217
Packit 971217
#include <stdio.h>
Packit 971217
#include <string.h>
Packit 971217
Packit 971217
/* From http://forum.doom9.org/archive/index.php/t-81059.html:
Packit 971217
 * 
Packit 971217
 * TMPlayer format, which comes in five varieties:
Packit 971217
 * 
Packit 971217
 * time-base 00:00:00:
Packit 971217
 * 00:00:50:This is the Earth at a time|when the dinosaurs roamed...
Packit 971217
 * 00:00:53:
Packit 971217
 * 00:00:54:a lush and fertile planet.
Packit 971217
 * 00:00:56:
Packit 971217
 * 
Packit 971217
 * time-base 0:00:00:
Packit 971217
 * 0:00:50:This is the Earth at a time|when the dinosaurs roamed...
Packit 971217
 * 0:00:53:
Packit 971217
 * 0:00:54:a lush and fertile planet.
Packit 971217
 * 0:00:56:
Packit 971217
 * 
Packit 971217
 * time-base 00:00:00=
Packit 971217
 * 00:00:50=This is the Earth at a time|when the dinosaurs roamed...
Packit 971217
 * 00:00:53=
Packit 971217
 * 00:00:54=a lush and fertile planet.
Packit 971217
 * 00:00:56=
Packit 971217
 * 
Packit 971217
 * time-base 0:00:00=
Packit 971217
 * 0:00:50=This is the Earth at a time|when the dinosaurs roamed...
Packit 971217
 * 0:00:53=
Packit 971217
 * 0:00:54=a lush and fertile planet.
Packit 971217
 * 0:00:56=
Packit 971217
 * 
Packit 971217
 * and multiline time-base 00:00:00,1=
Packit 971217
 * 00:00:50,1=This is the Earth at a time
Packit 971217
 * 00:00:50,2=when the dinosaurs roamed...
Packit 971217
 * 00:00:53,1=
Packit 971217
 * 00:00:54,1=a lush and fertile planet.
Packit 971217
 * 00:00:56,1=
Packit 971217
 *
Packit 971217
 * --------------------------------------------------------------------------
Packit 971217
 *
Packit 971217
 * And another variety (which is 'time-base 0:00:00:' but without empty lines):
Packit 971217
 *
Packit 971217
 * 00:00:01:This is the Earth at a time|when the dinosaurs roamed...
Packit 971217
 * 00:00:03:a lush and fertile planet.
Packit 971217
 * 00:00:06:More text here
Packit 971217
 * 00:00:12:Yet another line
Packit 971217
 *
Packit 971217
 */
Packit 971217
Packit 971217
static gchar *
Packit 971217
tmplayer_process_buffer (ParserState * state)
Packit 971217
{
Packit 971217
  gchar *ret;
Packit 971217
Packit 971217
  ret = g_strndup (state->buf->str, state->buf->len);
Packit 971217
  g_strdelimit (ret, "|", '\n');
Packit 971217
  g_string_truncate (state->buf, 0);
Packit 971217
  return ret;
Packit 971217
}
Packit 971217
Packit 971217
static gchar *
Packit 971217
tmplayer_parse_line (ParserState * state, const gchar * line, guint line_num)
Packit 971217
{
Packit 971217
  GstClockTime ts = GST_CLOCK_TIME_NONE;
Packit 971217
  const gchar *text_start = NULL;
Packit 971217
  gchar *ret = NULL;
Packit 971217
  gchar divc = '\0';
Packit 971217
  guint h, m, s, l = 1;
Packit 971217
Packit 971217
  if (sscanf (line, "%u:%02u:%02u,%u%c", &h, &m, &s, &l, &divc) == 5 &&
Packit 971217
      (divc == '=')) {
Packit 971217
    GST_LOG ("multiline format %u %u %u %u", h, m, s, l);
Packit 971217
    ts = GST_SECOND * ((((h * 60) + m) * 60) + s);
Packit 971217
    text_start = strchr (line, '=');
Packit 971217
  } else if (sscanf (line, "%u:%02u:%02u%c", &h, &m, &s, &divc) == 4 &&
Packit 971217
      (divc == '=' || divc == ':')) {
Packit 971217
    GST_LOG ("single line format %u %u %u %u %c", h, m, s, l, divc);
Packit 971217
    ts = GST_SECOND * ((((h * 60) + m) * 60) + s);
Packit 971217
    text_start = strchr (line + 6, divc);
Packit 971217
  } else if (line[0] == '\0' && state->buf->len > 0 &&
Packit 971217
      GST_CLOCK_TIME_IS_VALID (state->start_time)) {
Packit 971217
    /* if we get an empty line (could be the end of the file, but doesn't have
Packit 971217
     * to be), just push whatever is still in the buffer without a duration */
Packit 971217
    GST_LOG ("empty line, and there's still text in the buffer");
Packit 971217
    ret = tmplayer_process_buffer (state);
Packit 971217
    state->duration = GST_CLOCK_TIME_NONE;
Packit 971217
    return ret;
Packit 971217
  } else {
Packit 971217
    GST_WARNING ("failed to parse line: '%s'", line);
Packit 971217
    return NULL;
Packit 971217
  }
Packit 971217
Packit 971217
  /* if this is a line without text, or the first line in a multiline file,
Packit 971217
   * process and return the data in the buffer, which is the previous line(s) */
Packit 971217
  if (text_start == NULL || text_start[1] == '\0' ||
Packit 971217
      (l == 1 && state->buf->len > 0)) {
Packit 971217
Packit 971217
    if (GST_CLOCK_TIME_IS_VALID (state->start_time) &&
Packit 971217
        state->start_time < ts && line_num > 0) {
Packit 971217
      ret = tmplayer_process_buffer (state);
Packit 971217
      state->duration = ts - state->start_time;
Packit 971217
      /* ..and append current line's text (if there is any) for the next round.
Packit 971217
       * We don't have to store ts as pending_start_time, since we deduce the
Packit 971217
       * durations from the start times anyway, so as long as the parser just
Packit 971217
       * forwards state->start_time by duration after it pushes the line we
Packit 971217
       * are about to return it will all be good. */
Packit 971217
      g_string_append (state->buf, text_start + 1);
Packit 971217
    } else if (line_num > 0) {
Packit 971217
      GST_WARNING ("end of subtitle unit but no valid start time?!");
Packit 971217
    }
Packit 971217
  } else {
Packit 971217
    if (l > 1)
Packit 971217
      g_string_append_c (state->buf, '\n');
Packit 971217
    g_string_append (state->buf, text_start + 1);
Packit 971217
    state->start_time = ts;
Packit 971217
  }
Packit 971217
Packit 971217
  GST_LOG ("returning: '%s'", GST_STR_NULL (ret));
Packit 971217
  return ret;
Packit 971217
}
Packit 971217
Packit 971217
gchar *
Packit 971217
parse_tmplayer (ParserState * state, const gchar * line)
Packit 971217
{
Packit 971217
  gchar *ret;
Packit 971217
Packit 971217
  /* GST_LOG ("Parsing: %s", line); */
Packit 971217
Packit 971217
  ret = tmplayer_parse_line (state, line, state->state);
Packit 971217
  ++state->state;
Packit 971217
Packit 971217
  return ret;
Packit 971217
}