/* -*- Mode: C; tab-width: 2; indent-tabs-mode: t; c-basic-offset: 2 -*- */ /* Copyright 2005 Jan Schmidt * Copyright 2002,2003 Scott Wheeler (portions from taglib) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301, USA. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include "id3v2.h" #define HANDLE_INVALID_SYNCSAFE static gboolean id3v2_frames_to_tag_list (ID3TagsWorking * work, guint size); #ifndef GST_DISABLE_GST_DEBUG #define GST_CAT_DEFAULT id3v2_ensure_debug_category() GstDebugCategory * id3v2_ensure_debug_category (void) { static gsize cat_gonce = 0; if (g_once_init_enter (&cat_gonce)) { gsize cat; cat = (gsize) _gst_debug_category_new ("id3v2", 0, "ID3v2 tag parsing"); g_once_init_leave (&cat_gonce, cat); } return (GstDebugCategory *) cat_gonce; } #endif /* GST_DISABLE_GST_DEBUG */ /* Synch safe uints have 28 bits (256MB max) available. */ guint id3v2_read_synch_uint (const guint8 * data, guint size) { gint i; guint result = 0; gint invalid = 0; g_assert (size <= 4); size--; for (i = 0; i <= size; i++) { invalid |= data[i] & 0x80; result |= (data[i] & 0x7f) << ((size - i) * 7); } #ifdef HANDLE_INVALID_SYNCSAFE if (invalid) { GST_WARNING ("Invalid synch-safe integer in ID3v2 frame " "- using the actual value instead"); result = 0; for (i = 0; i <= size; i++) { result |= data[i] << ((size - i) * 8); } } #endif return result; } /** * gst_tag_get_id3v2_tag_size: * @buffer: buffer holding ID3v2 tag (or at least the start of one) * * Determines size of an ID3v2 tag on buffer containing at least ID3v2 header, * i.e. at least #GST_TAG_ID3V2_HEADER_SIZE (10) bytes; * * Returns: Size of tag, or 0 if header is invalid or too small. */ guint gst_tag_get_id3v2_tag_size (GstBuffer * buffer) { GstMapInfo info; guint8 flags; guint result = 0; g_return_val_if_fail (buffer != NULL, 0); gst_buffer_map (buffer, &info, GST_MAP_READ); if (info.size < ID3V2_HDR_SIZE) goto too_small; /* Check for 'ID3' string at start of buffer */ if (info.data[0] != 'I' || info.data[1] != 'D' || info.data[2] != '3') goto no_tag; /* Read the flags */ flags = info.data[5]; /* Read the size from the header */ result = id3v2_read_synch_uint (info.data + 6, 4); if (result == 0) goto empty; result += ID3V2_HDR_SIZE; /* Expand the read size to include a footer if there is one */ if ((flags & ID3V2_HDR_FLAG_FOOTER)) result += 10; GST_DEBUG ("ID3v2 tag, size: %u bytes", result); done: gst_buffer_unmap (buffer, &info); return result; too_small: { GST_DEBUG ("size too small"); goto done; } no_tag: { GST_DEBUG ("No ID3v2 tag in data"); goto done; } empty: { GST_DEBUG ("Empty tag size"); result = ID3V2_HDR_SIZE; goto done; } } guint8 * id3v2_ununsync_data (const guint8 * unsync_data, guint32 * size) { const guint8 *end; guint8 *out, *uu; guint out_size; uu = out = g_malloc (*size); for (end = unsync_data + *size; unsync_data < end - 1; ++unsync_data, ++uu) { *uu = *unsync_data; if (G_UNLIKELY (*unsync_data == 0xff && *(unsync_data + 1) == 0x00)) ++unsync_data; } /* take care of last byte (if last two bytes weren't 0xff 0x00) */ if (unsync_data < end) { *uu = *unsync_data; ++uu; } out_size = uu - out; GST_DEBUG ("size after un-unsyncing: %u (before: %u)", out_size, *size); *size = out_size; return out; } /** * gst_tag_list_from_id3v2_tag: * @buffer: buffer to convert * * Creates a new tag list that contains the information parsed out of a * ID3 tag. * * Returns: A new #GstTagList with all tags that could be extracted from the * given vorbiscomment buffer or NULL on error. */ GstTagList * gst_tag_list_from_id3v2_tag (GstBuffer * buffer) { GstMapInfo info; guint8 *uu_data = NULL; guint read_size; ID3TagsWorking work; guint8 flags; guint16 version; gst_tag_register_musicbrainz_tags (); read_size = gst_tag_get_id3v2_tag_size (buffer); /* Ignore tag if it has no frames attached, but skip the header then */ if (read_size < ID3V2_HDR_SIZE) return NULL; gst_buffer_map (buffer, &info, GST_MAP_READ); /* Read the version */ version = GST_READ_UINT16_BE (info.data + 3); /* Read the flags */ flags = info.data[5]; /* Validate the version. At the moment, we only support up to 2.4.0 */ if (ID3V2_VER_MAJOR (version) > 4 || ID3V2_VER_MINOR (version) > 0) goto wrong_version; GST_DEBUG ("ID3v2 header flags: %s %s %s %s", (flags & ID3V2_HDR_FLAG_UNSYNC) ? "UNSYNC" : "", (flags & ID3V2_HDR_FLAG_EXTHDR) ? "EXTENDED_HEADER" : "", (flags & ID3V2_HDR_FLAG_EXPERIMENTAL) ? "EXPERIMENTAL" : "", (flags & ID3V2_HDR_FLAG_FOOTER) ? "FOOTER" : ""); /* This shouldn't really happen! Caller should have checked first */ if (info.size < read_size) goto not_enough_data; GST_DEBUG ("Reading ID3v2 tag with revision 2.%d.%d of size %u", version >> 8, version & 0xff, read_size); GST_MEMDUMP ("ID3v2 tag", info.data, read_size); memset (&work, 0, sizeof (ID3TagsWorking)); work.buffer = buffer; work.hdr.version = version; work.hdr.size = read_size; work.hdr.flags = flags; work.hdr.frame_data = info.data + ID3V2_HDR_SIZE; if (flags & ID3V2_HDR_FLAG_FOOTER) { if (read_size < ID3V2_HDR_SIZE + 10) goto not_enough_data; /* Invalid frame size */ work.hdr.frame_data_size = read_size - ID3V2_HDR_SIZE - 10; } else { g_assert (read_size >= ID3V2_HDR_SIZE); /* checked above */ work.hdr.frame_data_size = read_size - ID3V2_HDR_SIZE; } /* in v2.3 the frame sizes are not syncsafe, so the entire tag had to be * unsynced. In v2.4 the frame sizes are syncsafe so it's just the frame * data that needs un-unsyncing, but not the frame headers. */ if ((flags & ID3V2_HDR_FLAG_UNSYNC) != 0 && ID3V2_VER_MAJOR (version) <= 3) { GST_DEBUG ("Un-unsyncing entire tag"); uu_data = id3v2_ununsync_data (work.hdr.frame_data, &work.hdr.frame_data_size); work.hdr.frame_data = uu_data; GST_MEMDUMP ("ID3v2 tag (un-unsyced)", uu_data, work.hdr.frame_data_size); } id3v2_frames_to_tag_list (&work, work.hdr.frame_data_size); g_free (uu_data); gst_buffer_unmap (buffer, &info); return work.tags; /* ERRORS */ wrong_version: { GST_WARNING ("ID3v2 tag is from revision 2.%d.%d, " "but decoder only supports 2.%d.%d. Ignoring as per spec.", version >> 8, version & 0xff, ID3V2_VERSION >> 8, ID3V2_VERSION & 0xff); gst_buffer_unmap (buffer, &info); return NULL; } not_enough_data: { GST_DEBUG ("Found ID3v2 tag with revision 2.%d.%d - need %u more bytes to read", version >> 8, version & 0xff, (guint) (read_size - info.size)); gst_buffer_unmap (buffer, &info); return NULL; } } static guint id3v2_frame_hdr_size (guint id3v2ver) { /* ID3v2 < 2.3.0 only had 6 byte header */ switch (ID3V2_VER_MAJOR (id3v2ver)) { case 0: case 1: case 2: return 6; case 3: case 4: default: return 10; } } static const gchar obsolete_frame_ids[][5] = { {"CRM"}, {"EQU"}, {"LNK"}, {"RVA"}, {"TIM"}, {"TSI"}, /* From 2.2 */ {"EQUA"}, {"RVAD"}, {"TIME"}, {"TRDA"}, {"TSIZ"} /* From 2.3 */ }; static const struct ID3v2FrameIDConvert { const gchar orig[5]; const gchar new[5]; } frame_id_conversions[] = { /* 2.3.x frames */ { "TORY", "TDOR"}, { "TYER", "TDRC"}, /* 2.2.x frames */ { "BUF", "RBUF"}, { "CNT", "PCNT"}, { "COM", "COMM"}, { "CRA", "AENC"}, { "ETC", "ETCO"}, { "GEO", "GEOB"}, { "IPL", "TIPL"}, { "MCI", "MCDI"}, { "MLL", "MLLT"}, { "PIC", "APIC"}, { "POP", "POPM"}, { "REV", "RVRB"}, { "SLT", "SYLT"}, { "STC", "SYTC"}, { "TAL", "TALB"}, { "TBP", "TBPM"}, { "TCM", "TCOM"}, { "TCO", "TCON"}, { "TCR", "TCOP"}, { "TDA", "TDAT"}, { /* obsolete, but we need to parse it anyway */ "TDY", "TDLY"}, { "TEN", "TENC"}, { "TFT", "TFLT"}, { "TKE", "TKEY"}, { "TLA", "TLAN"}, { "TLE", "TLEN"}, { "TMT", "TMED"}, { "TOA", "TOAL"}, { "TOF", "TOFN"}, { "TOL", "TOLY"}, { "TOR", "TDOR"}, { "TOT", "TOAL"}, { "TP1", "TPE1"}, { "TP2", "TPE2"}, { "TP3", "TPE3"}, { "TP4", "TPE4"}, { "TPA", "TPOS"}, { "TPB", "TPUB"}, { "TRC", "TSRC"}, { "TRD", "TDRC"}, { "TRK", "TRCK"}, { "TSS", "TSSE"}, { "TT1", "TIT1"}, { "TT2", "TIT2"}, { "TT3", "TIT3"}, { "TXT", "TOLY"}, { "TXX", "TXXX"}, { "TYE", "TDRC"}, { "UFI", "UFID"}, { "ULT", "USLT"}, { "WAF", "WOAF"}, { "WAR", "WOAR"}, { "WAS", "WOAS"}, { "WCM", "WCOM"}, { "WCP", "WCOP"}, { "WPB", "WPUB"}, { "WXX", "WXXX"} }; static gboolean convert_fid_to_v240 (gchar * frame_id) { gint i; for (i = 0; i < G_N_ELEMENTS (obsolete_frame_ids); ++i) { if (strncmp (frame_id, obsolete_frame_ids[i], 5) == 0) return TRUE; } for (i = 0; i < G_N_ELEMENTS (frame_id_conversions); ++i) { if (strncmp (frame_id, frame_id_conversions[i].orig, 5) == 0) { strcpy (frame_id, frame_id_conversions[i].new); return FALSE; } } return FALSE; } /* add unknown or unhandled ID3v2 frames to the taglist as binary blobs */ static void id3v2_add_id3v2_frame_blob_to_taglist (ID3TagsWorking * work, guint8 * frame_data, guint frame_size) { GstBuffer *blob; GstSample *sample; #if 0 GstCaps *caps; gchar *media_type; #endif guint i; blob = gst_buffer_new_and_alloc (frame_size); gst_buffer_fill (blob, 0, frame_data, frame_size); sample = gst_sample_new (blob, NULL, NULL, NULL); gst_buffer_unref (blob); /* Sanitize frame id */ for (i = 0; i < 4; i++) { if (!g_ascii_isalnum (frame_data[i])) frame_data[i] = '_'; } #if 0 media_type = g_strdup_printf ("application/x-gst-id3v2-%c%c%c%c-frame", g_ascii_tolower (frame_data[0]), g_ascii_tolower (frame_data[1]), g_ascii_tolower (frame_data[2]), g_ascii_tolower (frame_data[3])); caps = gst_caps_new_simple (media_type, "version", G_TYPE_INT, (gint) ID3V2_VER_MAJOR (work->hdr.version), NULL); gst_buffer_set_caps (blob, caps); gst_caps_unref (caps); g_free (media_type); #endif /* gst_util_dump_mem (GST_BUFFER_DATA (blob), GST_BUFFER_SIZE (blob)); */ gst_tag_list_add (work->tags, GST_TAG_MERGE_APPEND, GST_TAG_ID3V2_FRAME, sample, NULL); gst_sample_unref (sample); } static gboolean id3v2_frames_to_tag_list (ID3TagsWorking * work, guint size) { guint frame_hdr_size; /* Extended header if present */ if (work->hdr.flags & ID3V2_HDR_FLAG_EXTHDR) { work->hdr.ext_hdr_size = id3v2_read_synch_uint (work->hdr.frame_data, 4); /* In id3v2.4.x the header size is the size of the *whole* * extended header. * In id3v2.3.x the header size does *not* include itself. * In older versions it's undefined but let's assume it follow 2.3.x */ switch (ID3V2_VER_MAJOR (work->hdr.version)) { case 0: case 1: case 2: case 3: work->hdr.ext_hdr_size += 4; break; case 4: break; default: GST_WARNING ("Don't know how to handled Extended Header for this id3 version"); break; } GST_LOG ("extended header size %d", work->hdr.ext_hdr_size); if (work->hdr.ext_hdr_size < 6 || work->hdr.ext_hdr_size > work->hdr.frame_data_size) { GST_DEBUG ("Invalid extended header. Broken tag"); return FALSE; } work->hdr.ext_flag_bytes = work->hdr.frame_data[4]; if (5 + work->hdr.ext_flag_bytes > work->hdr.frame_data_size) { GST_DEBUG ("Tag claims extended header, but doesn't have enough bytes. Broken tag"); return FALSE; } work->hdr.ext_flag_data = work->hdr.frame_data + 5; work->hdr.frame_data += work->hdr.ext_hdr_size; work->hdr.frame_data_size -= work->hdr.ext_hdr_size; } frame_hdr_size = id3v2_frame_hdr_size (work->hdr.version); if (work->hdr.frame_data_size <= frame_hdr_size) { GST_DEBUG ("Tag has no data frames. Broken tag"); return FALSE; /* Must have at least one frame */ } work->tags = gst_tag_list_new_empty (); while (work->hdr.frame_data_size > frame_hdr_size) { guint frame_size = 0; gchar frame_id[5] = ""; guint16 frame_flags = 0x0; gboolean obsolete_id = FALSE; gboolean read_synch_size = TRUE; guint i; /* Read the header */ switch (ID3V2_VER_MAJOR (work->hdr.version)) { case 0: case 1: case 2: frame_id[0] = work->hdr.frame_data[0]; frame_id[1] = work->hdr.frame_data[1]; frame_id[2] = work->hdr.frame_data[2]; frame_id[3] = 0; frame_id[4] = 0; obsolete_id = convert_fid_to_v240 (frame_id); /* 3 byte non-synchsafe size */ frame_size = work->hdr.frame_data[3] << 16 | work->hdr.frame_data[4] << 8 | work->hdr.frame_data[5]; frame_flags = 0; break; case 3: read_synch_size = FALSE; /* 2.3 frame size is not synch-safe */ case 4: default: frame_id[0] = work->hdr.frame_data[0]; frame_id[1] = work->hdr.frame_data[1]; frame_id[2] = work->hdr.frame_data[2]; frame_id[3] = work->hdr.frame_data[3]; frame_id[4] = 0; if (read_synch_size) frame_size = id3v2_read_synch_uint (work->hdr.frame_data + 4, 4); else frame_size = GST_READ_UINT32_BE (work->hdr.frame_data + 4); frame_flags = GST_READ_UINT16_BE (work->hdr.frame_data + 8); if (ID3V2_VER_MAJOR (work->hdr.version) == 3) { frame_flags &= ID3V2_3_FRAME_FLAGS_MASK; obsolete_id = convert_fid_to_v240 (frame_id); if (obsolete_id) GST_DEBUG ("Ignoring v2.3 frame %s", frame_id); } break; } work->hdr.frame_data += frame_hdr_size; work->hdr.frame_data_size -= frame_hdr_size; if (frame_size > work->hdr.frame_data_size || strcmp (frame_id, "") == 0) break; /* No more frames to read */ /* Sanitize frame id */ switch (ID3V2_VER_MAJOR (work->hdr.version)) { case 0: case 1: case 2: for (i = 0; i < 3; i++) { if (!g_ascii_isalnum (frame_id[i])) frame_id[i] = '_'; } break; default: for (i = 0; i < 4; i++) { if (!g_ascii_isalnum (frame_id[i])) frame_id[i] = '_'; } } #if 1 #if 0 GST_LOG ("Frame @ %ld (0x%02lx) id %s size %u, next=%ld (0x%02lx) obsolete=%d", (glong) (work->hdr.frame_data - start), (glong) (work->hdr.frame_data - start), frame_id, frame_size, (glong) (work->hdr.frame_data + frame_hdr_size + frame_size - start), (glong) (work->hdr.frame_data + frame_hdr_size + frame_size - start), obsolete_id); #endif #define flag_string(flag,str) \ ((frame_flags & (flag)) ? (str) : "") GST_LOG ("Frame header flags: 0x%04x %s %s %s %s %s %s %s", frame_flags, flag_string (ID3V2_FRAME_STATUS_FRAME_ALTER_PRESERVE, "ALTER_PRESERVE"), flag_string (ID3V2_FRAME_STATUS_READONLY, "READONLY"), flag_string (ID3V2_FRAME_FORMAT_GROUPING_ID, "GROUPING_ID"), flag_string (ID3V2_FRAME_FORMAT_COMPRESSION, "COMPRESSION"), flag_string (ID3V2_FRAME_FORMAT_ENCRYPTION, "ENCRYPTION"), flag_string (ID3V2_FRAME_FORMAT_UNSYNCHRONISATION, "UNSYNC"), flag_string (ID3V2_FRAME_FORMAT_DATA_LENGTH_INDICATOR, "LENGTH_IND")); #undef flag_str #endif if (!obsolete_id) { /* Now, read, decompress etc the contents of the frame * into a TagList entry */ work->cur_frame_size = frame_size; work->frame_id = frame_id; work->frame_flags = frame_flags; if (id3v2_parse_frame (work)) { GST_LOG ("Extracted frame with id %s", frame_id); } else { GST_LOG ("Failed to extract frame with id %s", frame_id); /* Rewind the frame data / size to pass the header too */ id3v2_add_id3v2_frame_blob_to_taglist (work, work->hdr.frame_data - frame_hdr_size, frame_hdr_size + frame_size); } work->frame_id = NULL; /* clear ref to loop-local storage */ } work->hdr.frame_data += frame_size; work->hdr.frame_data_size -= frame_size; } if (gst_tag_list_n_tags (work->tags) == 0) { GST_DEBUG ("Could not extract any frames from tag. Broken or empty tag"); gst_tag_list_unref (work->tags); work->tags = NULL; return FALSE; } /* Set day/month now if they were in a separate (obsolete) TDAT frame */ /* FIXME: we could extract the time as well now */ if (work->pending_day != 0 && work->pending_month != 0) { GstDateTime *dt = NULL; if (gst_tag_list_get_date_time (work->tags, GST_TAG_DATE_TIME, &dt)) { GstDateTime *dt2; /* GstDateTime is immutable, so create new one and replace old one */ dt2 = gst_date_time_new_ymd (gst_date_time_get_year (dt), work->pending_month, work->pending_day); gst_tag_list_add (work->tags, GST_TAG_MERGE_REPLACE, GST_TAG_DATE_TIME, dt2, NULL); gst_date_time_unref (dt2); gst_date_time_unref (dt); } } return TRUE; }