Blob Blame History Raw
////////////////////////////////////////////////////////////////////////////
//                           **** WAVPACK ****                            //
//                  Hybrid Lossless Wavefile Compressor                   //
//                Copyright (c) 1998 - 2017 David Bryant                  //
//                          All Rights Reserved.                          //
//      Distributed under the BSD Software License (see license.txt)      //
////////////////////////////////////////////////////////////////////////////

// import_id3.c

// This module provides limited support for importing existing ID3 tags
// (from DSF files, for example) into WavPack files

#include <sys/stat.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <stdio.h>

#include "wavpack.h"

static struct {
    char *id3_item, *ape_item;
} text_tag_table [] = {
    { "TALB", "Album" },
    { "TIT2", "Title" },
    { "TPE1", "Artist" },
    { "TCON", "Genre" },
    { "TYER", "Year" },
    { "TRCK", "Track" },
    { "TCOM", "Composer" },
    { "TPE3", "Conductor" }
};

#define NUM_TEXT_TAG_ITEMS (sizeof (text_tag_table) / sizeof (text_tag_table [0]))

static int WideCharToUTF8 (const wchar_t *Wide, unsigned char *pUTF8, int len);
static void Latin1ToUTF8 (void *string, int len);

// Import specified ID3v2.3 tag. The WavPack context accepts the tag items, and can be
// NULL for doing a dry-run through the tag. If errors occur then a description will be
// written to "error" (which must be 80 characters) and -1 will be returned. If no
// errors occur then the number of tag items successfully written will be returned, or
// zero in the case of no applicable tags. An optional integer pointer can be provided
// to accept the total number of bytes consumed by the tag (name and value).

int ImportID3v2 (WavpackContext *wpc, unsigned char *tag_data, int tag_size, char *error, int32_t *bytes_used)
{
    int tag_size_from_header, items_imported = 0, done_cover = 0;
    unsigned char id3_header [10];

    if (bytes_used)
        *bytes_used = 0;

    if (tag_size < sizeof (id3_header)) {
        strcpy (error, "can't read tag header");
        return -1;
    }

    memcpy (id3_header, tag_data, sizeof (id3_header));
    tag_size -= sizeof (id3_header);
    tag_data += sizeof (id3_header);

    if (strncmp ((char *) id3_header, "ID3", 3)) {
        strcpy (error, "no ID3v2 tag found");
        return -1;
    }

    if (id3_header [3] != 3 || id3_header [4] == 0xFF || (id3_header [5] & 0x1F)) {
        strcpy (error, "not valid ID3v2.3");
        return -1;
    }

    if (id3_header [5] & 0x80) {
        strcpy (error, "unsynchonization detected");
        return -1;
    }

    if (id3_header [5] & 0x40) {
        strcpy (error, "extended header detected");
        return -1;
    }

    if (id3_header [5] & 0x20) {
        strcpy (error, "experimental indicator detected");
        return -1;
    }

    if ((id3_header [6] | id3_header [7] | id3_header [8] | id3_header [9]) & 0x80) {
        strcpy (error, "not valid ID3v2.3 (bad size)");
        return -1;
    }

    tag_size_from_header = id3_header [9] + (id3_header [8] << 7) + (id3_header [7] << 14) + (id3_header [6] << 21);

    if (tag_size_from_header > tag_size) {
        strcpy (error, "tag is truncated");
        return -1;
    }

    while (1) {
        unsigned char frame_header [10], *frame_body;
        int frame_size, i;

        if (tag_size < sizeof (frame_header))
            break;

        memcpy (frame_header, tag_data, sizeof (frame_header));
        tag_size -= sizeof (frame_header);
        tag_data += sizeof (frame_header);

        if (!frame_header [0] && !frame_header [1] && !frame_header [2] && !frame_header [3])
            break;

        for (i = 0; i < 4; ++i)
            if (frame_header [i] < '0' ||
                (frame_header [i] > '9' && frame_header [i] < 'A') ||
                frame_header [i] > 'Z') {
                    strcpy (error, "bad frame identity");
                    return -1;
            }

        if (frame_header [9]) {
            strcpy (error, "unknown frame_header flag set");
            return -1;
        }

        frame_size = frame_header [7] + (frame_header [6] << 8) + (frame_header [5] << 16) + (frame_header [4] << 24);

        if (!frame_size) {
            strcpy (error, "empty frame not allowed");
            return -1;
        }

        frame_body = malloc (frame_size + 4);

        if (frame_size > tag_size) {
            strcpy (error, "can't read frame body");
            return -1;
        }

        memcpy (frame_body, tag_data, frame_size);
        tag_size -= frame_size;
        tag_data += frame_size;

        if (frame_header [0] == 'T' && strncmp ((char *) frame_header, "TXXX", 4)) {
            unsigned char *utf8_string = NULL;

            if (frame_body [0] == 0) {
                int nchars = frame_size - 1;
                unsigned char *fp = frame_body + 1;

                utf8_string = malloc ((nchars + 1) * 3);

                for (i = 0; i < nchars; ++i)
                    if (!(utf8_string [i] = *fp++))
                        break;

                if (i == nchars)
                    utf8_string [nchars] = 0;

                Latin1ToUTF8 (utf8_string, (nchars + 1) * 3);
            }
            else if (frame_body [0] == 1 && frame_size > 2 && frame_body [1] == 0xFF && frame_body [2] == 0xFE) {
                int nchars = (frame_size - 3) / 2;
                wchar_t *wide_string = malloc ((nchars + 1) * sizeof (wchar_t));
                unsigned char *fp = frame_body + 3;

                utf8_string = malloc ((nchars + 1) * 3);

                for (i = 0; i < nchars; ++i, fp += 2)
                    if (!(wide_string [i] = fp [0] | (fp [1] << 8)))
                        break;

                if (i == nchars)
                    wide_string [nchars] = 0;

                WideCharToUTF8 (wide_string, utf8_string, (nchars + 1) * 3);
                free (wide_string);
            }
            else {
                strcpy (error, "unknown character encoding");
                return -1;
            }

            // if we got a text string, look through the table and find an equivalent APEv2 tag item

            if (utf8_string) {
                for (i = 0; i < NUM_TEXT_TAG_ITEMS; ++i)
                    if (!strncmp ((char *) frame_header, text_tag_table [i].id3_item, 4)) {
                        if (wpc && !WavpackAppendTagItem (wpc, text_tag_table [i].ape_item, (char *) utf8_string, (int) strlen ((char *) utf8_string))) {
                            strcpy (error, WavpackGetErrorMessage (wpc));
                            return -1;
                        }

                        items_imported++;
                        if (bytes_used) *bytes_used += (int) (strlen ((char *) utf8_string) + strlen (text_tag_table [i].ape_item) + 1);
                    }

                free (utf8_string);
            }
        }
        else if (!strncmp ((char *) frame_header, "APIC", 4)) {
            if (frame_body [0] == 0) {
                char *mime_type, *description, *extension, *item = NULL;
                unsigned char *frame_ptr = frame_body + 1;
                int frame_bytes = frame_size - 1;
                unsigned char picture_type;

                mime_type = (char *) frame_ptr;

                while (frame_bytes-- && *frame_ptr++);

                if (frame_bytes < 0) {
                    strcpy (error, "unterminated picture mime type");
                    return -1;
                }

                if (frame_bytes == 0) {
                    strcpy (error, "no picture type");
                    return -1;
                }

                picture_type = *frame_ptr++;
                frame_bytes--;

                description = (char *) frame_ptr;

                while (frame_bytes-- && *frame_ptr++);

                if (frame_bytes < 0) {
                    strcpy (error, "unterminated picture description");
                    return -1;
                }

                if (frame_bytes < 2) {
                    strcpy (error, "no picture data");
                    return -1;
                }

                if (strstr (mime_type, "jpeg") || strstr (mime_type, "JPEG"))
                    extension = ".jpg";
                else if (strstr (mime_type, "png") || strstr (mime_type, "PNG"))
                    extension = ".png";
                else if (frame_ptr [0] == 0xFF && frame_ptr [1] == 0xD8)
                    extension = ".jpg";
                else if (frame_ptr [0] == 0x89 && frame_ptr [1] == 0x50)
                    extension = ".png";
                else
                    extension = "";

                if (picture_type == 3) {
                    item = "Cover Art (Front)";
                    done_cover = 1;
                }
                else if (picture_type == 4)
                    item = "Cover Art (Back)";
                else if (picture_type != 1 && picture_type != 2 && !done_cover) {
                    item = "Cover Art (Front)";
                    done_cover = 1;
                }

                if (item) {
                    int binary_tag_size = (int) strlen (item) + (int) strlen (extension) + 1 + frame_bytes;
                    char *binary_tag_image = malloc (binary_tag_size);

                    strcpy (binary_tag_image, item);
                    strcat (binary_tag_image, extension);
                    memcpy (binary_tag_image + binary_tag_size - frame_bytes, frame_ptr, frame_bytes);

                    if (wpc && !WavpackAppendBinaryTagItem (wpc, item, binary_tag_image, binary_tag_size)) {
                        strcpy (error, WavpackGetErrorMessage (wpc));
                        return -1;
                    }

                    items_imported++;
                    if (bytes_used) *bytes_used += (int) strlen (item) + 1 + binary_tag_size;
                    free (binary_tag_image);
                }
            }
            else {
                strcpy (error, "unhandled APIC character encoding");
                return -1;
            }
        }

        free (frame_body);
    }

    return items_imported;
}

// Convert the Unicode wide-format string into a UTF-8 string using no more
// than the specified buffer length. The wide-format string must be NULL
// terminated and the resulting string will be NULL terminated. The actual
// number of characters converted (not counting terminator) is returned, which
// may be less than the number of characters in the wide string if the buffer
// length is exceeded.

static int WideCharToUTF8 (const wchar_t *Wide, unsigned char *pUTF8, int len)
{
    const wchar_t *pWide = Wide;
    int outndx = 0;

    while (*pWide) {
        if (*pWide < 0x80 && outndx + 1 < len)
            pUTF8 [outndx++] = (unsigned char) *pWide++;
        else if (*pWide < 0x800 && outndx + 2 < len) {
            pUTF8 [outndx++] = (unsigned char) (0xc0 | ((*pWide >> 6) & 0x1f));
            pUTF8 [outndx++] = (unsigned char) (0x80 | (*pWide++ & 0x3f));
        }
        else if (outndx + 3 < len) {
            pUTF8 [outndx++] = (unsigned char) (0xe0 | ((*pWide >> 12) & 0xf));
            pUTF8 [outndx++] = (unsigned char) (0x80 | ((*pWide >> 6) & 0x3f));
            pUTF8 [outndx++] = (unsigned char) (0x80 | (*pWide++ & 0x3f));
        }
        else
            break;
    }

    pUTF8 [outndx] = 0;
    return (int)(pWide - Wide);
}

// Convert a Latin1 string into its Unicode UTF-8 format equivalent. The
// conversion is done in-place so the maximum length of the string buffer must
// be specified because the string may become longer or shorter. If the
// resulting string will not fit in the specified buffer size then it is
// truncated.

#ifdef _WIN32

#include <windows.h>

static void Latin1ToUTF8 (void *string, int len)
{
    int max_chars = (int) strlen (string);
    wchar_t *temp = (wchar_t *) malloc ((max_chars + 1) * 2);

    MultiByteToWideChar (28591, 0, string, -1, temp, max_chars + 1);
    WideCharToUTF8 (temp, (unsigned char *) string, len);
    free (temp);
}

#else

#include <iconv.h>

static void Latin1ToUTF8 (void *string, int len)
{
    char *temp = malloc (len);
    char *outp = temp;
    char *inp = string;
    size_t insize = 0;
    size_t outsize = len - 1;
    int err = 0;
    iconv_t converter;

    memset(temp, 0, len);

    insize = strlen (string);
    converter = iconv_open ("UTF-8", "ISO-8859-1");

    if (converter != (iconv_t) -1) {
        err = iconv (converter, &inp, &insize, &outp, &outsize);
        iconv_close (converter);
    }
    else
        err = -1;

    if (err == -1) {
        free(temp);
        return;
    }

    memmove (string, temp, len);
    free (temp);
}

#endif