Blob Blame History Raw
/*
 *  LAME MP3 encoder for DirectShow
 *  LAME encoder wrapper
 *
 *  Copyright (c) 2000-2005 Marie Orlova, Peter Gubanov, Vitaly Ivanov, Elecard Ltd.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

#include <streams.h>
#include "Encoder.h"


//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CEncoder::CEncoder() :
    m_bInpuTypeSet(FALSE),
    m_bOutpuTypeSet(FALSE),
    m_bFinished(FALSE),
    m_outOffset(0),
    m_outReadOffset(0),
    m_frameCount(0),
    pgf(NULL)
{
    m_outFrameBuf = new unsigned char[OUT_BUFFER_SIZE];
}

CEncoder::~CEncoder()
{
    Close(NULL);

    if (m_outFrameBuf)
        delete [] m_outFrameBuf;
}

//////////////////////////////////////////////////////////////////////
// SetInputType - check if given input type is supported
//////////////////////////////////////////////////////////////////////
HRESULT CEncoder::SetInputType(LPWAVEFORMATEX lpwfex, bool bJustCheck)
{
    CAutoLock l(&m_lock);

    if (lpwfex->wFormatTag == WAVE_FORMAT_PCM)
    {
        if (lpwfex->nChannels == 1 || lpwfex->nChannels == 2)
        {
            if (lpwfex->nSamplesPerSec  == 48000 ||
                lpwfex->nSamplesPerSec  == 44100 ||
                lpwfex->nSamplesPerSec  == 32000 ||
                lpwfex->nSamplesPerSec  == 24000 ||
                lpwfex->nSamplesPerSec  == 22050 ||
                lpwfex->nSamplesPerSec  == 16000 ||
                lpwfex->nSamplesPerSec  == 12000 ||
                lpwfex->nSamplesPerSec  == 11025 ||
                lpwfex->nSamplesPerSec  ==  8000)
            {
                if (lpwfex->wBitsPerSample == 16)
                {
                    if (!bJustCheck)
                    {
                        memcpy(&m_wfex, lpwfex, sizeof(WAVEFORMATEX));
                        m_bInpuTypeSet = true;
                    }

                    return S_OK;
                }
            }
        }
    }

    if (!bJustCheck)
        m_bInpuTypeSet = false;

    return E_INVALIDARG;
}

//////////////////////////////////////////////////////////////////////
// SetOutputType - try to initialize encoder with given output type
//////////////////////////////////////////////////////////////////////
HRESULT CEncoder::SetOutputType(MPEG_ENCODER_CONFIG &mabsi)
{
    CAutoLock l(&m_lock);

    m_mabsi = mabsi;
    m_bOutpuTypeSet = true;

    return S_OK;
}

//////////////////////////////////////////////////////////////////////
// SetDefaultOutputType - sets default MPEG audio properties according
// to input type
//////////////////////////////////////////////////////////////////////
HRESULT CEncoder::SetDefaultOutputType(LPWAVEFORMATEX lpwfex)
{
    CAutoLock l(&m_lock);

    if(lpwfex->nChannels == 1 || m_mabsi.bForceMono)
        m_mabsi.ChMode = MONO;

    if((lpwfex->nSamplesPerSec < m_mabsi.dwSampleRate) || (lpwfex->nSamplesPerSec % m_mabsi.dwSampleRate != 0))
        m_mabsi.dwSampleRate = lpwfex->nSamplesPerSec;

    return S_OK;
}

//////////////////////////////////////////////////////////////////////
// Init - initialized or reiniyialized encoder SDK with given input 
// and output settings
//////////////////////////////////////////////////////////////////////
HRESULT CEncoder::Init()
{
    CAutoLock l(&m_lock);

    m_outOffset     = 0;
    m_outReadOffset = 0;

    m_bFinished     = FALSE;

    m_frameCount    = 0;

    if (!pgf)
    {
        if (!m_bInpuTypeSet || !m_bOutpuTypeSet)
            return E_UNEXPECTED;

        // Init Lame library
        // note: newer, safer interface which doesn't 
        // allow or require direct access to 'gf' struct is being written
        // see the file 'API' included with LAME.
        if (pgf = lame_init())
        {
            lame_set_num_channels(pgf, m_wfex.nChannels);
            lame_set_in_samplerate(pgf, m_wfex.nSamplesPerSec);
            lame_set_out_samplerate(pgf, m_mabsi.dwSampleRate);
            if ((lame_get_out_samplerate(pgf) >= 32000) && (m_mabsi.dwBitrate < 32))
                lame_set_brate(pgf, 32);
            else
                lame_set_brate(pgf, m_mabsi.dwBitrate);
            lame_set_VBR(pgf, m_mabsi.vmVariable);
            lame_set_VBR_min_bitrate_kbps(pgf, m_mabsi.dwVariableMin);
            lame_set_VBR_max_bitrate_kbps(pgf, m_mabsi.dwVariableMax);

            lame_set_copyright(pgf, m_mabsi.bCopyright);
            lame_set_original(pgf, m_mabsi.bOriginal);
            lame_set_error_protection(pgf, m_mabsi.bCRCProtect);

            lame_set_bWriteVbrTag(pgf, m_mabsi.dwXingTag);
            lame_set_strict_ISO(pgf, m_mabsi.dwStrictISO);
            lame_set_VBR_hard_min(pgf, m_mabsi.dwEnforceVBRmin);

            if (lame_get_num_channels(pgf) == 2 && !m_mabsi.bForceMono)
            {
                //int act_br = pgf->VBR ? pgf->VBR_min_bitrate_kbps + pgf->VBR_max_bitrate_kbps / 2 : pgf->brate;

                // Disabled. It's for user's consideration now
                //int rel = pgf->out_samplerate / (act_br + 1);
                //pgf->mode = rel < 200 ? m_mabsi.ChMode : JOINT_STEREO;

                lame_set_mode(pgf, m_mabsi.ChMode);
            }
            else
                lame_set_mode(pgf, MONO);

            if (lame_get_mode(pgf) == JOINT_STEREO)
                lame_set_force_ms(pgf, m_mabsi.dwForceMS);
            else
                lame_set_force_ms(pgf, 0);

//            pgf->mode_fixed = m_mabsi.dwModeFixed;

            if (m_mabsi.dwVoiceMode != 0)
            {
                lame_set_lowpassfreq(pgf,12000);
                ///pgf->VBR_max_bitrate_kbps = 160;
            }

            if (m_mabsi.dwKeepAllFreq != 0)
            {
                ///pgf->lowpassfreq = -1;
                ///pgf->highpassfreq = -1;
                /// not available anymore
            }

            lame_set_quality(pgf, m_mabsi.dwQuality);
            lame_set_VBR_q(pgf, m_mabsi.dwVBRq);

            lame_init_params(pgf);

            // encoder delay compensation
            {
                int const nch = lame_get_num_channels(pgf);
                short * start_padd = (short *)calloc(48, nch * sizeof(short));

				int out_bytes = 0;

                if (nch == 2)
                    out_bytes = lame_encode_buffer_interleaved(pgf, start_padd, 48, m_outFrameBuf, OUT_BUFFER_SIZE);
                else
                    out_bytes = lame_encode_buffer(pgf, start_padd, start_padd, 48, m_outFrameBuf, OUT_BUFFER_SIZE);

				if (out_bytes > 0)
					m_outOffset += out_bytes;

                free(start_padd);
            }

            return S_OK;
        }

        return E_FAIL;
    }

    return S_OK;
}

//////////////////////////////////////////////////////////////////////
// Close - closes encoder
//////////////////////////////////////////////////////////////////////
HRESULT CEncoder::Close(IStream* pStream)
{
	CAutoLock l(&m_lock);
    if (pgf)
    {
		if(lame_get_bWriteVbrTag(pgf) && pStream)
		{
			updateLameTagFrame(pStream);
		}

        lame_close(pgf);
        pgf = NULL;
    }

    return S_OK;
}

//////////////////////////////////////////////////////////////////////
// Encode - encodes data placed on pdata and returns
// the number of processed bytes
//////////////////////////////////////////////////////////////////////
int CEncoder::Encode(const short * pdata, int data_size)
{
    CAutoLock l(&m_lock);

    if (!pgf || !m_outFrameBuf || !pdata || data_size < 0 || (data_size & (sizeof(short) - 1)))
        return -1;

    // some data left in the buffer, shift to start
    if (m_outReadOffset > 0)
    {
        if (m_outOffset > m_outReadOffset)
            memmove(m_outFrameBuf, m_outFrameBuf + m_outReadOffset, m_outOffset - m_outReadOffset);

        m_outOffset -= m_outReadOffset;
    }

    m_outReadOffset = 0;



    m_bFinished = FALSE;

    int bytes_processed = 0;
    int const nch = lame_get_num_channels(pgf);

    while (1)
    {
        int nsamples = (data_size - bytes_processed) / (sizeof(short) * nch);

        if (nsamples <= 0)
            break;

        if (nsamples > 1152)
            nsamples = 1152;

        if (m_outOffset >= OUT_BUFFER_MAX)
            break;

        int out_bytes = 0;

        if (nch == 2)
            out_bytes = lame_encode_buffer_interleaved(
                                            pgf,
                                            (short *)(pdata + (bytes_processed / sizeof(short))),
                                            nsamples,
                                            m_outFrameBuf + m_outOffset,
                                            OUT_BUFFER_SIZE - m_outOffset);
        else
            out_bytes = lame_encode_buffer(
                                            pgf,
                                            pdata + (bytes_processed / sizeof(short)),
                                            pdata + (bytes_processed / sizeof(short)),
                                            nsamples,
                                            m_outFrameBuf + m_outOffset,
                                            OUT_BUFFER_SIZE - m_outOffset);

        if (out_bytes < 0)
            return -1;

        m_outOffset     += out_bytes;
        bytes_processed += nsamples * nch * sizeof(short);
    }

    return bytes_processed;
}

//
// Finsh - flush the buffered samples
//
HRESULT CEncoder::Finish()
{
    CAutoLock l(&m_lock);

    if (!pgf || !m_outFrameBuf || (m_outOffset >= OUT_BUFFER_MAX))
        return E_FAIL;

    m_outOffset += lame_encode_flush(pgf, m_outFrameBuf + m_outOffset, OUT_BUFFER_SIZE - m_outOffset);

    m_bFinished = TRUE;

    return S_OK;
}


int getFrameLength(const unsigned char * pdata)
{
    if (!pdata || pdata[0] != 0xff || (pdata[1] & 0xe0) != 0xe0)
        return -1;

    const int sample_rate_tab[4][4] =
    {
        {11025,12000,8000,1},
        {1,1,1,1},
        {22050,24000,16000,1},
        {44100,48000,32000,1}
    };

#define MPEG_VERSION_RESERVED   1
#define MPEG_VERSION_1          3

#define LAYER_III               1

#define BITRATE_FREE            0
#define BITRATE_RESERVED        15

#define SRATE_RESERVED          3

#define EMPHASIS_RESERVED       2

    int version_id      = (pdata[1] & 0x18) >> 3;
    int layer           = (pdata[1] & 0x06) >> 1;
    int bitrate_id      = (pdata[2] & 0xF0) >> 4;
    int sample_rate_id  = (pdata[2] & 0x0C) >> 2;
    int padding         = (pdata[2] & 0x02) >> 1;
    int emphasis        =  pdata[3] & 0x03;

    if (version_id      != MPEG_VERSION_RESERVED &&
        layer           == LAYER_III &&
        bitrate_id      != BITRATE_FREE &&
        bitrate_id      != BITRATE_RESERVED &&
        sample_rate_id  != SRATE_RESERVED &&
        emphasis        != EMPHASIS_RESERVED)
    {
        int spf         = (version_id == MPEG_VERSION_1) ? 1152 : 576;
        int sample_rate = sample_rate_tab[version_id][sample_rate_id];
        int bitrate     = dwBitRateValue[version_id != MPEG_VERSION_1][bitrate_id - 1] * 1000;

        return (bitrate * spf) / (8 * sample_rate) + padding;
    }

    return -1;
}


int CEncoder::GetFrame(const unsigned char ** pframe)
{
    if (!pgf || !m_outFrameBuf || !pframe)
        return -1;

	while ((m_outOffset - m_outReadOffset) > 4)
    {
        int frame_length = getFrameLength(m_outFrameBuf + m_outReadOffset);

        if (frame_length < 0)
        {
            m_outReadOffset++;
        }
        else if (frame_length <= (m_outOffset - m_outReadOffset))
        {
            *pframe = m_outFrameBuf + m_outReadOffset;
            m_outReadOffset += frame_length;

            m_frameCount++;

            // don't deliver the first and the last frames
            if (m_frameCount != 1 && !(m_bFinished && (m_outOffset - m_outReadOffset) < 5))
                return frame_length;
        }
        else
            break;
    }

    return 0;
}

////////////////////////////////////////////////////////////////////////////////
// Returns block of a mp3 file, witch size integer multiples of cbAlign
// or not aligned if finished
////////////////////////////////////////////////////////////////////////////////
int CEncoder::GetBlockAligned(const unsigned char ** pblock, int* piBufferSize, const long& cbAlign)
{
	ASSERT(piBufferSize);
    if (!pgf || !m_outFrameBuf || !pblock)
        return -1;

	int iBlockLen = m_outOffset - m_outReadOffset;
	ASSERT(iBlockLen >= 0);
	
	if(!m_bFinished)
	{
		if(cbAlign > 0)
			iBlockLen-=iBlockLen%cbAlign;
		*piBufferSize = iBlockLen;
	}
	else
	{
		if(cbAlign && iBlockLen%cbAlign)
		{
			*piBufferSize = iBlockLen + cbAlign - iBlockLen%cbAlign;
		}
		else
		{
			*piBufferSize = iBlockLen;
		}
	}

	if(iBlockLen) {
		*pblock = m_outFrameBuf + m_outReadOffset;
		m_outReadOffset+=iBlockLen;
	}

	return iBlockLen;
}

HRESULT CEncoder::maybeSyncWord(IStream *pStream)
{
	HRESULT hr = S_OK;
    unsigned char mp3_frame_header[4];
	ULONG nbytes;
	if(FAILED(hr = pStream->Read(mp3_frame_header, sizeof(mp3_frame_header), &nbytes)))
		return hr;
	
    if ( nbytes != sizeof(mp3_frame_header) ) {
        return E_FAIL;
    }
    if ( mp3_frame_header[0] != 0xffu ) {
        return S_FALSE; /* doesn't look like a sync word */
    }
    if ( (mp3_frame_header[1] & 0xE0u) != 0xE0u ) {
		return S_FALSE; /* doesn't look like a sync word */
    }
    return S_OK;
}

HRESULT CEncoder::skipId3v2(IStream *pStream, size_t lametag_frame_size)
{
	HRESULT hr = S_OK;
    ULONG  nbytes;
    size_t  id3v2TagSize = 0;
    unsigned char id3v2Header[10];
	LARGE_INTEGER seekTo;

    /* seek to the beginning of the stream */
	seekTo.QuadPart = 0;
	if (FAILED(hr = pStream->Seek(seekTo,  STREAM_SEEK_SET, NULL))) {
        return hr;  /* not seekable, abort */
    }
    /* read 10 bytes in case there's an ID3 version 2 header here */
	hr = pStream->Read(id3v2Header, sizeof(id3v2Header), &nbytes);
    if (FAILED(hr))
		return hr;
	if(nbytes != sizeof(id3v2Header)) {
        return E_FAIL;  /* not readable, maybe opened Write-Only */
    }
    /* does the stream begin with the ID3 version 2 file identifier? */
    if (!strncmp((char *) id3v2Header, "ID3", 3)) {
        /* the tag size (minus the 10-byte header) is encoded into four
        * bytes where the most significant bit is clear in each byte
        */
        id3v2TagSize = (((id3v2Header[6] & 0x7f) << 21)
            | ((id3v2Header[7] & 0x7f) << 14)
            | ((id3v2Header[8] & 0x7f) << 7)
            | (id3v2Header[9] & 0x7f))
            + sizeof id3v2Header;
    }
    /* Seek to the beginning of the audio stream */
	seekTo.QuadPart = id3v2TagSize;
	if (FAILED(hr = pStream->Seek(seekTo, STREAM_SEEK_SET, NULL))) {
        return hr;
    }
    if (S_OK != (hr = maybeSyncWord(pStream))) {
		return SUCCEEDED(hr)?E_FAIL:hr;
    }
	seekTo.QuadPart = id3v2TagSize+lametag_frame_size;
	if (FAILED(hr = pStream->Seek(seekTo, STREAM_SEEK_SET, NULL))) {
        return hr;
    }
    if (S_OK != (hr = maybeSyncWord(pStream))) {
        return SUCCEEDED(hr)?E_FAIL:hr;
    }
    /* OK, it seems we found our LAME-Tag/Xing frame again */
    /* Seek to the beginning of the audio stream */
	seekTo.QuadPart = id3v2TagSize;
	if (FAILED(hr = pStream->Seek(seekTo, STREAM_SEEK_SET, NULL))) {
        return hr;
    }
    return S_OK;
}

// Updates VBR tag
HRESULT CEncoder::updateLameTagFrame(IStream* pStream)
{
	HRESULT hr = S_OK;
	size_t n = lame_get_lametag_frame( pgf, 0, 0 ); /* ask for bufer size */

    if ( n > 0 )
    {
        unsigned char* buffer = 0;
        ULONG m = n;

        if ( FAILED(hr = skipId3v2(pStream, n) )) 
        {
            /*DispErr( "Error updating LAME-tag frame:\n\n"
                     "can't locate old frame\n" );*/
            return hr;
        }

        buffer = (unsigned char*)malloc( n );

        if ( buffer == 0 ) 
        {
            /*DispErr( "Error updating LAME-tag frame:\n\n"
                     "can't allocate frame buffer\n" );*/
            return E_OUTOFMEMORY;
        }

        /* Put it all to disk again */
        n = lame_get_lametag_frame( pgf, buffer, n );
        if ( n > 0 ) 
        {
			hr = pStream->Write(buffer, n, &m);        
        }
        free( buffer );

        if ( m != n ) 
        {
            /*DispErr( "Error updating LAME-tag frame:\n\n"
                     "couldn't write frame into file\n" );*/
			return E_FAIL;
        }
    }
    return hr;
}