/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/*
* This file is part of the libmspub project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include "MSPUBMetaData.h"
#include <cmath>
#include <cstdio>
#include <cstring>
#include <ctime>
#include <string>
#include "libmspub_utils.h"
libmspub::MSPUBMetaData::MSPUBMetaData()
: m_idsAndOffsets(), m_typedPropertyValues(), m_metaData()
{
}
libmspub::MSPUBMetaData::~MSPUBMetaData()
{
}
enum PIDDSI
{
PIDDSI_CODEPAGE = 0x00000001,
PIDDSI_CATEGORY = 0x00000002,
PIDDSI_PRESFORMAT = 0x00000003,
PIDDSI_BYTECOUNT = 0x00000004,
PIDDSI_LINECOUNT = 0x00000005,
PIDDSI_PARACOUNT = 0x00000006,
PIDDSI_SLIDECOUNT = 0x00000007,
PIDDSI_NOTECOUNT = 0x00000008,
PIDDSI_HIDDENCOUNT = 0x00000009,
PIDDSI_MMCLIPCOUNT = 0x0000000A,
PIDDSI_SCALE = 0x0000000B,
PIDDSI_HEADINGPAIR = 0x0000000C,
PIDDSI_DOCPARTS = 0x0000000D,
PIDDSI_MANAGER = 0x0000000E,
PIDDSI_COMPANY = 0x0000000F,
PIDDSI_LINKSDIRTY = 0x00000010,
PIDDSI_CCHWITHSPACES = 0x00000011,
PIDDSI_SHAREDDOC = 0x00000013,
PIDDSI_LINKBASE = 0x00000014,
PIDDSI_HLINKS = 0x00000015,
PIDDSI_HYPERLINKSCHANGED = 0x00000016,
PIDDSI_VERSION = 0x00000017,
PIDDSI_DIGSIG = 0x00000018,
PIDDSI_CONTENTTYPE = 0x0000001A,
PIDDSI_CONTENTSTATUS = 0x0000001B,
PIDDSI_LANGUAGE = 0x0000001C,
PIDDSI_DOCVERSION = 0x0000001D
};
enum PIDSI
{
CODEPAGE_PROPERTY_IDENTIFIER = 0x00000001,
PIDSI_TITLE = 0x00000002,
PIDSI_SUBJECT = 0x00000003,
PIDSI_AUTHOR = 0x00000004,
PIDSI_KEYWORDS = 0x00000005,
PIDSI_COMMENTS = 0x00000006,
PIDSI_TEMPLATE = 0x00000007,
PIDSI_LASTAUTHOR = 0x00000008,
PIDSI_REVNUMBER = 0x00000009,
PIDSI_EDITTIME = 0x0000000A,
PIDSI_LASTPRINTED = 0x0000000B,
PIDSI_CREATE_DTM = 0x0000000C,
PIDSI_LASTSAVE_DTM = 0x0000000D,
PIDSI_PAGECOUNT = 0x0000000E,
PIDSI_WORDCOUNT = 0x0000000F,
PIDSI_CHARCOUNT = 0x00000010,
PIDSI_THUMBNAIL = 0x00000011,
PIDSI_APPNAME = 0x00000012,
PIDSI_DOC_SECURITY = 0x00000013
};
bool libmspub::MSPUBMetaData::parse(librevenge::RVNGInputStream *input)
{
if (!input)
return false;
readPropertySetStream(input);
return true;
}
void libmspub::MSPUBMetaData::readPropertySetStream(librevenge::RVNGInputStream *input)
{
// ByteOrder
input->seek(2, librevenge::RVNG_SEEK_CUR);
// Version
input->seek(2, librevenge::RVNG_SEEK_CUR);
// SystemIdentifier
input->seek(4, librevenge::RVNG_SEEK_CUR);
// CLSID
input->seek(16, librevenge::RVNG_SEEK_CUR);
// NumPropertySets
input->seek(4, librevenge::RVNG_SEEK_CUR);
// FMTID0
//input->seek(16, librevenge::RVNG_SEEK_CUR);
uint32_t data1 = readU32(input);
uint16_t data2 = readU16(input);
uint16_t data3 = readU16(input);
uint8_t data4[8];
for (unsigned char &i : data4)
{
i = readU8(input);
}
// Pretty-printed GUID is 36 bytes + the terminating null-character.
char FMTID0[37];
sprintf(FMTID0, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", data1, data2, data3,
data4[0], data4[1], data4[2], data4[3], data4[4], data4[5], data4[6], data4[7]);
uint32_t offset0 = readU32(input);
readPropertySet(input, offset0, FMTID0);
}
void libmspub::MSPUBMetaData::readPropertySet(librevenge::RVNGInputStream *input, uint32_t offset, char *FMTID)
{
input->seek(offset, librevenge::RVNG_SEEK_SET);
// Size
input->seek(4, librevenge::RVNG_SEEK_CUR);
uint32_t numProperties = readU32(input);
for (uint32_t i = 0; i < numProperties; ++i)
readPropertyIdentifierAndOffset(input);
for (uint32_t i = 0; i < numProperties; ++i)
{
if (i >= m_idsAndOffsets.size())
break;
readTypedPropertyValue(input, i, offset + m_idsAndOffsets[i].second, FMTID);
}
}
uint32_t libmspub::MSPUBMetaData::getCodePage()
{
for (size_t i = 0; i < m_idsAndOffsets.size(); ++i)
{
if (m_idsAndOffsets[i].first == PIDSI::CODEPAGE_PROPERTY_IDENTIFIER)
{
if (i >= m_typedPropertyValues.size())
break;
return m_typedPropertyValues[i];
}
}
return 0;
}
void libmspub::MSPUBMetaData::readPropertyIdentifierAndOffset(librevenge::RVNGInputStream *input)
{
uint32_t propertyIdentifier = readU32(input);
uint32_t offset = readU32(input);
m_idsAndOffsets.push_back(std::make_pair(propertyIdentifier, offset));
}
#define VT_I2 0x0002
#define VT_LPSTR 0x001E
void libmspub::MSPUBMetaData::readTypedPropertyValue(librevenge::RVNGInputStream *input,
uint32_t index,
uint32_t offset,
char *FMTID)
{
input->seek(offset, librevenge::RVNG_SEEK_SET);
uint16_t type = readU16(input);
// Padding
input->seek(2, librevenge::RVNG_SEEK_CUR);
if (type == VT_I2)
{
uint16_t value = readU16(input);
m_typedPropertyValues[index] = value;
}
else if (type == VT_LPSTR)
{
librevenge::RVNGString string = readCodePageString(input);
if (!string.empty())
{
if (index >= m_idsAndOffsets.size())
return;
if (!strcmp(FMTID, "f29f85e0-4ff9-1068-ab91-08002b27b3d9"))
{
switch (m_idsAndOffsets[index].first)
{
case PIDSI::PIDSI_TITLE:
m_metaData.insert("dc:title", string);
break;
case PIDSI::PIDSI_SUBJECT:
m_metaData.insert("dc:subject", string);
break;
case PIDSI::PIDSI_AUTHOR:
m_metaData.insert("meta:initial-creator", string);
m_metaData.insert("dc:creator", string);
break;
case PIDSI::PIDSI_KEYWORDS:
m_metaData.insert("meta:keyword", string);
break;
case PIDSI::PIDSI_COMMENTS:
m_metaData.insert("dc:description", string);
break;
case PIDSI::PIDSI_TEMPLATE:
std::string templateHref(string.cstr());
size_t found = templateHref.find_last_of("/\\");
if (found != std::string::npos)
string = librevenge::RVNGString(templateHref.substr(found+1).c_str());
m_metaData.insert("librevenge:template", string);
break;
}
}
else if (!strcmp(FMTID,"d5cdd502-2e9c-101b-9397-08002b2cf9ae"))
{
switch (m_idsAndOffsets[index].first)
{
case PIDDSI_CATEGORY:
m_metaData.insert("librevenge:category", string);
break;
case PIDDSI_LINECOUNT:
// this should actually be PIDDSI_COMPANY but this
// is what company is mapped to
m_metaData.insert("librevenge:company", string);
break;
case PIDDSI_LANGUAGE:
m_metaData.insert("dc:language", string);
break;
}
}
}
}
}
librevenge::RVNGString libmspub::MSPUBMetaData::readCodePageString(librevenge::RVNGInputStream *input)
{
uint32_t size = readU32(input);
if (size == 0)
return librevenge::RVNGString();
std::vector<unsigned char> characters;
for (uint32_t i = 0; i < size; ++i)
characters.push_back(readU8(input));
uint32_t codepage = getCodePage();
librevenge::RVNGString string;
if (codepage == 65001)
{
// http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx
// says this is UTF-8.
characters.push_back(0);
string.append(reinterpret_cast<const char *>(characters.data()));
}
else
{
switch (codepage)
{
case 1252:
// http://msdn.microsoft.com/en-us/goglobal/bb964654
appendCharacters(string, characters, "windows-1252");
break;
default:
MSPUB_DEBUG_MSG(("MSPUBMetaData::readCodePageString: Unknown codepage %u found\n", unsigned(codepage)));
}
}
return string;
}
bool libmspub::MSPUBMetaData::parseTimes(librevenge::RVNGInputStream *input)
{
// Parse the header
// HeaderSignature: 8 bytes
// HeaderCLSID: 16 bytes
// MinorVersion: 2 bytes
// MajorVersion: 2 bytes
// ByteOrder: 2 bytes
input->seek(30, librevenge::RVNG_SEEK_CUR);
uint16_t sectorShift = readU16(input);
// MiniSectorShift: 2 bytes
// Reserved: 6 bytes
// NumDirectorySectors: 4 bytes
// NumFATSectors: 4 bytes
input->seek(16, librevenge::RVNG_SEEK_CUR);
uint32_t firstDirSectorLocation = readU32(input);
// Seek to the Root Directory Entry
size_t sectorSize = std::pow(2, sectorShift);
input->seek((firstDirSectorLocation + 1) * sectorSize, librevenge::RVNG_SEEK_SET);
// DirectoryEntryName: 64 bytes
// DirectoryEntryNameLength: 2 bytes
// ObjectType: 1 byte
// ColorFlag: 1 byte
// LeftSiblingID: 4 bytes
// RightSiblingID: 4 bytes
// ChildID: 4 bytes
// CLSID: 16 bytes
// StateBits: 4 bytes
// CreationTime: 8 bytes
input->seek(108, librevenge::RVNG_SEEK_CUR);
uint64_t modifiedTime = readU64(input);
// modifiedTime is number of 100ns since Jan 1 1601
const uint64_t epoch = uint64_t(116444736UL) * 100;
time_t sec = (modifiedTime / 10000000) - epoch;
const struct tm *time = localtime(&sec);
if (time)
{
static const int MAX_BUFFER = 1024;
char buffer[MAX_BUFFER];
strftime(&buffer[0], MAX_BUFFER-1, "%Y-%m-%dT%H:%M:%SZ", time);
librevenge::RVNGString result;
result.append(buffer);
// Visio UI uses modifiedTime for both purposes.
m_metaData.insert("meta:creation-date", result);
m_metaData.insert("dc:date", result);
return true;
}
return false;
}
const librevenge::RVNGPropertyList &libmspub::MSPUBMetaData::getMetaData()
{
return m_metaData;
}
/* vim:set shiftwidth=2 softtabstop=2 expandtab: */