/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
* soup-content-sniffer.c
*
* Copyright (C) 2009, 2013 Gustavo Noronha Silva.
*
* This code implements the following specification:
*
* http://mimesniff.spec.whatwg.org/ as of 11 June 2013
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <string.h>
#include "soup-content-sniffer.h"
#include "soup.h"
#include "soup-content-processor.h"
#include "soup-content-sniffer-stream.h"
#include "soup-message-private.h"
/**
* SECTION:soup-content-sniffer
* @short_description: Content sniffing for SoupSession
*
* A #SoupContentSniffer tries to detect the actual content type of
* the files that are being downloaded by looking at some of the data
* before the #SoupMessage emits its #SoupMessage::got-headers signal.
* #SoupContentSniffer implements #SoupSessionFeature, so you can add
* content sniffing to a session with soup_session_add_feature() or
* soup_session_add_feature_by_type().
*
* Since: 2.28
**/
static void soup_content_sniffer_session_feature_init (SoupSessionFeatureInterface *feature_interface, gpointer interface_data);
static SoupContentProcessorInterface *soup_content_sniffer_default_content_processor_interface;
static void soup_content_sniffer_content_processor_init (SoupContentProcessorInterface *interface, gpointer interface_data);
G_DEFINE_TYPE_WITH_CODE (SoupContentSniffer, soup_content_sniffer, G_TYPE_OBJECT,
G_IMPLEMENT_INTERFACE (SOUP_TYPE_SESSION_FEATURE,
soup_content_sniffer_session_feature_init)
G_IMPLEMENT_INTERFACE (SOUP_TYPE_CONTENT_PROCESSOR,
soup_content_sniffer_content_processor_init))
static GInputStream *
soup_content_sniffer_content_processor_wrap_input (SoupContentProcessor *processor,
GInputStream *base_stream,
SoupMessage *msg,
GError **error)
{
return g_object_new (SOUP_TYPE_CONTENT_SNIFFER_STREAM,
"base-stream", base_stream,
"message", msg,
"sniffer", SOUP_CONTENT_SNIFFER (processor),
NULL);
}
static void
soup_content_sniffer_content_processor_init (SoupContentProcessorInterface *processor_interface,
gpointer interface_data)
{
soup_content_sniffer_default_content_processor_interface =
g_type_default_interface_peek (SOUP_TYPE_CONTENT_PROCESSOR);
processor_interface->processing_stage = SOUP_STAGE_BODY_DATA;
processor_interface->wrap_input = soup_content_sniffer_content_processor_wrap_input;
}
static void
soup_content_sniffer_init (SoupContentSniffer *content_sniffer)
{
}
typedef struct {
const guchar *mask;
const guchar *pattern;
guint pattern_length;
const char *sniffed_type;
} SoupContentSnifferMediaPattern;
static char*
sniff_media (SoupContentSniffer *sniffer,
SoupBuffer *buffer,
SoupContentSnifferMediaPattern table[],
int table_length)
{
const guchar *resource = (const guchar *)buffer->data;
guint resource_length = MIN (512, buffer->length);
int i;
for (i = 0; i < table_length; i++) {
SoupContentSnifferMediaPattern *type_row = &(table[i]);
guint j;
if (resource_length < type_row->pattern_length)
continue;
for (j = 0; j < type_row->pattern_length; j++) {
if ((type_row->mask[j] & resource[j]) != type_row->pattern[j])
break;
}
/* This means our comparison above matched completely */
if (j == type_row->pattern_length)
return g_strdup (type_row->sniffed_type);
}
return NULL;
}
/* This table is based on the MIMESNIFF spec;
* See 6.1 Matching an image type pattern
*/
static SoupContentSnifferMediaPattern image_types_table[] = {
/* Windows icon signature. */
{ (const guchar *)"\xFF\xFF\xFF\xFF",
(const guchar *)"\x00\x00\x01\x00",
4,
"image/x-icon" },
/* Windows cursor signature. */
{ (const guchar *)"\xFF\xFF\xFF\xFF",
(const guchar *)"\x00\x00\x02\x00",
4,
"image/x-icon" },
/* BMP. */
{ (const guchar *)"\xFF\xFF",
(const guchar *)"BM",
2,
"image/bmp" },
/* GIFs. */
{ (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF",
(const guchar *)"GIF87a",
6,
"image/gif" },
{ (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF",
(const guchar *)"GIF89a",
6,
"image/gif" },
/* WEBP. */
{ (const guchar *)"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF",
(const guchar *)"RIFF\x00\x00\x00\x00WEBPVP",
14,
"image/webp" },
/* PNG. */
{ (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
(const guchar *)"\x89PNG\x0D\x0A\x1A\x0A",
8,
"image/png" },
/* JPEG. */
{ (const guchar *)"\xFF\xFF\xFF",
(const guchar *)"\xFF\xD8\xFF",
3,
"image/jpeg" },
};
static char*
sniff_images (SoupContentSniffer *sniffer, SoupBuffer *buffer)
{
return sniff_media (sniffer,
buffer,
image_types_table,
G_N_ELEMENTS (image_types_table));
}
/* This table is based on the MIMESNIFF spec;
* See 6.2 Matching an audio or video type pattern
*/
static SoupContentSnifferMediaPattern audio_video_types_table[] = {
{ (const guchar *)"\xFF\xFF\xFF\xFF",
(const guchar *)"\x1A\x45\xDF\xA3",
4,
"video/webm" },
{ (const guchar *)"\xFF\xFF\xFF\xFF",
(const guchar *)".snd",
4,
"audio/basic" },
{ (const guchar *)"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
(const guchar *)"FORM\0\0\0\0AIFF",
12,
"audio/aiff" },
{ (const guchar *)"\xFF\xFF\xFF",
(const guchar *)"ID3",
3,
"audio/mpeg" },
{ (const guchar *)"\xFF\xFF\xFF\xFF\xFF",
(const guchar *)"OggS\0",
5,
"application/ogg" },
{ (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
(const guchar *)"MThd\x00\x00\x00\x06",
8,
"audio/midi" },
{ (const guchar *)"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
(const guchar *)"RIFF\x00\x00\x00\x00AVI ",
12,
"video/avi" },
{ (const guchar *)"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
(const guchar *)"RIFF\x00\x00\x00\x00WAVE",
12,
"audio/wave" },
};
static gboolean
sniff_mp4 (SoupContentSniffer *sniffer, SoupBuffer *buffer)
{
const char *resource = (const char *)buffer->data;
guint resource_length = MIN (512, buffer->length);
guint32 box_size = *((guint32*)resource);
guint i;
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
box_size = ((box_size >> 24) |
((box_size << 8) & 0x00FF0000) |
((box_size >> 8) & 0x0000FF00) |
(box_size << 24));
#endif
if (resource_length < 12 || resource_length < box_size || box_size % 4 != 0)
return FALSE;
if (!g_str_has_prefix (resource + 4, "ftyp"))
return FALSE;
if (!g_str_has_prefix (resource + 8, "mp4"))
return FALSE;
for (i = 16; i < box_size && i < resource_length; i = i + 4) {
if (g_str_has_prefix (resource + i, "mp4"))
return TRUE;
}
return FALSE;
}
static char*
sniff_audio_video (SoupContentSniffer *sniffer, SoupBuffer *buffer)
{
char *sniffed_type;
sniffed_type = sniff_media (sniffer,
buffer,
audio_video_types_table,
G_N_ELEMENTS (audio_video_types_table));
if (sniffed_type != NULL)
return sniffed_type;
if (sniff_mp4 (sniffer, buffer))
return g_strdup ("video/mp4");
return NULL;
}
/* This table is based on the MIMESNIFF spec;
* See 7.1 Identifying a resource with an unknown MIME type
*/
typedef struct {
/* @has_ws is TRUE if @pattern contains "generic" whitespace */
gboolean has_ws;
/* @has_tag_termination is TRUE if we should check for a tag-terminating
* byte (0x20 " " or 0x3E ">") after the pattern match.
*/
gboolean has_tag_termination;
const guchar *mask;
const guchar *pattern;
guint pattern_length;
const char *sniffed_type;
gboolean scriptable;
} SoupContentSnifferPattern;
/* When has_ws is TRUE, spaces in the pattern will indicate where insignificant space
* is allowed. Those spaces are marked with \x00 on the mask.
*/
static SoupContentSnifferPattern types_table[] = {
/* Scriptable types. */
{ TRUE, TRUE,
(const guchar *)"\x00\xFF\xFF\xDF\xDF\xDF\xDF\xDF\xDF\xDF\xFF\xDF\xDF\xDF\xDF",
(const guchar *)" <!DOCTYPE HTML",
14,
"text/html",
TRUE },
{ TRUE, TRUE,
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF",
(const guchar *)" <HTML",
5,
"text/html",
TRUE },
{ TRUE, TRUE,
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF",
(const guchar *)" <HEAD",
5,
"text/html",
TRUE },
{ TRUE, TRUE,
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF\xDF\xDF",
(const guchar *)" <SCRIPT",
7,
"text/html",
TRUE },
{ TRUE, TRUE,
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF\xDF\xDF",
(const guchar *)" <IFRAME",
7,
"text/html",
TRUE },
{ TRUE, TRUE,
(const guchar *)"\x00\xFF\xDF\xFF",
(const guchar *)" <H1",
3,
"text/html",
TRUE },
{ TRUE, TRUE,
(const guchar *)"\x00\xFF\xDF\xDF\xDF",
(const guchar *)" <DIV",
4,
"text/html",
TRUE },
{ TRUE, TRUE,
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF",
(const guchar *)" <FONT",
5,
"text/html",
TRUE },
{ TRUE, TRUE,
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF\xDF",
(const guchar *)" <TABLE",
6,
"text/html",
TRUE },
{ TRUE, TRUE,
(const guchar *)"\x00\xFF\xDF",
(const guchar *)" <A",
2,
"text/html",
TRUE },
{ TRUE, TRUE,
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF\xDF",
(const guchar *)" <STYLE",
6,
"text/html",
TRUE },
{ TRUE, TRUE,
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF\xDF",
(const guchar *)" <TITLE",
6,
"text/html",
TRUE },
{ TRUE, TRUE,
(const guchar *)"\x00\xFF\xDF",
(const guchar *)" <B",
2,
"text/html",
TRUE },
{ TRUE, TRUE,
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF",
(const guchar *)" <BODY",
5,
"text/html",
TRUE },
{ TRUE, TRUE,
(const guchar *)"\x00\xFF\xDF\xDF",
(const guchar *)" <BR",
3,
"text/html",
TRUE },
{ TRUE, TRUE,
(const guchar *)"\x00\xFF\xDF",
(const guchar *)" <P",
2,
"text/html",
TRUE },
{ TRUE, TRUE,
(const guchar *)"\x00\xFF\xFF\xFF\xFF",
(const guchar *)" <!--",
4,
"text/html",
TRUE },
{ TRUE, FALSE,
(const guchar *)"\x00\xFF\xFF\xFF\xFF\xFF",
(const guchar *)" <?xml",
5,
"text/xml",
TRUE },
{ FALSE, FALSE,
(const guchar *)"\xFF\xFF\xFF\xFF\xFF",
(const guchar *)"%PDF-",
5,
"application/pdf",
TRUE },
/* Non-scriptable types. */
{ FALSE, FALSE,
(const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
(const guchar *)"%!PS-Adobe-",
11,
"application/postscript",
FALSE },
{ FALSE, FALSE, /* UTF-16BE BOM */
(const guchar *)"\xFF\xFF\x00\x00",
(const guchar *)"\xFE\xFF\x00\x00",
4,
"text/plain",
FALSE },
{ FALSE, FALSE, /* UTF-16LE BOM */
(const guchar *)"\xFF\xFF\x00\x00",
(const guchar *)"\xFF\xFE\x00\x00",
4,
"text/plain",
FALSE },
{ FALSE, FALSE, /* UTF-8 BOM */
(const guchar *)"\xFF\xFF\xFF\x00",
(const guchar *)"\xEF\xBB\xBF\x00",
4,
"text/plain",
FALSE },
};
/* Whether a given byte looks like it might be part of binary content.
* Source: HTML5 spec; borrowed from the Chromium mime sniffer code,
* which is BSD-licensed
*/
static char byte_looks_binary[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, /* 0x00 - 0x0F */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, /* 0x10 - 0x1F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 - 0x2F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30 - 0x3F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 - 0x4F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50 - 0x5F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 - 0x6F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70 - 0x7F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80 - 0x8F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90 - 0x9F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xA0 - 0xAF */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xB0 - 0xBF */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xC0 - 0xCF */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xD0 - 0xDF */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xE0 - 0xEF */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xF0 - 0xFF */
};
/* HTML5: 2.7.4 Content-Type sniffing: unknown type */
static char*
sniff_unknown (SoupContentSniffer *sniffer, SoupBuffer *buffer,
gboolean sniff_scriptable)
{
char *sniffed_type = NULL;
const guchar *resource = (const guchar *)buffer->data;
guint resource_length = MIN (512, buffer->length);
guint i;
for (i = 0; i < G_N_ELEMENTS (types_table); i++) {
SoupContentSnifferPattern *type_row = &(types_table[i]);
if (!sniff_scriptable && type_row->scriptable)
continue;
if (type_row->has_ws) {
guint index_stream = 0;
guint index_pattern = 0;
gboolean skip_row = FALSE;
while ((index_stream < resource_length) &&
(index_pattern <= type_row->pattern_length)) {
/* Skip insignificant white space ("WS" in the spec) */
if (type_row->pattern[index_pattern] == ' ') {
if (resource[index_stream] == '\x09' ||
resource[index_stream] == '\x0a' ||
resource[index_stream] == '\x0c' ||
resource[index_stream] == '\x0d' ||
resource[index_stream] == '\x20')
index_stream++;
else
index_pattern++;
} else {
if ((type_row->mask[index_pattern] & resource[index_stream]) != type_row->pattern[index_pattern]) {
skip_row = TRUE;
break;
}
index_pattern++;
index_stream++;
}
}
if (skip_row)
continue;
if (index_pattern > type_row->pattern_length) {
if (type_row->has_tag_termination &&
resource[index_stream] != '\x20' &&
resource[index_stream] != '\x3E')
continue;
return g_strdup (type_row->sniffed_type);
}
} else {
guint j;
if (resource_length < type_row->pattern_length)
continue;
for (j = 0; j < type_row->pattern_length; j++) {
if ((type_row->mask[j] & resource[j]) != type_row->pattern[j])
break;
}
/* This means our comparison above matched completely */
if (j == type_row->pattern_length)
return g_strdup (type_row->sniffed_type);
}
}
sniffed_type = sniff_images (sniffer, buffer);
if (sniffed_type != NULL)
return sniffed_type;
sniffed_type = sniff_audio_video (sniffer, buffer);
if (sniffed_type != NULL)
return sniffed_type;
for (i = 0; i < resource_length; i++) {
if (byte_looks_binary[resource[i]])
return g_strdup ("application/octet-stream");
}
return g_strdup ("text/plain");
}
/* MIMESNIFF: 7.2 Sniffing a mislabeled binary resource */
static char*
sniff_text_or_binary (SoupContentSniffer *sniffer, SoupBuffer *buffer)
{
const guchar *resource = (const guchar *)buffer->data;
int resource_length = MIN (512, buffer->length);
gboolean looks_binary = FALSE;
int i;
/* 2. Detecting UTF-16BE, UTF-16LE BOMs means it's text/plain */
if (resource_length >= 2) {
if ((resource[0] == 0xFE && resource[1] == 0xFF) ||
(resource[0] == 0xFF && resource[1] == 0xFE))
return g_strdup ("text/plain");
}
/* 3. UTF-8 BOM. */
if (resource_length >= 3) {
if (resource[0] == 0xEF && resource[1] == 0xBB && resource[2] == 0xBF)
return g_strdup ("text/plain");
}
/* 4. Look to see if any of the first n bytes looks binary */
for (i = 0; i < resource_length; i++) {
if (byte_looks_binary[resource[i]]) {
looks_binary = TRUE;
break;
}
}
if (!looks_binary)
return g_strdup ("text/plain");
/* 5. Execute 7.1 Identifying a resource with an unknown MIME type.
* TODO: sniff-scriptable needs to be unset.
*/
return sniff_unknown (sniffer, buffer, TRUE);
}
static gboolean
skip_insignificant_space (const char *resource, int *pos, int resource_length)
{
while ((resource[*pos] == '\x09') ||
(resource[*pos] == '\x20') ||
(resource[*pos] == '\x0A') ||
(resource[*pos] == '\x0D')) {
*pos = *pos + 1;
if (*pos > resource_length)
return TRUE;
}
return FALSE;
}
static char*
sniff_feed_or_html (SoupContentSniffer *sniffer, SoupBuffer *buffer)
{
const char *resource = (const char *)buffer->data;
int resource_length = MIN (512, buffer->length);
int pos = 0;
if (resource_length < 3)
goto text_html;
/* Skip a leading UTF-8 BOM */
if (resource[0] == 0xEF && resource[1] == 0xBB && resource[2] == 0xBF)
pos = 3;
look_for_tag:
if (pos > resource_length)
goto text_html;
if (skip_insignificant_space (resource, &pos, resource_length))
goto text_html;
if (resource[pos] != '<')
return g_strdup ("text/html");
pos++;
if ((pos + 2) > resource_length)
goto text_html;
/* Skip comments. */
if (g_str_has_prefix (resource + pos, "!--")) {
pos = pos + 3;
if ((pos + 2) > resource_length)
goto text_html;
while (!g_str_has_prefix (resource + pos, "-->")) {
pos++;
if ((pos + 2) > resource_length)
goto text_html;
}
pos = pos + 3;
goto look_for_tag;
}
if (pos > resource_length)
goto text_html;
if (resource[pos] == '!') {
do {
pos++;
if (pos > resource_length)
goto text_html;
} while (resource[pos] != '>');
pos++;
goto look_for_tag;
} else if (resource[pos] == '?') {
do {
pos++;
if ((pos + 1) > resource_length)
goto text_html;
} while (!g_str_has_prefix (resource + pos, "?>"));
pos = pos + 2;
goto look_for_tag;
}
if ((pos + 3) > resource_length)
goto text_html;
if (g_str_has_prefix (resource + pos, "rss"))
return g_strdup ("application/rss+xml");
if ((pos + 4) > resource_length)
goto text_html;
if (g_str_has_prefix (resource + pos, "feed"))
return g_strdup ("application/atom+xml");
if ((pos + 7) > resource_length)
goto text_html;
if (g_str_has_prefix (resource + pos, "rdf:RDF")) {
pos = pos + 7;
if (skip_insignificant_space (resource, &pos, resource_length))
goto text_html;
if ((pos + 32) > resource_length)
goto text_html;
if (g_str_has_prefix (resource + pos, "xmlns=\"http://purl.org/rss/1.0/\"")) {
pos = pos + 32;
if (skip_insignificant_space (resource, &pos, resource_length))
goto text_html;
if ((pos + 55) > resource_length)
goto text_html;
if (g_str_has_prefix (resource + pos, "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\""))
return g_strdup ("application/rss+xml");
}
if ((pos + 55) > resource_length)
goto text_html;
if (g_str_has_prefix (resource + pos, "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"")) {
pos = pos + 55;
if (skip_insignificant_space (resource, &pos, resource_length))
goto text_html;
if ((pos + 32) > resource_length)
goto text_html;
if (g_str_has_prefix (resource + pos, "xmlns=\"http://purl.org/rss/1.0/\""))
return g_strdup ("application/rss+xml");
}
}
text_html:
return g_strdup ("text/html");
}
static char *
soup_content_sniffer_real_sniff (SoupContentSniffer *sniffer, SoupMessage *msg,
SoupBuffer *buffer, GHashTable **params)
{
const char *content_type;
const char *x_content_type_options;
char *sniffed_type = NULL;
gboolean no_sniff = FALSE;
content_type = soup_message_headers_get_content_type (msg->response_headers, params);
/* MIMESNIFF: 7 Determining the sniffed MIME type of a resource. */
x_content_type_options = soup_message_headers_get_one (msg->response_headers, "X-Content-Type-Options");
if (!g_strcmp0 (x_content_type_options, "nosniff"))
no_sniff = TRUE;
/* 1. Unknown/undefined supplied type with sniff-scritable = !nosniff. */
if ((content_type == NULL) ||
!g_ascii_strcasecmp (content_type, "unknown/unknown") ||
!g_ascii_strcasecmp (content_type, "application/unknown") ||
!g_ascii_strcasecmp (content_type, "*/*"))
return sniff_unknown (sniffer, buffer, !no_sniff);
/* 2. If nosniff is specified in X-Content-Type-Options use the supplied MIME type. */
if (no_sniff)
return g_strdup (content_type);
/* 3. check-for-apache-bug */
if ((content_type != NULL) &&
(g_str_equal (content_type, "text/plain") ||
g_str_equal (content_type, "text/plain; charset=ISO-8859-1") ||
g_str_equal (content_type, "text/plain; charset=iso-8859-1") ||
g_str_equal (content_type, "text/plain; charset=UTF-8")))
return sniff_text_or_binary (sniffer, buffer);
/* 4. XML types sent by the server are always used. */
if (g_str_has_suffix (content_type, "+xml") ||
!g_ascii_strcasecmp (content_type, "text/xml") ||
!g_ascii_strcasecmp (content_type, "application/xml"))
return g_strdup (content_type);
/* 5. Distinguish feed from HTML. */
if (!g_ascii_strcasecmp (content_type, "text/html"))
return sniff_feed_or_html (sniffer, buffer);
/* 6. Image types.
*/
if (!g_ascii_strncasecmp (content_type, "image/", 6)) {
sniffed_type = sniff_images (sniffer, buffer);
if (sniffed_type != NULL)
return sniffed_type;
return g_strdup (content_type);
}
/* 7. Audio and video types. */
if (!g_ascii_strncasecmp (content_type, "audio/", 6) ||
!g_ascii_strncasecmp (content_type, "video/", 6) ||
!g_ascii_strcasecmp (content_type, "application/ogg")) {
sniffed_type = sniff_audio_video (sniffer, buffer);
if (sniffed_type != NULL)
return sniffed_type;
return g_strdup (content_type);
}
/* If we got text/plain, use text_or_binary */
if (g_str_equal (content_type, "text/plain")) {
return sniff_text_or_binary (sniffer, buffer);
}
return g_strdup (content_type);
}
static gsize
soup_content_sniffer_real_get_buffer_size (SoupContentSniffer *sniffer)
{
return 512;
}
static void
soup_content_sniffer_got_headers_cb (SoupMessage *msg, SoupContentSniffer *sniffer)
{
SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg);
priv->bytes_for_sniffing = soup_content_sniffer_get_buffer_size (sniffer);
}
static void
soup_content_sniffer_request_queued (SoupSessionFeature *feature,
SoupSession *session,
SoupMessage *msg)
{
SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg);
priv->sniffer = g_object_ref (SOUP_CONTENT_SNIFFER (feature));
g_signal_connect (msg, "got-headers",
G_CALLBACK (soup_content_sniffer_got_headers_cb),
feature);
}
static void
soup_content_sniffer_request_unqueued (SoupSessionFeature *feature,
SoupSession *session,
SoupMessage *msg)
{
SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg);
g_object_unref (priv->sniffer);
priv->sniffer = NULL;
g_signal_handlers_disconnect_by_func (msg, soup_content_sniffer_got_headers_cb, feature);
}
static void
soup_content_sniffer_class_init (SoupContentSnifferClass *content_sniffer_class)
{
content_sniffer_class->sniff = soup_content_sniffer_real_sniff;
content_sniffer_class->get_buffer_size = soup_content_sniffer_real_get_buffer_size;
}
static void
soup_content_sniffer_session_feature_init (SoupSessionFeatureInterface *feature_interface,
gpointer interface_data)
{
feature_interface->request_queued = soup_content_sniffer_request_queued;
feature_interface->request_unqueued = soup_content_sniffer_request_unqueued;
}
/**
* soup_content_sniffer_new:
*
* Creates a new #SoupContentSniffer.
*
* Returns: a new #SoupContentSniffer
*
* Since: 2.28
**/
SoupContentSniffer *
soup_content_sniffer_new (void)
{
return g_object_new (SOUP_TYPE_CONTENT_SNIFFER, NULL);
}
/**
* soup_content_sniffer_sniff:
* @sniffer: a #SoupContentSniffer
* @msg: the message to sniff
* @buffer: a buffer containing the start of @msg's response body
* @params: (element-type utf8 utf8) (out) (transfer full) (allow-none): return
* location for Content-Type parameters (eg, "charset"), or %NULL
*
* Sniffs @buffer to determine its Content-Type. The result may also
* be influenced by the Content-Type declared in @msg's response
* headers.
*
* Return value: the sniffed Content-Type of @buffer; this will never be %NULL,
* but may be "application/octet-stream".
*
* Since: 2.28
*/
char *
soup_content_sniffer_sniff (SoupContentSniffer *sniffer,
SoupMessage *msg, SoupBuffer *buffer,
GHashTable **params)
{
g_return_val_if_fail (SOUP_IS_CONTENT_SNIFFER (sniffer), NULL);
g_return_val_if_fail (SOUP_IS_MESSAGE (msg), NULL);
g_return_val_if_fail (buffer != NULL, NULL);
return SOUP_CONTENT_SNIFFER_GET_CLASS (sniffer)->sniff (sniffer, msg, buffer, params);
}
/**
* soup_content_sniffer_get_buffer_size:
* @sniffer: a #SoupContentSniffer
*
* Gets the number of bytes @sniffer needs in order to properly sniff
* a buffer.
*
* Return value: the number of bytes to sniff
*
* Since: 2.28
*/
gsize
soup_content_sniffer_get_buffer_size (SoupContentSniffer *sniffer)
{
g_return_val_if_fail (SOUP_IS_CONTENT_SNIFFER (sniffer), 0);
return SOUP_CONTENT_SNIFFER_GET_CLASS (sniffer)->get_buffer_size (sniffer);
}