|
Packit Service |
ca3877 |
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
|
|
Packit Service |
ca3877 |
/*
|
|
Packit Service |
ca3877 |
* soup-content-sniffer.c
|
|
Packit Service |
ca3877 |
*
|
|
Packit Service |
ca3877 |
* Copyright (C) 2009, 2013 Gustavo Noronha Silva.
|
|
Packit Service |
ca3877 |
*
|
|
Packit Service |
ca3877 |
* This code implements the following specification:
|
|
Packit Service |
ca3877 |
*
|
|
Packit Service |
ca3877 |
* http://mimesniff.spec.whatwg.org/ as of 11 June 2013
|
|
Packit Service |
ca3877 |
*/
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
#ifdef HAVE_CONFIG_H
|
|
Packit Service |
ca3877 |
#include <config.h>
|
|
Packit Service |
ca3877 |
#endif
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
#include <string.h>
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
#include "soup-content-sniffer.h"
|
|
Packit Service |
ca3877 |
#include "soup.h"
|
|
Packit Service |
ca3877 |
#include "soup-content-processor.h"
|
|
Packit Service |
ca3877 |
#include "soup-content-sniffer-stream.h"
|
|
Packit Service |
ca3877 |
#include "soup-message-private.h"
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/**
|
|
Packit Service |
ca3877 |
* SECTION:soup-content-sniffer
|
|
Packit Service |
ca3877 |
* @short_description: Content sniffing for SoupSession
|
|
Packit Service |
ca3877 |
*
|
|
Packit Service |
ca3877 |
* A #SoupContentSniffer tries to detect the actual content type of
|
|
Packit Service |
ca3877 |
* the files that are being downloaded by looking at some of the data
|
|
Packit Service |
ca3877 |
* before the #SoupMessage emits its #SoupMessage::got-headers signal.
|
|
Packit Service |
ca3877 |
* #SoupContentSniffer implements #SoupSessionFeature, so you can add
|
|
Packit Service |
ca3877 |
* content sniffing to a session with soup_session_add_feature() or
|
|
Packit Service |
ca3877 |
* soup_session_add_feature_by_type().
|
|
Packit Service |
ca3877 |
*
|
|
Packit Service |
ca3877 |
* Since: 2.28
|
|
Packit Service |
ca3877 |
**/
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static void soup_content_sniffer_session_feature_init (SoupSessionFeatureInterface *feature_interface, gpointer interface_data);
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static SoupContentProcessorInterface *soup_content_sniffer_default_content_processor_interface;
|
|
Packit Service |
ca3877 |
static void soup_content_sniffer_content_processor_init (SoupContentProcessorInterface *interface, gpointer interface_data);
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
G_DEFINE_TYPE_WITH_CODE (SoupContentSniffer, soup_content_sniffer, G_TYPE_OBJECT,
|
|
Packit Service |
ca3877 |
G_IMPLEMENT_INTERFACE (SOUP_TYPE_SESSION_FEATURE,
|
|
Packit Service |
ca3877 |
soup_content_sniffer_session_feature_init)
|
|
Packit Service |
ca3877 |
G_IMPLEMENT_INTERFACE (SOUP_TYPE_CONTENT_PROCESSOR,
|
|
Packit Service |
ca3877 |
soup_content_sniffer_content_processor_init))
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static GInputStream *
|
|
Packit Service |
ca3877 |
soup_content_sniffer_content_processor_wrap_input (SoupContentProcessor *processor,
|
|
Packit Service |
ca3877 |
GInputStream *base_stream,
|
|
Packit Service |
ca3877 |
SoupMessage *msg,
|
|
Packit Service |
ca3877 |
GError **error)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
return g_object_new (SOUP_TYPE_CONTENT_SNIFFER_STREAM,
|
|
Packit Service |
ca3877 |
"base-stream", base_stream,
|
|
Packit Service |
ca3877 |
"message", msg,
|
|
Packit Service |
ca3877 |
"sniffer", SOUP_CONTENT_SNIFFER (processor),
|
|
Packit Service |
ca3877 |
NULL);
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static void
|
|
Packit Service |
ca3877 |
soup_content_sniffer_content_processor_init (SoupContentProcessorInterface *processor_interface,
|
|
Packit Service |
ca3877 |
gpointer interface_data)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
soup_content_sniffer_default_content_processor_interface =
|
|
Packit Service |
ca3877 |
g_type_default_interface_peek (SOUP_TYPE_CONTENT_PROCESSOR);
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
processor_interface->processing_stage = SOUP_STAGE_BODY_DATA;
|
|
Packit Service |
ca3877 |
processor_interface->wrap_input = soup_content_sniffer_content_processor_wrap_input;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static void
|
|
Packit Service |
ca3877 |
soup_content_sniffer_init (SoupContentSniffer *content_sniffer)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
typedef struct {
|
|
Packit Service |
ca3877 |
const guchar *mask;
|
|
Packit Service |
ca3877 |
const guchar *pattern;
|
|
Packit Service |
ca3877 |
guint pattern_length;
|
|
Packit Service |
ca3877 |
const char *sniffed_type;
|
|
Packit Service |
ca3877 |
} SoupContentSnifferMediaPattern;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static char*
|
|
Packit Service |
ca3877 |
sniff_media (SoupContentSniffer *sniffer,
|
|
Packit Service |
ca3877 |
SoupBuffer *buffer,
|
|
Packit Service |
ca3877 |
SoupContentSnifferMediaPattern table[],
|
|
Packit Service |
ca3877 |
int table_length)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
const guchar *resource = (const guchar *)buffer->data;
|
|
Packit Service |
ca3877 |
guint resource_length = MIN (512, buffer->length);
|
|
Packit Service |
ca3877 |
int i;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
for (i = 0; i < table_length; i++) {
|
|
Packit Service |
ca3877 |
SoupContentSnifferMediaPattern *type_row = &(table[i]);
|
|
Packit Service |
ca3877 |
guint j;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (resource_length < type_row->pattern_length)
|
|
Packit Service |
ca3877 |
continue;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
for (j = 0; j < type_row->pattern_length; j++) {
|
|
Packit Service |
ca3877 |
if ((type_row->mask[j] & resource[j]) != type_row->pattern[j])
|
|
Packit Service |
ca3877 |
break;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* This means our comparison above matched completely */
|
|
Packit Service |
ca3877 |
if (j == type_row->pattern_length)
|
|
Packit Service |
ca3877 |
return g_strdup (type_row->sniffed_type);
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
return NULL;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* This table is based on the MIMESNIFF spec;
|
|
Packit Service |
ca3877 |
* See 6.1 Matching an image type pattern
|
|
Packit Service |
ca3877 |
*/
|
|
Packit Service |
ca3877 |
static SoupContentSnifferMediaPattern image_types_table[] = {
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* Windows icon signature. */
|
|
Packit Service |
ca3877 |
{ (const guchar *)"\xFF\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\x00\x01\x00",
|
|
Packit Service |
ca3877 |
4,
|
|
Packit Service |
ca3877 |
"image/x-icon" },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* Windows cursor signature. */
|
|
Packit Service |
ca3877 |
{ (const guchar *)"\xFF\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\x00\x02\x00",
|
|
Packit Service |
ca3877 |
4,
|
|
Packit Service |
ca3877 |
"image/x-icon" },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* BMP. */
|
|
Packit Service |
ca3877 |
{ (const guchar *)"\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"BM",
|
|
Packit Service |
ca3877 |
2,
|
|
Packit Service |
ca3877 |
"image/bmp" },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* GIFs. */
|
|
Packit Service |
ca3877 |
{ (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"GIF87a",
|
|
Packit Service |
ca3877 |
6,
|
|
Packit Service |
ca3877 |
"image/gif" },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"GIF89a",
|
|
Packit Service |
ca3877 |
6,
|
|
Packit Service |
ca3877 |
"image/gif" },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* WEBP. */
|
|
Packit Service |
ca3877 |
{ (const guchar *)"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"RIFF\x00\x00\x00\x00WEBPVP",
|
|
Packit Service |
ca3877 |
14,
|
|
Packit Service |
ca3877 |
"image/webp" },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* PNG. */
|
|
Packit Service |
ca3877 |
{ (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"\x89PNG\x0D\x0A\x1A\x0A",
|
|
Packit Service |
ca3877 |
8,
|
|
Packit Service |
ca3877 |
"image/png" },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* JPEG. */
|
|
Packit Service |
ca3877 |
{ (const guchar *)"\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"\xFF\xD8\xFF",
|
|
Packit Service |
ca3877 |
3,
|
|
Packit Service |
ca3877 |
"image/jpeg" },
|
|
Packit Service |
ca3877 |
};
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static char*
|
|
Packit Service |
ca3877 |
sniff_images (SoupContentSniffer *sniffer, SoupBuffer *buffer)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
return sniff_media (sniffer,
|
|
Packit Service |
ca3877 |
buffer,
|
|
Packit Service |
ca3877 |
image_types_table,
|
|
Packit Service |
ca3877 |
G_N_ELEMENTS (image_types_table));
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* This table is based on the MIMESNIFF spec;
|
|
Packit Service |
ca3877 |
* See 6.2 Matching an audio or video type pattern
|
|
Packit Service |
ca3877 |
*/
|
|
Packit Service |
ca3877 |
static SoupContentSnifferMediaPattern audio_video_types_table[] = {
|
|
Packit Service |
ca3877 |
{ (const guchar *)"\xFF\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"\x1A\x45\xDF\xA3",
|
|
Packit Service |
ca3877 |
4,
|
|
Packit Service |
ca3877 |
"video/webm" },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ (const guchar *)"\xFF\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)".snd",
|
|
Packit Service |
ca3877 |
4,
|
|
Packit Service |
ca3877 |
"audio/basic" },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ (const guchar *)"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"FORM\0\0\0\0AIFF",
|
|
Packit Service |
ca3877 |
12,
|
|
Packit Service |
ca3877 |
"audio/aiff" },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ (const guchar *)"\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"ID3",
|
|
Packit Service |
ca3877 |
3,
|
|
Packit Service |
ca3877 |
"audio/mpeg" },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ (const guchar *)"\xFF\xFF\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"OggS\0",
|
|
Packit Service |
ca3877 |
5,
|
|
Packit Service |
ca3877 |
"application/ogg" },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"MThd\x00\x00\x00\x06",
|
|
Packit Service |
ca3877 |
8,
|
|
Packit Service |
ca3877 |
"audio/midi" },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ (const guchar *)"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"RIFF\x00\x00\x00\x00AVI ",
|
|
Packit Service |
ca3877 |
12,
|
|
Packit Service |
ca3877 |
"video/avi" },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ (const guchar *)"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"RIFF\x00\x00\x00\x00WAVE",
|
|
Packit Service |
ca3877 |
12,
|
|
Packit Service |
ca3877 |
"audio/wave" },
|
|
Packit Service |
ca3877 |
};
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static gboolean
|
|
Packit Service |
ca3877 |
sniff_mp4 (SoupContentSniffer *sniffer, SoupBuffer *buffer)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
const char *resource = (const char *)buffer->data;
|
|
Packit Service |
ca3877 |
guint resource_length = MIN (512, buffer->length);
|
|
Packit Service |
ca3877 |
guint32 box_size = *((guint32*)resource);
|
|
Packit Service |
ca3877 |
guint i;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
|
Packit Service |
ca3877 |
box_size = ((box_size >> 24) |
|
|
Packit Service |
ca3877 |
((box_size << 8) & 0x00FF0000) |
|
|
Packit Service |
ca3877 |
((box_size >> 8) & 0x0000FF00) |
|
|
Packit Service |
ca3877 |
(box_size << 24));
|
|
Packit Service |
ca3877 |
#endif
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (resource_length < 12 || resource_length < box_size || box_size % 4 != 0)
|
|
Packit Service |
ca3877 |
return FALSE;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (!g_str_has_prefix (resource + 4, "ftyp"))
|
|
Packit Service |
ca3877 |
return FALSE;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (!g_str_has_prefix (resource + 8, "mp4"))
|
|
Packit Service |
ca3877 |
return FALSE;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
for (i = 16; i < box_size && i < resource_length; i = i + 4) {
|
|
Packit Service |
ca3877 |
if (g_str_has_prefix (resource + i, "mp4"))
|
|
Packit Service |
ca3877 |
return TRUE;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
return FALSE;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static char*
|
|
Packit Service |
ca3877 |
sniff_audio_video (SoupContentSniffer *sniffer, SoupBuffer *buffer)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
char *sniffed_type;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
sniffed_type = sniff_media (sniffer,
|
|
Packit Service |
ca3877 |
buffer,
|
|
Packit Service |
ca3877 |
audio_video_types_table,
|
|
Packit Service |
ca3877 |
G_N_ELEMENTS (audio_video_types_table));
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (sniffed_type != NULL)
|
|
Packit Service |
ca3877 |
return sniffed_type;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (sniff_mp4 (sniffer, buffer))
|
|
Packit Service |
ca3877 |
return g_strdup ("video/mp4");
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
return NULL;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* This table is based on the MIMESNIFF spec;
|
|
Packit Service |
ca3877 |
* See 7.1 Identifying a resource with an unknown MIME type
|
|
Packit Service |
ca3877 |
*/
|
|
Packit Service |
ca3877 |
typedef struct {
|
|
Packit Service |
ca3877 |
/* @has_ws is TRUE if @pattern contains "generic" whitespace */
|
|
Packit Service |
ca3877 |
gboolean has_ws;
|
|
Packit Service |
ca3877 |
/* @has_tag_termination is TRUE if we should check for a tag-terminating
|
|
Packit Service |
ca3877 |
* byte (0x20 " " or 0x3E ">") after the pattern match.
|
|
Packit Service |
ca3877 |
*/
|
|
Packit Service |
ca3877 |
gboolean has_tag_termination;
|
|
Packit Service |
ca3877 |
const guchar *mask;
|
|
Packit Service |
ca3877 |
const guchar *pattern;
|
|
Packit Service |
ca3877 |
guint pattern_length;
|
|
Packit Service |
ca3877 |
const char *sniffed_type;
|
|
Packit Service |
ca3877 |
gboolean scriptable;
|
|
Packit Service |
ca3877 |
} SoupContentSnifferPattern;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* When has_ws is TRUE, spaces in the pattern will indicate where insignificant space
|
|
Packit Service |
ca3877 |
* is allowed. Those spaces are marked with \x00 on the mask.
|
|
Packit Service |
ca3877 |
*/
|
|
Packit Service |
ca3877 |
static SoupContentSnifferPattern types_table[] = {
|
|
Packit Service |
ca3877 |
/* Scriptable types. */
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, TRUE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xFF\xDF\xDF\xDF\xDF\xDF\xDF\xDF\xFF\xDF\xDF\xDF\xDF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
14,
|
|
Packit Service |
ca3877 |
"text/html",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, TRUE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
5,
|
|
Packit Service |
ca3877 |
"text/html",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, TRUE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
5,
|
|
Packit Service |
ca3877 |
"text/html",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, TRUE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF\xDF\xDF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
7,
|
|
Packit Service |
ca3877 |
"text/html",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, TRUE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF\xDF\xDF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
7,
|
|
Packit Service |
ca3877 |
"text/html",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, TRUE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xDF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
3,
|
|
Packit Service |
ca3877 |
"text/html",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, TRUE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xDF\xDF\xDF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
4,
|
|
Packit Service |
ca3877 |
"text/html",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, TRUE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
5,
|
|
Packit Service |
ca3877 |
"text/html",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, TRUE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF\xDF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
6,
|
|
Packit Service |
ca3877 |
"text/html",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, TRUE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xDF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
2,
|
|
Packit Service |
ca3877 |
"text/html",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, TRUE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF\xDF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
6,
|
|
Packit Service |
ca3877 |
"text/html",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, TRUE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF\xDF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
6,
|
|
Packit Service |
ca3877 |
"text/html",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, TRUE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xDF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
2,
|
|
Packit Service |
ca3877 |
"text/html",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, TRUE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xDF\xDF\xDF\xDF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
5,
|
|
Packit Service |
ca3877 |
"text/html",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, TRUE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xDF\xDF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
3,
|
|
Packit Service |
ca3877 |
"text/html",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, TRUE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xDF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
2,
|
|
Packit Service |
ca3877 |
"text/html",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, TRUE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
4,
|
|
Packit Service |
ca3877 |
"text/html",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ TRUE, FALSE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\x00\xFF\xFF\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"
|
|
Packit Service |
ca3877 |
5,
|
|
Packit Service |
ca3877 |
"text/xml",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ FALSE, FALSE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\xFF\xFF\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"%PDF-",
|
|
Packit Service |
ca3877 |
5,
|
|
Packit Service |
ca3877 |
"application/pdf",
|
|
Packit Service |
ca3877 |
TRUE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* Non-scriptable types. */
|
|
Packit Service |
ca3877 |
{ FALSE, FALSE,
|
|
Packit Service |
ca3877 |
(const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
Packit Service |
ca3877 |
(const guchar *)"%!PS-Adobe-",
|
|
Packit Service |
ca3877 |
11,
|
|
Packit Service |
ca3877 |
"application/postscript",
|
|
Packit Service |
ca3877 |
FALSE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ FALSE, FALSE, /* UTF-16BE BOM */
|
|
Packit Service |
ca3877 |
(const guchar *)"\xFF\xFF\x00\x00",
|
|
Packit Service |
ca3877 |
(const guchar *)"\xFE\xFF\x00\x00",
|
|
Packit Service |
ca3877 |
4,
|
|
Packit Service |
ca3877 |
"text/plain",
|
|
Packit Service |
ca3877 |
FALSE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ FALSE, FALSE, /* UTF-16LE BOM */
|
|
Packit Service |
ca3877 |
(const guchar *)"\xFF\xFF\x00\x00",
|
|
Packit Service |
ca3877 |
(const guchar *)"\xFF\xFE\x00\x00",
|
|
Packit Service |
ca3877 |
4,
|
|
Packit Service |
ca3877 |
"text/plain",
|
|
Packit Service |
ca3877 |
FALSE },
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
{ FALSE, FALSE, /* UTF-8 BOM */
|
|
Packit Service |
ca3877 |
(const guchar *)"\xFF\xFF\xFF\x00",
|
|
Packit Service |
ca3877 |
(const guchar *)"\xEF\xBB\xBF\x00",
|
|
Packit Service |
ca3877 |
4,
|
|
Packit Service |
ca3877 |
"text/plain",
|
|
Packit Service |
ca3877 |
FALSE },
|
|
Packit Service |
ca3877 |
};
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* Whether a given byte looks like it might be part of binary content.
|
|
Packit Service |
ca3877 |
* Source: HTML5 spec; borrowed from the Chromium mime sniffer code,
|
|
Packit Service |
ca3877 |
* which is BSD-licensed
|
|
Packit Service |
ca3877 |
*/
|
|
Packit Service |
ca3877 |
static char byte_looks_binary[] = {
|
|
Packit Service |
ca3877 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, /* 0x00 - 0x0F */
|
|
Packit Service |
ca3877 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, /* 0x10 - 0x1F */
|
|
Packit Service |
ca3877 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 - 0x2F */
|
|
Packit Service |
ca3877 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30 - 0x3F */
|
|
Packit Service |
ca3877 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 - 0x4F */
|
|
Packit Service |
ca3877 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50 - 0x5F */
|
|
Packit Service |
ca3877 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 - 0x6F */
|
|
Packit Service |
ca3877 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70 - 0x7F */
|
|
Packit Service |
ca3877 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80 - 0x8F */
|
|
Packit Service |
ca3877 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90 - 0x9F */
|
|
Packit Service |
ca3877 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xA0 - 0xAF */
|
|
Packit Service |
ca3877 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xB0 - 0xBF */
|
|
Packit Service |
ca3877 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xC0 - 0xCF */
|
|
Packit Service |
ca3877 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xD0 - 0xDF */
|
|
Packit Service |
ca3877 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xE0 - 0xEF */
|
|
Packit Service |
ca3877 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xF0 - 0xFF */
|
|
Packit Service |
ca3877 |
};
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* HTML5: 2.7.4 Content-Type sniffing: unknown type */
|
|
Packit Service |
ca3877 |
static char*
|
|
Packit Service |
ca3877 |
sniff_unknown (SoupContentSniffer *sniffer, SoupBuffer *buffer,
|
|
Packit Service |
ca3877 |
gboolean sniff_scriptable)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
char *sniffed_type = NULL;
|
|
Packit Service |
ca3877 |
const guchar *resource = (const guchar *)buffer->data;
|
|
Packit Service |
ca3877 |
guint resource_length = MIN (512, buffer->length);
|
|
Packit Service |
ca3877 |
guint i;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
for (i = 0; i < G_N_ELEMENTS (types_table); i++) {
|
|
Packit Service |
ca3877 |
SoupContentSnifferPattern *type_row = &(types_table[i]);
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (!sniff_scriptable && type_row->scriptable)
|
|
Packit Service |
ca3877 |
continue;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (type_row->has_ws) {
|
|
Packit Service |
ca3877 |
guint index_stream = 0;
|
|
Packit Service |
ca3877 |
guint index_pattern = 0;
|
|
Packit Service |
ca3877 |
gboolean skip_row = FALSE;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
while ((index_stream < resource_length) &&
|
|
Packit Service |
ca3877 |
(index_pattern <= type_row->pattern_length)) {
|
|
Packit Service |
ca3877 |
/* Skip insignificant white space ("WS" in the spec) */
|
|
Packit Service |
ca3877 |
if (type_row->pattern[index_pattern] == ' ') {
|
|
Packit Service |
ca3877 |
if (resource[index_stream] == '\x09' ||
|
|
Packit Service |
ca3877 |
resource[index_stream] == '\x0a' ||
|
|
Packit Service |
ca3877 |
resource[index_stream] == '\x0c' ||
|
|
Packit Service |
ca3877 |
resource[index_stream] == '\x0d' ||
|
|
Packit Service |
ca3877 |
resource[index_stream] == '\x20')
|
|
Packit Service |
ca3877 |
index_stream++;
|
|
Packit Service |
ca3877 |
else
|
|
Packit Service |
ca3877 |
index_pattern++;
|
|
Packit Service |
ca3877 |
} else {
|
|
Packit Service |
ca3877 |
if ((type_row->mask[index_pattern] & resource[index_stream]) != type_row->pattern[index_pattern]) {
|
|
Packit Service |
ca3877 |
skip_row = TRUE;
|
|
Packit Service |
ca3877 |
break;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
index_pattern++;
|
|
Packit Service |
ca3877 |
index_stream++;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (skip_row)
|
|
Packit Service |
ca3877 |
continue;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (index_pattern > type_row->pattern_length) {
|
|
Packit Service |
ca3877 |
if (type_row->has_tag_termination &&
|
|
Packit Service |
ca3877 |
resource[index_stream] != '\x20' &&
|
|
Packit Service |
ca3877 |
resource[index_stream] != '\x3E')
|
|
Packit Service |
ca3877 |
continue;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
return g_strdup (type_row->sniffed_type);
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
} else {
|
|
Packit Service |
ca3877 |
guint j;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (resource_length < type_row->pattern_length)
|
|
Packit Service |
ca3877 |
continue;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
for (j = 0; j < type_row->pattern_length; j++) {
|
|
Packit Service |
ca3877 |
if ((type_row->mask[j] & resource[j]) != type_row->pattern[j])
|
|
Packit Service |
ca3877 |
break;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* This means our comparison above matched completely */
|
|
Packit Service |
ca3877 |
if (j == type_row->pattern_length)
|
|
Packit Service |
ca3877 |
return g_strdup (type_row->sniffed_type);
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
sniffed_type = sniff_images (sniffer, buffer);
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (sniffed_type != NULL)
|
|
Packit Service |
ca3877 |
return sniffed_type;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
sniffed_type = sniff_audio_video (sniffer, buffer);
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (sniffed_type != NULL)
|
|
Packit Service |
ca3877 |
return sniffed_type;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
for (i = 0; i < resource_length; i++) {
|
|
Packit Service |
ca3877 |
if (byte_looks_binary[resource[i]])
|
|
Packit Service |
ca3877 |
return g_strdup ("application/octet-stream");
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
return g_strdup ("text/plain");
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* MIMESNIFF: 7.2 Sniffing a mislabeled binary resource */
|
|
Packit Service |
ca3877 |
static char*
|
|
Packit Service |
ca3877 |
sniff_text_or_binary (SoupContentSniffer *sniffer, SoupBuffer *buffer)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
const guchar *resource = (const guchar *)buffer->data;
|
|
Packit Service |
ca3877 |
int resource_length = MIN (512, buffer->length);
|
|
Packit Service |
ca3877 |
gboolean looks_binary = FALSE;
|
|
Packit Service |
ca3877 |
int i;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* 2. Detecting UTF-16BE, UTF-16LE BOMs means it's text/plain */
|
|
Packit Service |
ca3877 |
if (resource_length >= 2) {
|
|
Packit Service |
ca3877 |
if ((resource[0] == 0xFE && resource[1] == 0xFF) ||
|
|
Packit Service |
ca3877 |
(resource[0] == 0xFF && resource[1] == 0xFE))
|
|
Packit Service |
ca3877 |
return g_strdup ("text/plain");
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* 3. UTF-8 BOM. */
|
|
Packit Service |
ca3877 |
if (resource_length >= 3) {
|
|
Packit Service |
ca3877 |
if (resource[0] == 0xEF && resource[1] == 0xBB && resource[2] == 0xBF)
|
|
Packit Service |
ca3877 |
return g_strdup ("text/plain");
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* 4. Look to see if any of the first n bytes looks binary */
|
|
Packit Service |
ca3877 |
for (i = 0; i < resource_length; i++) {
|
|
Packit Service |
ca3877 |
if (byte_looks_binary[resource[i]]) {
|
|
Packit Service |
ca3877 |
looks_binary = TRUE;
|
|
Packit Service |
ca3877 |
break;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (!looks_binary)
|
|
Packit Service |
ca3877 |
return g_strdup ("text/plain");
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* 5. Execute 7.1 Identifying a resource with an unknown MIME type.
|
|
Packit Service |
ca3877 |
* TODO: sniff-scriptable needs to be unset.
|
|
Packit Service |
ca3877 |
*/
|
|
Packit Service |
ca3877 |
return sniff_unknown (sniffer, buffer, TRUE);
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static gboolean
|
|
Packit Service |
ca3877 |
skip_insignificant_space (const char *resource, int *pos, int resource_length)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
while ((resource[*pos] == '\x09') ||
|
|
Packit Service |
ca3877 |
(resource[*pos] == '\x20') ||
|
|
Packit Service |
ca3877 |
(resource[*pos] == '\x0A') ||
|
|
Packit Service |
ca3877 |
(resource[*pos] == '\x0D')) {
|
|
Packit Service |
ca3877 |
*pos = *pos + 1;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (*pos > resource_length)
|
|
Packit Service |
ca3877 |
return TRUE;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
return FALSE;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static char*
|
|
Packit Service |
ca3877 |
sniff_feed_or_html (SoupContentSniffer *sniffer, SoupBuffer *buffer)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
const char *resource = (const char *)buffer->data;
|
|
Packit Service |
ca3877 |
int resource_length = MIN (512, buffer->length);
|
|
Packit Service |
ca3877 |
int pos = 0;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (resource_length < 3)
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* Skip a leading UTF-8 BOM */
|
|
Packit Service |
ca3877 |
if (resource[0] == 0xEF && resource[1] == 0xBB && resource[2] == 0xBF)
|
|
Packit Service |
ca3877 |
pos = 3;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
look_for_tag:
|
|
Packit Service |
ca3877 |
if (pos > resource_length)
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (skip_insignificant_space (resource, &pos, resource_length))
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (resource[pos] != '<')
|
|
Packit Service |
ca3877 |
return g_strdup ("text/html");
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
pos++;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if ((pos + 2) > resource_length)
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* Skip comments. */
|
|
Packit Service |
ca3877 |
if (g_str_has_prefix (resource + pos, "!--")) {
|
|
Packit Service |
ca3877 |
pos = pos + 3;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if ((pos + 2) > resource_length)
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
while (!g_str_has_prefix (resource + pos, "-->")) {
|
|
Packit Service |
ca3877 |
pos++;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if ((pos + 2) > resource_length)
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
pos = pos + 3;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
goto look_for_tag;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (pos > resource_length)
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (resource[pos] == '!') {
|
|
Packit Service |
ca3877 |
do {
|
|
Packit Service |
ca3877 |
pos++;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (pos > resource_length)
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
} while (resource[pos] != '>');
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
pos++;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
goto look_for_tag;
|
|
Packit Service |
ca3877 |
} else if (resource[pos] == '?') {
|
|
Packit Service |
ca3877 |
do {
|
|
Packit Service |
ca3877 |
pos++;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if ((pos + 1) > resource_length)
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
} while (!g_str_has_prefix (resource + pos, "?>"));
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
pos = pos + 2;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
goto look_for_tag;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if ((pos + 3) > resource_length)
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (g_str_has_prefix (resource + pos, "rss"))
|
|
Packit Service |
ca3877 |
return g_strdup ("application/rss+xml");
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if ((pos + 4) > resource_length)
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (g_str_has_prefix (resource + pos, "feed"))
|
|
Packit Service |
ca3877 |
return g_strdup ("application/atom+xml");
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if ((pos + 7) > resource_length)
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (g_str_has_prefix (resource + pos, "rdf:RDF")) {
|
|
Packit Service |
ca3877 |
pos = pos + 7;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (skip_insignificant_space (resource, &pos, resource_length))
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if ((pos + 32) > resource_length)
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (g_str_has_prefix (resource + pos, "xmlns=\"http://purl.org/rss/1.0/\"")) {
|
|
Packit Service |
ca3877 |
pos = pos + 32;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (skip_insignificant_space (resource, &pos, resource_length))
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if ((pos + 55) > resource_length)
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (g_str_has_prefix (resource + pos, "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\""))
|
|
Packit Service |
ca3877 |
return g_strdup ("application/rss+xml");
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if ((pos + 55) > resource_length)
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (g_str_has_prefix (resource + pos, "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"")) {
|
|
Packit Service |
ca3877 |
pos = pos + 55;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (skip_insignificant_space (resource, &pos, resource_length))
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if ((pos + 32) > resource_length)
|
|
Packit Service |
ca3877 |
goto text_html;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
if (g_str_has_prefix (resource + pos, "xmlns=\"http://purl.org/rss/1.0/\""))
|
|
Packit Service |
ca3877 |
return g_strdup ("application/rss+xml");
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
text_html:
|
|
Packit Service |
ca3877 |
return g_strdup ("text/html");
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static char *
|
|
Packit Service |
ca3877 |
soup_content_sniffer_real_sniff (SoupContentSniffer *sniffer, SoupMessage *msg,
|
|
Packit Service |
ca3877 |
SoupBuffer *buffer, GHashTable **params)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
const char *content_type;
|
|
Packit Service |
ca3877 |
const char *x_content_type_options;
|
|
Packit Service |
ca3877 |
char *sniffed_type = NULL;
|
|
Packit Service |
ca3877 |
gboolean no_sniff = FALSE;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
content_type = soup_message_headers_get_content_type (msg->response_headers, params);
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* MIMESNIFF: 7 Determining the sniffed MIME type of a resource. */
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
x_content_type_options = soup_message_headers_get_one (msg->response_headers, "X-Content-Type-Options");
|
|
Packit Service |
ca3877 |
if (!g_strcmp0 (x_content_type_options, "nosniff"))
|
|
Packit Service |
ca3877 |
no_sniff = TRUE;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* 1. Unknown/undefined supplied type with sniff-scritable = !nosniff. */
|
|
Packit Service |
ca3877 |
if ((content_type == NULL) ||
|
|
Packit Service |
ca3877 |
!g_ascii_strcasecmp (content_type, "unknown/unknown") ||
|
|
Packit Service |
ca3877 |
!g_ascii_strcasecmp (content_type, "application/unknown") ||
|
|
Packit Service |
ca3877 |
!g_ascii_strcasecmp (content_type, "*/*"))
|
|
Packit Service |
ca3877 |
return sniff_unknown (sniffer, buffer, !no_sniff);
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* 2. If nosniff is specified in X-Content-Type-Options use the supplied MIME type. */
|
|
Packit Service |
ca3877 |
if (no_sniff)
|
|
Packit Service |
ca3877 |
return g_strdup (content_type);
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* 3. check-for-apache-bug */
|
|
Packit Service |
ca3877 |
if ((content_type != NULL) &&
|
|
Packit Service |
ca3877 |
(g_str_equal (content_type, "text/plain") ||
|
|
Packit Service |
ca3877 |
g_str_equal (content_type, "text/plain; charset=ISO-8859-1") ||
|
|
Packit Service |
ca3877 |
g_str_equal (content_type, "text/plain; charset=iso-8859-1") ||
|
|
Packit Service |
ca3877 |
g_str_equal (content_type, "text/plain; charset=UTF-8")))
|
|
Packit Service |
ca3877 |
return sniff_text_or_binary (sniffer, buffer);
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* 4. XML types sent by the server are always used. */
|
|
Packit Service |
ca3877 |
if (g_str_has_suffix (content_type, "+xml") ||
|
|
Packit Service |
ca3877 |
!g_ascii_strcasecmp (content_type, "text/xml") ||
|
|
Packit Service |
ca3877 |
!g_ascii_strcasecmp (content_type, "application/xml"))
|
|
Packit Service |
ca3877 |
return g_strdup (content_type);
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* 5. Distinguish feed from HTML. */
|
|
Packit Service |
ca3877 |
if (!g_ascii_strcasecmp (content_type, "text/html"))
|
|
Packit Service |
ca3877 |
return sniff_feed_or_html (sniffer, buffer);
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* 6. Image types.
|
|
Packit Service |
ca3877 |
*/
|
|
Packit Service |
ca3877 |
if (!g_ascii_strncasecmp (content_type, "image/", 6)) {
|
|
Packit Service |
ca3877 |
sniffed_type = sniff_images (sniffer, buffer);
|
|
Packit Service |
ca3877 |
if (sniffed_type != NULL)
|
|
Packit Service |
ca3877 |
return sniffed_type;
|
|
Packit Service |
ca3877 |
return g_strdup (content_type);
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* 7. Audio and video types. */
|
|
Packit Service |
ca3877 |
if (!g_ascii_strncasecmp (content_type, "audio/", 6) ||
|
|
Packit Service |
ca3877 |
!g_ascii_strncasecmp (content_type, "video/", 6) ||
|
|
Packit Service |
ca3877 |
!g_ascii_strcasecmp (content_type, "application/ogg")) {
|
|
Packit Service |
ca3877 |
sniffed_type = sniff_audio_video (sniffer, buffer);
|
|
Packit Service |
ca3877 |
if (sniffed_type != NULL)
|
|
Packit Service |
ca3877 |
return sniffed_type;
|
|
Packit Service |
ca3877 |
return g_strdup (content_type);
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/* If we got text/plain, use text_or_binary */
|
|
Packit Service |
ca3877 |
if (g_str_equal (content_type, "text/plain")) {
|
|
Packit Service |
ca3877 |
return sniff_text_or_binary (sniffer, buffer);
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
return g_strdup (content_type);
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static gsize
|
|
Packit Service |
ca3877 |
soup_content_sniffer_real_get_buffer_size (SoupContentSniffer *sniffer)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
return 512;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static void
|
|
Packit Service |
ca3877 |
soup_content_sniffer_got_headers_cb (SoupMessage *msg, SoupContentSniffer *sniffer)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg);
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
priv->bytes_for_sniffing = soup_content_sniffer_get_buffer_size (sniffer);
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static void
|
|
Packit Service |
ca3877 |
soup_content_sniffer_request_queued (SoupSessionFeature *feature,
|
|
Packit Service |
ca3877 |
SoupSession *session,
|
|
Packit Service |
ca3877 |
SoupMessage *msg)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg);
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
priv->sniffer = g_object_ref (SOUP_CONTENT_SNIFFER (feature));
|
|
Packit Service |
ca3877 |
g_signal_connect (msg, "got-headers",
|
|
Packit Service |
ca3877 |
G_CALLBACK (soup_content_sniffer_got_headers_cb),
|
|
Packit Service |
ca3877 |
feature);
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static void
|
|
Packit Service |
ca3877 |
soup_content_sniffer_request_unqueued (SoupSessionFeature *feature,
|
|
Packit Service |
ca3877 |
SoupSession *session,
|
|
Packit Service |
ca3877 |
SoupMessage *msg)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg);
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
g_object_unref (priv->sniffer);
|
|
Packit Service |
ca3877 |
priv->sniffer = NULL;
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
g_signal_handlers_disconnect_by_func (msg, soup_content_sniffer_got_headers_cb, feature);
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static void
|
|
Packit Service |
ca3877 |
soup_content_sniffer_class_init (SoupContentSnifferClass *content_sniffer_class)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
content_sniffer_class->sniff = soup_content_sniffer_real_sniff;
|
|
Packit Service |
ca3877 |
content_sniffer_class->get_buffer_size = soup_content_sniffer_real_get_buffer_size;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
static void
|
|
Packit Service |
ca3877 |
soup_content_sniffer_session_feature_init (SoupSessionFeatureInterface *feature_interface,
|
|
Packit Service |
ca3877 |
gpointer interface_data)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
feature_interface->request_queued = soup_content_sniffer_request_queued;
|
|
Packit Service |
ca3877 |
feature_interface->request_unqueued = soup_content_sniffer_request_unqueued;
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/**
|
|
Packit Service |
ca3877 |
* soup_content_sniffer_new:
|
|
Packit Service |
ca3877 |
*
|
|
Packit Service |
ca3877 |
* Creates a new #SoupContentSniffer.
|
|
Packit Service |
ca3877 |
*
|
|
Packit Service |
ca3877 |
* Returns: a new #SoupContentSniffer
|
|
Packit Service |
ca3877 |
*
|
|
Packit Service |
ca3877 |
* Since: 2.28
|
|
Packit Service |
ca3877 |
**/
|
|
Packit Service |
ca3877 |
SoupContentSniffer *
|
|
Packit Service |
ca3877 |
soup_content_sniffer_new (void)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
return g_object_new (SOUP_TYPE_CONTENT_SNIFFER, NULL);
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/**
|
|
Packit Service |
ca3877 |
* soup_content_sniffer_sniff:
|
|
Packit Service |
ca3877 |
* @sniffer: a #SoupContentSniffer
|
|
Packit Service |
ca3877 |
* @msg: the message to sniff
|
|
Packit Service |
ca3877 |
* @buffer: a buffer containing the start of @msg's response body
|
|
Packit Service |
ca3877 |
* @params: (element-type utf8 utf8) (out) (transfer full) (allow-none): return
|
|
Packit Service |
ca3877 |
* location for Content-Type parameters (eg, "charset"), or %NULL
|
|
Packit Service |
ca3877 |
*
|
|
Packit Service |
ca3877 |
* Sniffs @buffer to determine its Content-Type. The result may also
|
|
Packit Service |
ca3877 |
* be influenced by the Content-Type declared in @msg's response
|
|
Packit Service |
ca3877 |
* headers.
|
|
Packit Service |
ca3877 |
*
|
|
Packit Service |
ca3877 |
* Return value: the sniffed Content-Type of @buffer; this will never be %NULL,
|
|
Packit Service |
ca3877 |
* but may be "application/octet-stream".
|
|
Packit Service |
ca3877 |
*
|
|
Packit Service |
ca3877 |
* Since: 2.28
|
|
Packit Service |
ca3877 |
*/
|
|
Packit Service |
ca3877 |
char *
|
|
Packit Service |
ca3877 |
soup_content_sniffer_sniff (SoupContentSniffer *sniffer,
|
|
Packit Service |
ca3877 |
SoupMessage *msg, SoupBuffer *buffer,
|
|
Packit Service |
ca3877 |
GHashTable **params)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
g_return_val_if_fail (SOUP_IS_CONTENT_SNIFFER (sniffer), NULL);
|
|
Packit Service |
ca3877 |
g_return_val_if_fail (SOUP_IS_MESSAGE (msg), NULL);
|
|
Packit Service |
ca3877 |
g_return_val_if_fail (buffer != NULL, NULL);
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
return SOUP_CONTENT_SNIFFER_GET_CLASS (sniffer)->sniff (sniffer, msg, buffer, params);
|
|
Packit Service |
ca3877 |
}
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
/**
|
|
Packit Service |
ca3877 |
* soup_content_sniffer_get_buffer_size:
|
|
Packit Service |
ca3877 |
* @sniffer: a #SoupContentSniffer
|
|
Packit Service |
ca3877 |
*
|
|
Packit Service |
ca3877 |
* Gets the number of bytes @sniffer needs in order to properly sniff
|
|
Packit Service |
ca3877 |
* a buffer.
|
|
Packit Service |
ca3877 |
*
|
|
Packit Service |
ca3877 |
* Return value: the number of bytes to sniff
|
|
Packit Service |
ca3877 |
*
|
|
Packit Service |
ca3877 |
* Since: 2.28
|
|
Packit Service |
ca3877 |
*/
|
|
Packit Service |
ca3877 |
gsize
|
|
Packit Service |
ca3877 |
soup_content_sniffer_get_buffer_size (SoupContentSniffer *sniffer)
|
|
Packit Service |
ca3877 |
{
|
|
Packit Service |
ca3877 |
g_return_val_if_fail (SOUP_IS_CONTENT_SNIFFER (sniffer), 0);
|
|
Packit Service |
ca3877 |
|
|
Packit Service |
ca3877 |
return SOUP_CONTENT_SNIFFER_GET_CLASS (sniffer)->get_buffer_size (sniffer);
|
|
Packit Service |
ca3877 |
}
|