|
Packit |
3adb1e |
/* ====================================================================
|
|
Packit |
3adb1e |
* Licensed to the Apache Software Foundation (ASF) under one
|
|
Packit |
3adb1e |
* or more contributor license agreements. See the NOTICE file
|
|
Packit |
3adb1e |
* distributed with this work for additional information
|
|
Packit |
3adb1e |
* regarding copyright ownership. The ASF licenses this file
|
|
Packit |
3adb1e |
* to you under the Apache License, Version 2.0 (the
|
|
Packit |
3adb1e |
* "License"); you may not use this file except in compliance
|
|
Packit |
3adb1e |
* with the License. You may obtain a copy of the License at
|
|
Packit |
3adb1e |
*
|
|
Packit |
3adb1e |
* http://www.apache.org/licenses/LICENSE-2.0
|
|
Packit |
3adb1e |
*
|
|
Packit |
3adb1e |
* Unless required by applicable law or agreed to in writing,
|
|
Packit |
3adb1e |
* software distributed under the License is distributed on an
|
|
Packit |
3adb1e |
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
Packit |
3adb1e |
* KIND, either express or implied. See the License for the
|
|
Packit |
3adb1e |
* specific language governing permissions and limitations
|
|
Packit |
3adb1e |
* under the License.
|
|
Packit |
3adb1e |
* ====================================================================
|
|
Packit |
3adb1e |
*/
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
#include <stdlib.h>
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
#include <apr.h>
|
|
Packit |
3adb1e |
#include <apr_uri.h>
|
|
Packit |
3adb1e |
#include <apr_strings.h>
|
|
Packit |
3adb1e |
#include <apr_atomic.h>
|
|
Packit |
3adb1e |
#include <apr_base64.h>
|
|
Packit |
3adb1e |
#include <apr_getopt.h>
|
|
Packit |
3adb1e |
#include <apr_xml.h>
|
|
Packit |
3adb1e |
#include <apr_thread_proc.h>
|
|
Packit |
3adb1e |
#include <apr_thread_mutex.h>
|
|
Packit |
3adb1e |
#include <apr_thread_cond.h>
|
|
Packit |
3adb1e |
#include <apr_version.h>
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
#include "serf.h"
|
|
Packit |
3adb1e |
#include "serf_bucket_util.h"
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/*#define SERF_VERBOSE*/
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
#if !APR_HAS_THREADS
|
|
Packit |
3adb1e |
#error serf spider needs threads.
|
|
Packit |
3adb1e |
#endif
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* This is a rough-sketch example of how a multi-threaded spider could be
|
|
Packit |
3adb1e |
* constructed using serf.
|
|
Packit |
3adb1e |
*
|
|
Packit |
3adb1e |
* A network thread will read in a URL and feed it into an expat parser.
|
|
Packit |
3adb1e |
* After the entire response is read, the XML structure and the path is
|
|
Packit |
3adb1e |
* passed to a set of parser threads. These threads will scan the document
|
|
Packit |
3adb1e |
* for HTML href's and queue up any links that it finds.
|
|
Packit |
3adb1e |
*
|
|
Packit |
3adb1e |
* It does try to stay on the same server as it only uses one connection.
|
|
Packit |
3adb1e |
*
|
|
Packit |
3adb1e |
* Because we feed the responses into an XML parser, the documents must be
|
|
Packit |
3adb1e |
* well-formed XHTML.
|
|
Packit |
3adb1e |
*
|
|
Packit |
3adb1e |
* There is no duplicate link detection. You've been warned.
|
|
Packit |
3adb1e |
*/
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* The structure passed to the parser thread after we've read the entire
|
|
Packit |
3adb1e |
* response.
|
|
Packit |
3adb1e |
*/
|
|
Packit |
3adb1e |
typedef struct {
|
|
Packit |
3adb1e |
apr_xml_doc *doc;
|
|
Packit |
3adb1e |
char *path;
|
|
Packit |
3adb1e |
apr_pool_t *pool;
|
|
Packit |
3adb1e |
} doc_path_t;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
typedef struct {
|
|
Packit |
3adb1e |
const char *authn;
|
|
Packit |
3adb1e |
int using_ssl;
|
|
Packit |
3adb1e |
serf_ssl_context_t *ssl_ctx;
|
|
Packit |
3adb1e |
serf_bucket_alloc_t *bkt_alloc;
|
|
Packit |
3adb1e |
} app_baton_t;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
static void closed_connection(serf_connection_t *conn,
|
|
Packit |
3adb1e |
void *closed_baton,
|
|
Packit |
3adb1e |
apr_status_t why,
|
|
Packit |
3adb1e |
apr_pool_t *pool)
|
|
Packit |
3adb1e |
{
|
|
Packit |
3adb1e |
if (why) {
|
|
Packit |
3adb1e |
abort();
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
static apr_status_t conn_setup(apr_socket_t *skt,
|
|
Packit |
3adb1e |
serf_bucket_t **input_bkt,
|
|
Packit |
3adb1e |
serf_bucket_t **output_bkt,
|
|
Packit |
3adb1e |
void *setup_baton,
|
|
Packit |
3adb1e |
apr_pool_t *pool)
|
|
Packit |
3adb1e |
{
|
|
Packit |
3adb1e |
serf_bucket_t *c;
|
|
Packit |
3adb1e |
app_baton_t *ctx = setup_baton;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
c = serf_bucket_socket_create(skt, ctx->bkt_alloc);
|
|
Packit |
3adb1e |
if (ctx->using_ssl) {
|
|
Packit |
3adb1e |
c = serf_bucket_ssl_decrypt_create(c, ctx->ssl_ctx, ctx->bkt_alloc);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
*input_bkt = c;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
return APR_SUCCESS;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
static serf_bucket_t* accept_response(serf_request_t *request,
|
|
Packit |
3adb1e |
serf_bucket_t *stream,
|
|
Packit |
3adb1e |
void *acceptor_baton,
|
|
Packit |
3adb1e |
apr_pool_t *pool)
|
|
Packit |
3adb1e |
{
|
|
Packit |
3adb1e |
serf_bucket_t *c;
|
|
Packit |
3adb1e |
serf_bucket_alloc_t *bkt_alloc;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* get the per-request bucket allocator */
|
|
Packit |
3adb1e |
bkt_alloc = serf_request_get_alloc(request);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* Create a barrier so the response doesn't eat us! */
|
|
Packit |
3adb1e |
c = serf_bucket_barrier_create(stream, bkt_alloc);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
return serf_bucket_response_create(c, bkt_alloc);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
typedef struct {
|
|
Packit |
3adb1e |
serf_bucket_alloc_t *allocator;
|
|
Packit |
3adb1e |
#if APR_MAJOR_VERSION > 0
|
|
Packit |
3adb1e |
apr_uint32_t *requests_outstanding;
|
|
Packit |
3adb1e |
#else
|
|
Packit |
3adb1e |
apr_atomic_t *requests_outstanding;
|
|
Packit |
3adb1e |
#endif
|
|
Packit |
3adb1e |
serf_bucket_alloc_t *doc_queue_alloc;
|
|
Packit |
3adb1e |
apr_array_header_t *doc_queue;
|
|
Packit |
3adb1e |
apr_thread_cond_t *doc_queue_condvar;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
const char *hostinfo;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* includes: path, query, fragment. */
|
|
Packit |
3adb1e |
char *full_path;
|
|
Packit |
3adb1e |
apr_size_t full_path_len;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
char *path;
|
|
Packit |
3adb1e |
apr_size_t path_len;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
char *query;
|
|
Packit |
3adb1e |
apr_size_t query_len;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
char *fragment;
|
|
Packit |
3adb1e |
apr_size_t fragment_len;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
apr_xml_parser *parser;
|
|
Packit |
3adb1e |
apr_pool_t *parser_pool;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
int hdr_read;
|
|
Packit |
3adb1e |
int is_html;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
serf_response_acceptor_t acceptor;
|
|
Packit |
3adb1e |
void *acceptor_baton;
|
|
Packit |
3adb1e |
serf_response_handler_t handler;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
app_baton_t *app_ctx;
|
|
Packit |
3adb1e |
} handler_baton_t;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* Kludges for APR 0.9 support. */
|
|
Packit |
3adb1e |
#if APR_MAJOR_VERSION == 0
|
|
Packit |
3adb1e |
#define apr_atomic_inc32 apr_atomic_inc
|
|
Packit |
3adb1e |
#define apr_atomic_dec32 apr_atomic_dec
|
|
Packit |
3adb1e |
#define apr_atomic_read32 apr_atomic_read
|
|
Packit |
3adb1e |
#define apr_atomic_set32 apr_atomic_set
|
|
Packit |
3adb1e |
#endif
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
static apr_status_t handle_response(serf_request_t *request,
|
|
Packit |
3adb1e |
serf_bucket_t *response,
|
|
Packit |
3adb1e |
void *handler_baton,
|
|
Packit |
3adb1e |
apr_pool_t *pool)
|
|
Packit |
3adb1e |
{
|
|
Packit |
3adb1e |
const char *data;
|
|
Packit |
3adb1e |
apr_size_t len;
|
|
Packit |
3adb1e |
serf_status_line sl;
|
|
Packit |
3adb1e |
apr_status_t status;
|
|
Packit |
3adb1e |
handler_baton_t *ctx = handler_baton;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
if (!response) {
|
|
Packit |
3adb1e |
/* Oh no! We've been cancelled! */
|
|
Packit |
3adb1e |
abort();
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
status = serf_bucket_response_status(response, &sl);
|
|
Packit |
3adb1e |
if (status) {
|
|
Packit |
3adb1e |
if (APR_STATUS_IS_EAGAIN(status)) {
|
|
Packit |
3adb1e |
return APR_SUCCESS;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
abort();
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
while (1) {
|
|
Packit |
3adb1e |
status = serf_bucket_read(response, 2048, &data, &len;;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
if (SERF_BUCKET_READ_ERROR(status))
|
|
Packit |
3adb1e |
return status;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/*fwrite(data, 1, len, stdout);*/
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
if (!ctx->hdr_read) {
|
|
Packit |
3adb1e |
serf_bucket_t *hdrs;
|
|
Packit |
3adb1e |
const char *val;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
printf("Processing %s\n", ctx->path);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
hdrs = serf_bucket_response_get_headers(response);
|
|
Packit |
3adb1e |
val = serf_bucket_headers_get(hdrs, "Content-Type");
|
|
Packit |
3adb1e |
/* FIXME: This check isn't quite right because Content-Type could
|
|
Packit |
3adb1e |
* be decorated; ideally strcasestr would be correct.
|
|
Packit |
3adb1e |
*/
|
|
Packit |
3adb1e |
if (val && strcasecmp(val, "text/html") == 0) {
|
|
Packit |
3adb1e |
ctx->is_html = 1;
|
|
Packit |
3adb1e |
apr_pool_create(&ctx->parser_pool, NULL);
|
|
Packit |
3adb1e |
ctx->parser = apr_xml_parser_create(ctx->parser_pool);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
else {
|
|
Packit |
3adb1e |
ctx->is_html = 0;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
ctx->hdr_read = 1;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
if (ctx->is_html) {
|
|
Packit |
3adb1e |
apr_status_t xs;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
xs = apr_xml_parser_feed(ctx->parser, data, len);
|
|
Packit |
3adb1e |
/* Uh-oh. */
|
|
Packit |
3adb1e |
if (xs) {
|
|
Packit |
3adb1e |
#ifdef SERF_VERBOSE
|
|
Packit |
3adb1e |
printf("XML parser error (feed): %d\n", xs);
|
|
Packit |
3adb1e |
#endif
|
|
Packit |
3adb1e |
ctx->is_html = 0;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* are we done yet? */
|
|
Packit |
3adb1e |
if (APR_STATUS_IS_EOF(status)) {
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
if (ctx->is_html) {
|
|
Packit |
3adb1e |
apr_xml_doc *xmld;
|
|
Packit |
3adb1e |
apr_status_t xs;
|
|
Packit |
3adb1e |
doc_path_t *dup;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
xs = apr_xml_parser_done(ctx->parser, &xmld);
|
|
Packit |
3adb1e |
if (xs) {
|
|
Packit |
3adb1e |
#ifdef SERF_VERBOSE
|
|
Packit |
3adb1e |
printf("XML parser error (done): %d\n", xs);
|
|
Packit |
3adb1e |
#endif
|
|
Packit |
3adb1e |
return xs;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
dup = (doc_path_t*)
|
|
Packit |
3adb1e |
serf_bucket_mem_alloc(ctx->doc_queue_alloc,
|
|
Packit |
3adb1e |
sizeof(doc_path_t));
|
|
Packit |
3adb1e |
dup->doc = xmld;
|
|
Packit |
3adb1e |
dup->path = (char*)serf_bucket_mem_alloc(ctx->doc_queue_alloc,
|
|
Packit |
3adb1e |
ctx->path_len);
|
|
Packit |
3adb1e |
memcpy(dup->path, ctx->path, ctx->path_len);
|
|
Packit |
3adb1e |
dup->pool = ctx->parser_pool;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
*(doc_path_t **)apr_array_push(ctx->doc_queue) = dup;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
apr_thread_cond_signal(ctx->doc_queue_condvar);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
apr_atomic_dec32(ctx->requests_outstanding);
|
|
Packit |
3adb1e |
serf_bucket_mem_free(ctx->allocator, ctx->path);
|
|
Packit |
3adb1e |
if (ctx->query) {
|
|
Packit |
3adb1e |
serf_bucket_mem_free(ctx->allocator, ctx->query);
|
|
Packit |
3adb1e |
serf_bucket_mem_free(ctx->allocator, ctx->full_path);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
if (ctx->fragment) {
|
|
Packit |
3adb1e |
serf_bucket_mem_free(ctx->allocator, ctx->fragment);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
serf_bucket_mem_free(ctx->allocator, ctx);
|
|
Packit |
3adb1e |
return APR_EOF;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* have we drained the response so far? */
|
|
Packit |
3adb1e |
if (APR_STATUS_IS_EAGAIN(status))
|
|
Packit |
3adb1e |
return APR_SUCCESS;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* loop to read some more. */
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
/* NOTREACHED */
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
typedef struct {
|
|
Packit |
3adb1e |
apr_uint32_t *requests_outstanding;
|
|
Packit |
3adb1e |
serf_connection_t *connection;
|
|
Packit |
3adb1e |
apr_array_header_t *doc_queue;
|
|
Packit |
3adb1e |
serf_bucket_alloc_t *doc_queue_alloc;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
apr_thread_cond_t *condvar;
|
|
Packit |
3adb1e |
apr_thread_mutex_t *mutex;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* Master host: for now, we'll stick to one host. */
|
|
Packit |
3adb1e |
const char *hostinfo;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
app_baton_t *app_ctx;
|
|
Packit |
3adb1e |
} parser_baton_t;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
static apr_status_t setup_request(serf_request_t *request,
|
|
Packit |
3adb1e |
void *setup_baton,
|
|
Packit |
3adb1e |
serf_bucket_t **req_bkt,
|
|
Packit |
3adb1e |
serf_response_acceptor_t *acceptor,
|
|
Packit |
3adb1e |
void **acceptor_baton,
|
|
Packit |
3adb1e |
serf_response_handler_t *handler,
|
|
Packit |
3adb1e |
void **handler_baton,
|
|
Packit |
3adb1e |
apr_pool_t *pool)
|
|
Packit |
3adb1e |
{
|
|
Packit |
3adb1e |
handler_baton_t *ctx = setup_baton;
|
|
Packit |
3adb1e |
serf_bucket_t *hdrs_bkt;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
*req_bkt = serf_bucket_request_create("GET", ctx->full_path, NULL,
|
|
Packit |
3adb1e |
serf_request_get_alloc(request));
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
hdrs_bkt = serf_bucket_request_get_headers(*req_bkt);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* FIXME: Shouldn't we be able to figure out the host ourselves? */
|
|
Packit |
3adb1e |
serf_bucket_headers_setn(hdrs_bkt, "Host", ctx->hostinfo);
|
|
Packit |
3adb1e |
serf_bucket_headers_setn(hdrs_bkt, "User-Agent",
|
|
Packit |
3adb1e |
"Serf/" SERF_VERSION_STRING);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* Shouldn't serf do this for us? */
|
|
Packit |
3adb1e |
serf_bucket_headers_setn(hdrs_bkt, "Accept-Encoding", "gzip");
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
if (ctx->app_ctx->authn != NULL) {
|
|
Packit |
3adb1e |
serf_bucket_headers_setn(hdrs_bkt, "Authorization",
|
|
Packit |
3adb1e |
ctx->app_ctx->authn);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
if (ctx->app_ctx->using_ssl) {
|
|
Packit |
3adb1e |
serf_bucket_alloc_t *req_alloc;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
req_alloc = serf_request_get_alloc(request);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
if (ctx->app_ctx->ssl_ctx == NULL) {
|
|
Packit |
3adb1e |
*req_bkt = serf_bucket_ssl_encrypt_create(*req_bkt, NULL,
|
|
Packit |
3adb1e |
ctx->app_ctx->bkt_alloc);
|
|
Packit |
3adb1e |
ctx->app_ctx->ssl_ctx =
|
|
Packit |
3adb1e |
serf_bucket_ssl_encrypt_context_get(*req_bkt);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
else {
|
|
Packit |
3adb1e |
*req_bkt =
|
|
Packit |
3adb1e |
serf_bucket_ssl_encrypt_create(*req_bkt, ctx->app_ctx->ssl_ctx,
|
|
Packit |
3adb1e |
ctx->app_ctx->bkt_alloc);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
#ifdef SERF_VERBOSE
|
|
Packit |
3adb1e |
printf("Url requesting: %s\n", ctx->full_path);
|
|
Packit |
3adb1e |
#endif
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
*acceptor = ctx->acceptor;
|
|
Packit |
3adb1e |
*acceptor_baton = ctx->acceptor_baton;
|
|
Packit |
3adb1e |
*handler = ctx->handler;
|
|
Packit |
3adb1e |
*handler_baton = ctx;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
return APR_SUCCESS;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
static apr_status_t create_request(const char *hostinfo,
|
|
Packit |
3adb1e |
const char *path,
|
|
Packit |
3adb1e |
const char *query,
|
|
Packit |
3adb1e |
const char *fragment,
|
|
Packit |
3adb1e |
parser_baton_t *ctx,
|
|
Packit |
3adb1e |
apr_pool_t *tmppool)
|
|
Packit |
3adb1e |
{
|
|
Packit |
3adb1e |
handler_baton_t *new_ctx;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
if (hostinfo) {
|
|
Packit |
3adb1e |
/* Yes, this is a pointer comparison; not a string comparison. */
|
|
Packit |
3adb1e |
if (hostinfo != ctx->hostinfo) {
|
|
Packit |
3adb1e |
/* Not on the same host; ignore */
|
|
Packit |
3adb1e |
return APR_SUCCESS;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
new_ctx = (handler_baton_t*)serf_bucket_mem_alloc(ctx->app_ctx->bkt_alloc,
|
|
Packit |
3adb1e |
sizeof(handler_baton_t));
|
|
Packit |
3adb1e |
new_ctx->allocator = ctx->app_ctx->bkt_alloc;
|
|
Packit |
3adb1e |
new_ctx->requests_outstanding = ctx->requests_outstanding;
|
|
Packit |
3adb1e |
new_ctx->app_ctx = ctx->app_ctx;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* See above: this example restricts ourselves to the same vhost. */
|
|
Packit |
3adb1e |
new_ctx->hostinfo = ctx->hostinfo;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* we need to copy it so it falls under the request's scope. */
|
|
Packit |
3adb1e |
new_ctx->path_len = strlen(path);
|
|
Packit |
3adb1e |
new_ctx->path = (char*)serf_bucket_mem_alloc(ctx->app_ctx->bkt_alloc,
|
|
Packit |
3adb1e |
new_ctx->path_len + 1);
|
|
Packit |
3adb1e |
memcpy(new_ctx->path, path, new_ctx->path_len + 1);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* we need to copy it so it falls under the request's scope. */
|
|
Packit |
3adb1e |
if (query) {
|
|
Packit |
3adb1e |
new_ctx->query_len = strlen(query);
|
|
Packit |
3adb1e |
new_ctx->query = (char*)serf_bucket_mem_alloc(ctx->app_ctx->bkt_alloc,
|
|
Packit |
3adb1e |
new_ctx->query_len + 1);
|
|
Packit |
3adb1e |
memcpy(new_ctx->query, query, new_ctx->query_len + 1);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
else {
|
|
Packit |
3adb1e |
new_ctx->query = NULL;
|
|
Packit |
3adb1e |
new_ctx->query_len = 0;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* we need to copy it so it falls under the request's scope. */
|
|
Packit |
3adb1e |
if (fragment) {
|
|
Packit |
3adb1e |
new_ctx->fragment_len = strlen(fragment);
|
|
Packit |
3adb1e |
new_ctx->fragment =
|
|
Packit |
3adb1e |
(char*)serf_bucket_mem_alloc(ctx->app_ctx->bkt_alloc,
|
|
Packit |
3adb1e |
new_ctx->fragment_len + 1);
|
|
Packit |
3adb1e |
memcpy(new_ctx->fragment, fragment, new_ctx->fragment_len + 1);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
else {
|
|
Packit |
3adb1e |
new_ctx->fragment = NULL;
|
|
Packit |
3adb1e |
new_ctx->fragment_len = 0;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
if (!new_ctx->query) {
|
|
Packit |
3adb1e |
new_ctx->full_path = new_ctx->path;
|
|
Packit |
3adb1e |
new_ctx->full_path_len = new_ctx->path_len;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
else {
|
|
Packit |
3adb1e |
new_ctx->full_path_len = new_ctx->path_len + new_ctx->query_len;
|
|
Packit |
3adb1e |
new_ctx->full_path =
|
|
Packit |
3adb1e |
(char*)serf_bucket_mem_alloc(ctx->app_ctx->bkt_alloc,
|
|
Packit |
3adb1e |
new_ctx->full_path_len + 1);
|
|
Packit |
3adb1e |
memcpy(new_ctx->full_path, new_ctx->path, new_ctx->path_len);
|
|
Packit |
3adb1e |
memcpy(new_ctx->full_path + new_ctx->path_len, new_ctx->query,
|
|
Packit |
3adb1e |
new_ctx->query_len + 1);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
new_ctx->hdr_read = 0;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
new_ctx->doc_queue_condvar = ctx->condvar;
|
|
Packit |
3adb1e |
new_ctx->doc_queue = ctx->doc_queue;
|
|
Packit |
3adb1e |
new_ctx->doc_queue_alloc = ctx->doc_queue_alloc;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
new_ctx->acceptor = accept_response;
|
|
Packit |
3adb1e |
new_ctx->acceptor_baton = &ctx->app_ctx;
|
|
Packit |
3adb1e |
new_ctx->handler = handle_response;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
apr_atomic_inc32(ctx->requests_outstanding);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
serf_connection_request_create(ctx->connection, setup_request, new_ctx);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
return APR_SUCCESS;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
static apr_status_t put_req(const char *c, const char *orig_path,
|
|
Packit |
3adb1e |
parser_baton_t *ctx, apr_pool_t *pool)
|
|
Packit |
3adb1e |
{
|
|
Packit |
3adb1e |
apr_status_t status;
|
|
Packit |
3adb1e |
apr_uri_t url;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* Build url */
|
|
Packit |
3adb1e |
#ifdef SERF_VERBOSE
|
|
Packit |
3adb1e |
printf("Url discovered: %s\n", c);
|
|
Packit |
3adb1e |
#endif
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
status = apr_uri_parse(pool, c, &url;;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* We got something that was minimally useful. */
|
|
Packit |
3adb1e |
if (status == 0 && url.path) {
|
|
Packit |
3adb1e |
const char *path, *query, *fragment;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* This is likely a relative URL. So, merge and hope for the
|
|
Packit |
3adb1e |
* best.
|
|
Packit |
3adb1e |
*/
|
|
Packit |
3adb1e |
if (!url.hostinfo && url.path[0] != '/') {
|
|
Packit |
3adb1e |
struct iovec vec[2];
|
|
Packit |
3adb1e |
char *c;
|
|
Packit |
3adb1e |
apr_size_t nbytes;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
c = strrchr(orig_path, '/');
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* assert c */
|
|
Packit |
3adb1e |
if (!c) {
|
|
Packit |
3adb1e |
return APR_EGENERAL;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
vec[0].iov_base = (char*)orig_path;
|
|
Packit |
3adb1e |
vec[0].iov_len = c - orig_path + 1;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* If the HTML is cute and gives us ./foo - skip the ./ */
|
|
Packit |
3adb1e |
if (url.path[0] == '.' && url.path[1] == '/') {
|
|
Packit |
3adb1e |
vec[1].iov_base = url.path + 2;
|
|
Packit |
3adb1e |
vec[1].iov_len = strlen(url.path + 2);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
else if (url.path[0] == '.' && url.path[1] == '.') {
|
|
Packit |
3adb1e |
/* FIXME We could be cute and consolidate the path; we're a
|
|
Packit |
3adb1e |
* toy example. So no.
|
|
Packit |
3adb1e |
*/
|
|
Packit |
3adb1e |
vec[1].iov_base = url.path;
|
|
Packit |
3adb1e |
vec[1].iov_len = strlen(url.path);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
else {
|
|
Packit |
3adb1e |
vec[1].iov_base = url.path;
|
|
Packit |
3adb1e |
vec[1].iov_len = strlen(url.path);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
path = apr_pstrcatv(pool, vec, 2, &nbytes);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
else {
|
|
Packit |
3adb1e |
path = url.path;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
query = url.query;
|
|
Packit |
3adb1e |
fragment = url.fragment;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
return create_request(url.hostinfo, path, query, fragment, ctx, pool);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
return APR_SUCCESS;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
static apr_status_t find_href(apr_xml_elem *e, const char *orig_path,
|
|
Packit |
3adb1e |
parser_baton_t *ctx, apr_pool_t *pool)
|
|
Packit |
3adb1e |
{
|
|
Packit |
3adb1e |
apr_status_t status;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
do {
|
|
Packit |
3adb1e |
/* print */
|
|
Packit |
3adb1e |
if (e->name[0] == 'a' && e->name[1] == '\0') {
|
|
Packit |
3adb1e |
apr_xml_attr *a;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
a = e->attr;
|
|
Packit |
3adb1e |
while (a) {
|
|
Packit |
3adb1e |
if (strcasecmp(a->name, "href") == 0) {
|
|
Packit |
3adb1e |
break;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
a = a->next;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
if (a) {
|
|
Packit |
3adb1e |
status = put_req(a->value, orig_path, ctx, pool);
|
|
Packit |
3adb1e |
if (status) {
|
|
Packit |
3adb1e |
return status;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
if (e->first_child) {
|
|
Packit |
3adb1e |
status = find_href(e->first_child, orig_path, ctx, pool);
|
|
Packit |
3adb1e |
if (status) {
|
|
Packit |
3adb1e |
return status;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
e = e->next;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
while (e);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
return APR_SUCCESS;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
static apr_status_t find_href_doc(apr_xml_doc *doc, const char *path,
|
|
Packit |
3adb1e |
parser_baton_t *ctx,
|
|
Packit |
3adb1e |
apr_pool_t *pool)
|
|
Packit |
3adb1e |
{
|
|
Packit |
3adb1e |
return find_href(doc->root, path, ctx, pool);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
static void * APR_THREAD_FUNC parser_thread(apr_thread_t *thread, void *data)
|
|
Packit |
3adb1e |
{
|
|
Packit |
3adb1e |
apr_status_t status;
|
|
Packit |
3adb1e |
apr_pool_t *pool, *subpool;
|
|
Packit |
3adb1e |
parser_baton_t *ctx;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
ctx = (parser_baton_t*)data;
|
|
Packit |
3adb1e |
pool = apr_thread_pool_get(thread);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
apr_pool_create(&subpool, pool);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
while (1) {
|
|
Packit |
3adb1e |
doc_path_t *dup;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
apr_pool_clear(subpool);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* Grab it. */
|
|
Packit |
3adb1e |
apr_thread_mutex_lock(ctx->mutex);
|
|
Packit |
3adb1e |
/* Sleep. */
|
|
Packit |
3adb1e |
apr_thread_cond_wait(ctx->condvar, ctx->mutex);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* Fetch the doc off the list. */
|
|
Packit |
3adb1e |
if (ctx->doc_queue->nelts) {
|
|
Packit |
3adb1e |
dup = *(doc_path_t**)(apr_array_pop(ctx->doc_queue));
|
|
Packit |
3adb1e |
/* dup = (ctx->doc_queue->conns->elts)[0]; */
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
else {
|
|
Packit |
3adb1e |
dup = NULL;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* Don't need the mutex now. */
|
|
Packit |
3adb1e |
apr_thread_mutex_unlock(ctx->mutex);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* Parse the doc/url pair. */
|
|
Packit |
3adb1e |
if (dup) {
|
|
Packit |
3adb1e |
status = find_href_doc(dup->doc, dup->path, ctx, subpool);
|
|
Packit |
3adb1e |
if (status) {
|
|
Packit |
3adb1e |
printf("Error finding hrefs: %d %s\n", status, dup->path);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
/* Free the doc pair and its pool. */
|
|
Packit |
3adb1e |
apr_pool_destroy(dup->pool);
|
|
Packit |
3adb1e |
serf_bucket_mem_free(ctx->doc_queue_alloc, dup->path);
|
|
Packit |
3adb1e |
serf_bucket_mem_free(ctx->doc_queue_alloc, dup);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* Hey are we done? */
|
|
Packit |
3adb1e |
if (!apr_atomic_read32(ctx->requests_outstanding)) {
|
|
Packit |
3adb1e |
break;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
return NULL;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
static void print_usage(apr_pool_t *pool)
|
|
Packit |
3adb1e |
{
|
|
Packit |
3adb1e |
puts("serf_get [options] URL");
|
|
Packit |
3adb1e |
puts("-h\tDisplay this help");
|
|
Packit |
3adb1e |
puts("-v\tDisplay version");
|
|
Packit |
3adb1e |
puts("-H\tPrint response headers");
|
|
Packit |
3adb1e |
puts("-a <user:password> Present Basic authentication credentials");
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
int main(int argc, const char **argv)
|
|
Packit |
3adb1e |
{
|
|
Packit |
3adb1e |
apr_status_t status;
|
|
Packit |
3adb1e |
apr_pool_t *pool;
|
|
Packit |
3adb1e |
apr_sockaddr_t *address;
|
|
Packit |
3adb1e |
serf_context_t *context;
|
|
Packit |
3adb1e |
serf_connection_t *connection;
|
|
Packit |
3adb1e |
app_baton_t app_ctx;
|
|
Packit |
3adb1e |
handler_baton_t *handler_ctx;
|
|
Packit |
3adb1e |
apr_uri_t url;
|
|
Packit |
3adb1e |
const char *raw_url, *method;
|
|
Packit |
3adb1e |
int count;
|
|
Packit |
3adb1e |
apr_getopt_t *opt;
|
|
Packit |
3adb1e |
char opt_c;
|
|
Packit |
3adb1e |
char *authn = NULL;
|
|
Packit |
3adb1e |
const char *opt_arg;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* For the parser threads */
|
|
Packit |
3adb1e |
apr_thread_t *thread[3];
|
|
Packit |
3adb1e |
apr_threadattr_t *tattr;
|
|
Packit |
3adb1e |
apr_status_t parser_status;
|
|
Packit |
3adb1e |
parser_baton_t *parser_ctx;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
apr_initialize();
|
|
Packit |
3adb1e |
atexit(apr_terminate);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
apr_pool_create(&pool, NULL);
|
|
Packit |
3adb1e |
apr_atomic_init(pool);
|
|
Packit |
3adb1e |
/* serf_initialize(); */
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* Default to one round of fetching. */
|
|
Packit |
3adb1e |
count = 1;
|
|
Packit |
3adb1e |
/* Default to GET. */
|
|
Packit |
3adb1e |
method = "GET";
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
apr_getopt_init(&opt, pool, argc, argv);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
while ((status = apr_getopt(opt, "a:hv", &opt_c, &opt_arg)) ==
|
|
Packit |
3adb1e |
APR_SUCCESS) {
|
|
Packit |
3adb1e |
int srclen, enclen;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
switch (opt_c) {
|
|
Packit |
3adb1e |
case 'a':
|
|
Packit |
3adb1e |
srclen = strlen(opt_arg);
|
|
Packit |
3adb1e |
enclen = apr_base64_encode_len(srclen);
|
|
Packit |
3adb1e |
authn = apr_palloc(pool, enclen + 6);
|
|
Packit |
3adb1e |
strcpy(authn, "Basic ");
|
|
Packit |
3adb1e |
(void) apr_base64_encode(&authn[6], opt_arg, srclen);
|
|
Packit |
3adb1e |
break;
|
|
Packit |
3adb1e |
case 'h':
|
|
Packit |
3adb1e |
print_usage(pool);
|
|
Packit |
3adb1e |
exit(0);
|
|
Packit |
3adb1e |
break;
|
|
Packit |
3adb1e |
case 'v':
|
|
Packit |
3adb1e |
puts("Serf version: " SERF_VERSION_STRING);
|
|
Packit |
3adb1e |
exit(0);
|
|
Packit |
3adb1e |
default:
|
|
Packit |
3adb1e |
break;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
if (opt->ind != opt->argc - 1) {
|
|
Packit |
3adb1e |
print_usage(pool);
|
|
Packit |
3adb1e |
exit(-1);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
raw_url = argv[opt->ind];
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
apr_uri_parse(pool, raw_url, &url;;
|
|
Packit |
3adb1e |
if (!url.port) {
|
|
Packit |
3adb1e |
url.port = apr_uri_port_of_scheme(url.scheme);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
if (!url.path) {
|
|
Packit |
3adb1e |
url.path = "/";
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
if (strcasecmp(url.scheme, "https") == 0) {
|
|
Packit |
3adb1e |
app_ctx.using_ssl = 1;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
else {
|
|
Packit |
3adb1e |
app_ctx.using_ssl = 0;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
status = apr_sockaddr_info_get(&address,
|
|
Packit |
3adb1e |
url.hostname, APR_UNSPEC, url.port, 0,
|
|
Packit |
3adb1e |
pool);
|
|
Packit |
3adb1e |
if (status) {
|
|
Packit |
3adb1e |
printf("Error creating address: %d\n", status);
|
|
Packit |
3adb1e |
exit(1);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
context = serf_context_create(pool);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* ### Connection or Context should have an allocator? */
|
|
Packit |
3adb1e |
app_ctx.bkt_alloc = serf_bucket_allocator_create(pool, NULL, NULL);
|
|
Packit |
3adb1e |
app_ctx.ssl_ctx = NULL;
|
|
Packit |
3adb1e |
app_ctx.authn = authn;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
connection = serf_connection_create(context, address,
|
|
Packit |
3adb1e |
conn_setup, &app_ctx,
|
|
Packit |
3adb1e |
closed_connection, &app_ctx,
|
|
Packit |
3adb1e |
pool);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
handler_ctx = (handler_baton_t*)serf_bucket_mem_alloc(app_ctx.bkt_alloc,
|
|
Packit |
3adb1e |
sizeof(handler_baton_t));
|
|
Packit |
3adb1e |
handler_ctx->allocator = app_ctx.bkt_alloc;
|
|
Packit |
3adb1e |
handler_ctx->doc_queue = apr_array_make(pool, 1, sizeof(doc_path_t*));
|
|
Packit |
3adb1e |
handler_ctx->doc_queue_alloc = app_ctx.bkt_alloc;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
handler_ctx->requests_outstanding =
|
|
Packit |
3adb1e |
(apr_uint32_t*)serf_bucket_mem_alloc(app_ctx.bkt_alloc,
|
|
Packit |
3adb1e |
sizeof(apr_uint32_t));
|
|
Packit |
3adb1e |
apr_atomic_set32(handler_ctx->requests_outstanding, 0);
|
|
Packit |
3adb1e |
handler_ctx->hdr_read = 0;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
parser_ctx = (void*)serf_bucket_mem_alloc(app_ctx.bkt_alloc,
|
|
Packit |
3adb1e |
sizeof(parser_baton_t));
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
parser_ctx->requests_outstanding = handler_ctx->requests_outstanding;
|
|
Packit |
3adb1e |
parser_ctx->connection = connection;
|
|
Packit |
3adb1e |
parser_ctx->app_ctx = &app_ctx;
|
|
Packit |
3adb1e |
parser_ctx->doc_queue = handler_ctx->doc_queue;
|
|
Packit |
3adb1e |
parser_ctx->doc_queue_alloc = handler_ctx->doc_queue_alloc;
|
|
Packit |
3adb1e |
/* Restrict ourselves to this host. */
|
|
Packit |
3adb1e |
parser_ctx->hostinfo = url.hostinfo;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
status = apr_thread_mutex_create(&parser_ctx->mutex,
|
|
Packit |
3adb1e |
APR_THREAD_MUTEX_DEFAULT, pool);
|
|
Packit |
3adb1e |
if (status) {
|
|
Packit |
3adb1e |
printf("Couldn't create mutex %d\n", status);
|
|
Packit |
3adb1e |
return status;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
status = apr_thread_cond_create(&parser_ctx->condvar, pool);
|
|
Packit |
3adb1e |
if (status) {
|
|
Packit |
3adb1e |
printf("Couldn't create condvar: %d\n", status);
|
|
Packit |
3adb1e |
return status;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* Let the handler now which condvar to use. */
|
|
Packit |
3adb1e |
handler_ctx->doc_queue_condvar = parser_ctx->condvar;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
apr_threadattr_create(&tattr, pool);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* Start the parser thread. */
|
|
Packit |
3adb1e |
apr_thread_create(&thread[0], tattr, parser_thread, parser_ctx, pool);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* Deliver the first request. */
|
|
Packit |
3adb1e |
create_request(url.hostinfo, url.path, NULL, NULL, parser_ctx, pool);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* Go run our normal thread. */
|
|
Packit |
3adb1e |
while (1) {
|
|
Packit |
3adb1e |
int tries = 0;
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
status = serf_context_run(context, SERF_DURATION_FOREVER, pool);
|
|
Packit |
3adb1e |
if (APR_STATUS_IS_TIMEUP(status))
|
|
Packit |
3adb1e |
continue;
|
|
Packit |
3adb1e |
if (status) {
|
|
Packit |
3adb1e |
char buf[200];
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
printf("Error running context: (%d) %s\n", status,
|
|
Packit |
3adb1e |
apr_strerror(status, buf, sizeof(buf)));
|
|
Packit |
3adb1e |
exit(1);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* We run this check to allow our parser threads to add more
|
|
Packit |
3adb1e |
* requests to our queue.
|
|
Packit |
3adb1e |
*/
|
|
Packit |
3adb1e |
for (tries = 0; tries < 3; tries++) {
|
|
Packit |
3adb1e |
if (!apr_atomic_read32(handler_ctx->requests_outstanding)) {
|
|
Packit |
3adb1e |
#ifdef SERF_VERBOSE
|
|
Packit |
3adb1e |
printf("Waiting...");
|
|
Packit |
3adb1e |
#endif
|
|
Packit |
3adb1e |
apr_sleep(100000);
|
|
Packit |
3adb1e |
#ifdef SERF_VERBOSE
|
|
Packit |
3adb1e |
printf("Done\n");
|
|
Packit |
3adb1e |
#endif
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
else {
|
|
Packit |
3adb1e |
break;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
if (tries >= 3) {
|
|
Packit |
3adb1e |
break;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
/* Debugging purposes only! */
|
|
Packit |
3adb1e |
serf_debug__closed_conn(app_ctx.bkt_alloc);
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
printf("Quitting...\n");
|
|
Packit |
3adb1e |
serf_connection_close(connection);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
/* wake up the parser via condvar signal */
|
|
Packit |
3adb1e |
apr_thread_cond_signal(parser_ctx->condvar);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
status = apr_thread_join(&parser_status, thread[0]);
|
|
Packit |
3adb1e |
if (status) {
|
|
Packit |
3adb1e |
printf("Error joining thread: %d\n", status);
|
|
Packit |
3adb1e |
return status;
|
|
Packit |
3adb1e |
}
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
serf_bucket_mem_free(app_ctx.bkt_alloc, handler_ctx->requests_outstanding);
|
|
Packit |
3adb1e |
serf_bucket_mem_free(app_ctx.bkt_alloc, parser_ctx);
|
|
Packit |
3adb1e |
|
|
Packit |
3adb1e |
apr_pool_destroy(pool);
|
|
Packit |
3adb1e |
return 0;
|
|
Packit |
3adb1e |
}
|