Blob Blame History Raw
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */


/*
 * Author: Wan-Teh Chang
 *
 * Given an HTTP URL, httpget uses the GET method to fetch the file.
 * The fetched file is written to stdout by default, or can be
 * saved in an output file.
 *
 * This is a single-threaded program.
 */

#include "prio.h"
#include "prnetdb.h"
#include "prlog.h"
#include "prerror.h"
#include "prprf.h"
#include "prinit.h"

#include <stdio.h>
#include <string.h>
#include <stdlib.h>  /* for atoi */

#define FCOPY_BUFFER_SIZE (16 * 1024)
#define INPUT_BUFFER_SIZE 1024
#define LINE_SIZE 512
#define HOST_SIZE 256
#define PORT_SIZE 32
#define PATH_SIZE 512

/*
 * A buffer for storing the excess input data for ReadLine.
 * The data in the buffer starts from (including) the element pointed to
 * by inputHead, and ends just before (not including) the element pointed
 * to by inputTail.  The buffer is empty if inputHead == inputTail.
 */

static char inputBuf[INPUT_BUFFER_SIZE];
/*
 * inputBufEnd points just past the end of inputBuf
 */
static char *inputBufEnd = inputBuf + sizeof(inputBuf);
static char *inputHead = inputBuf;
static char *inputTail = inputBuf;

static PRBool endOfStream = PR_FALSE;

/*
 * ReadLine --
 *
 * Read in a line of text, terminated by CRLF or LF, from fd into buf.
 * The terminating CRLF or LF is included (always as '\n').  The text
 * in buf is terminated by a null byte.  The excess bytes are stored in
 * inputBuf for use in the next ReadLine call or FetchFile call.
 * Returns the number of bytes in buf.  0 means end of stream.  Returns
 * -1 if read fails.
 */

PRInt32 ReadLine(PRFileDesc *fd, char *buf, PRUint32 bufSize)
{
    char *dst = buf;
    char *bufEnd = buf + bufSize;  /* just past the end of buf */
    PRBool lineFound = PR_FALSE;
    char *crPtr = NULL;  /* points to the CR ('\r') character */
    PRInt32 nRead;

loop:
    PR_ASSERT(inputBuf <= inputHead && inputHead <= inputTail
	    && inputTail <= inputBufEnd);
    while (lineFound == PR_FALSE && inputHead != inputTail
	    && dst < bufEnd - 1) {
	if (*inputHead == '\r') {
	    crPtr = dst;
	} else if (*inputHead == '\n') {
	    lineFound = PR_TRUE;
	    if (crPtr == dst - 1) {
		dst--; 
	    }
	}
	*(dst++) = *(inputHead++);
    }
    if (lineFound == PR_TRUE || dst == bufEnd - 1 || endOfStream == PR_TRUE) {
	*dst = '\0';
	return dst - buf;
    }

    /*
     * The input buffer should be empty now
     */
    PR_ASSERT(inputHead == inputTail);

    nRead = PR_Read(fd, inputBuf, sizeof(inputBuf));
    if (nRead == -1) {
	*dst = '\0';
	return -1;
    } else if (nRead == 0) {
	endOfStream = PR_TRUE;
	*dst = '\0';
	return dst - buf;
    }
    inputHead = inputBuf;
    inputTail = inputBuf + nRead;
    goto loop;
}

PRInt32 DrainInputBuffer(char *buf, PRUint32 bufSize)
{
    PRInt32 nBytes = inputTail - inputHead;

    if (nBytes == 0) {
	if (endOfStream) {
	    return -1;
	} else {
	    return 0;
	}
    }
    if ((PRInt32) bufSize < nBytes) {
	nBytes = bufSize;
    }
    memcpy(buf, inputHead, nBytes);
    inputHead += nBytes;
    return nBytes;
}

PRStatus FetchFile(PRFileDesc *in, PRFileDesc *out)
{
    char buf[FCOPY_BUFFER_SIZE];
    PRInt32 nBytes;

    while ((nBytes = DrainInputBuffer(buf, sizeof(buf))) > 0) {
	if (PR_Write(out, buf, nBytes) != nBytes) {
            fprintf(stderr, "httpget: cannot write to file\n");
	    return PR_FAILURE;
	}
    }
    if (nBytes < 0) {
	/* Input buffer is empty and end of stream */
	return PR_SUCCESS;
    }
    while ((nBytes = PR_Read(in, buf, sizeof(buf))) > 0) {
	if (PR_Write(out, buf, nBytes) != nBytes) {
	    fprintf(stderr, "httpget: cannot write to file\n");
	    return PR_FAILURE;
        }
    }
    if (nBytes < 0) {
	fprintf(stderr, "httpget: cannot read from socket\n");
	return PR_FAILURE;
    }
    return PR_SUCCESS;
}

PRStatus FastFetchFile(PRFileDesc *in, PRFileDesc *out, PRUint32 size)
{
    PRInt32 nBytes;
    PRFileMap *outfMap;
    void *addr;
    char *start;
    PRUint32 rem;
    PRUint32 bytesToRead;
    PRStatus rv;
    PRInt64 sz64;

    LL_UI2L(sz64, size);
    outfMap = PR_CreateFileMap(out, sz64, PR_PROT_READWRITE);
    PR_ASSERT(outfMap);
    addr = PR_MemMap(outfMap, LL_ZERO, size);
    if (addr == NULL) {
	fprintf(stderr, "cannot memory-map file: (%d, %d)\n", PR_GetError(),
		PR_GetOSError());

	PR_CloseFileMap(outfMap);
	return PR_FAILURE;
    }
    start = (char *) addr;
    rem = size;
    while ((nBytes = DrainInputBuffer(start, rem)) > 0) {
	start += nBytes;
	rem -= nBytes;
    }
    if (nBytes < 0) {
	/* Input buffer is empty and end of stream */
	return PR_SUCCESS;
    }
    bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
    while (rem > 0 && (nBytes = PR_Read(in, start, bytesToRead)) > 0) {
	start += nBytes;
	rem -= nBytes;
        bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
    }
    if (nBytes < 0) {
	fprintf(stderr, "httpget: cannot read from socket\n");
	return PR_FAILURE;
    }
    rv = PR_MemUnmap(addr, size);
    PR_ASSERT(rv == PR_SUCCESS);
    rv = PR_CloseFileMap(outfMap);
    PR_ASSERT(rv == PR_SUCCESS);
    return PR_SUCCESS;
}

PRStatus ParseURL(char *url, char *host, PRUint32 hostSize,
    char *port, PRUint32 portSize, char *path, PRUint32 pathSize)
{
    char *start, *end;
    char *dst;
    char *hostEnd;
    char *portEnd;
    char *pathEnd;

    if (strncmp(url, "http", 4)) {
	fprintf(stderr, "httpget: the protocol must be http\n");
	return PR_FAILURE;
    }
    if (strncmp(url + 4, "://", 3) || url[7] == '\0') {
	fprintf(stderr, "httpget: malformed URL: %s\n", url);
	return PR_FAILURE;
    }

    start = end = url + 7;
    dst = host;
    hostEnd = host + hostSize;
    while (*end && *end != ':' && *end != '/') {
	if (dst == hostEnd - 1) {
	    fprintf(stderr, "httpget: host name too long\n");
	    return PR_FAILURE;
	}
	*(dst++) = *(end++);
    }
    *dst = '\0';

    if (*end == '\0') {
	PR_snprintf(port, portSize, "%d", 80);
	PR_snprintf(path, pathSize, "%s", "/");
	return PR_SUCCESS;
    }

    if (*end == ':') {
	end++;
	dst = port;
	portEnd = port + portSize;
	while (*end && *end != '/') {
	    if (dst == portEnd - 1) {
		fprintf(stderr, "httpget: port number too long\n");
		return PR_FAILURE;
	    }
	    *(dst++) = *(end++);
        }
	*dst = '\0';
	if (*end == '\0') {
	    PR_snprintf(path, pathSize, "%s", "/");
	    return PR_SUCCESS;
        }
    } else {
	PR_snprintf(port, portSize, "%d", 80);
    }

    dst = path;
    pathEnd = path + pathSize;
    while (*end) {
	if (dst == pathEnd - 1) {
	    fprintf(stderr, "httpget: file pathname too long\n");
	    return PR_FAILURE;
	}
	*(dst++) = *(end++);
    }
    *dst = '\0';
    return PR_SUCCESS;
}

void PrintUsage(void) {
    fprintf(stderr, "usage: httpget url\n"
		    "       httpget -o outputfile url\n"
		    "       httpget url -o outputfile\n");
}

int main(int argc, char **argv)
{
    PRHostEnt hostentry;
    char buf[PR_NETDB_BUF_SIZE];
    PRNetAddr addr;
    PRFileDesc *socket = NULL, *file = NULL;
    PRIntn cmdSize;
    char host[HOST_SIZE];
    char port[PORT_SIZE];
    char path[PATH_SIZE];
    char line[LINE_SIZE];
    int exitStatus = 0;
    PRBool endOfHeader = PR_FALSE;
    char *url;
    char *fileName = NULL;
    PRUint32 fileSize;

    if (argc != 2 && argc != 4) {
	PrintUsage();
	exit(1);
    }

    if (argc == 2) {
	/*
	 * case 1: httpget url
	 */
	url = argv[1];
    } else {
	if (strcmp(argv[1], "-o") == 0) {
	    /*
	     * case 2: httpget -o outputfile url
	     */
	    fileName = argv[2];
	    url = argv[3];
        } else {
	    /*
	     * case 3: httpget url -o outputfile
	     */
	    url = argv[1];
	    if (strcmp(argv[2], "-o") != 0) {
		PrintUsage();
		exit(1);
            }
	    fileName = argv[3];
	}
    }

    if (ParseURL(url, host, sizeof(host), port, sizeof(port),
	    path, sizeof(path)) == PR_FAILURE) {
	exit(1);
    }

    if (PR_GetHostByName(host, buf, sizeof(buf), &hostentry)
	    == PR_FAILURE) {
        fprintf(stderr, "httpget: unknown host name: %s\n", host);
	exit(1);
    }

    addr.inet.family = PR_AF_INET;
    addr.inet.port = PR_htons((short) atoi(port));
    addr.inet.ip = *((PRUint32 *) hostentry.h_addr_list[0]);

    socket = PR_NewTCPSocket();
    if (socket == NULL) {
	fprintf(stderr, "httpget: cannot create new tcp socket\n");
	exit(1);
    }

    if (PR_Connect(socket, &addr, PR_INTERVAL_NO_TIMEOUT) == PR_FAILURE) {
	fprintf(stderr, "httpget: cannot connect to http server\n");
	exitStatus = 1;
	goto done;
    }

    if (fileName == NULL) {
	file = PR_STDOUT;
    } else {
        file = PR_Open(fileName, PR_RDWR | PR_CREATE_FILE | PR_TRUNCATE,
		00777);
        if (file == NULL) {
	    fprintf(stderr, "httpget: cannot open file %s: (%d, %d)\n",
		    fileName, PR_GetError(), PR_GetOSError());
	    exitStatus = 1;
	    goto done;
	}
    }

    cmdSize = PR_snprintf(buf, sizeof(buf), "GET %s HTTP/1.0\r\n\r\n", path);
    PR_ASSERT(cmdSize == (PRIntn) strlen("GET  HTTP/1.0\r\n\r\n")
            + (PRIntn) strlen(path));
    if (PR_Write(socket, buf, cmdSize) != cmdSize) {
	fprintf(stderr, "httpget: cannot write to http server\n");
	exitStatus = 1;
	goto done;
    }

    if (ReadLine(socket, line, sizeof(line)) <= 0) {
	fprintf(stderr, "httpget: cannot read line from http server\n");
	exitStatus = 1;
	goto done;
    }

    /* HTTP response: 200 == OK */
    if (strstr(line, "200") == NULL) {
	fprintf(stderr, "httpget: %s\n", line);
	exitStatus = 1;
	goto done;
    }

    while (ReadLine(socket, line, sizeof(line)) > 0) {
	if (line[0] == '\n') {
	    endOfHeader = PR_TRUE;
	    break;
	}
	if (strncmp(line, "Content-Length", 14) == 0
		|| strncmp(line, "Content-length", 14) == 0) {
	    char *p = line + 14;

	    while (*p == ' ' || *p == '\t') {
		p++;
	    }
	    if (*p != ':') {
		continue;
            }
	    p++;
	    while (*p == ' ' || *p == '\t') {
		p++;
	    }
	    fileSize = 0;
	    while ('0' <= *p && *p <= '9') {
		fileSize = 10 * fileSize + (*p - '0');
		p++;
            }
	}
    }
    if (endOfHeader == PR_FALSE) {
	fprintf(stderr, "httpget: cannot read line from http server\n");
	exitStatus = 1;
	goto done;
    }

    if (fileName == NULL || fileSize == 0) {
        FetchFile(socket, file);
    } else {
	FastFetchFile(socket, file, fileSize);
    }

done:
    if (socket) PR_Close(socket);
    if (file) PR_Close(file);
    PR_Cleanup();
    return exitStatus;
}