% ------------------------------------------- % latex generated by: booktolatex.cgi % from source file : ../htdocs/books/web-dev/web-dev-book.txt % on: 23 August 2017, 8:19am % querystring: books/web-dev/web-dev-book.txt % document-root: /var/www/html % script-name: /cgi-bin/booktolatex.cgi % Server-name: bumble.sourceforge.net % Sed-script: booktolatex.sed % ------------------------------------------- \documentclass[a4paper,12pt]{article} \usepackage[margin=0.4cm,noheadfoot]{geometry} \usepackage{color} %% to use colours, use "xcolor" for more \usepackage{multicol} %% for multiple columns \usepackage{keystroke} %% for keyboard key images \usepackage[toc]{multitoc} %% for multi column table of contents \usepackage{tocloft} %% to customize the table of contents \setcounter{tocdepth}{2} %% only display 2 levels in the contents \setlength{\cftbeforesecskip}{0cm} %% make the toc more compact \usepackage{listings} %% for nice code listings %\lstset{language={}, \lstset{language=, %% define special comment delimiters '##(' and ')' moredelim=[s][\color{grey}\itshape\footnotesize\ttfamily]{~(}{)}, basicstyle=\ttfamily, %% fixed pitch font xleftmargin=1cm, %% margin on the left outside the frames breaklines=true, %% break long code lines breakatwhitespace=false, %% break long code lines anywhere breakindent=10pt, %% reduce the indent from 20pt to 10 postbreak=\mbox{{\color{blue}\small$\Rightarrow$\space}}, %% mark with arrow showstringspaces=false, %% dont show spaces within strings framerule=5pt, %% thickness of the frames rulecolor=\color{lightgrey}, frame=l} %% source code settings \usepackage{graphicx} %% to include images \usepackage{fancybox} %% boxes with rounded corners \usepackage{wrapfig} %% flow text around tables, images \usepackage{tabularx} %% change width of tables \usepackage[table]{xcolor} %% alternate row colour tables \usepackage{booktabs} %% for heavier rules in tables \usepackage[small,compact]{titlesec} %% sections more compact, less space \usepackage{enumitem} %% more compact and better lists \setlist{noitemsep} %% reduce list item spacing \usepackage{hyperref} %% make urls into hyperlinks \hypersetup{ %% add "pdftex," if only pdf output is required colorlinks=true, %% set up the colours for the hyperlinks linkcolor=black, %% internal document links black urlcolor=black, %% url links black filecolor=red, citecolor=red, bookmarks=true, pdfpagemode=UseOutlines} % define some colours to use \definecolor{lightgrey}{gray}{0.70} \definecolor{grey}{gray}{0.30} \titleformat{\section}[frame] %% titlesec: create framed section headings {\normalfont} {\filleft \footnotesize \enspace Section \thesection\enspace\enspace} {3pt} {\bfseries\itshape\filright} \title{Web Development using Linux} \author{} \date{27 October 2011, 6:33pm} \setlength{\parindent}{0pt} % \setlength{\parskip}{1ex} % label lists with stars \renewcommand{\labelitemi}{$\star$} \begin{document} \centerline{\Large \bf Web Development using Linux} \medskip \begin{center} {\huge ``}\textit{}{\huge ''} \textsc{} \end{center} % ----------------------------------- % the toc should be 2 columns because of the \multitoc package \tableofcontents \begin{multicols}{2} \begin{lstlisting} This book seeks to provide recipes for developing web sites using the Linux operating system, with an emphasis on command line tools. This book is not about html and css since that is covered in the html-css-book.txt IMAGES IMAGE COMPRESSION .... == tools for image compression .. webpack - .. pngout - .. IMAGE RESIZING .... * Convert a bunch of HTML files from ISO-8859-1 to UTF-8 file >> for x in `find . -name '*.html'` ; do iconv -f ISO-8859-1 -t UTF-8 $x > "$x.utf8"; rm $x; mv "$x.utf8" $x; done FILE TRANSFER == tools for file transfer over the net .. ftp - the old file transfer tool .. rsync - transfer only changed or new files .. sftp - an interactive secure version of sftp .. scp - a non-interactive secure ftp .. sitecopy - syncronize a remote site with what is local .. WEB SITE MIRRORING * Download all images from a site >> wget -r -l1 --no-parent -nH -nd -P/tmp -A".gif,.jpg" http://example.com/images WEB SITE UPLOAD use rsync or sitecopy POSTING DATA * Submit data to a HTML form with POST method and save the response >> curl -sd 'rid=value&submit=SUBMIT' > out.html * post with a proxy and authentication >> curl -F name='../htdocs/notes/'$1 -F contents='<'$1 -u user:upass -x prox.net:8080 -U bob:proxpass http://serv.net/save.cgi HTML STUFF HTML LINKS .... * get the links from a page >> lynx -dump -listonly www.server.net/page.html * find urls within an html file (most of them anyway) >> egrep 'https?://([[:alpha:]]([-[:alnum:]]+[[:alnum:]])*\.)+[[:alpha:]]{2,3}(: \d+)?(/([-\w/_\.]*(\?\S+)?)?)?' ENTITIES .... * encode HTML entities >> perl -MHTML::Entities -ne 'print encode_entities($_)' /tmp/subor.txt * or use xmlstarlet to encode entities. BASH AND WEB DEVELOPMENT Using the bash shell to develope websites maybe quite efficient, if unconventional. * possibly the simplest way to create a webpage from text >> cat page.txt | (echo '
'; cat -; echo '
') TEMPLATING WITH BASH .... * a simple template technique with bash >> export a=b; echo -e 'one\ntwo\nand ' | (echo 'cat << EE';sed 's//$a/g'; echo 'EE') | bash * use the technique above to substitute the date into the template >> cat template | (echo 'cat << EE';sed 's//$(date)/g'; echo 'EE') | bash FOLDER LISTINGS .... * list only folders >> ls -d */ | (echo '
    '; cat -; echo '
') * make an html directory listing out of the current folder >> echo "echo -e \"{$(echo * | tr ' ' ',')}"\" | bash * list all files and folders, no links >> a=$(echo *); echo 'echo -e "\n
  • "{'${a//" "/,}'}"
  • "' | bash * list only folders, no links >> a=$(echo */); echo 'echo -e "\n
  • "{'${a//" "/,}'}"
  • "' | bash * a for loop method to list only sub-folders as an html list \begin{lstlisting} echo "
      " for d in $(ls -d */); do echo "
    • $d
    • " done echo "
    " \end{lstlisting} * another for loop method to list only sub-folders as an html list \begin{lstlisting} echo "
      " for d in */; do echo "
    • $d
    • " done echo "
    " \end{lstlisting} * list subfolders as html links \begin{lstlisting} echo "
      " for d in $(ls -d */); do echo "
    • $d
    • " done echo "
    " \end{lstlisting} BASH CGI PROGRAMMING While it is most common for Cgi web-scripts to be written in the Perl language, it is also possible to write them using the normal Bash shell scripting language. Whether this is a good idea is completely another question... @@ http://en.wikipedia.org/wiki/Internet_media_type#List_of_common_media_types A list of common "media types" (such as "text/html") which are used in the "Content-Type:" field of the Cgi script. o- use the "2>&1" idiom at the end of script lines to redirect an error message to the "standard output" (which in the case of a Cgi script is the web-browser of the script visitor). This allows you, the developer to see what is going wrong with your bash cgi script. - using "here" documents with a bash cgi script is a simple way to produce content. - THE BASH CGI GOTCHAS .... o- the content-type line has to be before /anything/ or else nothing is printed. * there must be an empty line after the "content-type" line. \begin{lstlisting} echo "Content-Type: text/html" echo "...the query string is $QUERY_STRING" 2>&1 \end{lstlisting} * a bash cgi script indicating that the character set is "utf8" \begin{lstlisting} #!/bin/bash echo "Content-Type: text/html; charset=utf-8" echo echo "A Bash UTF8 Cgi Script!" 2>&1 \end{lstlisting} * show error messages in the browser generated by a cgi script line >> ech "this is a mistake" 2>&1 * show error messages in the browser with output redirection >> ech "this is a mistake" 2>&1 >save.txt ##(note that the 2>&1 should come before the file redirection) * a cgi script which display several environment variables \begin{lstlisting} #!/bin/bash cat << ENDxxx Content-Type: text/html; charset=utf-8 A bash cgi script
    DOCUMENT_ROOT
    $DOCUMENT_ROOT
    REMOTE_ADDRESS
    $REMOTE_ADDRESS
    SCRIPT_NAME
    $SCRIPT_NAME
    ENDxxx \end{lstlisting} GETTING AND DECODING FORM DATA .... @@ http://oinkzwurgl.org/bash_cgi bash functions for decoding cgi form data @@ http://www.fpx.de/fp/Software/ProcCGIsh.html a bash script and c program for decoding cgi form data. The c program needs to be compiled. Data sent from an html form to a web server can be sent in 2 different ways; in the querystring itself (the 'GET' method) and in the HTTP headers (the 'POST' method). If the data is 'posted' then the cgi script will receive the data on the standard input Data sent from an html form has to be url decoded. * data "posted" from an html form can be read from the standard input >> read postdata ##(the content length should first be checked) >> postdata=$(> file=$(perl -e "use CGI qw(:standard); print param('file')) ##(the CGI module takes care of url-decoding the form data) == cgi environment variables and meanings .. DOCUMENT_ROOT, The root directory of your server .. HTTP_COOKIE, The visitor's cookie, if one is set .. HTTP_HOST, The hostname of your server .. HTTP_REFERER, The URL of the page that called your script .. HTTP_USER_AGENT, The browser type of the visitor .. HTTPS, "on" if the script is being called through a secure server .. PATH, The system path your server is running under .. QUERY_STRING, The query string (see GET, below) .. REMOTE_ADDR, The IP address of the visitor .. REMOTE_HOST, The hostname of the visitor (if server has reverse-name-lookups on; otherwise this is the IP address again) .. REMOTE_PORT, The port the visitor is connected to on the web server .. REMOTE_USER, The visitor's username (for .htaccess-protected pages) .. REQUEST_METHOD, either "GET" or "POST" .. REQUEST_URI, The interpreted pathname of the requested document or CGI (relative to the document root) .. SCRIPT_FILENAME, The full pathname of the current CGI .. SCRIPT_NAME, The interpreted pathname of the current CGI (relative to the document root) .. SERVER_ADMIN, The email address for your server's webmaster .. SERVER_NAME, Your server's fully qualified domain name (e.g. www.cgi101.com) .. SERVER_PORT, The port number your server is listening on .. SERVER_SOFTWARE, The server software you're using (such as Apache 1.3) .. * some environment variables and example values \begin{lstlisting} SERVER_SOFTWARE = Apache/2.0.54 (Fedora) SERVER_NAME = www.comp.leeds.ac.uk GATEWAY_INTERFACE = CGI/1.1 SERVER_PROTOCOL = HTTP/1.1 SERVER_PORT = 80 REQUEST_METHOD = GET HTTP_ACCEPT = 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5' PATH_INFO = PATH_TRANSLATED = SCRIPT_NAME = /cgi-bin/Perl/environment-example QUERY_STRING = REMOTE_HOST = REMOTE_ADDR = 200.93.167.25 REMOTE_USER = CONTENT_TYPE = CONTENT_LENGTH = \end{lstlisting} == cgi Content-Type: values .. .. text/html, for html content .. text/plain, for plain text .. text/plain; charset=utf-8, plain text in the utf8 encoding .. text/xml, xml text .. XHTML == tools .. xmlstarlet - queries and edits xml from the command line .. CURL STUFF * getting a page via an authenticating proxy server >> curl -x proxy.utas.edu.au:8080 -U bobj http://www.server.net * getting a page via an authenticating proxy server as user 'bobj' with password 'asecret' >> curl -x proxy.net:8080 -U bobj:asecret http://www.server.net Supplying the password in this manner is possibly not a good idea from a security point of view * download a text file through a proxy and edit it with vim \begin{lstlisting} function edn { curl -x proxy.org.au:8080 -U bob:pass www.serv.net/a.txt -o ~/notes.txt vim ~/notes.txt } \end{lstlisting} * upload a file to webserver w cgi script with http authententication, \begin{lstlisting} function up { [ -z "$1" ] && echo 'no parameter' && return 1; curl -F name='../htdocs/notes/'$1 -F contents='<'$1 -u user:upass -x prox.net:8080 -U bob:proxpass http://serv.net/save.cgi } \end{lstlisting} PERL TRICKS == useful modules .. www::mechanize .. lwp .. * a perl mechanize example \begin{lstlisting} # navigate to the main page $mech->get('http://www.somesite.com/'); # follow a link that contains the text 'download this' $mech->follow_link( text_regex => qr/download this/i ); # submit a POST form, to log into the site $mech->submit_form( with_fields => { username => 'mungo', password => 'lost-and-alone', } ); # save the results as a file $mech->save_content('somefile.zip'); \end{lstlisting} PHP * Testing php configuration >> php -r "phpinfo\(\);" * get the urls from a webpage >> $browser->getUrls() CRAWLING .. ScriptableBrowser (simpletest) LOAD TESTING == tools .. funkload - web testing .. STUFF USED IN GOOGLE CHROME bsdiff, bspatch, bzip2, dtoa, hunspell, ICU, JSCRE, libjpeg, libpng, libxml, libxslt, LZMA SDK, modp_b64, Mozilla interface to Java Plugin APIs, npapi, nspr, nss, Pthreads for win32, sqlite, tlslite, V8 assembler, WebKit, WTL, zlib FORUM SITES @@ ask.metafilter.com visited by knowledgable people @@ SERVING PAGES * share the current directory tree (via http) at http://$HOSTNAME:8000/ >> python -m SimpleHTTPServer * create a webserver to share all files in /tmp/mydocs on port 8081 >> wbox servermode webroot /tmp/mydocs * create a webserver to share all files in /tmp/mydocs on port 8080 >> wbox servermode serverport 8080 webroot /tmp/mydocs * Sharing file through http 80 port >> nc -w 5 -v -l -p 80 < file.ext CGI SERVERS * a simple cgi server >> python -m CGIHTTPServer 8080 * some kind of perl cgi server >> HTTP::Simple::PSGI SMALL WEBSERVERS @@ http://ask.metafilter.com/65481/Help-me-find-a-cool-little-unix-http-utility-I-cant-remember some good things about mini web servers @@ http://hping.org/wbox/ site for wbox == small quick and easy webservers .. wbox - http server .. thttpd - small web server .. mini_httpd - same author as thttpd but smaller .. webfs - serves a file system from the web .. busybox httpd command - small webservery thing .. WBOX developed from 2007 - 2009 ... * show how long each part of a webpage takes to generate >> wbox nowhere.net/page.html timesplit 1 * show the http header information for a page >> wbox www.google.it/notexistingpage.html 1 showhdr MAC OSX Installed by default are php, curl, TEMPLATING @@ http://www.perl.com/pub/a/2001/08/21/templating.html an article about using templating systems with perl == tools .. Template Toolkit - almost active development, perl, and python .. HTML::Mason - ? callback style, active as of 2010 .. Embperl - embedd perl into webpages, stopped 2006 .. HTML::Template - perl template module .. Text::Template - a general purpose templater .. Apache::ASP - use asp with apache, stopped 2004 .. CGI::FastTemplate - another one .. @@ http://template-toolkit.org/ the site for the template toolkit TEMPLATE TOOLKIT * example statement using dot notation. >> How are things in [% customer.address.city %]? * a for loop [% FOREACH list %] [% name %] [% END %] \end{multicols} \section{Html Template} \emph{ Example loop with html::template } ----- $<$TMPL\_LOOP list$>$ $<$a href=``$<$TMPL\_VAR url$>$''$>$$<$b$>$$<$TMPL\_VAR name$>$$<$/b$>$$<$/A$>$ $<$/TMPL\_LOOP$>$ ,,, \subsection{Mason} Seems to be in active development. It can be run without a webserver \begin{description}[labelindent=1cm, leftmargin=2cm, style=nextline] \item[\url{http://www.masonhq.com/}] the official site \end{description} \emph{ Install mason using apt-get } \begin{lstlisting} apt-get install libmason-perl ~(??? unchecked) \end{lstlisting} \section{Perl Stuff} \arrayrulecolor{gray} \begin{center} \begin{tabular}{ |rl| } \multicolumn{2}{c}{\textbf{ some interesting perl web modules }} \\ \hline \texttt{ Dancer } & Perl web apps with good examples \\ \texttt{ Web::Simple } & Simple web apps \\ \texttt{ CGI::Application } \\ \texttt{ Catalyst } & Big web apps \\ \texttt{ Mason } \\ \texttt{ Mojolicious } \\ \hline \end{tabular} \end{center} \subsection{Web Simple} \begin{description}[labelindent=1cm, leftmargin=2cm, style=nextline] \item[\url{http://search.cpan.org/~mstrout/Web-Simple-0.002/lib/Web/Simple.pm}] the documentation for web simple \end{description} Developed in 2009. Can create a webapplication without a webserver \begin{description}[labelindent=1cm, leftmargin=2cm, style=nextline] \item[\url{hobbs}] at stackoverflow.com knowledgable perl web person \end{description} \section{Cpan Tool Crash Course} \emph{ Start a cpan shell to install mason } \begin{lstlisting} perl -MCPAN -e 'shell' \end{lstlisting} \arrayrulecolor{gray} \begin{center} \begin{tabular}{ |rl| } \multicolumn{2}{c}{\textbf{ simple cpan shell }} \\ \hline \texttt{ h } & Show help \\ \texttt{ get } & Get the source for a module \\ \texttt{ make } & Compile ? the module \\ \texttt{ test } & Test a module \\ \texttt{ install } & Do all of get, make, test \\ \texttt{ clean } & Get rid of a badly installed module \\ \texttt{ look } & See whats happening \\ \texttt{ readme } & See whats going on \\ \hline \end{tabular} \end{center} \begin{description}[labelindent=1cm, leftmargin=2cm, style=nextline] \item[\url{http://www.livejournal.com/doc/server/lj.install.perl_setup.modules.html}] a list of potentially useful modules used with livejournal \end{description} \emph{ Upgrade cpan but could cause problems ??? } \begin{lstlisting} perl -MCPAN -e shell cpan> install Bundle::CPAN cpan> reload cpan \end{lstlisting} \section{Getting Web Pages From The Command Line} \emph{ Http get of a web page via proxy server with login credentials } \begin{lstlisting} curl -U username[:password] -x proxy:proxyport webpage \end{lstlisting} \emph{ Use netcat to get a webpage } \begin{lstlisting} echo "GET / HTTP/1.0\r\r" | nc -v www.somewebsite.com 80 \end{lstlisting} \emph{ Get a webpage with the console php tool } \begin{lstlisting} php -r "file('http://metafilter.com/');" \end{lstlisting} \emph{ Get a webpage with perl } \begin{lstlisting} perl -MHTTP::Client -e 'print HTTP::Client->new()->get("http://localhost/path")' \end{lstlisting} \emph{ A simpler way to do the same } \begin{lstlisting} perl -MLWP::Simple -e 'get("http://www.metafilter.com/")' \end{lstlisting} \section{Load Testing} \emph{ Perform 4 queries per second (with 4 processes) on the local webserver } \begin{lstlisting} wbox http://localhost clients 4 \end{lstlisting} \section{Blog Engines} \texttt{ serendipity } & A blog engine with database backend \\ \hline \end{tabular} \end{center} \section{Notes} google use python \begin{description}[labelindent=1cm, leftmargin=2cm, style=nextline] \item[\url{http://unixmages.com/}] an unrelated but interesting site \end{description} \end{document}