werc.rc (3906B)
1 #!/usr/local/plan9/bin/rc 2 . ./util.rc 3 . ./handlers.rc 4 . ./fltr_cache.rc 5 cd .. 6 7 forbidden_uri_chars='[^a-zA-Z0-9_+\-\/\.,:]' 8 difs=$ifs # Used to restore default ifs when needed 9 10 # Expected input: ls -F style, $sitedir/path/to/files/ 11 # <ls -F+x><symlink hack><Useless?><hiden files > 12 dirfilter='s/\*$//; s,/+\./+,/,g; s,^\./,,; /\/[._][^\/]/d; /'$forbidden_uri_chars'/d; /\/sitemap\.xml$/d; /\/index\.(md|tpl)$/d; /\/(robots|sitemap)\.txt$/d; /_werc\/?$/d; ' 13 dirclean=' s/\.(md|1)$//; ' 14 15 # Careful, the proper p9p path might not be set until initrc.local is sourced 16 path=(. $PLAN9/bin ./bin /bin /usr/bin) 17 18 http_content_type='text/html' 19 ll_add handlers_bar_left nav_tree 20 werc_root=`{pwd} 21 sitesdir=sites 22 23 if(test -f etc/initrc.local) 24 . ./etc/initrc.local 25 if not 26 . ./etc/initrc 27 28 fn werc_exec_request { 29 site=$SERVER_NAME 30 base_url=http://$site 31 sitedir=$sitesdir/$site 32 master_template=`{get_tpl_file master.tpl} 33 current_date_time=`{date} 34 35 # Note: $REQUEST_URI is not officially in CGI 1.1, but seems to be de-facto 36 # Note: We only urldecode %5F->'_' because some sites (stackoverflow.com?) urlencode it in their links, 37 # perhaps we should completely urldecode the whole url. 38 req_path=`{echo -n $REQUEST_URI | sed 's/\?.*//; s!//+!/!g; s/%5[Ff]/_/g; s/'^$forbidden_uri_chars^'//g; s/\.\.*/./g; 1q'} 39 req_url=$base_url^$req_path 40 local_path=$sitedir$req_path 41 local_file='' 42 ifs='/' { args=`{echo -n $req_path} } 43 44 if(~ $req_path */index) 45 perm_redirect `{echo $req_path | sed 's,/index$,/,'} 46 47 if(~ $local_path */) { 48 if(test -d $local_path) 49 local_path=$local_path^'index' 50 # XXX: This redir might step on apps with synthetic dirs. 51 if not if(ls `{basename -d $local_path}^* >/dev/null >[2]/dev/null) 52 perm_redirect `{echo $req_path|sed 's,/+$,,'} 53 } 54 if not if(~ $req_path *'.' *',' *';' *':') 55 perm_redirect `{echo $req_path | sed 's/[.,;:)]$//'} 56 if not if(test -d $local_path) 57 perm_redirect $req_path^'/' 58 59 if(! ~ $#args 0) 60 ifs=$NEW_LINE { pageTitle=`{ echo $args|sed -e 's/ / - /g' -e 's/([a-z])-([a-z])/\1 \2/g' -e 's/_/ /g' } } 61 62 cd $sitedir 63 req_paths_list='/' # Note: req_paths_list doesn't include 'stnythetic' dirs. 64 conf_wd='/' # Used in config files to know where we are in the document tree. 65 if(test -f _werc/config) 66 . _werc/config 67 for(i in $args) { 68 conf_wd=$conf_wd^$i 69 req_paths_list=($req_paths_list $conf_wd) 70 if(test -d $i) { 71 conf_wd=$conf_wd'/' 72 cd $i 73 if(test -f _werc/config) 74 . _werc/config 75 } 76 } 77 cd $werc_root 78 79 if(~ $#perm_redir_to 1) 80 perm_redirect $perm_redir_to 81 for(l in $perm_redir_patterns) { 82 p=$$l 83 r=$p(1) 84 # If target is absolute, then patern must match whole string 85 if(~ $p(2) http://* https://*) 86 r='^'$r 87 t=`{ echo $req_path | sed 's!'^$r^'!'^$p(2)^'!' } # Malicious danger! 88 89 if(! ~ $"t '' $req_path) 90 perm_redirect $t 91 } 92 93 setup_handlers 94 95 96 # Set Page title 97 if(! ~ $local_file '') { 98 t=`{get_file_title $local_file} 99 if(! ~ $"t '') 100 pageTitle=$t 101 } 102 103 # XXX Is this never true? because we set pageTitle earlier based on url. 104 if(~ $"pageTitle '') 105 pageTitle=$"siteTitle' '$"siteSubtitle 106 if not 107 pageTitle=$"pageTitle' | '$"siteTitle' '$"siteSubtitle 108 109 for(h in $extraHttpHeaders) 110 echo $h 111 echo 'Content-Type: '^$http_content_type 112 echo # End of HTTP headers 113 114 if(! ~ $#debug 0) 115 dprint $"SERVER_NAME^$"REQUEST_URI - $"HTTP_USER_AGENT - $"REQUEST_METHOD - $"handler_body_main - $"master_template 116 117 if(~ $REQUEST_METHOD HEAD) 118 exit 119 120 template $master_template | awk_buffer 121 } 122 123 werc_exec_request >[2]/tmp/wercdebug.log