1 |
package proxy_impl; |
2 |
|
3 |
use overload; |
4 |
|
5 |
#use 5.020; # recommended for $& without performance penalty |
6 |
|
7 |
# regex syntax |
8 |
# . literal dot (like "\." in normaql regexes) |
9 |
# · match one character (like "." in normal regexes) |
10 |
# ** match anything (like ".*" in normal regexes) |
11 |
sub uriregex { |
12 |
overload::constant qr => sub { |
13 |
local $_ = shift; |
14 |
|
15 |
s/\./\\./g; |
16 |
s/·/./g; |
17 |
s/\*\*/.*/g; |
18 |
|
19 |
$_ |
20 |
}; |
21 |
} |
22 |
|
23 |
BEGIN { uriregex } |
24 |
|
25 |
use Apache2::Const -compile => qw( |
26 |
OK DECLINED DONE NOT_FOUND SERVER_ERROR AUTH_REQUIRED FORBIDDEN |
27 |
PROXYREQ_REVERSE |
28 |
); |
29 |
use Apache2::RequestUtil (); |
30 |
use Apache2::RequestRec (); |
31 |
use Apache2::Connection (); |
32 |
use Apache2::Util (); |
33 |
use Apache2::URI (); |
34 |
|
35 |
use APR::Const -compile => qw( |
36 |
FILETYPE_DIR FINFO_NORM |
37 |
); |
38 |
use APR::Error (); |
39 |
use APR::Finfo (); |
40 |
|
41 |
use common::sense; |
42 |
|
43 |
# per-request values, cached here for performance, use them |
44 |
our $req; # Apache2::RquestRec |
45 |
our $host; # $req->hostname |
46 |
our $uri; # $req->uri, already resolved .. and protects against %xx, hopefully |
47 |
our $ip; # $req->connection->client_ip, alternative ->useragent_ip |
48 |
|
49 |
# finish immediately with given status |
50 |
sub status($) { |
51 |
die \(my $status = shift); |
52 |
} |
53 |
|
54 |
#sub escape_path($) { |
55 |
# Apache2::Util::escape_path shift, $req->pool |
56 |
#} |
57 |
|
58 |
# escapes everything not allowed in a url hpath |
59 |
sub pesc($) { |
60 |
local $_ = shift; |
61 |
|
62 |
s/([^a-zA-z0-9\$\-_.+!*'(),;?&=\/])/sprintf "%%%02x", ord $1/ge; |
63 |
|
64 |
$_ |
65 |
} |
66 |
|
67 |
sub err($) { |
68 |
warn "$_[0]"; |
69 |
status 500; |
70 |
} |
71 |
|
72 |
# external redirect with status code |
73 |
sub redirect($$) { |
74 |
my ($status, $location) = @_; |
75 |
|
76 |
# $location =~ s,^(http://[^/:]+)/,$1:34567/,;#d# |
77 |
|
78 |
$req->headers_out->set (Location => $location); |
79 |
status $status; |
80 |
} |
81 |
|
82 |
# permanent external redirect |
83 |
sub rperm($) { |
84 |
redirect 301, shift |
85 |
} |
86 |
|
87 |
# temporary redirect, could be internal, but never is |
88 |
sub rtemp($) { |
89 |
redirect 302, shift |
90 |
} |
91 |
|
92 |
# internal redirect, TODO |
93 |
sub rint($) { |
94 |
&rtemp |
95 |
} |
96 |
|
97 |
# serve some path, do not call directly |
98 |
sub _rpathname($) { |
99 |
my $path = shift; |
100 |
|
101 |
my $finfo = eval { APR::Finfo::stat $path, APR::Const::FINFO_NORM, $req->pool } |
102 |
or status Apache2::Const::NOT_FOUND; |
103 |
|
104 |
# let mod_dir, mod_autoindex and the default-handler handle this |
105 |
$req->filename ($path); |
106 |
$req->finfo ($finfo); |
107 |
# warn "serve <$path,",$req->handler,">\n";#d# |
108 |
status Apache2::Const::OK; |
109 |
} |
110 |
|
111 |
# serve a regular file from disk |
112 |
sub rfile($) { |
113 |
$req->handler ("default-handler"); |
114 |
&_rpathname |
115 |
} |
116 |
|
117 |
# serve a directory from disk |
118 |
sub rdir($) { |
119 |
my $path = shift; |
120 |
|
121 |
if ($req->uri !~ m,/$,) { |
122 |
# redirect dir to dir/ |
123 |
rperm $req->construct_url (pesc $req->uri . "/"); |
124 |
|
125 |
} elsif (-e "$path/index.html") { |
126 |
# mod_dir emulation, display index.html if any |
127 |
$req->content_type ("text/html"); |
128 |
rfile "$path/index.html"; |
129 |
} elsif (-e "$path/index.xhtml") { |
130 |
$req->content_type ("text/html"); # we assume to follow the compatibility guidelines |
131 |
rfile "$path/index.xhtml"; |
132 |
|
133 |
} else { |
134 |
# let mod_autoindex handle it later |
135 |
$req->handler ("httpd/unix-directory"); |
136 |
_rpathname $path; |
137 |
} |
138 |
} |
139 |
|
140 |
# like rpath, but assumes caller already stat'ed |
141 |
sub rpath_nostat($) { |
142 |
-d _ ? &rdir : &rfile |
143 |
} |
144 |
|
145 |
# serve a generic path, can be dir or file |
146 |
sub rpath($) { |
147 |
stat $_[0]; |
148 |
&rpath_nostat |
149 |
} |
150 |
|
151 |
# sets SCRIPT_NAME and PATH_INFO from previous regex match |
152 |
# pathinfo must be specified via a regex match |
153 |
# either |
154 |
# - $1 has matched SCRIPT_NAME |
155 |
# - OR $1 as above, and $2 matches PATH_INFO (for extra checking) |
156 |
# - OR (?<name>...) and (?<path>...) exist and match SCRIPT_NAME and PATH_INFO. |
157 |
sub _get_pathinfo() { |
158 |
my ($name, $path); |
159 |
|
160 |
if (exists $+{name} && exists $+{path}) { |
161 |
$name = $+{name}; |
162 |
$path = $+{path}; |
163 |
} elsif (defined $1) { |
164 |
$name = $1; |
165 |
$path = defined $2 ? $2 : substr "$`$&$'", $+[1]; |
166 |
} else { |
167 |
err "$uri: cannot set pathinfo"; |
168 |
} |
169 |
|
170 |
if ($uri ne "$name$path") { |
171 |
err "$uri: imperfect split into <$name> and <$path>"; |
172 |
} |
173 |
|
174 |
($name, $path) |
175 |
} |
176 |
|
177 |
sub _set_pathinfo() { |
178 |
my ($name, $path) = _get_pathinfo; |
179 |
|
180 |
$req->uri ("$name$path"); |
181 |
$req->path_info ($path); |
182 |
} |
183 |
|
184 |
# run a cgi script, first argument is script, |
185 |
# also sets pathinfo, see _get_pathinfo |
186 |
sub rcgi($) { |
187 |
my ($path) = @_; |
188 |
|
189 |
_set_pathinfo; |
190 |
|
191 |
$req->handler ("cgi-script"); |
192 |
$req->notes->set ("alias-forced-type" => "cgi-script"); # leave a note for mod_cgi, so it ignores missing ExecCGI |
193 |
_rpathname $path; |
194 |
} |
195 |
|
196 |
# simplest reverse proxy, target is target url for this request |
197 |
sub rproxy($;$) { |
198 |
my ($target, $preservehost) = @_; |
199 |
|
200 |
$req->proxyreq (Apache2::Const::PROXYREQ_REVERSE); |
201 |
$req->filename ("proxy:$target"); |
202 |
$req->handler ("proxy-server"); |
203 |
$req->subprocess_env->set ("proxy-sendchunked", 1); |
204 |
#$notes->set ("proxy-nocanon", 1); |
205 |
#$env->set ("proxy-initial-not-pooled", 1); |
206 |
|
207 |
# disable compression, see http://www.apachetutor.org/admin/reverseproxies |
208 |
# $req->headers_in->unset ("accept-encoding"); |
209 |
# alternatively recompress, SetOutputFilter INFLATE;DEFLATE |
210 |
|
211 |
$req->add_config (["ProxyPreserveHost on"], ~0) |
212 |
if $preservehost; |
213 |
|
214 |
$req->headers_out->set ("x-forwarded-for", $req->connection->client_ip); |
215 |
$req->headers_out->set ("x-forwarded-proto", "https"); # unable to find a way to access ap_http_scheme |
216 |
|
217 |
status Apache2::Const::OK; |
218 |
} |
219 |
|
220 |
# host, port |
221 |
# also sets pathinfo, see _get_pathinfo |
222 |
sub rscgi($;$) { |
223 |
my ($target) = @_; |
224 |
|
225 |
_set_pathinfo; |
226 |
rproxy $target; |
227 |
|
228 |
# notes: uds_path |
229 |
} |
230 |
|
231 |
# reverse proxy |
232 |
# path is local root uri |
233 |
# target is target root url |
234 |
# suffix is appended to target url |
235 |
# @lines is extra config lines |
236 |
sub rproxy_html($$$@) { |
237 |
my ($path, $target, $suffix, @lines) = @_; |
238 |
|
239 |
(my $cpath = $path) =~ s/([^A-Za-z0-9\/.\-_])/sprintf "\\x%02x", ord $1/ge; |
240 |
|
241 |
push @lines, ( |
242 |
"ProxyPassReverse /", |
243 |
"ProxyHTMLEnable on", |
244 |
# "ProxyHTMLURLMap $target $cpath", |
245 |
# "ProxyHTMLURLMap / $cpath/", |
246 |
); |
247 |
|
248 |
# warn "PROXY<$path,$target,$suffix>\n";#d# |
249 |
# warn map "$_\n",@lines; |
250 |
|
251 |
$req->add_config (\@lines, ~0, $path); |
252 |
|
253 |
rproxy "$target$suffix"; |
254 |
} |
255 |
|
256 |
my $rules; |
257 |
|
258 |
sub load_rules { |
259 |
open my $fh, "<:raw", Apache2::ServerUtil::server_root . "/rules" |
260 |
or die Apache2::ServerUtil::server_root . "/rules: $!"; |
261 |
local $/; |
262 |
|
263 |
$rules = <$fh>; |
264 |
|
265 |
$rules = eval "use common::sense; BEGIN { uriregex }\nsub {\n#line 0 'rules'\n$rules\n}" |
266 |
or die $@; |
267 |
|
268 |
warn "proxy rules successfully loaded.\n"; |
269 |
|
270 |
Apache2::Const::OK |
271 |
} |
272 |
|
273 |
sub map_to_storage { |
274 |
local $req = shift; |
275 |
local ($host, $uri, $ip) = ($req->hostname, $req->uri, $req->connection->client_ip); |
276 |
|
277 |
eval { |
278 |
# the uri is pre-parsed and "protected" by apache |
279 |
# the hostname is lowercased but otherwise completely unchecked, |
280 |
# so better be safe than sorry |
281 |
$host =~ /^[A-Za-z0-9\-.]+$/ |
282 |
or status 404; |
283 |
|
284 |
# must have at least one dot |
285 |
$host =~ /./ |
286 |
or status 404; |
287 |
|
288 |
local $_ = "$host$uri"; |
289 |
$rules->($req); |
290 |
}; |
291 |
|
292 |
if ($@) { |
293 |
if (SCALAR:: eq ref $@) { |
294 |
return ${$@}; |
295 |
} else { |
296 |
die; |
297 |
} |
298 |
} |
299 |
|
300 |
Apache2::Const::NOT_FOUND |
301 |
} |
302 |
|
303 |
load_rules; |
304 |
|
305 |
warn "proxy loaded and iniitalised.\n"; |
306 |
|
307 |
1 |
308 |
|