1 |
root |
1.1 |
#!/opt/bin/perl |
2 |
|
|
|
3 |
|
|
BEGIN { require "common.pl" } |
4 |
|
|
our ($TXN, %stat, $db_env, $TPB, $db_info, $TODAY); |
5 |
|
|
|
6 |
|
|
use common::sense; |
7 |
|
|
use Compress::LZF; |
8 |
|
|
use BDB; |
9 |
|
|
use JSON::XS; |
10 |
|
|
use HTML::Entities; |
11 |
|
|
use Date::Parse; |
12 |
|
|
|
13 |
|
|
############################################################################# |
14 |
|
|
|
15 |
|
|
our $db_http = table "http"; |
16 |
|
|
our $db_torrent = table "torrent"; |
17 |
|
|
|
18 |
|
|
############################################################################# |
19 |
|
|
|
20 |
|
|
#db_compact $db_http, undef, undef, undef, undef, 0, undef; |
21 |
|
|
|
22 |
|
|
my $c = $db_http->cursor; |
23 |
|
|
my @del; |
24 |
|
|
|
25 |
|
|
#open my $out, "| mbuffer -s4M -m64M | lzma -9 >tpb.json.lzma" |
26 |
|
|
open my $out, "| mbuffer -s4M -m64M >tpb.json" |
27 |
|
|
or die "tpb.json: $!"; |
28 |
|
|
|
29 |
|
|
sub ashtml($) { |
30 |
|
|
local $_ = shift; |
31 |
|
|
|
32 |
|
|
s/^\s+//; |
33 |
|
|
s/\s+$//; |
34 |
|
|
s/\015//g; |
35 |
|
|
|
36 |
|
|
utf8::decode $_; |
37 |
|
|
|
38 |
|
|
$_ |
39 |
|
|
} |
40 |
|
|
|
41 |
|
|
sub astext($) { |
42 |
|
|
local $_ = ashtml shift; |
43 |
|
|
|
44 |
|
|
#s%<a href="([^"]+).*?>.*?</a>%URL<$1>%g; |
45 |
|
|
#s%<br\s*/?>%\n%g; |
46 |
|
|
|
47 |
|
|
HTML::Entities::decode_entities $_; |
48 |
|
|
utf8::decode $_; |
49 |
|
|
|
50 |
|
|
$_ |
51 |
|
|
} |
52 |
|
|
|
53 |
|
|
sub astime($) { |
54 |
|
|
str2time $_[0] |
55 |
|
|
} |
56 |
|
|
|
57 |
|
|
while () { |
58 |
|
|
db_c_get $c, my $key, my $data, BDB::NEXT; |
59 |
|
|
last if $!; |
60 |
|
|
|
61 |
|
|
syswrite STDOUT, "\r$key "; |
62 |
|
|
|
63 |
|
|
eval { |
64 |
|
|
for (dic_decompress 0, $data) { |
65 |
|
|
my @info; |
66 |
|
|
|
67 |
|
|
m%title="More from this category">(.*?)<%s or die "$_: no category"; |
68 |
|
|
$info[0] = [split / > /, $1]; |
69 |
|
|
|
70 |
|
|
$info[1] = $key+0; |
71 |
|
|
|
72 |
|
|
m%<div id="title">(.*?)</div>%s or die "$_: no title"; |
73 |
|
|
$info[2] = astext $1; |
74 |
|
|
|
75 |
|
|
m% href="magnet:\?xt=urn:btih:([0-9a-f]*)&dn% or die "$_: no magnet"; |
76 |
|
|
$info[3] = $1; |
77 |
|
|
|
78 |
|
|
$info[4] = [m%<a href="/tag/([^"]+)%g]; |
79 |
|
|
|
80 |
|
|
m%<dt>By:</dt>.*?<dd>.*?<a href="/user/([^/]+)/%s |
81 |
|
|
and $info[5] = astext $1; |
82 |
|
|
|
83 |
|
|
m%<dt>Uploaded:</dt>.*?<dd>(.*?)</dd>%s or die "$_: no uploaded"; |
84 |
|
|
$info[6] = astime $1; |
85 |
|
|
|
86 |
|
|
m%<div class="nfo">\s*<pre>(.*?)</pre>%s or die "$_: no nfo"; |
87 |
|
|
$info[7] = astext $1; |
88 |
|
|
|
89 |
|
|
my @comments; |
90 |
|
|
|
91 |
|
|
while ( |
92 |
|
|
m% |
93 |
|
|
<div\sid="comment-\d+"> |
94 |
|
|
<p\sclass="byline"> |
95 |
|
|
\s* |
96 |
|
|
<a\shref="/user/([^/]+)/".*?\sat\s(....-..-..\s..:..\sCET):\s*</p> |
97 |
|
|
<div\sclass="comment">(.*?)</div>\s*</div> |
98 |
|
|
%gsx |
99 |
|
|
) { |
100 |
|
|
my ($user, $date, $comment) = ($1, $2, $3); |
101 |
|
|
push @comments, [$user, astime $date, astext $comment]; |
102 |
|
|
} |
103 |
|
|
|
104 |
|
|
$info[8] = \@comments; |
105 |
|
|
|
106 |
|
|
print $out JSON::XS::encode_json \@info, "\n"; |
107 |
|
|
} |
108 |
|
|
}; |
109 |
|
|
|
110 |
|
|
if ($@) { |
111 |
|
|
print STDOUT "$@"; |
112 |
|
|
push @del, $key; |
113 |
|
|
} |
114 |
|
|
} |
115 |
|
|
|
116 |
|
|
undef $c; |
117 |
|
|
|
118 |
|
|
if (@del) { |
119 |
|
|
for (@del) { |
120 |
|
|
iput $_, [0, 1]; |
121 |
|
|
db_del $db_torrent, undef, $_; |
122 |
|
|
db_del $db_http , undef, $_; |
123 |
|
|
} |
124 |
|
|
} |
125 |
|
|
|