ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/tpb/process
Revision: 1.1
Committed: Sun Sep 27 07:55:20 2015 UTC (8 years, 7 months ago) by root
Branch: MAIN
CVS Tags: HEAD
Log Message:
*** empty log message ***

File Contents

# Content
1 #!/opt/bin/perl
2
3 BEGIN { require "common.pl" }
4 our ($TXN, %stat, $db_env, $TPB, $db_info, $TODAY);
5
6 use common::sense;
7 use Compress::LZF;
8 use BDB;
9 use JSON::XS;
10 use HTML::Entities;
11 use Date::Parse;
12
13 #############################################################################
14
15 our $db_http = table "http";
16 our $db_torrent = table "torrent";
17
18 #############################################################################
19
20 #db_compact $db_http, undef, undef, undef, undef, 0, undef;
21
22 my $c = $db_http->cursor;
23 my @del;
24
25 #open my $out, "| mbuffer -s4M -m64M | lzma -9 >tpb.json.lzma"
26 open my $out, "| mbuffer -s4M -m64M >tpb.json"
27 or die "tpb.json: $!";
28
29 sub ashtml($) {
30 local $_ = shift;
31
32 s/^\s+//;
33 s/\s+$//;
34 s/\015//g;
35
36 utf8::decode $_;
37
38 $_
39 }
40
41 sub astext($) {
42 local $_ = ashtml shift;
43
44 #s%<a href="([^"]+).*?>.*?</a>%URL<$1>%g;
45 #s%<br\s*/?>%\n%g;
46
47 HTML::Entities::decode_entities $_;
48 utf8::decode $_;
49
50 $_
51 }
52
53 sub astime($) {
54 str2time $_[0]
55 }
56
57 while () {
58 db_c_get $c, my $key, my $data, BDB::NEXT;
59 last if $!;
60
61 syswrite STDOUT, "\r$key ";
62
63 eval {
64 for (dic_decompress 0, $data) {
65 my @info;
66
67 m%title="More from this category">(.*?)<%s or die "$_: no category";
68 $info[0] = [split / &gt; /, $1];
69
70 $info[1] = $key+0;
71
72 m%<div id="title">(.*?)</div>%s or die "$_: no title";
73 $info[2] = astext $1;
74
75 m% href="magnet:\?xt=urn:btih:([0-9a-f]*)&dn% or die "$_: no magnet";
76 $info[3] = $1;
77
78 $info[4] = [m%<a href="/tag/([^"]+)%g];
79
80 m%<dt>By:</dt>.*?<dd>.*?<a href="/user/([^/]+)/%s
81 and $info[5] = astext $1;
82
83 m%<dt>Uploaded:</dt>.*?<dd>(.*?)</dd>%s or die "$_: no uploaded";
84 $info[6] = astime $1;
85
86 m%<div class="nfo">\s*<pre>(.*?)</pre>%s or die "$_: no nfo";
87 $info[7] = astext $1;
88
89 my @comments;
90
91 while (
92 m%
93 <div\sid="comment-\d+">
94 <p\sclass="byline">
95 \s*
96 <a\shref="/user/([^/]+)/".*?\sat\s(....-..-..\s..:..\sCET):\s*</p>
97 <div\sclass="comment">(.*?)</div>\s*</div>
98 %gsx
99 ) {
100 my ($user, $date, $comment) = ($1, $2, $3);
101 push @comments, [$user, astime $date, astext $comment];
102 }
103
104 $info[8] = \@comments;
105
106 print $out JSON::XS::encode_json \@info, "\n";
107 }
108 };
109
110 if ($@) {
111 print STDOUT "$@";
112 push @del, $key;
113 }
114 }
115
116 undef $c;
117
118 if (@del) {
119 for (@del) {
120 iput $_, [0, 1];
121 db_del $db_torrent, undef, $_;
122 db_del $db_http , undef, $_;
123 }
124 }
125