ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/tpb/process
Revision: 1.1
Committed: Sun Sep 27 07:55:20 2015 UTC (8 years, 8 months ago) by root
Branch: MAIN
CVS Tags: HEAD
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #!/opt/bin/perl
2    
3     BEGIN { require "common.pl" }
4     our ($TXN, %stat, $db_env, $TPB, $db_info, $TODAY);
5    
6     use common::sense;
7     use Compress::LZF;
8     use BDB;
9     use JSON::XS;
10     use HTML::Entities;
11     use Date::Parse;
12    
13     #############################################################################
14    
15     our $db_http = table "http";
16     our $db_torrent = table "torrent";
17    
18     #############################################################################
19    
20     #db_compact $db_http, undef, undef, undef, undef, 0, undef;
21    
22     my $c = $db_http->cursor;
23     my @del;
24    
25     #open my $out, "| mbuffer -s4M -m64M | lzma -9 >tpb.json.lzma"
26     open my $out, "| mbuffer -s4M -m64M >tpb.json"
27     or die "tpb.json: $!";
28    
29     sub ashtml($) {
30     local $_ = shift;
31    
32     s/^\s+//;
33     s/\s+$//;
34     s/\015//g;
35    
36     utf8::decode $_;
37    
38     $_
39     }
40    
41     sub astext($) {
42     local $_ = ashtml shift;
43    
44     #s%<a href="([^"]+).*?>.*?</a>%URL<$1>%g;
45     #s%<br\s*/?>%\n%g;
46    
47     HTML::Entities::decode_entities $_;
48     utf8::decode $_;
49    
50     $_
51     }
52    
53     sub astime($) {
54     str2time $_[0]
55     }
56    
57     while () {
58     db_c_get $c, my $key, my $data, BDB::NEXT;
59     last if $!;
60    
61     syswrite STDOUT, "\r$key ";
62    
63     eval {
64     for (dic_decompress 0, $data) {
65     my @info;
66    
67     m%title="More from this category">(.*?)<%s or die "$_: no category";
68     $info[0] = [split / &gt; /, $1];
69    
70     $info[1] = $key+0;
71    
72     m%<div id="title">(.*?)</div>%s or die "$_: no title";
73     $info[2] = astext $1;
74    
75     m% href="magnet:\?xt=urn:btih:([0-9a-f]*)&dn% or die "$_: no magnet";
76     $info[3] = $1;
77    
78     $info[4] = [m%<a href="/tag/([^"]+)%g];
79    
80     m%<dt>By:</dt>.*?<dd>.*?<a href="/user/([^/]+)/%s
81     and $info[5] = astext $1;
82    
83     m%<dt>Uploaded:</dt>.*?<dd>(.*?)</dd>%s or die "$_: no uploaded";
84     $info[6] = astime $1;
85    
86     m%<div class="nfo">\s*<pre>(.*?)</pre>%s or die "$_: no nfo";
87     $info[7] = astext $1;
88    
89     my @comments;
90    
91     while (
92     m%
93     <div\sid="comment-\d+">
94     <p\sclass="byline">
95     \s*
96     <a\shref="/user/([^/]+)/".*?\sat\s(....-..-..\s..:..\sCET):\s*</p>
97     <div\sclass="comment">(.*?)</div>\s*</div>
98     %gsx
99     ) {
100     my ($user, $date, $comment) = ($1, $2, $3);
101     push @comments, [$user, astime $date, astext $comment];
102     }
103    
104     $info[8] = \@comments;
105    
106     print $out JSON::XS::encode_json \@info, "\n";
107     }
108     };
109    
110     if ($@) {
111     print STDOUT "$@";
112     push @del, $key;
113     }
114     }
115    
116     undef $c;
117    
118     if (@del) {
119     for (@del) {
120     iput $_, [0, 1];
121     db_del $db_torrent, undef, $_;
122     db_del $db_http , undef, $_;
123     }
124     }
125