… | |
… | |
8 | |
8 | |
9 | perl -MFaster ... |
9 | perl -MFaster ... |
10 | |
10 | |
11 | =head1 DESCRIPTION |
11 | =head1 DESCRIPTION |
12 | |
12 | |
13 | This module implements a very simple-minded JIT. It works by more or less |
13 | This module implements a very simple-minded "JIT" (or actually AIT, ahead |
14 | translating every function it sees into a C program, compiling it and then |
14 | of time compiler). It works by more or less translating every function it |
15 | replacing the function by the compiled code. |
15 | sees into a C program, compiling it and then replacing the function by the |
|
|
16 | compiled code. |
16 | |
17 | |
17 | As a result, startup times are immense, as every function might lead to a |
18 | As a result, startup times are immense, as every function might lead to a |
18 | full-blown compilation. |
19 | full-blown compilation. |
19 | |
20 | |
20 | The speed improvements are also not great, you can expect 20% or so on |
21 | The speed improvements are also not great, you can expect 20% or so on |
21 | average, for code that runs very often. |
22 | average, for code that runs very often. The reason for this is that data |
|
|
23 | handling is mostly being done by the same old code, it just gets called |
|
|
24 | a bit faster. Regexes and string operations won't get faster. Airhtmetic |
|
|
25 | doresn't become any faster. Just the operands and other stuff is put on |
|
|
26 | the stack faster, and the opcodes themselves have a bit less overhead. |
22 | |
27 | |
23 | Faster is in the early stages of development. Due to its design its |
28 | Faster is in the early stages of development. Due to its design its |
24 | relatively safe to use (it will either work or simply slowdown the program |
29 | relatively safe to use (it will either work or simply slowdown the program |
25 | immensely, but rarely cause bugs). |
30 | immensely, but rarely cause bugs). |
26 | |
31 | |
27 | Usage is very easy, just C<use Faster> and every function called from then |
32 | Usage is very easy, just C<use Faster> and every function called from then |
28 | on will be compiled. |
33 | on will be compiled. |
29 | |
34 | |
30 | Right now, Faster will leave lots of F<*.c>, F<*.o> and F<*.so> files in |
35 | Right now, Faster can leave lots of F<*.c> and F<*.so> files in your |
31 | F</tmp>, and it will even create those temporary files in an insecure |
36 | F<$FASTER_CACHEDIR> (by default F<$HOME/.perl-faster-cache>), and it will |
32 | manner, so watch out. |
37 | even create those temporary files in an insecure manner, so watch out. |
33 | |
38 | |
34 | =over 4 |
39 | =over 4 |
35 | |
40 | |
36 | =cut |
41 | =cut |
37 | |
42 | |
… | |
… | |
40 | no warnings; |
45 | no warnings; |
41 | |
46 | |
42 | use strict; |
47 | use strict; |
43 | use Config; |
48 | use Config; |
44 | use B (); |
49 | use B (); |
45 | #use Digest::MD5 (); |
|
|
46 | use DynaLoader (); |
50 | use DynaLoader (); |
47 | use File::Temp (); |
51 | use Digest::MD5 (); |
|
|
52 | use Storable (); |
|
|
53 | use Fcntl (); |
48 | |
54 | |
49 | BEGIN { |
55 | BEGIN { |
50 | our $VERSION = '0.01'; |
56 | our $VERSION = '0.01'; |
51 | |
57 | |
52 | require XSLoader; |
58 | require XSLoader; |
53 | XSLoader::load __PACKAGE__, $VERSION; |
59 | XSLoader::load __PACKAGE__, $VERSION; |
54 | } |
60 | } |
|
|
61 | |
|
|
62 | my $CACHEDIR = |
|
|
63 | $ENV{FASTER_CACHE} |
|
|
64 | || (exists $ENV{HOME} && "$ENV{HOME}/.perl-faster-cache") |
|
|
65 | || do { |
|
|
66 | require File::Temp; |
|
|
67 | File::Temp::tempdir (CLEANUP => 1) |
|
|
68 | }; |
55 | |
69 | |
56 | my $COMPILE = "$Config{cc} -c -I$Config{archlibexp}/CORE $Config{optimize} $Config{ccflags} $Config{cccdlflags}"; |
70 | my $COMPILE = "$Config{cc} -c -I$Config{archlibexp}/CORE $Config{optimize} $Config{ccflags} $Config{cccdlflags}"; |
57 | my $LINK = "$Config{ld} $Config{ldflags} $Config{lddlflags} $Config{ccdlflags}"; |
71 | my $LINK = "$Config{ld} $Config{ldflags} $Config{lddlflags} $Config{ccdlflags}"; |
58 | my $LIBS = "$Config{libs}"; |
72 | my $LIBS = "$Config{libs}"; |
59 | my $_o = $Config{_o}; |
73 | my $_o = $Config{_o}; |
… | |
… | |
61 | |
75 | |
62 | # we don't need no steenking PIC on x86 |
76 | # we don't need no steenking PIC on x86 |
63 | $COMPILE =~ s/-f(?:PIC|pic)//g |
77 | $COMPILE =~ s/-f(?:PIC|pic)//g |
64 | if $Config{archname} =~ /^(i[3456]86)-/; |
78 | if $Config{archname} =~ /^(i[3456]86)-/; |
65 | |
79 | |
66 | my $opt_assert = $ENV{FASTER_DEBUG}; |
80 | my $opt_assert = $ENV{FASTER_DEBUG} > 1; |
67 | my $verbose = $ENV{FASTER_VERBOSE}+0; |
81 | my $verbose = $ENV{FASTER_VERBOSE}+0; |
|
|
82 | |
|
|
83 | warn "Faster: CACHEDIR is $CACHEDIR\n" if $verbose > 2; |
68 | |
84 | |
69 | our $source; |
85 | our $source; |
70 | |
86 | |
71 | our @ops; |
87 | our @ops; |
72 | our $insn; |
88 | our $insn; |
… | |
… | |
133 | # ops that do not need an ASYNC_CHECK |
149 | # ops that do not need an ASYNC_CHECK |
134 | my %f_noasync = map +($_ => undef), qw( |
150 | my %f_noasync = map +($_ => undef), qw( |
135 | mapstart grepstart match entereval |
151 | mapstart grepstart match entereval |
136 | enteriter entersub leaveloop |
152 | enteriter entersub leaveloop |
137 | |
153 | |
138 | pushmark nextstate |
154 | pushmark nextstate caller |
139 | |
155 | |
140 | const stub unstack |
156 | const stub unstack |
141 | last next redo seq |
157 | last next redo goto seq |
142 | padsv padav padhv padany |
158 | padsv padav padhv padany |
143 | aassign sassign orassign |
159 | aassign sassign orassign |
144 | rv2av rv2cv rv2gv rv2hv refgen |
160 | rv2av rv2cv rv2gv rv2hv refgen |
145 | gv gvsv |
161 | gv gvsv |
146 | add subtract multiply divide |
162 | add subtract multiply divide |
147 | complement cond_expr and or not |
163 | complement cond_expr and or not |
|
|
164 | bit_and bit_or bit_xor |
148 | defined |
165 | defined |
149 | method method_named bless |
166 | method method_named bless |
150 | preinc postinc predec postdec |
167 | preinc postinc predec postdec |
151 | aelem aelemfast helem delete exists |
168 | aelem aelemfast helem delete exists |
152 | pushre subst list join split concat |
169 | pushre subst list lslice join split concat |
153 | length substr stringify ord |
170 | length substr stringify ord |
154 | push pop shift unshift |
171 | push pop shift unshift |
155 | eq ne gt lt ge le |
172 | eq ne gt lt ge le |
156 | regcomp regcreset regcmaybe |
173 | regcomp regcreset regcmaybe |
157 | ); |
174 | ); |
… | |
… | |
213 | |
230 | |
214 | out_next; |
231 | out_next; |
215 | } |
232 | } |
216 | |
233 | |
217 | sub op_pushmark { |
234 | sub op_pushmark { |
218 | $source .= " PUSHMARK (PL_stack_sp);\n"; |
235 | $source .= " faster_PUSHMARK (PL_stack_sp);\n"; |
219 | |
236 | |
220 | out_next; |
237 | out_next; |
221 | } |
238 | } |
222 | |
239 | |
223 | if ($Config{useithreads} ne "define") { |
240 | if ($Config{useithreads} ne "define") { |
… | |
… | |
352 | PUSHs (sv); |
369 | PUSHs (sv); |
353 | PUTBACK; |
370 | PUTBACK; |
354 | EOF |
371 | EOF |
355 | |
372 | |
356 | if (($flags & B::OPf_MOD) && ($op->private & B::OPpDEREF)) { |
373 | if (($flags & B::OPf_MOD) && ($op->private & B::OPpDEREF)) { |
357 | $source .= " vivify_ref (sv, " . $op->private . " & OPpDEREF);\n"; |
374 | $source .= " if (!SvROK (sv)) vivify_ref (sv, " . $op->private . " & OPpDEREF);\n"; |
358 | } |
375 | } |
359 | $source .= " }\n"; |
376 | $source .= " }\n"; |
360 | |
377 | |
361 | out_next; |
378 | out_next; |
362 | } |
379 | } |
… | |
… | |
503 | local %op_regcomp; |
520 | local %op_regcomp; |
504 | |
521 | |
505 | my %opsseen; |
522 | my %opsseen; |
506 | my @todo = $cv->START; |
523 | my @todo = $cv->START; |
507 | my %op_target; |
524 | my %op_target; |
|
|
525 | my $numpushmark; |
508 | |
526 | |
509 | while (my $op = shift @todo) { |
527 | while (my $op = shift @todo) { |
510 | for (; $$op; $op = $op->next) { |
528 | for (; $$op; $op = $op->next) { |
511 | last if $opsseen{$$op}++; |
529 | last if $opsseen{$$op}++; |
512 | |
530 | |
… | |
… | |
546 | |
564 | |
547 | push @op_loop, \@targ; |
565 | push @op_loop, \@targ; |
548 | push @todo, @targ; |
566 | push @todo, @targ; |
549 | |
567 | |
550 | $op_target{$$_}++ for @targ; |
568 | $op_target{$$_}++ for @targ; |
|
|
569 | |
551 | } elsif ($class eq "COP") { |
570 | } elsif ($class eq "COP") { |
552 | $insn->{bblock}++ if defined $op->label; |
571 | $insn->{bblock}++ if defined $op->label; |
|
|
572 | |
|
|
573 | } else { |
|
|
574 | if ($name eq "pushmark") { |
|
|
575 | $numpushmark++; |
|
|
576 | } |
553 | } |
577 | } |
554 | } |
578 | } |
555 | } |
579 | } |
556 | |
580 | |
557 | $_->{bblock}++ for grep $op_target{${$_->{op}}}, @ops; |
581 | $_->{bblock}++ for grep $op_target{${$_->{op}}}, @ops; |
… | |
… | |
559 | local $source = <<EOF; |
583 | local $source = <<EOF; |
560 | OP *%%%FUNC%%% (pTHX) |
584 | OP *%%%FUNC%%% (pTHX) |
561 | { |
585 | { |
562 | register OP *nextop = (OP *)${$ops[0]->{op}}L; |
586 | register OP *nextop = (OP *)${$ops[0]->{op}}L; |
563 | EOF |
587 | EOF |
|
|
588 | |
|
|
589 | $source .= " faster_PUSHMARK_PREALLOC ($numpushmark);\n" |
|
|
590 | if $numpushmark; |
564 | |
591 | |
565 | while (@ops) { |
592 | while (@ops) { |
566 | $insn = shift @ops; |
593 | $insn = shift @ops; |
567 | |
594 | |
568 | $op = $insn->{op}; |
595 | $op = $insn->{op}; |
… | |
… | |
631 | |
658 | |
632 | $source |
659 | $source |
633 | } |
660 | } |
634 | |
661 | |
635 | my $uid = "aaaaaaa0"; |
662 | my $uid = "aaaaaaa0"; |
|
|
663 | my %so; |
636 | |
664 | |
637 | sub source2ptr { |
665 | sub func2ptr { |
638 | my (@source) = @_; |
666 | my (@func) = @_; |
639 | |
667 | |
640 | my $stem = "/tmp/Faster-$$-" . $uid++; |
668 | #LOCK |
|
|
669 | mkdir $CACHEDIR, 0777; |
|
|
670 | sysopen my $meta_fh, "$CACHEDIR/meta", &Fcntl::O_RDWR | &Fcntl::O_CREAT, 0666 |
|
|
671 | or die "$$CACHEDIR/meta: $!"; |
|
|
672 | binmode $meta_fh, ":raw:perlio"; |
|
|
673 | fcntl_lock fileno $meta_fh |
|
|
674 | or die "$CACHEDIR/meta: $!"; |
641 | |
675 | |
|
|
676 | my $meta = eval { Storable::fd_retrieve $meta_fh } || { version => 1 }; |
|
|
677 | |
|
|
678 | for my $f (@func) { |
|
|
679 | $f->{func} = "F" . Digest::MD5::md5_hex ($f->{source}); |
|
|
680 | $f->{so} = $meta->{$f->{func}}; |
|
|
681 | } |
|
|
682 | |
|
|
683 | if (grep !$_->{so}, @func) { |
|
|
684 | my $stem; |
|
|
685 | |
|
|
686 | do { |
|
|
687 | $stem = "$CACHEDIR/$$-" . $uid++; |
|
|
688 | } while -e "$stem$_so"; |
|
|
689 | |
642 | open FILE, ">:raw", "$stem.c"; |
690 | open my $fh, ">:raw", "$stem.c"; |
643 | print FILE <<EOF; |
691 | print $fh <<EOF; |
644 | #define PERL_NO_GET_CONTEXT |
692 | #define PERL_NO_GET_CONTEXT |
645 | #define PERL_CORE |
693 | #define PERL_CORE |
646 | |
694 | |
647 | #include <assert.h> |
695 | #include <assert.h> |
648 | |
696 | |
649 | #include "EXTERN.h" |
697 | #include "EXTERN.h" |
650 | #include "perl.h" |
698 | #include "perl.h" |
651 | #include "XSUB.h" |
699 | #include "XSUB.h" |
652 | |
700 | |
|
|
701 | #if 1 |
|
|
702 | # define faster_PUSHMARK_PREALLOC(count) while (PL_markstack_ptr + (count) >= PL_markstack_max) markstack_grow () |
|
|
703 | # define faster_PUSHMARK(p) *++PL_markstack_ptr = (p) - PL_stack_base |
|
|
704 | #else |
|
|
705 | # define faster_PUSHMARK_PREALLOC(count) 1 |
|
|
706 | # define faster_PUSHMARK(p) PUSHMARK(p) |
|
|
707 | #endif |
|
|
708 | |
653 | #define RUNOPS_TILL(op) \\ |
709 | #define RUNOPS_TILL(op) \\ |
654 | while (nextop != (op)) \\ |
710 | while (nextop != (op)) \\ |
655 | { \\ |
711 | { \\ |
656 | PERL_ASYNC_CHECK (); \\ |
712 | PERL_ASYNC_CHECK (); \\ |
657 | PL_op = nextop; nextop = (PL_op->op_ppaddr)(aTHX); \\ |
713 | PL_op = nextop; nextop = (PL_op->op_ppaddr)(aTHX); \\ |
658 | } |
|
|
659 | |
|
|
660 | EOF |
|
|
661 | for (@source) { |
|
|
662 | my $func = $uid++; |
|
|
663 | $_ =~ s/%%%FUNC%%%/$func/g; |
|
|
664 | print FILE $_; |
|
|
665 | $_ = $func; |
|
|
666 | } |
714 | } |
667 | |
715 | |
668 | close FILE; |
716 | EOF |
|
|
717 | for my $f (grep !$_->{so}, @func) { |
|
|
718 | next if $f->{so} = $meta->{$f->{func}}; # some cv's alias others |
|
|
719 | |
|
|
720 | warn "compiling $f->{name} to $stem$_so:$f->{func}\n" if $verbose > 1; |
|
|
721 | my $source = $f->{source}; |
|
|
722 | $source =~ s/%%%FUNC%%%/$f->{func}/g; |
|
|
723 | print $fh $source; |
|
|
724 | $meta->{$f->{func}} = $f->{so} = $stem; |
|
|
725 | } |
|
|
726 | |
|
|
727 | close $fh; |
669 | system "$COMPILE -o $stem$_o $stem.c"; |
728 | system "$COMPILE -o $stem$_o $stem.c"; |
670 | #d#unlink "$stem.c"; |
729 | unlink "$stem.c" unless $ENV{FASTER_DEBUG} > 0; |
671 | system "$LINK -o $stem$_so $stem$_o $LIBS"; |
730 | system "$LINK -o $stem$_so $stem$_o $LIBS"; |
672 | unlink "$stem$_o"; |
731 | unlink "$stem$_o"; |
|
|
732 | } |
673 | |
733 | |
|
|
734 | for my $f (@func) { |
|
|
735 | my $stem = $f->{so}; |
|
|
736 | |
674 | my $so = DynaLoader::dl_load_file "$stem$_so" |
737 | my $so = ($so{$stem} ||= DynaLoader::dl_load_file "$stem$_so") |
675 | or die "$stem$_so: $!"; |
738 | or die "$stem$_so: $!"; |
676 | |
739 | |
677 | #unlink "$stem$_so"; |
740 | #unlink "$stem$_so"; |
678 | |
741 | |
679 | map +(DynaLoader::dl_find_symbol $so, $_), @source |
742 | $f->{ptr} = DynaLoader::dl_find_symbol $so, $f->{func} |
|
|
743 | or die "$f->{func} not found in $stem$_so: $!"; |
|
|
744 | } |
|
|
745 | |
|
|
746 | seek $meta_fh, 0, 0 or die "$CACHEDIR/meta: $!"; |
|
|
747 | Storable::nstore_fd $meta, $meta_fh; |
|
|
748 | truncate $meta_fh, tell $meta_fh; |
|
|
749 | |
|
|
750 | # UNLOCK (by closing $meta_fh) |
680 | } |
751 | } |
681 | |
752 | |
682 | my %ignore; |
753 | my %ignore; |
683 | |
754 | |
684 | sub entersub { |
755 | sub entersub { |
… | |
… | |
686 | |
757 | |
687 | my $pkg = $cv->STASH->NAME; |
758 | my $pkg = $cv->STASH->NAME; |
688 | |
759 | |
689 | return if $ignore{$pkg}; |
760 | return if $ignore{$pkg}; |
690 | |
761 | |
691 | warn "compiling ", $cv->STASH->NAME, "\n" |
762 | warn "optimising ", $cv->STASH->NAME, "\n" |
692 | if $verbose; |
763 | if $verbose; |
693 | |
764 | |
694 | eval { |
765 | eval { |
695 | my @cv; |
766 | my @func; |
696 | my @cv_source; |
767 | |
|
|
768 | push @func, { |
|
|
769 | cv => $cv, |
|
|
770 | name => "<>", |
|
|
771 | source => cv2c $cv, |
|
|
772 | }; |
697 | |
773 | |
698 | # always compile the whole stash |
774 | # always compile the whole stash |
699 | my %stash = $cv->STASH->ARRAY; |
775 | my %stash = $cv->STASH->ARRAY; |
700 | while (my ($k, $v) = each %stash) { |
776 | while (my ($k, $v) = each %stash) { |
701 | $v->isa (B::GV::) |
777 | $v->isa (B::GV::) |
… | |
… | |
704 | my $cv = $v->CV; |
780 | my $cv = $v->CV; |
705 | |
781 | |
706 | if ($cv->isa (B::CV::) |
782 | if ($cv->isa (B::CV::) |
707 | && ${$cv->START} |
783 | && ${$cv->START} |
708 | && $cv->START->name ne "null") { |
784 | && $cv->START->name ne "null") { |
|
|
785 | |
709 | push @cv, $cv; |
786 | push @func, { |
|
|
787 | cv => $cv, |
|
|
788 | name => $k, |
710 | push @cv_source, cv2c $cv; |
789 | source => cv2c $cv, |
|
|
790 | }; |
711 | } |
791 | } |
712 | } |
792 | } |
713 | |
793 | |
714 | my @ptr = source2ptr @cv_source; |
794 | func2ptr @func; |
715 | |
795 | |
716 | for (0 .. $#cv) { |
796 | for my $f (@func) { |
717 | patch_cv $cv[$_], $ptr[$_]; |
797 | patch_cv $f->{cv}, $f->{ptr}; |
718 | } |
798 | } |
719 | }; |
799 | }; |
720 | |
800 | |
721 | if ($@) { |
801 | if ($@) { |
722 | $ignore{$pkg}++; |
802 | $ignore{$pkg}++; |
… | |
… | |
737 | =over 4 |
817 | =over 4 |
738 | |
818 | |
739 | =item FASTER_VERBOSE |
819 | =item FASTER_VERBOSE |
740 | |
820 | |
741 | Faster will output more informational messages when set to values higher |
821 | Faster will output more informational messages when set to values higher |
742 | than C<0>. Currently, C<1> outputs which packages are being compiled. |
822 | than C<0>. Currently, C<1> outputs which packages are being compiled, C<3> |
|
|
823 | outputs the cache directory and C<10> outputs information on which perl |
|
|
824 | function is compiled into which shared object. |
743 | |
825 | |
744 | =item FASTER_DEBUG |
826 | =item FASTER_DEBUG |
745 | |
827 | |
746 | Add debugging code when set to values higher than C<0>. Currently, this |
828 | Add debugging code when set to values higher than C<0>. Currently, this |
747 | adds 1-3 C<assert>'s per perl op, to ensure that opcode order and C |
829 | adds 1-3 C<assert>'s per perl op (FASTER_DEBUG > 1), to ensure that opcode |
748 | execution order are compatible. |
830 | order and C execution order are compatible. |
749 | |
831 | |
750 | =item FASTER_CACHE |
832 | =item FASTER_CACHE |
751 | |
833 | |
752 | NOT YET IMPLEMENTED |
|
|
753 | |
|
|
754 | Set a persistent cache directory that caches compiled code |
834 | Set a persistent cache directory that caches compiled code fragments. The |
755 | fragments. Normally, code compiled by Faster will be deleted immediately, |
835 | default is C<$HOME/.perl-faster-cache> if C<HOME> is set and a temporary |
756 | and every restart will recompile everything. Setting this variable to a |
836 | directory otherwise. |
757 | directory makes Faster cache the generated files for re-use. |
|
|
758 | |
837 | |
759 | This directory will always grow in contents, so you might need to erase it |
838 | This directory will always grow in size, so you might need to erase it |
760 | from time to time. |
839 | from time to time. |
761 | |
840 | |
762 | =back |
841 | =back |
763 | |
842 | |
764 | =head1 BUGS/LIMITATIONS |
843 | =head1 BUGS/LIMITATIONS |