… | |
… | |
8 | |
8 | |
9 | perl -MFaster ... |
9 | perl -MFaster ... |
10 | |
10 | |
11 | =head1 DESCRIPTION |
11 | =head1 DESCRIPTION |
12 | |
12 | |
13 | This module implements a very simple-minded JIT. It works by more or less |
13 | This module implements a very simple-minded "JIT" (or actually AIT, ahead |
14 | translating every function it sees into a C program, compiling it and then |
14 | of time compiler). It works by more or less translating every function it |
15 | replacing the function by the compiled code. |
15 | sees into a C program, compiling it and then replacing the function by the |
|
|
16 | compiled code. |
16 | |
17 | |
17 | As a result, startup times are immense, as every function might lead to a |
18 | As a result, startup times are immense, as every function might lead to a |
18 | full-blown compilation. |
19 | full-blown compilation. |
19 | |
20 | |
20 | The speed improvements are also not great, you can expect 20% or so on |
21 | The speed improvements are also not great, you can expect 20% or so on |
21 | average, for code that runs very often. |
22 | average, for code that runs very often. The reason for this is that data |
|
|
23 | handling is mostly being done by the same old code, it just gets called |
|
|
24 | a bit faster. Regexes and string operations won't get faster. Airhtmetic |
|
|
25 | doresn't become any faster. Just the operands and other stuff is put on |
|
|
26 | the stack faster, and the opcodes themselves have a bit less overhead. |
22 | |
27 | |
23 | Faster is in the early stages of development. Due to its design its |
28 | Faster is in the early stages of development. Due to its design its |
24 | relatively safe to use (it will either work or simply slowdown the program |
29 | relatively safe to use (it will either work or simply slowdown the program |
25 | immensely, but rarely cause bugs). |
30 | immensely, but rarely cause bugs). |
26 | |
31 | |
|
|
32 | More intelligent algorithms (loop optimisation, type inference) could |
|
|
33 | improve that easily, but requires a much more elaborate presentation and |
|
|
34 | optimiser than what is in place. There are no plans to improve Faster in |
|
|
35 | this way, yet, but it would provide a reasonably good place to start. |
|
|
36 | |
27 | Usage is very easy, just C<use Faster> and every function called from then |
37 | Usage is very easy, just C<use Faster> and every function called from then |
28 | on will be compiled. |
38 | on will be compiled. |
29 | |
39 | |
30 | Right now, Faster will leave lots of F<*.c>, F<*.o> and F<*.so> files in |
40 | Right now, Faster can leave lots of F<*.c> and F<*.so> files in your |
31 | F</tmp>, and it will even create those temporary files in an insecure |
41 | F<$FASTER_CACHEDIR> (by default F<$HOME/.perl-faster-cache>), and it will |
32 | manner, so watch out. |
42 | even create those temporary files in an insecure manner, so watch out. |
33 | |
43 | |
34 | =over 4 |
44 | =over 4 |
35 | |
45 | |
36 | =cut |
46 | =cut |
37 | |
47 | |
… | |
… | |
40 | no warnings; |
50 | no warnings; |
41 | |
51 | |
42 | use strict; |
52 | use strict; |
43 | use Config; |
53 | use Config; |
44 | use B (); |
54 | use B (); |
45 | #use Digest::MD5 (); |
|
|
46 | use DynaLoader (); |
55 | use DynaLoader (); |
47 | use Digest::MD5 (); |
56 | use Digest::MD5 (); |
48 | use Storable (); |
57 | use Storable (); |
|
|
58 | use Fcntl (); |
49 | |
59 | |
50 | BEGIN { |
60 | BEGIN { |
51 | our $VERSION = '0.01'; |
61 | our $VERSION = '0.01'; |
52 | |
62 | |
53 | require XSLoader; |
63 | require XSLoader; |
54 | XSLoader::load __PACKAGE__, $VERSION; |
64 | XSLoader::load __PACKAGE__, $VERSION; |
55 | } |
65 | } |
56 | |
66 | |
57 | my $CACHEDIR = $ENV{FASTER_CACHE} || do { |
67 | my $CACHEDIR = |
|
|
68 | $ENV{FASTER_CACHE} |
|
|
69 | || (exists $ENV{HOME} && "$ENV{HOME}/.perl-faster-cache") |
|
|
70 | || do { |
58 | require File::Temp; |
71 | require File::Temp; |
59 | File::Temp::tempdir (CLEANUP => 1) |
72 | File::Temp::tempdir (CLEANUP => 1) |
60 | }; |
73 | }; |
61 | |
74 | |
62 | my $COMPILE = "$Config{cc} -c -I$Config{archlibexp}/CORE $Config{optimize} $Config{ccflags} $Config{cccdlflags}"; |
75 | my $COMPILE = "$Config{cc} -c -I$Config{archlibexp}/CORE $Config{optimize} $Config{ccflags} $Config{cccdlflags}"; |
63 | my $LINK = "$Config{ld} $Config{ldflags} $Config{lddlflags} $Config{ccdlflags}"; |
76 | my $LINK = "$Config{ld} $Config{ldflags} $Config{lddlflags} $Config{ccdlflags}"; |
64 | my $LIBS = "$Config{libs}"; |
77 | my $LIBS = "$Config{libs}"; |
65 | my $_o = $Config{_o}; |
78 | my $_o = $Config{_o}; |
… | |
… | |
67 | |
80 | |
68 | # we don't need no steenking PIC on x86 |
81 | # we don't need no steenking PIC on x86 |
69 | $COMPILE =~ s/-f(?:PIC|pic)//g |
82 | $COMPILE =~ s/-f(?:PIC|pic)//g |
70 | if $Config{archname} =~ /^(i[3456]86)-/; |
83 | if $Config{archname} =~ /^(i[3456]86)-/; |
71 | |
84 | |
72 | my $opt_assert = $ENV{FASTER_DEBUG}; |
85 | my $opt_assert = $ENV{FASTER_DEBUG} > 1; |
73 | my $verbose = $ENV{FASTER_VERBOSE}+0; |
86 | my $verbose = $ENV{FASTER_VERBOSE}+0; |
|
|
87 | |
|
|
88 | warn "Faster: CACHEDIR is $CACHEDIR\n" if $verbose > 2; |
74 | |
89 | |
75 | our $source; |
90 | our $source; |
76 | |
91 | |
77 | our @ops; |
92 | our @ops; |
78 | our $insn; |
93 | our $insn; |
… | |
… | |
139 | # ops that do not need an ASYNC_CHECK |
154 | # ops that do not need an ASYNC_CHECK |
140 | my %f_noasync = map +($_ => undef), qw( |
155 | my %f_noasync = map +($_ => undef), qw( |
141 | mapstart grepstart match entereval |
156 | mapstart grepstart match entereval |
142 | enteriter entersub leaveloop |
157 | enteriter entersub leaveloop |
143 | |
158 | |
144 | pushmark nextstate |
159 | pushmark nextstate caller |
145 | |
160 | |
146 | const stub unstack |
161 | const stub unstack |
147 | last next redo seq |
162 | last next redo goto seq |
148 | padsv padav padhv padany |
163 | padsv padav padhv padany |
149 | aassign sassign orassign |
164 | aassign sassign orassign |
150 | rv2av rv2cv rv2gv rv2hv refgen |
165 | rv2av rv2cv rv2gv rv2hv refgen |
151 | gv gvsv |
166 | gv gvsv |
152 | add subtract multiply divide |
167 | add subtract multiply divide |
153 | complement cond_expr and or not |
168 | complement cond_expr and or not |
|
|
169 | bit_and bit_or bit_xor |
154 | defined |
170 | defined |
155 | method method_named bless |
171 | method method_named bless |
156 | preinc postinc predec postdec |
172 | preinc postinc predec postdec |
157 | aelem aelemfast helem delete exists |
173 | aelem aelemfast helem delete exists |
158 | pushre subst list join split concat |
174 | pushre subst list lslice join split concat |
159 | length substr stringify ord |
175 | length substr stringify ord |
160 | push pop shift unshift |
176 | push pop shift unshift |
161 | eq ne gt lt ge le |
177 | eq ne gt lt ge le |
162 | regcomp regcreset regcmaybe |
178 | regcomp regcreset regcmaybe |
163 | ); |
179 | ); |
… | |
… | |
219 | |
235 | |
220 | out_next; |
236 | out_next; |
221 | } |
237 | } |
222 | |
238 | |
223 | sub op_pushmark { |
239 | sub op_pushmark { |
224 | $source .= " PUSHMARK (PL_stack_sp);\n"; |
240 | $source .= " faster_PUSHMARK (PL_stack_sp);\n"; |
225 | |
241 | |
226 | out_next; |
242 | out_next; |
227 | } |
243 | } |
228 | |
244 | |
229 | if ($Config{useithreads} ne "define") { |
245 | if ($Config{useithreads} ne "define") { |
… | |
… | |
509 | local %op_regcomp; |
525 | local %op_regcomp; |
510 | |
526 | |
511 | my %opsseen; |
527 | my %opsseen; |
512 | my @todo = $cv->START; |
528 | my @todo = $cv->START; |
513 | my %op_target; |
529 | my %op_target; |
|
|
530 | my $numpushmark; |
514 | |
531 | |
515 | while (my $op = shift @todo) { |
532 | while (my $op = shift @todo) { |
516 | for (; $$op; $op = $op->next) { |
533 | for (; $$op; $op = $op->next) { |
517 | last if $opsseen{$$op}++; |
534 | last if $opsseen{$$op}++; |
518 | |
535 | |
… | |
… | |
552 | |
569 | |
553 | push @op_loop, \@targ; |
570 | push @op_loop, \@targ; |
554 | push @todo, @targ; |
571 | push @todo, @targ; |
555 | |
572 | |
556 | $op_target{$$_}++ for @targ; |
573 | $op_target{$$_}++ for @targ; |
|
|
574 | |
557 | } elsif ($class eq "COP") { |
575 | } elsif ($class eq "COP") { |
558 | $insn->{bblock}++ if defined $op->label; |
576 | $insn->{bblock}++ if defined $op->label; |
|
|
577 | |
|
|
578 | } else { |
|
|
579 | if ($name eq "pushmark") { |
|
|
580 | $numpushmark++; |
|
|
581 | } |
559 | } |
582 | } |
560 | } |
583 | } |
561 | } |
584 | } |
562 | |
585 | |
563 | $_->{bblock}++ for grep $op_target{${$_->{op}}}, @ops; |
586 | $_->{bblock}++ for grep $op_target{${$_->{op}}}, @ops; |
… | |
… | |
565 | local $source = <<EOF; |
588 | local $source = <<EOF; |
566 | OP *%%%FUNC%%% (pTHX) |
589 | OP *%%%FUNC%%% (pTHX) |
567 | { |
590 | { |
568 | register OP *nextop = (OP *)${$ops[0]->{op}}L; |
591 | register OP *nextop = (OP *)${$ops[0]->{op}}L; |
569 | EOF |
592 | EOF |
|
|
593 | |
|
|
594 | $source .= " faster_PUSHMARK_PREALLOC ($numpushmark);\n" |
|
|
595 | if $numpushmark; |
570 | |
596 | |
571 | while (@ops) { |
597 | while (@ops) { |
572 | $insn = shift @ops; |
598 | $insn = shift @ops; |
573 | |
599 | |
574 | $op = $insn->{op}; |
600 | $op = $insn->{op}; |
… | |
… | |
643 | |
669 | |
644 | sub func2ptr { |
670 | sub func2ptr { |
645 | my (@func) = @_; |
671 | my (@func) = @_; |
646 | |
672 | |
647 | #LOCK |
673 | #LOCK |
|
|
674 | mkdir $CACHEDIR, 0777; |
|
|
675 | sysopen my $meta_fh, "$CACHEDIR/meta", &Fcntl::O_RDWR | &Fcntl::O_CREAT, 0666 |
|
|
676 | or die "$$CACHEDIR/meta: $!"; |
|
|
677 | binmode $meta_fh, ":raw:perlio"; |
|
|
678 | fcntl_lock fileno $meta_fh |
|
|
679 | or die "$CACHEDIR/meta: $!"; |
|
|
680 | |
648 | my $meta = eval { Storable::retrieve "$CACHEDIR/meta" } || { version => 1 }; |
681 | my $meta = eval { Storable::fd_retrieve $meta_fh } || { version => 1 }; |
649 | |
682 | |
650 | for my $f (@func) { |
683 | for my $f (@func) { |
651 | $f->{func} = "F" . Digest::MD5::md5_hex ($f->{source}); |
684 | $f->{func} = "F" . Digest::MD5::md5_hex ($f->{source}); |
652 | $f->{so} = $meta->{$f->{func}}; |
685 | $f->{so} = $meta->{$f->{func}}; |
653 | } |
686 | } |
… | |
… | |
668 | |
701 | |
669 | #include "EXTERN.h" |
702 | #include "EXTERN.h" |
670 | #include "perl.h" |
703 | #include "perl.h" |
671 | #include "XSUB.h" |
704 | #include "XSUB.h" |
672 | |
705 | |
|
|
706 | #if 1 |
|
|
707 | # define faster_PUSHMARK_PREALLOC(count) while (PL_markstack_ptr + (count) >= PL_markstack_max) markstack_grow () |
|
|
708 | # define faster_PUSHMARK(p) *++PL_markstack_ptr = (p) - PL_stack_base |
|
|
709 | #else |
|
|
710 | # define faster_PUSHMARK_PREALLOC(count) 1 |
|
|
711 | # define faster_PUSHMARK(p) PUSHMARK(p) |
|
|
712 | #endif |
|
|
713 | |
673 | #define RUNOPS_TILL(op) \\ |
714 | #define RUNOPS_TILL(op) \\ |
674 | while (nextop != (op)) \\ |
715 | while (nextop != (op)) \\ |
675 | { \\ |
716 | { \\ |
676 | PERL_ASYNC_CHECK (); \\ |
717 | PERL_ASYNC_CHECK (); \\ |
677 | PL_op = nextop; nextop = (PL_op->op_ppaddr)(aTHX); \\ |
718 | PL_op = nextop; nextop = (PL_op->op_ppaddr)(aTHX); \\ |
… | |
… | |
688 | $meta->{$f->{func}} = $f->{so} = $stem; |
729 | $meta->{$f->{func}} = $f->{so} = $stem; |
689 | } |
730 | } |
690 | |
731 | |
691 | close $fh; |
732 | close $fh; |
692 | system "$COMPILE -o $stem$_o $stem.c"; |
733 | system "$COMPILE -o $stem$_o $stem.c"; |
693 | #d#unlink "$stem.c"; |
734 | unlink "$stem.c" unless $ENV{FASTER_DEBUG} > 0; |
694 | system "$LINK -o $stem$_so $stem$_o $LIBS"; |
735 | system "$LINK -o $stem$_so $stem$_o $LIBS"; |
695 | unlink "$stem$_o"; |
736 | unlink "$stem$_o"; |
696 | } |
737 | } |
697 | |
738 | |
698 | for my $f (@func) { |
739 | for my $f (@func) { |
… | |
… | |
705 | |
746 | |
706 | $f->{ptr} = DynaLoader::dl_find_symbol $so, $f->{func} |
747 | $f->{ptr} = DynaLoader::dl_find_symbol $so, $f->{func} |
707 | or die "$f->{func} not found in $stem$_so: $!"; |
748 | or die "$f->{func} not found in $stem$_so: $!"; |
708 | } |
749 | } |
709 | |
750 | |
|
|
751 | seek $meta_fh, 0, 0 or die "$CACHEDIR/meta: $!"; |
710 | Storable::nstore $meta, "$CACHEDIR/meta"; |
752 | Storable::nstore_fd $meta, $meta_fh; |
711 | # UNLOCK |
753 | truncate $meta_fh, tell $meta_fh; |
|
|
754 | |
|
|
755 | # UNLOCK (by closing $meta_fh) |
712 | } |
756 | } |
713 | |
757 | |
714 | my %ignore; |
758 | my %ignore; |
715 | |
759 | |
716 | sub entersub { |
760 | sub entersub { |
… | |
… | |
723 | warn "optimising ", $cv->STASH->NAME, "\n" |
767 | warn "optimising ", $cv->STASH->NAME, "\n" |
724 | if $verbose; |
768 | if $verbose; |
725 | |
769 | |
726 | eval { |
770 | eval { |
727 | my @func; |
771 | my @func; |
|
|
772 | |
|
|
773 | push @func, { |
|
|
774 | cv => $cv, |
|
|
775 | name => "<>", |
|
|
776 | source => cv2c $cv, |
|
|
777 | }; |
728 | |
778 | |
729 | # always compile the whole stash |
779 | # always compile the whole stash |
730 | my %stash = $cv->STASH->ARRAY; |
780 | my %stash = $cv->STASH->ARRAY; |
731 | while (my ($k, $v) = each %stash) { |
781 | while (my ($k, $v) = each %stash) { |
732 | $v->isa (B::GV::) |
782 | $v->isa (B::GV::) |
… | |
… | |
772 | =over 4 |
822 | =over 4 |
773 | |
823 | |
774 | =item FASTER_VERBOSE |
824 | =item FASTER_VERBOSE |
775 | |
825 | |
776 | Faster will output more informational messages when set to values higher |
826 | Faster will output more informational messages when set to values higher |
777 | than C<0>. Currently, C<1> outputs which packages are being compiled. |
827 | than C<0>. Currently, C<1> outputs which packages are being compiled, C<3> |
|
|
828 | outputs the cache directory and C<10> outputs information on which perl |
|
|
829 | function is compiled into which shared object. |
778 | |
830 | |
779 | =item FASTER_DEBUG |
831 | =item FASTER_DEBUG |
780 | |
832 | |
781 | Add debugging code when set to values higher than C<0>. Currently, this |
833 | Add debugging code when set to values higher than C<0>. Currently, this |
782 | adds 1-3 C<assert>'s per perl op, to ensure that opcode order and C |
834 | adds 1-3 C<assert>'s per perl op (FASTER_DEBUG > 1), to ensure that opcode |
783 | execution order are compatible. |
835 | order and C execution order are compatible. |
784 | |
836 | |
785 | =item FASTER_CACHE |
837 | =item FASTER_CACHE |
786 | |
838 | |
787 | NOT YET IMPLEMENTED CORRECTLY, SHARING BEETWEEN INSTANCES IS IMPOSSIBLE |
|
|
788 | |
|
|
789 | Set a persistent cache directory that caches compiled code |
839 | Set a persistent cache directory that caches compiled code fragments. The |
790 | fragments. Normally, code compiled by Faster will be deleted immediately, |
840 | default is C<$HOME/.perl-faster-cache> if C<HOME> is set and a temporary |
791 | and every restart will recompile everything. Setting this variable to a |
841 | directory otherwise. |
792 | directory makes Faster cache the generated files for re-use. |
|
|
793 | |
842 | |
794 | This directory will always grow in contents, so you might need to erase it |
843 | This directory will always grow in size, so you might need to erase it |
795 | from time to time. |
844 | from time to time. |
796 | |
845 | |
797 | =back |
846 | =back |
798 | |
847 | |
799 | =head1 BUGS/LIMITATIONS |
848 | =head1 BUGS/LIMITATIONS |