nix-archive-1(type directoryentry(namelibnode(type directoryentry(nameperl5node(type directoryentry(name site_perlnode(type directoryentry(name5.36.0node(type directoryentry(nameLinguanode(type directoryentry(nameStemnode(type directoryentry(nameFr.pmnode(typeregularcontentss9package Lingua::Stem::Fr; use strict; use warnings; require Exporter; our @ISA = qw(Exporter); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. # This allows declaration use Lingua::Stem::Fr ':all'; # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK # will save memory. our %EXPORT_TAGS = (); our @EXPORT_OK = qw (stem stem_word clear_stem_cache stem_caching); our @EXPORT = (); our $VERSION = '0.02'; my $Stem_Caching = 0; my $Stem_Cache = {}; sub stem { return [] if ($#_ == -1); my $parm_ref; if (ref $_[0]) { $parm_ref = shift; } else { $parm_ref = { @_ }; } my $words = []; my $locale = 'fr'; my $exceptions = {}; foreach (keys %$parm_ref) { my $key = lc ($_); if ($key eq '-words') { @$words = @{$parm_ref->{$key}}; } elsif ($key eq '-exceptions') { $exceptions = $parm_ref->{$key}; } elsif ($key eq '-locale') { $locale = $parm_ref->{$key}; } else { croak (__PACKAGE__ . "::stem() - Unknown parameter '$key' with value '$parm_ref->{$key}'\n"); } } local( $_ ); foreach (@$words) { # Flatten case $_ = lc $_; # Check against exceptions list if (exists $exceptions->{$_}) { $_ = $exceptions->{$_}; next; } # Check against cache of stemmed words my $original_word = $_; if ($Stem_Caching && exists $Stem_Cache->{$original_word}) { $_ = $Stem_Cache->{$original_word}; next; } $_ = stem_word($_); $Stem_Cache->{$original_word} = $_ if $Stem_Caching; } $Stem_Cache = {} if ($Stem_Caching < 2); return $words; } sub stem_word { our($word) = @_; $word = lc $word; # Check against cache of stemmed words if ($Stem_Caching && exists $Stem_Cache->{$word}) { return $Stem_Cache->{$word}; } our($RV, $R1, $R2); ### u, i between vowels into upper case. $word =~ s/([aeiouy])([ui])([aeiouy])/$1.uc($2).$3/eg; ### y preceded or followed by a vowel into upper case. $word =~ s/([aeiouy])(y)/$1.uc($2)/eg; $word =~ s/(y)([aeiouy])/uc($1).$2/eg; ### u after q into upper case. $word =~ s/(q)(u)/$1.uc($2)/eg; #### RV is defined as follows $RV = $word; #### If the first two letters are vowels if($word =~ /^[aeiouy][aeiouy]/) { #### RV is the region after the third letter unless ( $RV =~ s/^...// ) { $RV = ""; } } elsif ( $word =~ /^.+?[aeiouy].+/ ) { #### RV is after the first vowel not beginning or end the word $RV =~ s/^.+?[aeiouy]//; } else { #### RV is the end of the word $RV = ""; } #print "Word=$word\nRV=$RV\n"; #### Defining R1 and R2 $R1 = $word; #### R1 is the region after the first non-vowel following a #### vowel, or is the null region at the end of the word if #### there is no such non-vowel. unless($R1 =~ s/^.*?[aeiouy][^aeiouy]//) { $R1 = ""; } #print "R1=$R1\n"; #### R2 is the region after the first non-vowel following a #### vowel in R1, or is the null region at the end of the #### word if there is no such non-vowel. $R2 = $R1; if($R2) { unless($R2 =~ s/^.*?[aeiouy][^aeiouy]//) { $R2 = ""; } } #print "R2=$R2\n"; #### Step 1: Standard suffix removal my $step1 = 0; #### Search for the longest among the following suffixes, #### and perform the action indicated my @suffix = qw( ance iqUe isme able iste eux ances iqUes ismes ables istes ); #### delete if in R2 $step1 += stem_killer( $R2, "", "", @suffix ); @suffix = qw( trice ateur ation atrices ateurs ations ); #### delete if in R2 #### if preceded by ic, delete if in R2 #print "Word=$word RV=$RV R1=$R1 R2=$R2\n"; $step1 += stem_killer( $R2, "ic", "", @suffix ) || stem_killer( $R1, "ic", "iqU", @suffix ) || stem_killer( $R2, "", "", @suffix ); @suffix = qw( logie logies ); #### replace with log if in R2 $step1 += stem_killer( $R2, "", "log", @suffix ); @suffix = qw( usion ution usions utions ); #### replace with u if in R2 $step1 += stem_killer( $R2, "", "u", @suffix ); @suffix = qw( ence ences ); #### replace with ent if in R2 $step1 += stem_killer( $R2, "", "ent", @suffix ); @suffix = qw( issement issements ); #### delete if in R1 and preceded by a non-vowel if ( nvprec( $R1, @suffix ) ) { $step1 += stem_killer( $R1, "", "", @suffix); } @suffix = qw( ement ements ); #### delete if in RV #### if preceded by iv, delete if in R2 #### (and if further preceded by at, delete if in R2), otherwise, #### if preceded by eus, delete if in R2, else replace by eux if in R1, otherwise, #### if preceded by abl or iqU, delete if in R2, otherwise, #### if preceded by ir or Ir, replace by i if in RV $step1 += stem_killer( $RV, "ativ", "", @suffix ) || stem_killer( $R2, "iv", "", @suffix ) || stem_killer( $R2, "(abl|iqU)", "", @suffix ) || stem_killer( $R2, "(ir|Ir)", "i", @suffix ) || stem_killer( $R2, "eus", "", @suffix ) || stem_killer( $R1, "eus", "eux", @suffix ) || stem_killer( $RV, "", "", @suffix ); @suffix = qw( it its ); #### delete if in R2 #### if preceded by abil, delete if in R2, else replace by abl, otherwise, #### if preceded by ic, delete if in R2, else replace by iqU, otherwise, #### if preceded by iv, delete if in R2 $step1 += stem_killer( $R2, "(abil|ic|iv)", "", @suffix ) || stem_killer( $word, "abil", "abl", @suffix ) || stem_killer( $word, "ic", "iqU", @suffix ) || stem_killer( $R2, "", "", @suffix ); @suffix = qw( if ive ifs ives ); #### delete if in R2 #### if preceded by at, delete if in R2 #### (and if further preceded by ic, delete if in R2, else replace by iqU) $step1 += stem_killer( $R2, "icat", "", @suffix) || stem_killer( $R2, "at", "", @suffix) || stem_killer( $word, "icat", "iqU", @suffix) || stem_killer( $R2, "", "", @suffix); @suffix = qw( eaux ); #### replace with eau $step1 += stem_killer( $word, "", "eau", @suffix); @suffix = qw( aux ); #### replace with eau $step1 += stem_killer( $R1, "", "al", @suffix); @suffix = qw( euse euses ); #### delete if in R2, else replace by eux if in R1 $step1 += stem_killer( $R2, "", "", @suffix) || stem_killer( $R1, "", "eux", @suffix); @suffix = qw( emment ); #### replace with ent my $sufstep2 += stem_killer( $RV, "", "ent", @suffix); @suffix = qw( amment ); #### replace with ant $sufstep2 += stem_killer( $RV, "", "ant", @suffix); @suffix = qw( ment ments ); #### delete if preceded by a vowel in RV if ( vprec ( $RV, @suffix) ) { $sufstep2 += stem_killer( $RV, "", "", @suffix); } #### Step 2: Verb suffixes #### Do step 2a if no ending was removed by step 1. my $step2a = 0; if( ($step1 == 0) || ($sufstep2 > 0) ) { #### Search for the longest among the following suffixes in RV, #### and if found, delete. @suffix = qw( mes t tes i ie ies ir ira irai iraIent irais irait iras irent irez iriez irions irons iront is issaIent issais issait issant issante issantes issants isse issent isses issez issiez issions issons it ); if ( nvprec( $RV, @suffix) ) { #print "word:$word RV:$RV R1:$R1 R2:$R2\n"; $step2a += stem_killer( $RV, "", "", @suffix ); } } my $step2b = 0; if ( $step2a == 0 ) { @suffix = qw( ions ); #### delete if in R2 $step2b += stem_killer( $R2, "", "", @suffix); @suffix = qw( e es s rent er era erai eraIent erais erait eras erez eriez erions erons eront ez iez ); #### delete $step2b += stem_killer( $RV, "", "", @suffix); #print "Avant word:$word RV:$RV R1:$R1 R2:$R2\n"; @suffix = qw( mes t tes a ai aIent ais ait ant ante antes ants as asse assent asses assiez assions ); #### delete #### if preceded by e, delete $step2b += stem_killer( $RV, "e", "", @suffix) || stem_killer( $RV, "", "", @suffix); #print "Apres word:$word RV:$RV R1:$R1 R2:$R2\n"; } my $step4 = 1; if ( $step1 > 0 || $step2a > 0 || $step2b > 0 ) { #### Step 3 #### Replace final Y with i or final with c if ( $word =~ /Y$|$/ ) { $word =~ s/Y$/i/; $word =~ s/$/c/; $step4 = 0; } } if ( $step4 == 1 && $step1 == 0 && $step2a == 0 && $step2b == 0 ) { #### Step 4 #### If the word ends s, not preceded by a, i, o, u, or s, delete it. #print "word:$word RV:$RV\n"; if ( $word =~ /[^aious]s$/ ) { stem_killer( $word , "", "", "s" ); } @suffix = qw( ent ); #### delete if in R2 stem_killer( $R2, "", "", @suffix); @suffix = qw( ion ); #### delete if in R2 and preceded by s or t if ( $R2 =~ /ion$/ && $RV =~ /tion|sion/ ) { stem_killer( $R2, "", "", @suffix); } #(So note that ion is removed only when it is in R2 - as well as being in RV - and preceded by s or t which must be in RV.) @suffix = qw( ier ire Ier Ire ); #### replace with i stem_killer( $RV, "", "i", @suffix); @suffix = qw( e ); #### e delete #print "word:$word RV:$RV R1:$R1 R2:$R2\n"; stem_killer( $RV, "", "", @suffix); @suffix = qw( ); #### if preceded by gu, delete if ( $RV =~ /gu$/ ) { stem_killer( $RV, "", "", @suffix); } } #### Always do Step 5 and Step 6 #### step 5 : Undouble #### If the word ends enn, onn, ett, ell or eill, delete the last letter $word =~ s/enn$/en/; $word =~ s/onn$/on/; $word =~ s/ett$/et/; $word =~ s/ell$/el/; $word =~ s/eill$/eil/; #### step 6 :Un-accent #### If the words ends or followed by at least one non-vowel, #### remove the accent from the e $word =~ s/[]([^aeiouy]+?)$/e$1/; #### And finally: #### Turn any remaining I, U and Y letters into lower case. $word =~ s/([IUY])/lc($1)/eg; return $word; } sub nvprec { my($where, @list) = @_; use vars qw($RV $R1 $R2 $word); foreach my $p ( sort { length($b) <=> length($a) } @list) { if ($where =~ /[^aeiouy]$p$/) { return 1; } } return; } sub vprec { my($where, @list) = @_; use vars qw($RV $R1 $R2 $word); foreach my $p ( sort { length($b) <=> length($a) } @list) { if ($where =~ /[aeiouy]$p$/) { return 1; } } return; } sub stem_killer { my($where, $pre, $with, @list) = @_; use vars qw($RV $R1 $R2 $word); my $done = 0; foreach my $P (sort { length($b) <=> length($a) } @list) { if($where =~ /$pre$P$/) { $R2 =~ s/$pre$P$/$with/; $R1 =~ s/$pre$P$/$with/; $RV =~ s/$pre$P$/$with/; $word =~ s/$pre$P$/$with/; $done = 1; last; } } return $done; } sub stem_caching { my $parm_ref; if (ref $_[0]) { $parm_ref = shift; } else { $parm_ref = { @_ }; } my $caching_level = $parm_ref->{-level}; if (defined $caching_level) { if ($caching_level !~ m/^[012]$/) { croak(__PACKAGE__ . "::stem_caching() - Legal values are '0','1' or '2'. '$caching_level' is not a legal value"); } $Stem_Caching = $caching_level; } return $Stem_Caching; } sub clear_stem_cache { $Stem_Cache = {}; } 1; __END__ =head1 NAME Lingua::Stem::Fr - Perl French Stemming =head1 SYNOPSIS use Lingua::Stem::Fr; my $stems = Lingua::Stem::Fr::stem({ -words => $word_list_reference, -locale => 'fr', -exceptions => $exceptions_hash, }); my $stem = Lingua::Stem::Fr::stem_word( $word ); =head1 DESCRIPTION This module use the a modified version of the Porter Stemming Algorithm to return a stemmed words. The algorithm is implemented as described in: http://snowball.tartarus.org/french/stemmer.html with some improvement. The code is carefully crafted to work in conjunction with the L module by Benjamin Franz. This french version is based too, on the work of Aldo Calpini (Italian Version) =head1 METHODS =over 4 =item stem({ -words => \@words, -locale => 'fr', -exceptions => \%exceptions }); Stems a list of passed words. Returns an anonymous list reference to the stemmed words. Example: my $stemmed_words = Lingua::Stem::Fr::stem({ -words => \@words, -locale => 'fr', -exceptions => \%exceptions, }); =item stem_word( $word ); Stems a single word and returns the stem directly. Example: my $stem = Lingua::Stem::Fr::stem_word( $word ); =item stem_caching({ -level => 0|1|2 }); Sets the level of stem caching. '0' means 'no caching'. This is the default level. '1' means 'cache per run'. This caches stemming results during a single call to 'stem'. '2' means 'cache indefinitely'. This caches stemming results until either the process exits or the 'clear_stem_cache' method is called. =item clear_stem_cache; Clears the cache of stemmed words =back =cut =head1 HISTORY =over 8 =item 0.01 Original version; created by h2xs 1.23 with options -ACX -n Lingua::Stem::Fr =item 0.02 Minor change in documentation and disable of limitation to perl 5.8.3+ =back =head1 SEE ALSO You can see the French stemming algorithm from Mr Porter here : http://snowball.tartarus.org/french/stemmer.html Another French stemming tool in Perl (French page) : http://www.univ-nancy2.fr/pers/namer/Telecharger_Flemm.html =head1 AUTHOR Sbastien Darribere-Pleyt, Esebastien.darribere@lefute.comE =head1 COPYRIGHT AND LICENSE Copyright (C) 2003 by Aldo Calpini Copyright (C) 2004 by Sbastien Darribere-Pleyt This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.3 or, at your option, any later version of Perl 5 you may have available. =cut ))))))entry(nameriscv64-linux-thread-multinode(type directoryentry(nameautonode(type directoryentry(nameLinguanode(type directoryentry(nameStemnode(type directoryentry(nameFrnode(type directoryentry(name .packlistnode(typeregularcontents/gnu/store/3pv2r8slvb18mg4za8fx2gi8nb2i7a1i-perl-lingua-stem-fr-0.02/lib/perl5/site_perl/5.36.0/Lingua/Stem/Fr.pm /gnu/store/3pv2r8slvb18mg4za8fx2gi8nb2i7a1i-perl-lingua-stem-fr-0.02/share/man/man3/Lingua::Stem::Fr.3 ))))))))))))))))))))entry(namesharenode(type directoryentry(namemannode(type directoryentry(nameman3node(type directoryentry(nameLingua::Stem::Fr.3.zstnode(typeregularcontentsr (/d%Kzi`9mj:@cvw֟Ba~TyS]ziGP2((cn$*JA@P"IdʨI4yIDeN#l2(bgA VUlJ8kVxjW&DD@0qbY΁i7 xKQ )Rεyx\.˦'o-%p$W^8E9L.kX^rRfo,ڌ!Lֱ*: .Ыm d18 ZyxXr2*|I'"01 {z_ӂ6{ #m-& 6 JC'mA9 ɬK&02qwE</lyȤ[= r{m ^rPmø͎Z^G`rŸhJaj,Zx[*j==R^5Se`t8'T0ÆLcck!Fp(bP `Li,ɽPCsR ēD(qbK(R]2S'DĠdfxl,ڸmfChnŢY1|4Vh߭yH gD{G+%(*| KAE"!趙р04W``aрd(i=q*8v=n`V\2t+@h5GΈhVF-G,"(78C9딘ssL/:7't@0h *Z !^H)!PY%㶞 !sI&|: E]uΩ"I 4"\M)kЈH E%I= ?IQk( ;{8B[aC~P_Wlm/Sg9Z9p,pIK$ Rw4k7̢DƒF f$wieA}LuE|) d(mB_:_ 9轄]IQsy>(." }"vjN{kK/s2Yv9}NA; feTP pHE)PyV )9`p(JiFS6[Xitp4=Gg?gQK ҍC=V9LBgP_ 8ESTڇκ2F9RD fKMh$:%D*Խjh9OfN \4Uʕ-slIhfs|B0ᕙ1C~û Dd.`;v<1VTh,u a\Uo Ljzd%-nb;=#c7kZCpNCTG7$u]ȶ kޙx!:2&_:{PK9ĀYoA /=%@o_Y !mϓCW\\ԕ3 z/RUY֭9&3*sa4qqׯ&djy\=6Hm:%mpNs_xL[^#KK]f=pI)Ỏ\+o#-Zh81Ys2'KeY]l۔Sft)N`"Ԉ* c q+yGB: 'B&]B8[Kp=nH쌔V53|kdQ>w | V ($ /ʤ zmT5_yWX<0\(z\]&qUUdtQQ5 MAA* TXvEZt?;4Ic!Uz@8W<\kbqy N/oQl"MllWWxZ9hK( 6>+)))))))))