diff options
author | Anno Siegel <anno4000@lublin.zrz.tu-berlin.de> | 2006-06-22 00:39:51 +0200 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2006-06-23 16:28:03 +0000 |
commit | 1e73acc8af3eecb1b36ee831483e1e9a7b3d1662 (patch) | |
tree | a723894f28f77db0a42b0de6599f6be9243cc439 /ext/Hash | |
parent | 7d3c2c289ea5236d2681b4bfba871738a7751375 (diff) | |
download | perl-1e73acc8af3eecb1b36ee831483e1e9a7b3d1662.tar.gz |
Re: [PATCH] Hash::Util::FieldHash
Message-Id: <974A5B4B-7614-4F3F-BA7C-828960D82C55@mailbox.tu-berlin.de>
p4raw-id: //depot/perl@28419
Diffstat (limited to 'ext/Hash')
-rw-r--r-- | ext/Hash/Util/Changes | 4 | ||||
-rw-r--r-- | ext/Hash/Util/FieldHash/Changes | 6 | ||||
-rw-r--r-- | ext/Hash/Util/FieldHash/FieldHash.xs | 353 | ||||
-rw-r--r-- | ext/Hash/Util/FieldHash/Makefile.PL | 20 | ||||
-rw-r--r-- | ext/Hash/Util/FieldHash/lib/Hash/Util/FieldHash.pm | 442 | ||||
-rw-r--r-- | ext/Hash/Util/FieldHash/t/01_load.t | 59 | ||||
-rw-r--r-- | ext/Hash/Util/FieldHash/t/02_function.t | 215 | ||||
-rw-r--r-- | ext/Hash/Util/FieldHash/t/03_class.t | 116 | ||||
-rw-r--r-- | ext/Hash/Util/FieldHash/t/04_thread.t | 68 | ||||
-rw-r--r-- | ext/Hash/Util/FieldHash/t/05_perlhook.t | 174 | ||||
-rw-r--r-- | ext/Hash/Util/FieldHash/t/10_hash.t | 116 | ||||
-rw-r--r-- | ext/Hash/Util/FieldHash/t/11_hashassign.t | 319 | ||||
-rw-r--r-- | ext/Hash/Util/FieldHash/t/12_hashwarn.t | 60 | ||||
-rw-r--r-- | ext/Hash/Util/Makefile.PL | 1 | ||||
-rw-r--r-- | ext/Hash/Util/lib/Hash/Util.pm | 41 |
15 files changed, 1992 insertions, 2 deletions
diff --git a/ext/Hash/Util/Changes b/ext/Hash/Util/Changes index f6ba16bf4a..06589b56ff 100644 --- a/ext/Hash/Util/Changes +++ b/ext/Hash/Util/Changes @@ -15,4 +15,6 @@ on top of code by Nick Ing-Simmons and Jeffrey Friedl. - +0.07 Sun Jun 11 21:24:15 CEST 2006 + - added front-end support for the new Hash::Util::FieldHash + (Anno Siegel) diff --git a/ext/Hash/Util/FieldHash/Changes b/ext/Hash/Util/FieldHash/Changes new file mode 100644 index 0000000000..071dcaa4ff --- /dev/null +++ b/ext/Hash/Util/FieldHash/Changes @@ -0,0 +1,6 @@ +Revision history for Perl extension Hash::Util::FieldHash. + +0.01 Sat Jun 3 16:24:12 2006 + - original version; created by h2xs 1.23 with options + -A -g --skip-ppport -nHash::Util::FieldHash + diff --git a/ext/Hash/Util/FieldHash/FieldHash.xs b/ext/Hash/Util/FieldHash/FieldHash.xs new file mode 100644 index 0000000000..32c936180b --- /dev/null +++ b/ext/Hash/Util/FieldHash/FieldHash.xs @@ -0,0 +1,353 @@ +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" + +/* support for Hash::Util::FieldHash, prefix HUF_ */ + +/* The object registry, a package variable */ +#define HUF_OB_REG "Hash::Util::FieldHash::ob_reg" +/* Magic cookies to recognize object id's. Hi, Eva, David */ +#define HUF_COOKIE 2805.1980 +#define HUF_REFADDR_COOKIE 1811.1976 + + +/* For global cache of object registry */ +#define MY_CXT_KEY "Hash::Util::FieldHash::_guts" XS_VERSION +typedef struct { + HV* ob_reg; /* Cache object registry */ +} my_cxt_t; +START_MY_CXT + +/* Deal with global context */ +#define HUF_INIT 1 +#define HUF_CLONE 0 +#define HUF_RESET -1 + +void HUF_global(I32 how) { + if (how == HUF_INIT) { + MY_CXT_INIT; + MY_CXT.ob_reg = get_hv(HUF_OB_REG, 1); + } else if (how == HUF_CLONE) { + MY_CXT_CLONE; + MY_CXT.ob_reg = get_hv(HUF_OB_REG, 0); + } else if (how == HUF_RESET) { + dMY_CXT; + MY_CXT.ob_reg = get_hv(HUF_OB_REG, 0); + } +} + +/* the id as an SV, optionally marked in the nv (unused feature) */ +SV* HUF_id(SV* ref, NV cookie) { + SV* id = sv_newmortal(); + if (cookie == 0 ) { + SvUPGRADE(id, SVt_PVIV); + } else { + SvUPGRADE(id, SVt_PVNV); + SvNV_set(id, cookie); + SvNOK_on(id); + } + SvIV_set(id, (IV)SvRV(ref)); + SvIOK_on(id); + return id; +} + +/* plain id, only used for field hash entries in field lists */ +SV* HUF_field_id(SV* obj) { + return HUF_id(obj, 0.0); +} + +/* object id (may be different in future) */ +SV* HUF_obj_id(SV* obj) { + return HUF_id(obj, 0.0); +} + +/* set up uvar magic for any sv */ +void HUF_add_uvar_magic( + SV* sv, /* the sv to enchant, visible to * get/set */ + I32(* val)(pTHX_ IV, SV*), /* "get" function */ + I32(* set)(pTHX_ IV, SV*), /* "set" function */ + I32 index, /* get/set will see this */ + SV* thing /* any associated info */ +) { + MAGIC* mg; + struct ufuncs uf; + uf.uf_val = val; + uf.uf_set = set; + uf.uf_index = index; + sv_magic(sv, thing, PERL_MAGIC_uvar, (char*)&uf, sizeof(uf)); +} + +/* Fetch the data container of a trigger */ +AV* HUF_get_trigger_content(SV* trigger) { + MAGIC* mg; + if (trigger && (mg = mg_find(trigger, PERL_MAGIC_uvar))) + return (AV*)mg->mg_obj; + return NULL; +} + +/* Delete an object from all field hashes it may occur in. Also delete + * the object's entry from the object registry. + */ +I32 HUF_destroy_obj(pTHX_ IV index, SV* trigger) { + /* Do nothing if the weakref wasn't undef'd. Also don't bother + * during global destruction. (MY_CXT.ob_reg is sometimes funny there) */ + if (!SvROK(trigger) && (!PL_in_clean_all)) { + dMY_CXT; + AV* cont = HUF_get_trigger_content(trigger); + SV* ob_id = *av_fetch(cont, 0, 0); + HV* field_tab = (HV*) *av_fetch(cont, 1, 0); + HE* ent; + hv_iterinit(field_tab); + while (ent = hv_iternext(field_tab)) { + SV* field_ref = HeVAL(ent); + SV* field = SvRV(field_ref); + hv_delete_ent((HV*)field, ob_id, G_DISCARD, 0); + } + /* make it safe in case we must run in global clenaup, after all */ + if (PL_in_clean_all) + HUF_global(HUF_RESET); + hv_delete_ent(MY_CXT.ob_reg, ob_id, G_DISCARD, 0); + } + return 0; +} + +/* Create a trigger for an object. The trigger is a magical weak ref + * that fires when the weak ref expires. it holds the original id of + * the object, and a list of field hashes from which the object may + * have to be deleted. The trigger is stored in the object registry + * and also deleted when the object expires. + */ +SV* HUF_new_trigger(SV* obj, SV* ob_id) { + dMY_CXT; + SV* trigger = sv_rvweaken(newRV_inc(SvRV(obj))); + AV* cont = newAV(); + sv_2mortal((SV*)cont); + av_store(cont, 0, SvREFCNT_inc(ob_id)); + av_store(cont, 1, (SV*)newHV()); + HUF_add_uvar_magic(trigger, NULL, &HUF_destroy_obj, 0, (SV*)cont); + hv_store_ent(MY_CXT.ob_reg, ob_id, trigger, 0); + return trigger; +} + +/* retrieve a trigger for obj if one exists, return NULL otherwise */ +SV* HUF_ask_trigger(SV* ob_id) { + dMY_CXT; + HE* ent; + if (ent = hv_fetch_ent(MY_CXT.ob_reg, ob_id, 0, 0)) + return HeVAL(ent); + return NULL; +} + +/* get the trigger for an object, creating it if necessary */ +SV* HUF_get_trigger(SV* obj, SV* ob_id) { + SV* trigger; + if (!(trigger = HUF_ask_trigger(ob_id))) + trigger = HUF_new_trigger(obj, ob_id); + return trigger; +} + +/* mark an object (trigger) as having been used with a field */ +void HUF_mark_field(SV* trigger, SV* field) { + AV* cont = HUF_get_trigger_content(trigger); + HV* field_tab = (HV*) *av_fetch(cont, 1, 0); + SV* field_ref = newRV_inc(field); + SV* field_id = HUF_field_id(field_ref); + hv_store_ent(field_tab, field_id, field_ref, 0); +} + +/* The key exchange function. It communicates with S_hv_magic_uvar_xkey + * in hv.c */ +IV HUF_watch_key(pTHX_ IV action, SV* field) { + MAGIC* mg = mg_find(field, PERL_MAGIC_uvar); + SV* keysv = mg->mg_obj; + if (keysv && SvROK(keysv)) { + SV* ob_id = HUF_obj_id(keysv); + SV* trigger = HUF_get_trigger(keysv, ob_id); + HUF_mark_field(trigger, field); + mg->mg_obj = ob_id; /* key replacement */ + } + return 0; +} + +/* see if something is a field hash */ +int HUF_get_status(HV* hash) { + int ans = 0; + if (hash && (SvTYPE(hash) == SVt_PVHV)) { + dMY_CXT; + MAGIC* mg; + struct ufuncs* uf; + ans = (mg = mg_find((SV*)hash, PERL_MAGIC_uvar)) && + (uf = (struct ufuncs *)mg->mg_ptr) && + (uf->uf_val == &HUF_watch_key) && + (uf->uf_set == NULL); + } + return ans; +} + +/* Thread support. These routines are called by CLONE (and nothing else) */ + +/* Fix entries for one object in all field hashes */ +void HUF_fix_trigger(SV* trigger, SV* new_id) { + AV* cont = HUF_get_trigger_content(trigger); + HV* field_tab = (HV*) *av_fetch(cont, 1, 0); + HV* new_tab = newHV(); + HE* ent; + SV* old_id = *av_fetch(cont, 0, 0); + hv_iterinit(field_tab); + while (ent = hv_iternext(field_tab)) { + SV* field_ref = HeVAL(ent); + SV* field_id = HUF_field_id(field_ref); + HV* field = (HV*)SvRV(field_ref); + SV* val; + /* recreate field tab entry */ + hv_store_ent(new_tab, field_id, SvREFCNT_inc(field_ref), 0); + /* recreate field entry, if any */ + if (val = hv_delete_ent(field, old_id, 0, 0)) + hv_store_ent(field, new_id, SvREFCNT_inc(val), 0); + } + /* update the trigger */ + av_store(cont, 0, SvREFCNT_inc(new_id)); + av_store(cont, 1, (SV*)new_tab); +} + +/* Go over object registry and fix all objects. Also fix the object + * registry. + */ +void HUF_fix_objects() { + dMY_CXT; + I32 i, len; + HE* ent; + AV* oblist = (AV*)sv_2mortal((SV*)newAV()); + hv_iterinit(MY_CXT.ob_reg); + while(ent = hv_iternext(MY_CXT.ob_reg)) + av_push(oblist, SvREFCNT_inc(hv_iterkeysv(ent))); + len = av_len(oblist); + for (i = 0; i <= len; ++i) { + SV* old_id = *av_fetch(oblist, i, 0); + SV* trigger = hv_delete_ent(MY_CXT.ob_reg, old_id, 0, 0); + SV* new_id = HUF_obj_id(trigger); + HUF_fix_trigger(trigger, new_id); + hv_store_ent(MY_CXT.ob_reg, new_id, SvREFCNT_inc(trigger), 0); + } +} + +/* test support (not needed for functionality) */ + +static SV* counter; +IV HUF_inc_var(pTHX_ IV index, SV* which) { + sv_setiv(counter, 1 + SvIV(counter)); + return 0; +} + +MODULE = Hash::Util::FieldHash PACKAGE = Hash::Util::FieldHash + +BOOT: +{ + HUF_global(HUF_INIT); /* create variables */ +} + +int +_fieldhash(SV* href, int mode) +PROTOTYPE: $$ +CODE: + HV* field; + RETVAL = 0; + if (mode && + href && SvROK(href) && + (field = (HV*)SvRV(href)) && + SvTYPE(field) == SVt_PVHV + ) { + HUF_add_uvar_magic( + SvRV(href), + &HUF_watch_key, + NULL, + 0, + NULL + ); + RETVAL = HUF_get_status(field); + } +OUTPUT: + RETVAL + +void +CLONE(char* class) +CODE: + if (0 == strcmp(class, "Hash::Util::FieldHash")) { + HUF_global(HUF_CLONE); + HUF_fix_objects(); + } + +SV* +_get_obj_id(SV* obj) +CODE: + RETVAL = NULL; + if (SvROK(obj)) + RETVAL = HUF_obj_id(obj); +OUTPUT: + RETVAL + +SV* +_active_fields(SV* obj) +PPCODE: + if (SvROK(obj)) { + SV* ob_id = HUF_obj_id(obj); + SV* trigger = HUF_ask_trigger(ob_id); + if (trigger) { + AV* cont = HUF_get_trigger_content(trigger); + HV* field_tab = (HV*) *av_fetch(cont, 1, 0); + HE* ent; + hv_iterinit(field_tab); + while (ent = hv_iternext(field_tab)) { + HV* field = (HV*)SvRV(HeVAL(ent)); + if (hv_exists_ent(field, ob_id, 0)) + XPUSHs(sv_2mortal(newRV_inc((SV*)field))); + } + } + } + +void +_test_uvar_get(SV* svref, SV* countref) +CODE: + if (SvROK(svref) && SvROK(countref)) { + counter = SvRV(countref); + sv_setiv(counter, 0); + HUF_add_uvar_magic( + SvRV(svref), + &HUF_inc_var, + NULL, + 0, + SvRV(countref) + ); + } + +void +_test_uvar_set(SV* svref, SV* countref) +CODE: + if (SvROK(svref) && SvROK(countref)) { + counter = SvRV(countref); + sv_setiv(counter, 0); + counter = SvRV(countref); + HUF_add_uvar_magic( + SvRV(svref), + NULL, + &HUF_inc_var, + 0, + SvRV(countref) + ); + } + +void +_test_uvar_same(SV* svref, SV* countref) +CODE: + if (SvROK(svref) && SvROK(countref)) { + counter = SvRV(countref); + sv_setiv(counter, 0); + HUF_add_uvar_magic( + SvRV(svref), + &HUF_inc_var, + &HUF_inc_var, + 0, + NULL + ); + } + diff --git a/ext/Hash/Util/FieldHash/Makefile.PL b/ext/Hash/Util/FieldHash/Makefile.PL new file mode 100644 index 0000000000..1ec06efe27 --- /dev/null +++ b/ext/Hash/Util/FieldHash/Makefile.PL @@ -0,0 +1,20 @@ +use 5.009004; +use ExtUtils::MakeMaker; +# See lib/ExtUtils/MakeMaker.pm for details of how to influence +# the contents of the Makefile that is written. +WriteMakefile( + NAME => 'Hash::Util::FieldHash', + VERSION_FROM => 'lib/Hash/Util/FieldHash.pm', # finds $VERSION + PREREQ_PM => {}, # e.g., Module::Name => 1.1 + (grep( /^PERL_CORE=1$/, @ARGV) ? (MAN3PODS => {}) : ()), + ($] >= 5.005 ? ## Add these new keywords supported since 5.005 + (ABSTRACT_FROM => 'lib/Hash/Util/FieldHash.pm', # retrieve abstract from module + AUTHOR => 'Anno Siegel <anno@zrz.tu-berlin.de>') : ()), + LIBS => [''], # e.g., '-lm' + DEFINE => '', # e.g., '-DHAVE_SOMETHING' + # Insert -I. if you add *.h files later: + INC => '', # e.g., '-I/usr/include/other' + # Un-comment this if you add C files to link with later: + # OBJECT => '$(O_FILES)', # link all the C files too + CCFLAGS => '-Wuninitialized', +); diff --git a/ext/Hash/Util/FieldHash/lib/Hash/Util/FieldHash.pm b/ext/Hash/Util/FieldHash/lib/Hash/Util/FieldHash.pm new file mode 100644 index 0000000000..cf20f55876 --- /dev/null +++ b/ext/Hash/Util/FieldHash/lib/Hash/Util/FieldHash.pm @@ -0,0 +1,442 @@ +package Hash::Util::FieldHash; + +use 5.009004; +use strict; +use warnings; +use Carp qw( croak); +use Scalar::Util qw( reftype); + +require Exporter; +our @ISA = qw(Exporter); +our %EXPORT_TAGS = ( + 'all' => [ qw( + fieldhash + fieldhashes + )], +); +our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); +our @EXPORT = qw( +); + +our $VERSION = '0.01'; + +{ + require XSLoader; + our %ob_reg; # silence possible 'once' warning in XSLoader + XSLoader::load('Hash::Util::FieldHash', $VERSION); +} + +sub fieldhash (\%) { + for ( shift ) { + return unless ref() && reftype( $_) eq 'HASH'; + return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0); + return $_ if Hash::Util::FieldHash::_fieldhash( $_, 1); + return; + } +} + +sub fieldhashes { map &fieldhash( $_), @_ } + +1; +__END__ + +=head1 NAME + +Hash::Util::FieldHash - Associate references with data + +=head1 SYNOPSIS + + use Hash::Util qw(fieldhash fieldhashes); + + # Create a single field hash + fieldhash my %foo; + + # Create three at once... + fieldhashes \ my(%foo, %bar, %baz); + # ...or any number + fieldhashes @hashrefs; + +=head1 Functions + +Two functions generate field hashes: + +=over + +=item fieldhash + + fieldhash %hash; + +Creates a single field hash. The argument must be a hash. Returns +a reference to the given hash if successful, otherwise nothing. + +=item fieldhashes + + fieldhashes @hashrefs; + +Creates any number of field hashes. Arguments must be hash references. +Returns the converted hashrefs in list context, their number in scalar +context. + +=back + +=head1 Description + +=head2 Features + +Field hashes have three basic features: + +=over + +=item Key exchange + +If a I<reference> is used as a field hash key, it is replaced by +the integer value of the reference address. + +=item Thread support + +In a new I<thread> a field hash is updated so that its keys reflect +the new reference addresses of the original objects. + +=item Garbage collection + +When a reference goes I<stale> after having been used as a field hash key, +the hash entry will be deleted. + +=back + +Field hashes are designed to maintain an association of a reference +with a value. The association is independent of the bless status of +the key, it is thread safe and garbage-collected. These properties +are desirable in the construction of inside-out classes. + +When used with keys that are plain scalars (not references), field +hashes behave like normal hashes. + +=head2 Rationale + +The association of a reference (namely an object) with a value is +central to the concept of inside-out classes. These classes don't +store the values of object variables (fields) inside the object itself, +but outside, as it were, in private hashes keyed by the object. + +Normal hashes can be used for the purpose, but turn out to have +some disadvantages: + +=over + +=item Stringification + +The stringification of references depends on the bless status of the +reference. A plain hash reference C<$ref> may stringify as C<HASH(0x1801018)>, +but after being blessed into class C<foo> the same reference will look like +as C<foo=HASH(0x1801018)>, unless class C<foo> overloads stringification, +in which case it may show up as C<wurzelzwerg>. In a normal hash, the +stringified reference wouldn't be found again after the blessing. + +Bypassing stringification by use of C<Scalar::Util::refaddr> has been +used to correct this. Field hashes automatically stringify their +keys to the reference address in decimal. + +=item Thread Dependency + +When a new thread is created, the Perl interpreter is cloned, which +implies that all variables change their reference address. Thus, +in a daughter thread, the "same" reference C<$ref> contains a different +address, but the cloned hash still holds the key based on the original +address. Again, the association is broken. + +A C<CLONE> method is required to update the hash on thread creation. +Field hashes come with an appropriate C<CLONE>. + +=item Garbage Collection + +When a reference (an object) is used as a hash key, the entry stays +in the hash when the object eventually goes out of scope. That can result +in a memory leak because the data associated with the object is not +freed. Worse than that, it can lead to a false association if the +reference address of the original object is later re-used. This +is not a remote possibility, address re-use happens all the time and +is a certainty under many conditions. + +If the references in question are indeed objects, a C<DESTROY> method +I<must> clean up hashes that the object uses for storage. Special +methods are needed when unblessed references can occur. + +Field hashes have garbage collection built in. If a reference +(blessed or unblessed) goes out of scope, corresponding entries +will be deleted from all field hashes. + +=back + +Thus, an inside-out class based on field hashes doesn't need a C<DESTROY> +method, nor a C<CLONE> method for thread support. That facilitates the +construction considerably. + +=head2 How to use + +Traditionally, the definition of an inside-out class contains a bare +block inside which a number of lexical hashes are declared and the +basic accessor methods defined, usually through C<Scalar::Util::refaddr>. +Further methods may be defined outside this block. There has to be +a DESTROY method and, for thread support, a CLONE method. + +When field hashes are used, the basic structure reamins the same. +Each lexical hash will be made a field hash. The call to C<refaddr> +can be omitted from the accessor methods. DESTROY and CLONE methods +are not necessary. + +If you have an existing inside-out class, simply making all hashes +field hashes with no other change should make no difference. Through +the calls to C<refaddr> or equivalent, the field hashes never get to +see a reference and work like normal hashes. Your DESTROY (and +CLONE) methods are still needed. + +To make the field hashes kick in, it is easiest to redefine C<refaddr> +as + + sub refaddr { shift } + +instead of importing it from C<Scalar::Util>. It should now be possible +to disable DESTROY and CLONE. Note that while it isn't disabled, +DESTROY will be called before the garbage collection of field hashes, +so it will be invoked with a functional object. + +It is not necessary to import the functions C<fieldhash> and/or +C<fieldhashes> into every class that is going to use them. When +the class is up and running, these functions have no business there. +If there are only a few field hashes to declare, it is simplest to + + use Hash::Util::FieldHash; + +early and call the functions qualified: + + Hash::Util::FieldHash::fieldhash my %foo; + +Otherwise, import the functions into a convenient package like +C<HUF> or, more generic, C<Aux> + + { + package Aux; + use Hash::Util::FieldHash ':all'; + } + +and call + + Aux::fieldhash my %foo; + +as needed. + +=head2 Examples + +Well... really only one example, and a rather trivial one at that. +There isn't much to exemplify. + +=head3 A simple class... + +The following example shows an utterly simple inside-out class +C<TimeStamp>, created using field hashes. It has a single field, +incorporated as the field hash C<%time>. Besides C<new> it has only +two methods: an initializer called C<stamp> that sets the field to +the current time, and a read-only accessor C<when> that returns the +time in C<localtime> format. + + # The class TimeStamp + + use Hash::Util::FieldHash; + { + package TimeStamp; + + Hash::Util::FieldHash::fieldhash my %time; + + sub stamp { $time{ $_[ 0]} = time; shift } # initializer + sub when { scalar localtime $time{ shift()} } # read accessor + sub new { bless( do { \ my $x }, shift)->stamp } # creator + } + + # See if it works + my $ts = TimeStamp->new; + print $ts->when, "\n"; + +Remarkable about this class definition is what isn't there: there +is no C<DESTROY> method, inherited or local, and no C<CLONE> method +is needed to make it thread-safe. Not to mention no need to call +C<refaddr> or something similar in the accessors. + +=head3 ...in action + +The outstanding property of inside-out classes is their "inheritability". +Like all inside-out classes, C<TimeStamp> is a I<universal base class>. +We can put it on the C<@ISA> list of arbitrary classes and its methods +will just work, no matter how the host class is constructed. This is +demonstrated by the following program: + + # Make a sample of objects to add time stamps to. + + use Math::Complex; + use IO::Handle; + + my @objects = ( + Math::Complex->new( 12, 13), + IO::Handle->new(), + qr/abc/, # in class Regexp + bless( [], 'Boing'), # made up on the spot + ); + + # Prepare for use with TimeStamp + + for ( @objects ) { + no strict 'refs'; + push @{ ref() . '::ISA' }, 'TimeStamp'; + } + + # Now apply TimeStamp methods to all objects and show the result + + for my $obj ( @objects ) { + $obj->stamp; + report( $obj, $obj->when); + } + + # print a description of the object and the result of ->when + + use Scalar::Util qw( reftype); + sub report { + my ( $obj, $when) = @_; + my $msg = sprintf "This is a %s object(a %s), its time is %s", + ref $obj, + reftype $obj, + $when; + $msg =~ s/\ba(?= [aeiouAEIOU])/an/g; # grammar matters :) + print "$msg\n"; + } + +=head2 Garbage-Collected Hashes + +Garbage collection in a field hash means that entries will "spontaneously" +disappear when the object that created them disappears. That must be +borne in mind, especially when looping over a field hash. If anything +you do inside the loop could cause an object to go out of scope, a +random key may be deleted from the hash you are looping over. That +can throw the loop iterator, so it's best to cache a consistent snapshot +of the keys and/or values and loop over that. You will still have to +check that a cached entry still exists when you get to it. + +Garbage collection can be confusing when keys are created in a field hash +from normal scalars as well as references. Once a reference is I<used> with +a field hash, the entry will be collected, even if it was later overwritten +with a plain scalar key (every positive integer is a candidate). This +is true even if the original entry was deleted in the meantime. In fact, +deletion from a field hash, and also a test for existence constitute +I<use> in this sense and create a liability to delete the entry when +the reference goes out of scope. If you happen to create an entry +with an identical key from a string or integer, that will be collected +instead. Thus, mixed use of references and plain scalars as field hash +keys is not entirely supported. + +=head1 Guts + +To make C<Hash::Util::FieldHash> work, there were two changes to +F<perl> itself. C<PERL_MAGIC_uvar> was made avaliable for hashes, +and weak references now call uvar C<get> magic after a weakref has been +cleared. The first feature is used to make field hashes intercept +their keys upon access. The second one triggers garbage collection. + +=head2 The C<PERL_MAGIC_uvar> interface for hashes + +C<PERL_MAGIC_uvar> I<get> magic is called from C<hv_fetch_common> and +C<hv_delete_common> through the function C<hv_magic_uvar_xkey>, which +defines the interface. The call happens for hashes with "uvar" magic +if the C<ufuncs> structure has equal values in the C<uf_val> and C<uf_set> +fields. Hashes are unaffected if (and as long as) these fields +hold different values. + +Upon the call, the C<mg_obj> field will hold the hash key to be accessed. +Upon return, the C<SV*> value in C<mg_obj> will be used in place of the +original key in the hash access. The integer index value in the first +parameter will be the C<action> value from C<hv_fetch_common>, or -1 +if the call is from C<hv_delete_common>. + +This is a template for a function suitable for the C<uf_val> field in +a C<ufuncs> structure for this call. The C<uf_set> and C<uf_index> +fields are irrelevant. + + IV watch_key(pTHX_ IV action, SV* field) { + MAGIC* mg = mg_find(field, PERL_MAGIC_uvar); + SV* keysv = mg->mg_obj; + /* Do whatever you need to. If you decide to + supply a different key newkey, return it like this + */ + sv_2mortal(newkey); + mg->mg_obj = newkey; + return 0; + } + +=head2 Weakrefs call uvar magic + +When a weak reference is stored in an C<SV> that has "uvar" magic, C<set> +magic is called after the reference has gone stale. This hook can be +used to trigger further garbage-collection activities associated with +the referenced object. + +=head2 How field hashes work + +The three features of key hashes, I<key replacement>, I<thread support>, +and I<garbage collection> are supported by a data structure called +the I<object registry>. This is currently the hash +C<Hash::Utils::FieldHash::ob_reg> though there may be a more private +place for it in the future. An "object" is any reference (blessed +or unblessed) that has been used as a field hash key. + +The object registry keeps track of references that have been used as +field hash keys. The keys are generated from the reference address +like in a field hash (though the registry isn't a field hash). Each +value is a weak copy of the original reference, stored in an C<SV> that +is itself magical (C<PERL_MAGIC_uvar> again). The magical structure +holds a list (another hash, really) of field hashes that the reference +has been used with. When the weakref becomes stale, the magic is +activated and uses the list to delete the reference from all field +hashes it has been used with. After that, the entry is removed from +the object registry itself. Implicitly, that frees the magic structure +and the storage it has been using. + +Whenever a reference is used as a field hash key, the object registry +is checked and a new entry is made if necessary. The field hash is +then added to the list of fields this reference has used. + +The object registry is also used to repair a field hash after thread +cloning. Here, the entire object registry is processed. For every +reference found there, the field hashes it has used are visited and +the entry is updated. + +=head2 Internal function Hash::Util::FieldHash::_fieldhash + + # test if %hash is a field hash + my $result = _fieldhash \ %hash, 0; + + # make %hash a field hash + my $result = _fieldhash \ %hash, 1; + +C<_fieldhash> is the internal function used to create field hashes. +It takes two arguments, a hashref and a mode. If the mode is boolean +false, the hash is not changed but tested if it is a field hash. If +the hash isn't a field hash the return value is boolean false. If it +is, the return value indicates the mode of field hash. When called with +a boolean true mode, it turns the given hash into a field hash of this +mode, returning the mode of the created field hash. C<_fieldhash> +does not erase the given hash. + +Currently there is only one type of field hash, and only the boolean +value of the mode makes a difference, but that may change. + +=head1 AUTHOR + +Anno Siegel, E<lt>anno4000@zrz.tu-berlin.deE<gt> + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2006 by (icke) + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.8.7 or, +at your option, any later version of Perl 5 you may have available. + +=cut diff --git a/ext/Hash/Util/FieldHash/t/01_load.t b/ext/Hash/Util/FieldHash/t/01_load.t new file mode 100644 index 0000000000..952f2a3532 --- /dev/null +++ b/ext/Hash/Util/FieldHash/t/01_load.t @@ -0,0 +1,59 @@ +#!perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +use strict; use warnings; + +use Test::More tests => 8; + +# see that Hash::Util::FieldHash and Hash::Util load and export what +# they should + +# note to self: this test only works in the perl build environment, +# not in my homely test environment (haven't got the right Hash::Util.pm +# there). mask it. + +BEGIN { + use_ok( 'Hash::Util'); + ok( defined( &Hash::Util::lock_keys), "Hash::Util::lock_keys found"); + ok( !defined( &Hash::Util::FieldHash::fieldhashes), + "Hash::Util::FieldHash not loaded", + ); +} + +package one; +use Test::More; +use Hash::Util qw( lock_keys); +BEGIN { + ok( defined( &lock_keys), "lock_keys imported from Hash::Util"); +} + +use Hash::Util qw( fieldhashes); +BEGIN { + ok( defined( &Hash::Util::FieldHash::fieldhashes), + "Hash::Util::FieldHash loaded", + ); + ok( defined( &fieldhashes), + "fieldhashes imported from Hash::Util", + ); +} + +package two; +use Test::More; +use Hash::Util::FieldHash qw( fieldhashes); +BEGIN { + ok( defined( &fieldhashes), + "fieldhashes imported from Hash::Util::FieldHash", + ); +} + +use Hash::Util::FieldHash qw( :all); +BEGIN { + ok( defined( &fieldhash), + "fieldhash imported from Hash::Util::FieldHash via :all", + ); +} + diff --git a/ext/Hash/Util/FieldHash/t/02_function.t b/ext/Hash/Util/FieldHash/t/02_function.t new file mode 100644 index 0000000000..a89bf2e92b --- /dev/null +++ b/ext/Hash/Util/FieldHash/t/02_function.t @@ -0,0 +1,215 @@ +#!perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +use strict; use warnings; +use Test::More; +my $n_tests = 0; + +use Hash::Util::FieldHash qw( :all); + +######################### + +# define ref types to use with some tests +my @test_types; +BEGIN { + # skipping CODE refs, they are differently scoped + @test_types = qw( SCALAR ARRAY HASH GLOB); +} + +### Object registry + +BEGIN { $n_tests += 3 } +{ + my $ob_reg = \ %Hash::Util::FieldHash::ob_reg; + { + my $obj = {}; + { + my $h; + fieldhash %$h; + $h->{ $obj} = 123; + is( keys %$ob_reg, 1, "one object registered"); + } + # field hash stays alive until $obj dies + is( keys %$ob_reg, 1, "object still registered"); + } + is( keys %$ob_reg, 0, "object unregistered"); +} + +### existence/retrieval/deletion +BEGIN { $n_tests += 6 } +{ + no warnings 'misc'; + my $val = 123; + fieldhash my %h; + for ( [ str => 'abc'], [ ref => {}] ) { + my ( $keytype, $key) = @$_; + $h{ $key} = $val; + ok( exists $h{ $key}, "existence ($keytype)"); + is( $h{ $key}, $val, "retrieval ($keytype)"); + delete $h{ $key}; + is( keys %h, 0, "deletion ($keytype)"); + } +} + +### id-action (stringification independent of bless) +BEGIN { $n_tests += 4 } +{ + my( %f, %g, %h, %i); + fieldhash %f; + fieldhash %g; + my $val = 123; + my $key = []; + $f{ $key} = $val; + is( $f{ $key}, $val, "plain key set in field"); + bless $key; + is( $f{ $key}, $val, "access through blessed"); + $key = []; + $h{ $key} = $val; + is( $h{ $key}, $val, "plain key set in hash"); + bless $key; + isnt( $h{ $key}, $val, "no access through blessed"); +} + +# Garbage collection +BEGIN { $n_tests += 1 + 2*( 3*@test_types + 5) + 1 } + +{ + fieldhash my %h; + $h{ []} = 123; + is( keys %h, 0, "blip"); +} + +for my $preload ( [], [ map {}, 1 .. 3] ) { + my $pre = @$preload ? ' (preloaded)' : ''; + fieldhash my %f; + my @preval = map "$_", @$preload; + @f{ @$preload} = @preval; + # Garbage collection separately + for my $type ( @test_types) { + { + my $ref = gen_ref( $type); + $f{ $ref} = $type; + my ( $val) = grep $_ eq $type, values %f; + is( $val, $type, "$type visible$pre"); + is( + keys %Hash::Util::FieldHash::ob_reg, + 1 + @$preload, + "$type obj registered$pre" + ); + } + is( keys %f, @$preload, "$type gone$pre"); + } + + # Garbage collection collectively + is( keys %Hash::Util::FieldHash::ob_reg, @$preload, "no objs remaining$pre"); + { + my @refs = map gen_ref( $_), @test_types; + @f{ @refs} = @test_types; + ok( + eq_set( [ values %f], [ @test_types, @preval]), + "all types present$pre", + ); + is( + keys %Hash::Util::FieldHash::ob_reg, + @test_types + @$preload, + "all types registered$pre", + ); + } + die "preload gone" unless defined $preload; + ok( eq_set( [ values %f], \ @preval), "all types gone$pre"); + is( keys %Hash::Util::FieldHash::ob_reg, @$preload, "all types unregistered$pre"); +} +is( keys %Hash::Util::FieldHash::ob_reg, 0, "preload gone after loop"); + +# big key sets +BEGIN { $n_tests += 8 } +{ + my $size = 10_000; + fieldhash( my %f); + { + my @refs = map [], 1 .. $size; + $f{ $_} = 1 for @refs; + is( keys %f, $size, "many keys singly"); + is( + keys %Hash::Util::FieldHash::ob_reg, + $size, + "many objects singly", + ); + } + is( keys %f, 0, "many keys singly gone"); + is( + keys %Hash::Util::FieldHash::ob_reg, + 0, + "many objects singly unregistered", + ); + + { + my @refs = map [], 1 .. $size; + $f{ $_} = 1 for @refs; + is( keys %f, $size, "many keys at once"); + is( + keys %Hash::Util::FieldHash::ob_reg, + $size, + "many objects at once", + ); + } + is( keys %f, 0, "many keys at once gone"); + is( + keys %Hash::Util::FieldHash::ob_reg, + 0, + "many objects at once unregistered", + ); +} + +# many field hashes +BEGIN { $n_tests += 6 } +{ + my $n_fields = 1000; + my @fields = map &fieldhash( {}), 1 .. $n_fields; + my @obs = map gen_ref( $_), @test_types; + my $n_obs = @obs; + for my $field ( @fields ) { + @{ $field }{ @obs} = map ref, @obs; + } + my $err = grep keys %$_ != @obs, @fields; + is( $err, 0, "$n_obs entries in $n_fields fields"); + is( keys %Hash::Util::FieldHash::ob_reg, @obs, "$n_obs obs registered"); + pop @obs; + $err = grep keys %$_ != @obs, @fields; + is( $err, 0, "one entry gone from $n_fields fields"); + is( keys %Hash::Util::FieldHash::ob_reg, @obs, "one ob unregistered"); + @obs = (); + $err = grep keys %$_ != @obs, @fields; + is( $err, 0, "all entries gone from $n_fields fields"); + is( keys %Hash::Util::FieldHash::ob_reg, @obs, "all obs unregistered"); +} + +{ + + BEGIN { $n_tests += 1 } + fieldhash my %h; + bless \ %h, 'abc'; # this bus-errors with a certain bug + ok( 1, "no bus error on bless") +} + +BEGIN { plan tests => $n_tests } + +####################################################################### + +use Symbol qw( gensym); + +BEGIN { + my %gen = ( + SCALAR => sub { \ my $x }, + ARRAY => sub { [] }, + HASH => sub { {} }, + GLOB => sub { gensym }, + CODE => sub { sub {} }, + ); + + sub gen_ref { $gen{ shift()}->() } +} diff --git a/ext/Hash/Util/FieldHash/t/03_class.t b/ext/Hash/Util/FieldHash/t/03_class.t new file mode 100644 index 0000000000..027b43c198 --- /dev/null +++ b/ext/Hash/Util/FieldHash/t/03_class.t @@ -0,0 +1,116 @@ +#!perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +use strict; use warnings; +use Test::More; +my $n_tests = 0; + +use Config; +BEGIN { $n_tests += 2 } +{ + my $p = Impostor->new( 'Donald Duck'); + is( $p->greeting, "Hi, I'm Donald Duck", "blank title"); + $p->assume_title( 'Mr'); + is( $p->greeting, "Hi, I'm Mr Donald Duck", "changed title"); +} + +# thread support? +BEGIN { $n_tests += 5 } +SKIP: { + skip "No thread support", 5 unless $Config{ usethreads}; + require threads; + treads->import if threads->can( 'import'); + + my $ans; + my $p = Impostor->new( 'Donald Duck'); + $ans = threads->create( sub { $p->greeting })->join; + is( $ans, "Hi, I'm Donald Duck", "thread: blank title"); + $p->assume_title( 'Mr'); + $ans = threads->create( sub { $p->greeting })->join; + is( $ans, "Hi, I'm Mr Donald Duck", "thread: changed title"); + $ans = threads->create( + sub { + $p->assume_title( 'Uncle'); + $p->greeting; + } + )->join; + is( $ans, "Hi, I'm Uncle Donald Duck", "thread: local change"); + is( $p->greeting, "Hi, I'm Mr Donald Duck", "thread: change is local"); + + # second generation thread + $ans = threads->create( + sub { + threads->create( sub { $p->greeting })->join; + } + )->join; + is( $ans, "Hi, I'm Mr Donald Duck", "double thread: got greeting"); +} + +BEGIN { plan tests => $n_tests } + +############################################################################ + +# must do this in BEGIN so that field hashes are declared before +# first use above + +BEGIN { + package CFF; + use Hash::Util::FieldHash qw( :all); + + package Person; + + { + CFF::fieldhash my %name; + CFF::fieldhash my %title; + + sub init { + my $p = shift; + $name{ $p} = shift || ''; + $title{ $p} = shift || ''; + $p; + } + + sub name { $name{ shift()} } + sub title { $title{ shift() } } + } + + sub new { + my $class = shift; + bless( \ my $x, $class)->init( @_); + } + + sub greeting { + my $p = shift; + my $greet = "Hi, I'm "; + $_ and $greet .= "$_ " for $p->title; + $greet .= $p->name; + $greet; + } + + package Impostor; + use base 'Person'; + + { + CFF::fieldhash my %assumed_title; + + sub init { + my $p = shift; + my ( $name, $title) = @_; + $p->Person::init( $name, $title); + $p->assume_title( $title); + $p; + } + + sub title { $assumed_title{ shift()} } + + sub assume_title { + my $p = shift; + $assumed_title{ $p} = shift || ''; + $p; + } + } +} diff --git a/ext/Hash/Util/FieldHash/t/04_thread.t b/ext/Hash/Util/FieldHash/t/04_thread.t new file mode 100644 index 0000000000..0693522181 --- /dev/null +++ b/ext/Hash/Util/FieldHash/t/04_thread.t @@ -0,0 +1,68 @@ +#!perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +use strict; use warnings; +use Test::More; +my $n_tests; + +use Hash::Util::FieldHash qw( :all); + +{ + my $n_basic; + BEGIN { + $n_basic = 6; # 6 tests per call of basic_func() + $n_tests += 5*$n_basic; + } + my $ob_reg = \ %Hash::Util::FieldHash::ob_reg; + my %h; + fieldhash %h; + + sub basic_func { + my $level = shift; + + my @res; + my $push_is = sub { + my ( $hash, $should, $name) = @_; + push @res, [ scalar keys %$hash, $should, $name]; + }; + + my $obj = []; + $push_is->( \ %h, 0, "$level: initially clear"); + $push_is->( $ob_reg, 0, "$level: ob_reg initially clear"); + $h{ $obj} = 123; + $push_is->( \ %h, 1, "$level: one object"); + $push_is->( $ob_reg, 1, "$level: ob_reg one object"); + undef $obj; + $push_is->( \ %h, 0, "$level: garbage collected"); + $push_is->( $ob_reg, 0, "$level: ob_reg garbage collected"); + @res; + } + + &is( @$_) for basic_func( "home"); + + SKIP: { + require Config; + skip "No thread support", 3*$n_basic unless + $Config::Config{ usethreads}; + require threads; + my ( $t) = threads->create( \ &basic_func, "thread 1"); + &is( @$_) for $t->join; + + &is( @$_) for basic_func( "back home"); + + ( $t) = threads->create( sub { + my ( $t) = threads->create( \ &basic_func, "thread 2"); + $t->join; + }); + &is( @$_) for $t->join; + } + + &is( @$_) for basic_func( "back home again"); + +} + +BEGIN { plan tests => $n_tests } diff --git a/ext/Hash/Util/FieldHash/t/05_perlhook.t b/ext/Hash/Util/FieldHash/t/05_perlhook.t new file mode 100644 index 0000000000..73f8654118 --- /dev/null +++ b/ext/Hash/Util/FieldHash/t/05_perlhook.t @@ -0,0 +1,174 @@ +#!perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +use strict; use warnings; +use Test::More; +my $n_tests; + +use Hash::Util::FieldHash; +use Scalar::Util qw( weaken); + +# The functions in Hash::Util::FieldHash +# _test_uvar_get, _test_uvar_get and _test_uvar_both + +# _test_uvar_get( $anyref, \ $counter) makes the referent of $anyref +# "uvar"-magical with get magic only. $counter is reset if the magic +# could be established. $counter will be incremented each time the +# magic "get" function is called. + +# _test_uvar_set does the same for "set" magic. _test_uvar_both +# sets both magic functions identically. Both use the same counter. + +# magical weak ref (patch to sv.c) +{ + my( $magref, $counter); + + $counter = 123; + Hash::Util::FieldHash::_test_uvar_set( \ $magref, \ $counter); + is( $counter, 0, "got magical scalar"); + + my $ref = []; + $magref = $ref; + is( $counter, 1, "store triggers magic"); + + weaken $magref; + is( $counter, 1, "weaken doesn't trigger magic"); + + { my $x = $magref } + is( $counter, 1, "read doesn't trigger magic"); + + undef $ref; + is( $counter, 2, "ref expiry triggers magic (weakref patch worked)"); + + is( $magref, undef, "weak ref works normally"); + + # same, but overwrite weakref before expiry + $counter = 0; + weaken( $magref = $ref = []); + is( $counter, 1, "setup for overwrite"); + + $magref = my $other_ref = []; + is( $counter, 2, "overwrite triggers"); + + undef $ref; + is( $counter, 2, "ref expiry doesn't trigger after overwrite"); + + is( $magref, $other_ref, "weak ref doesn't kill overwritten value"); + + BEGIN { $n_tests += 10 } +} + +# magical hash (patches to mg.c and hv.c) +{ + # the hook is only sensitive if the set function is NULL + my ( %h, $counter); + $counter = 123; + Hash::Util::FieldHash::_test_uvar_get( \ %h, \ $counter); + is( $counter, 0, "got magical hash"); + + %h = ( abc => 123); + is( $counter, 1, "list assign triggers"); + + $h{ def} = 456; + is( $counter, 3, "lvalue assign triggers twice"); + + exists $h{ def}; + is( $counter, 4, "good exists triggers"); + + exists $h{ xyz}; + is( $counter, 5, "bad exists triggers"); + + delete $h{ def}; + is( $counter, 6, "good delete triggers"); + + delete $h{ xyz}; + is( $counter, 7, "bad delete triggers"); + + my $x = $h{ abc}; + is( $counter, 8, "good read triggers"); + + $x = $h{ xyz}; + is( $counter, 9, "bad read triggers"); + + bless \ %h; + is( $counter, 9, "bless triggers(!)"); + + + $x = keys %h; + is( $counter, 9, "scalar keys doesn't trigger"); + + () = keys %h; + is( $counter, 9, "list keys doesn't trigger"); + + $x = values %h; + is( $counter, 9, "scalar values doesn't trigger"); + + () = values %h; + is( $counter, 9, "list values doesn't trigger"); + + $x = each %h; + is( $counter, 9, "scalar each doesn't trigger"); + + () = each %h; + is( $counter, 9, "list each doesn't trigger"); + + # see that normal set magic doesn't trigger (identity condition) + my %i; + Hash::Util::FieldHash::_test_uvar_set( \ %i, \ $counter); + is( $counter, 0, "got magical hash"); + + %i = ( abc => 123); + $i{ def} = 456; + exists $i{ def}; + exists $i{ xyz}; + delete $i{ def}; + delete $i{ xyz}; + $x = $i{ abc}; + $x = $i{ xyz}; + $x = keys %i; + () = keys %i; + $x = values %i; + () = values %i; + $x = each %i; + () = each %i; + + is( $counter, 0, "normal set magic never triggers"); + + bless \ %i, 'abc'; + is( $counter, 1, "...except with bless"); + + # see that magic with both set and get doesn't trigger (identity condition) + $counter = 123; + my %j; + Hash::Util::FieldHash::_test_uvar_same( \ %j, \ $counter); + is( $counter, 0, "got magical hash"); + + %j = ( abc => 123); + $j{ def} = 456; + exists $j{ def}; + exists $j{ xyz}; + delete $j{ def}; + delete $j{ xyz}; + $x = $j{ abc}; + $x = $j{ xyz}; + $x = keys %j; + () = keys %j; + $x = values %j; + () = values %j; + $x = each %j; + () = each %j; + + is( $counter, 0, "normal get magic never triggers"); + + bless \ %j, 'abc'; + is( $counter, 1, "...except for bless"); + + BEGIN { $n_tests += 22 } +} + +BEGIN { plan tests => $n_tests } + diff --git a/ext/Hash/Util/FieldHash/t/10_hash.t b/ext/Hash/Util/FieldHash/t/10_hash.t new file mode 100644 index 0000000000..80de72291d --- /dev/null +++ b/ext/Hash/Util/FieldHash/t/10_hash.t @@ -0,0 +1,116 @@ +#!./perl -w + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +use Test::More; + +use strict; +use Hash::Util::FieldHash qw( :all); + +no warnings 'misc'; + +plan tests => 5; + +fieldhash my %h; + +ok (!Internals::HvREHASH(%h), "hash doesn't start with rehash flag on"); + +foreach (1..10) { + $h{"\0"x$_}++; +} + +ok (!Internals::HvREHASH(%h), "10 entries doesn't trigger rehash"); + +foreach (11..20) { + $h{"\0"x$_}++; +} + +ok (Internals::HvREHASH(%h), "20 entries triggers rehash"); + + + + +# second part using an emulation of the PERL_HASH in perl, mounting an +# attack on a prepopulated hash. This is also useful if you need normal +# keys which don't contain \0 -- suitable for stashes + +use constant MASK_U32 => 2**32; +use constant HASH_SEED => 0; +use constant THRESHOLD => 14; +use constant START => "a"; + +# some initial hash data +fieldhash my %h2; +%h2 = map {$_ => 1} 'a'..'cc'; + +ok (!Internals::HvREHASH(%h2), + "starting with pre-populated non-pathalogical hash (rehash flag if off)"); + +my @keys = get_keys(\%h2); +$h2{$_}++ for @keys; +ok (Internals::HvREHASH(%h2), + scalar(@keys) . " colliding into the same bucket keys are triggerring rehash"); + +sub get_keys { + my $hr = shift; + + # the minimum of bits required to mount the attack on a hash + my $min_bits = log(THRESHOLD)/log(2); + + # if the hash has already been populated with a significant amount + # of entries the number of mask bits can be higher + my $keys = scalar keys %$hr; + my $bits = $keys ? log($keys)/log(2) : 0; + $bits = $min_bits if $min_bits > $bits; + + $bits = int($bits) < $bits ? int($bits) + 1 : int($bits); + # need to add 2 bits to cover the internal split cases + $bits += 2; + my $mask = 2**$bits-1; + print "# using mask: $mask ($bits)\n"; + + my @keys; + my $s = START; + my $c = 0; + # get 2 keys on top of the THRESHOLD + my $hash; + while (@keys < THRESHOLD+2) { + # next if exists $hash->{$s}; + $hash = hash($s); + next unless ($hash & $mask) == 0; + $c++; + printf "# %2d: %5s, %10s\n", $c, $s, $hash; + push @keys, $s; + } continue { + $s++; + } + + return @keys; +} + + +# trying to provide the fastest equivalent of C macro's PERL_HASH in +# Perl - the main complication is that it uses U32 integer, which we +# can't do it perl, without doing some tricks +sub hash { + my $s = shift; + my @c = split //, $s; + my $u = HASH_SEED; + for (@c) { + # (A % M) + (B % M) == (A + B) % M + # This works because '+' produces a NV, which is big enough to hold + # the intermidiate result. We only need the % before any "^" and "&" + # to get the result in the range for an I32. + # and << doesn't work on NV, so using 1 << 10 + $u += ord; + $u += $u * (1 << 10); $u %= MASK_U32; + $u ^= $u >> 6; + } + $u += $u << 3; $u %= MASK_U32; + $u ^= $u >> 11; $u %= MASK_U32; + $u += $u << 15; $u %= MASK_U32; + $u; +} diff --git a/ext/Hash/Util/FieldHash/t/11_hashassign.t b/ext/Hash/Util/FieldHash/t/11_hashassign.t new file mode 100644 index 0000000000..205f36ebcb --- /dev/null +++ b/ext/Hash/Util/FieldHash/t/11_hashassign.t @@ -0,0 +1,319 @@ +#!./perl -w + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +use Test::More; + +# use strict; +use Hash::Util::FieldHash qw( :all); +no warnings 'misc'; + +plan tests => 215; + +my @comma = ("key", "value"); + +# The peephole optimiser already knows that it should convert the string in +# $foo{string} into a shared hash key scalar. It might be worth making the +# tokeniser build the LHS of => as a shared hash key scalar too. +# And so there's the possiblility of it going wrong +# And going right on 8 bit but wrong on utf8 keys. +# And really we should also try utf8 literals in {} and => in utf8.t + +# Some of these tests are (effectively) duplicated in each.t +fieldhash my %comma; +%comma = @comma; +ok (keys %comma == 1, 'keys on comma hash'); +ok (values %comma == 1, 'values on comma hash'); +# defeat any tokeniser or optimiser cunning +my $key = 'ey'; +is ($comma{"k" . $key}, "value", 'is key present? (unoptimised)'); +# now with cunning: +is ($comma{key}, "value", 'is key present? (maybe optimised)'); +#tokeniser may treat => differently. +my @temp = (key=>undef); +is ($comma{$temp[0]}, "value", 'is key present? (using LHS of =>)'); + +@temp = %comma; +ok (eq_array (\@comma, \@temp), 'list from comma hash'); + +@temp = each %comma; +ok (eq_array (\@comma, \@temp), 'first each from comma hash'); +@temp = each %comma; +ok (eq_array ([], \@temp), 'last each from comma hash'); + +my %temp = %comma; + +ok (keys %temp == 1, 'keys on copy of comma hash'); +ok (values %temp == 1, 'values on copy of comma hash'); +is ($temp{'k' . $key}, "value", 'is key present? (unoptimised)'); +# now with cunning: +is ($temp{key}, "value", 'is key present? (maybe optimised)'); +@temp = (key=>undef); +is ($comma{$temp[0]}, "value", 'is key present? (using LHS of =>)'); + +@temp = %temp; +ok (eq_array (\@temp, \@temp), 'list from copy of comma hash'); + +@temp = each %temp; +ok (eq_array (\@temp, \@temp), 'first each from copy of comma hash'); +@temp = each %temp; +ok (eq_array ([], \@temp), 'last each from copy of comma hash'); + +my @arrow = (Key =>"Value"); + +fieldhash my %arrow; +%arrow = @arrow; +ok (keys %arrow == 1, 'keys on arrow hash'); +ok (values %arrow == 1, 'values on arrow hash'); +# defeat any tokeniser or optimiser cunning +$key = 'ey'; +is ($arrow{"K" . $key}, "Value", 'is key present? (unoptimised)'); +# now with cunning: +is ($arrow{Key}, "Value", 'is key present? (maybe optimised)'); +#tokeniser may treat => differently. +@temp = ('Key', undef); +is ($arrow{$temp[0]}, "Value", 'is key present? (using LHS of =>)'); + +@temp = %arrow; +ok (eq_array (\@arrow, \@temp), 'list from arrow hash'); + +@temp = each %arrow; +ok (eq_array (\@arrow, \@temp), 'first each from arrow hash'); +@temp = each %arrow; +ok (eq_array ([], \@temp), 'last each from arrow hash'); + +%temp = %arrow; + +ok (keys %temp == 1, 'keys on copy of arrow hash'); +ok (values %temp == 1, 'values on copy of arrow hash'); +is ($temp{'K' . $key}, "Value", 'is key present? (unoptimised)'); +# now with cunning: +is ($temp{Key}, "Value", 'is key present? (maybe optimised)'); +@temp = ('Key', undef); +is ($arrow{$temp[0]}, "Value", 'is key present? (using LHS of =>)'); + +@temp = %temp; +ok (eq_array (\@temp, \@temp), 'list from copy of arrow hash'); + +@temp = each %temp; +ok (eq_array (\@temp, \@temp), 'first each from copy of arrow hash'); +@temp = each %temp; +ok (eq_array ([], \@temp), 'last each from copy of arrow hash'); + +fieldhash my %direct; +fieldhash my %slow; +%direct = ('Camel', 2, 'Dromedary', 1); +$slow{Dromedary} = 1; +$slow{Camel} = 2; + +ok (eq_hash (\%slow, \%direct), "direct list assignment to hash"); +%direct = (Camel => 2, 'Dromedary' => 1); +ok (eq_hash (\%slow, \%direct), "direct list assignment to hash using =>"); + +$slow{Llama} = 0; # A llama is not a camel :-) +ok (!eq_hash (\%direct, \%slow), "different hashes should not be equal!"); + +my (%names, %names_copy); +fieldhash %names; +%names = ('$' => 'Scalar', '@' => 'Array', # Grr ' + '%', 'Hash', '&', 'Code'); +%names_copy = %names; +ok (eq_hash (\%names, \%names_copy), "check we can copy our hash"); + +sub in { + my %args = @_; + return eq_hash (\%names, \%args); +} + +ok (in (%names), "pass hash into a method"); + +sub in_method { + my $self = shift; + my %args = @_; + return eq_hash (\%names, \%args); +} + +ok (main->in_method (%names), "pass hash into a method"); + +sub out { + return %names; +} +%names_copy = out (); + +ok (eq_hash (\%names, \%names_copy), "pass hash from a subroutine"); + +sub out_method { + my $self = shift; + return %names; +} +%names_copy = main->out_method (); + +ok (eq_hash (\%names, \%names_copy), "pass hash from a method"); + +sub in_out { + my %args = @_; + return %args; +} +%names_copy = in_out (%names); + +ok (eq_hash (\%names, \%names_copy), "pass hash to and from a subroutine"); + +sub in_out_method { + my $self = shift; + my %args = @_; + return %args; +} +%names_copy = main->in_out_method (%names); + +ok (eq_hash (\%names, \%names_copy), "pass hash to and from a method"); + +my %names_copy2 = %names; +ok (eq_hash (\%names, \%names_copy2), "check copy worked"); + +# This should get ignored. +%names_copy = ('%', 'Associative Array', %names); + +ok (eq_hash (\%names, \%names_copy), "duplicates at the start of a list"); + +# This should not +%names_copy = ('*', 'Typeglob', %names); + +$names_copy2{'*'} = 'Typeglob'; +ok (eq_hash (\%names_copy, \%names_copy2), "duplicates at the end of a list"); + +%names_copy = ('%', 'Associative Array', '*', 'Endangered species', %names, + '*', 'Typeglob',); + +ok (eq_hash (\%names_copy, \%names_copy2), "duplicates at both ends"); + +# And now UTF8 + +foreach my $chr (60, 200, 600, 6000, 60000) { + # This little game may set a UTF8 flag internally. Or it may not. :-) + my ($key, $value) = (chr ($chr) . "\x{ABCD}", "$chr\x{ABCD}"); + chop ($key, $value); + my @utf8c = ($key, $value); + fieldhash my %utf8c; + %utf8c = @utf8c; + + ok (keys %utf8c == 1, 'keys on utf8 comma hash'); + ok (values %utf8c == 1, 'values on utf8 comma hash'); + # defeat any tokeniser or optimiser cunning + is ($utf8c{"" . $key}, $value, 'is key present? (unoptimised)'); + my $tempval = sprintf '$utf8c{"\x{%x}"}', $chr; + is (eval $tempval, $value, "is key present? (maybe $tempval is optimised)"); + $tempval = sprintf '@temp = ("\x{%x}" => undef)', $chr; + eval $tempval or die "'$tempval' gave $@"; + is ($utf8c{$temp[0]}, $value, 'is key present? (using LHS of $tempval)'); + + @temp = %utf8c; + ok (eq_array (\@utf8c, \@temp), 'list from utf8 comma hash'); + + @temp = each %utf8c; + ok (eq_array (\@utf8c, \@temp), 'first each from utf8 comma hash'); + @temp = each %utf8c; + ok (eq_array ([], \@temp), 'last each from utf8 comma hash'); + + %temp = %utf8c; + + ok (keys %temp == 1, 'keys on copy of utf8 comma hash'); + ok (values %temp == 1, 'values on copy of utf8 comma hash'); + is ($temp{"" . $key}, $value, 'is key present? (unoptimised)'); + $tempval = sprintf '$temp{"\x{%x}"}', $chr; + is (eval $tempval, $value, "is key present? (maybe $tempval is optimised)"); + $tempval = sprintf '@temp = ("\x{%x}" => undef)', $chr; + eval $tempval or die "'$tempval' gave $@"; + is ($temp{$temp[0]}, $value, "is key present? (using LHS of $tempval)"); + + @temp = %temp; + ok (eq_array (\@temp, \@temp), 'list from copy of utf8 comma hash'); + + @temp = each %temp; + ok (eq_array (\@temp, \@temp), 'first each from copy of utf8 comma hash'); + @temp = each %temp; + ok (eq_array ([], \@temp), 'last each from copy of utf8 comma hash'); + + my $assign = sprintf '("\x{%x}" => "%d")', $chr, $chr; + print "# $assign\n"; + my (@utf8a) = eval $assign; + + fieldhash my %utf8a; + %utf8a = @utf8a; + ok (keys %utf8a == 1, 'keys on utf8 arrow hash'); + ok (values %utf8a == 1, 'values on utf8 arrow hash'); + # defeat any tokeniser or optimiser cunning + is ($utf8a{$key . ""}, $value, 'is key present? (unoptimised)'); + $tempval = sprintf '$utf8a{"\x{%x}"}', $chr; + is (eval $tempval, $value, "is key present? (maybe $tempval is optimised)"); + $tempval = sprintf '@temp = ("\x{%x}" => undef)', $chr; + eval $tempval or die "'$tempval' gave $@"; + is ($utf8a{$temp[0]}, $value, "is key present? (using LHS of $tempval)"); + + @temp = %utf8a; + ok (eq_array (\@utf8a, \@temp), 'list from utf8 arrow hash'); + + @temp = each %utf8a; + ok (eq_array (\@utf8a, \@temp), 'first each from utf8 arrow hash'); + @temp = each %utf8a; + ok (eq_array ([], \@temp), 'last each from utf8 arrow hash'); + + %temp = %utf8a; + + ok (keys %temp == 1, 'keys on copy of utf8 arrow hash'); + ok (values %temp == 1, 'values on copy of utf8 arrow hash'); + is ($temp{'' . $key}, $value, 'is key present? (unoptimised)'); + $tempval = sprintf '$temp{"\x{%x}"}', $chr; + is (eval $tempval, $value, "is key present? (maybe $tempval is optimised)"); + $tempval = sprintf '@temp = ("\x{%x}" => undef)', $chr; + eval $tempval or die "'$tempval' gave $@"; + is ($temp{$temp[0]}, $value, "is key present? (using LHS of $tempval)"); + + @temp = %temp; + ok (eq_array (\@temp, \@temp), 'list from copy of utf8 arrow hash'); + + @temp = each %temp; + ok (eq_array (\@temp, \@temp), 'first each from copy of utf8 arrow hash'); + @temp = each %temp; + ok (eq_array ([], \@temp), 'last each from copy of utf8 arrow hash'); + +} + +# now some tests for hash assignment in scalar and list context with +# duplicate keys [perl #24380] +{ + my %h; my $x; my $ar; + fieldhash %h; + is( (join ':', %h = (1) x 8), '1:1', + 'hash assignment in list context removes duplicates' ); + is( scalar( %h = (1,2,1,3,1,4,1,5) ), 2, + 'hash assignment in scalar context' ); + is( scalar( ($x,%h) = (0,1,2,1,3,1,4,1,5) ), 3, + 'scalar + hash assignment in scalar context' ); + $ar = [ %h = (1,2,1,3,1,4,1,5) ]; + is( $#$ar, 1, 'hash assignment in list context' ); + is( "@$ar", "1 5", '...gets the last values' ); + $ar = [ ($x,%h) = (0,1,2,1,3,1,4,1,5) ]; + is( $#$ar, 2, 'scalar + hash assignment in list context' ); + is( "@$ar", "0 1 5", '...gets the last values' ); +} + +# test stringification of keys +{ + no warnings 'once', 'misc'; + my @types = qw( SCALAR ARRAY HASH CODE GLOB); + my @refs = ( \ do { my $x }, [], {}, sub {}, \ *x); + my(%h, %expect); + fieldhash %h; + @h{@refs} = @types; + @expect{map "$_", @refs} = @types; + ok (!eq_hash(\%h, \%expect), 'unblessed ref stringification different'); + + bless $_ for @refs; + %h = (); %expect = (); + @h{@refs} = @types; + @expect{map "$_", @refs} = @types; + ok (!eq_hash(\%h, \%expect), 'blessed ref stringification different'); +} diff --git a/ext/Hash/Util/FieldHash/t/12_hashwarn.t b/ext/Hash/Util/FieldHash/t/12_hashwarn.t new file mode 100644 index 0000000000..94509d25e1 --- /dev/null +++ b/ext/Hash/Util/FieldHash/t/12_hashwarn.t @@ -0,0 +1,60 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +use Test::More; + +plan( tests => 12 ); + +use strict; +use warnings; +use Hash::Util::FieldHash qw( :all); + +use vars qw{ @warnings }; + +BEGIN { + $SIG{'__WARN__'} = sub { push @warnings, @_ }; + $| = 1; +} + +my $fail_odd = 'Odd number of elements in hash assignment at '; +my $fail_odd_anon = 'Odd number of elements in anonymous hash at '; +my $fail_ref = 'Reference found where even-sized list expected at '; +my $fail_not_hr = 'Not a HASH reference at '; + +{ + @warnings = (); + fieldhash my %hash; + %hash = (1..3); + cmp_ok(scalar(@warnings),'==',1,'odd count'); + cmp_ok(substr($warnings[0],0,length($fail_odd)),'eq',$fail_odd,'odd msg'); + + @warnings = (); + %hash = 1; + cmp_ok(scalar(@warnings),'==',1,'scalar count'); + cmp_ok(substr($warnings[0],0,length($fail_odd)),'eq',$fail_odd,'scalar msg'); + + @warnings = (); + %hash = { 1..3 }; + cmp_ok(scalar(@warnings),'==',2,'odd hashref count'); + cmp_ok(substr($warnings[0],0,length($fail_odd_anon)),'eq',$fail_odd_anon,'odd hashref msg 1'); + cmp_ok(substr($warnings[1],0,length($fail_ref)),'eq',$fail_ref,'odd hashref msg 2'); + + @warnings = (); + %hash = [ 1..3 ]; + cmp_ok(scalar(@warnings),'==',1,'arrayref count'); + cmp_ok(substr($warnings[0],0,length($fail_ref)),'eq',$fail_ref,'arrayref msg'); + + @warnings = (); + %hash = sub { print "fenice" }; + cmp_ok(scalar(@warnings),'==',1,'coderef count'); + cmp_ok(substr($warnings[0],0,length($fail_odd)),'eq',$fail_odd,'coderef msg'); + + @warnings = (); + $_ = { 1..10 }; + cmp_ok(scalar(@warnings),'==',0,'hashref assign'); + +} diff --git a/ext/Hash/Util/Makefile.PL b/ext/Hash/Util/Makefile.PL index a328bfee85..7b7c166c75 100644 --- a/ext/Hash/Util/Makefile.PL +++ b/ext/Hash/Util/Makefile.PL @@ -8,6 +8,7 @@ WriteMakefile( MAN3PODS => {}, # Pods will be built by installman. NAME => "Hash::Util", DEFINE => "-DPERL_EXT", + DIR => ['FieldHash'], ); package MY; diff --git a/ext/Hash/Util/lib/Hash/Util.pm b/ext/Hash/Util/lib/Hash/Util.pm index c62a8bf2b6..a4f143ea85 100644 --- a/ext/Hash/Util/lib/Hash/Util.pm +++ b/ext/Hash/Util/lib/Hash/Util.pm @@ -10,6 +10,8 @@ use Scalar::Util qw(reftype); require Exporter; our @ISA = qw(Exporter); our @EXPORT_OK = qw( + fieldhash fieldhashes + all_keys lock_keys unlock_keys lock_value unlock_value @@ -26,11 +28,21 @@ our @EXPORT_OK = qw( hash_seed hv_store ); -our $VERSION = 0.06; +our $VERSION = 0.07; require DynaLoader; local @ISA = qw(DynaLoader); bootstrap Hash::Util $VERSION; +sub import { + my $class = shift; + if ( grep /fieldhash/, @_ ) { + require Hash::Util::FieldHash; + Hash::Util::FieldHash->import(':all'); # for re-export + } + unshift @_, $class; + goto &Exporter::import; +} + =head1 NAME @@ -38,6 +50,20 @@ Hash::Util - A selection of general-utility hash subroutines =head1 SYNOPSIS + # Field hashes + + use Hash::Util qw(fieldhash fieldhashes); + + # Create a single field hash + fieldhash my %foo; + + # Create three at once... + fieldhashes \ my(%foo, %bar, %baz); + # ...or any number + fieldhashes @hashrefs; + + # Restricted hashes + use Hash::Util qw( hash_seed all_keys lock_keys unlock_keys @@ -79,6 +105,19 @@ don't really warrant a keyword. By default C<Hash::Util> does not export anything. +=head2 Field hashes + +Field hashes are designed to maintain an association of a reference +with a value. The association is independent of the bless status of +the key, it is thread safe and garbage-collected. These properties +are desirable in the construction of inside-out classes. + +When used with keys that are plain scalars (not references), field +hashes behave like normal hashes. + +Field hashes are defined in a separate module for which C<Hash::Util> +is a front end. For a detailed description see L<Hash::Util::FieldHash>. + =head2 Restricted hashes 5.8.0 introduces the ability to restrict a hash to a certain set of |