summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <stewart@mysql.com>2005-09-12 15:54:23 +1000
committerunknown <stewart@mysql.com>2005-09-12 15:54:23 +1000
commitf28e81d03e28d6260d3cb89d13ef31e0cd9a3888 (patch)
tree9ceb7975cb919d60cd45784fda31eb8bacaa6bc1
parente6f860aad0fd3f831dc6db9ecb94fbf71f3b9ed9 (diff)
downloadmariadb-git-f28e81d03e28d6260d3cb89d13ef31e0cd9a3888.tar.gz
W#2776 Utility for calculating storage requirements for NDB
Initial ndb_size.pl Connects to an existing MySQL database and produces a XHTML document containing an estimation of storage requirements. ndb/tools/ndb_size.pl: Initial checkin of ndb_size.pl ndb/tools/ndb_size.tmpl: Initial checkin of XHTML template file for ndb_size.pl
-rw-r--r--ndb/tools/ndb_size.pl260
-rw-r--r--ndb/tools/ndb_size.tmpl175
2 files changed, 435 insertions, 0 deletions
diff --git a/ndb/tools/ndb_size.pl b/ndb/tools/ndb_size.pl
new file mode 100644
index 00000000000..92ecf5ca784
--- /dev/null
+++ b/ndb/tools/ndb_size.pl
@@ -0,0 +1,260 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use DBI;
+use POSIX;
+use HTML::Template;
+
+# MySQL Cluster size estimator
+# ----------------------------
+#
+# (C)2005 MySQL AB
+#
+#
+# The purpose of this tool is to work out storage requirements
+# from an existing MySQL database.
+#
+# This involves connecting to a mysql server and throwing a bunch
+# of queries at it.
+#
+# We currently estimate sizes for: 4.1, 5.0 and 5.1 to various amounts
+# of accurracy.
+#
+# There is no warranty.
+#
+# BUGS
+# ----
+# - enum/set is 0 byte storage! Woah - efficient!
+# - some float stores come out weird (when there's a comma e.g. 'float(4,1)')
+# - no disk data values
+# - computes the storage requirements of views (and probably MERGE)
+# - ignores character sets.
+
+my $template = HTML::Template->new(filename => 'ndb_size.tmpl',
+ die_on_bad_params => 0);
+
+my $dbh;
+
+{
+ my $database= $ARGV[0];
+ my $hostname= $ARGV[1];
+ my $port= $ARGV[2];
+ my $user= $ARGV[3];
+ my $password= $ARGV[4];
+ my $dsn = "DBI:mysql:database=$database;host=$hostname;port=$port";
+ $dbh= DBI->connect($dsn, $user, $password);
+ $template->param(db => $database);
+ $template->param(dsn => $dsn);
+}
+
+my @releases = ({rel=>'4.1'},{rel=>'5.0'},{rel=>'5.1'});
+$template->param(releases => \@releases);
+
+my $tables = $dbh->selectall_arrayref("show tables");
+
+my @table_size;
+
+sub align {
+ my($to,@unaligned) = @_;
+ my @aligned;
+ foreach my $x (@unaligned) {
+ push @aligned, $to * POSIX::floor(($x+$to-1)/$to);
+ }
+ return @aligned;
+}
+
+foreach(@{$tables})
+{
+ my $table= @{$_}[0];
+ my @columns;
+ my $info= $dbh->selectall_hashref("describe $table","Field");
+ my @count = $dbh->selectrow_array("select count(*) from $table");
+ my %columnsize; # used for index calculations
+
+ # We now work out the DataMemory usage
+
+ # sizes for 4.1, 5.0, 5.1
+ my @totalsize= (0,0,0);
+
+ foreach(keys %$info)
+ {
+ my @realsize = (0,0,0);
+ my $type;
+ my $size;
+ my $name= $_;
+
+ if($$info{$_}{Type} =~ /^(.*?)\((\d+)\)/)
+ {
+ $type= $1;
+ $size= $2;
+ }
+ else
+ {
+ $type= $$info{$_}{Type};
+ }
+
+ if($type =~ /tinyint/)
+ {@realsize=(1,1,1)}
+ elsif($type =~ /smallint/)
+ {@realsize=(2,2,2)}
+ elsif($type =~ /mediumint/)
+ {@realsize=(3,3,3)}
+ elsif($type =~ /bigint/)
+ {@realsize=(8,8,8)}
+ elsif($type =~ /int/)
+ {@realsize=(4,4,4)}
+ elsif($type =~ /float/)
+ {
+ if($size<=24)
+ {@realsize=(4,4,4)}
+ else
+ {@realsize=(8,8,8)}
+ }
+ elsif($type =~ /double/ || $type =~ /real/)
+ {@realsize=(8,8,8)}
+ elsif($type =~ /bit/)
+ {
+ my $a=($size+7)/8;
+ @realsize = ($a,$a,$a);
+ }
+ elsif($type =~ /datetime/)
+ {@realsize=(8,8,8)}
+ elsif($type =~ /timestamp/)
+ {@realsize=(4,4,4)}
+ elsif($type =~ /date/ || $type =~ /time/)
+ {@realsize=(3,3,3)}
+ elsif($type =~ /year/)
+ {@realsize=(1,1,1)}
+ elsif($type =~ /varchar/ || $type =~ /varbinary/)
+ {
+ my $fixed= 1+$size;
+ my @dynamic=$dbh->selectrow_array("select avg(length($name)) from $table");
+ $dynamic[0]=0 if !$dynamic[0];
+ @realsize= ($fixed,$fixed,ceil($dynamic[0]));
+ }
+ elsif($type =~ /binary/ || $type =~ /char/)
+ {@realsize=($size,$size,$size)}
+ elsif($type =~ /text/ || $type =~ /blob/)
+ {@realsize=(256,256,1)} # FIXME check if 5.1 is correct
+
+ @realsize= align(4,@realsize);
+
+ $totalsize[$_]+=$realsize[$_] foreach 0..$#totalsize;
+
+ my @realout;
+ push @realout,{val=>$_} foreach @realsize;
+
+ push @columns, {
+ name=>$name,
+ type=>$type,
+ size=>$size,
+ key=>$$info{$_}{Key},
+ datamemory=>\@realout,
+ };
+
+ $columnsize{$name}= \@realsize; # used for index calculations
+ }
+
+ # And now... the IndexMemory usage.
+ #
+ # Firstly, we assemble some information about the indexes.
+ # We use SHOW INDEX instead of using INFORMATION_SCHEMA so
+ # we can still connect to pre-5.0 mysqlds.
+ my %indexes;
+ {
+ my $sth= $dbh->prepare("show index from $table");
+ $sth->execute;
+ while(my $i = $sth->fetchrow_hashref)
+ {
+ $indexes{${%$i}{Key_name}}= {
+ type=>${%$i}{Index_type},
+ unique=>!${%$i}{Non_unique},
+ comment=>${%$i}{Comment},
+ } if !defined($indexes{${%$i}{Key_name}});
+
+ $indexes{${%$i}{Key_name}}{columns}[${%$i}{Seq_in_index}-1]=
+ ${%$i}{Column_name};
+ }
+ }
+
+ if(!defined($indexes{PRIMARY})) {
+ $indexes{PRIMARY}= {
+ type=>'BTREE',
+ unique=>1,
+ comment=>'Hidden pkey created by NDB',
+ columns=>['HIDDEN_NDB_PKEY'],
+ };
+ push @columns, {
+ name=>'HIDDEN_NDB_PKEY',
+ type=>'bigint',
+ size=>8,
+ key=>'PRI',
+ datamemory=>[{val=>8},{val=>8},{val=>8}],
+ };
+ $columnsize{'HIDDEN_NDB_PKEY'}= [8,8,8];
+ }
+
+ my @IndexDataMemory= ({val=>0},{val=>0},{val=>0});
+ my @RowIndexMemory= ({val=>0},{val=>0},{val=>0});
+
+ my @indexes;
+ foreach my $index (keys %indexes) {
+ my $im41= 25;
+ $im41+=$columnsize{$_}[0] foreach @{$indexes{$index}{columns}};
+ my @im = ({val=>$im41},{val=>25},{val=>25});
+ my @dm = ({val=>10},{val=>10},{val=>10});
+ push @indexes, {
+ name=>$index,
+ type=>$indexes{$index}{type},
+ columns=>join(',',@{$indexes{$index}{columns}}),
+ indexmemory=>\@im,
+ datamemory=>\@dm,
+ };
+ $IndexDataMemory[$_]{val}+=$dm[$_]{val} foreach 0..2;
+ $RowIndexMemory[$_]{val}+=$im[$_]{val} foreach 0..2;
+ }
+
+ # total size + 16 bytes overhead
+ my @TotalDataMemory;
+ $TotalDataMemory[$_]{val}=$IndexDataMemory[$_]{val}+$totalsize[$_]+16 foreach 0..2;
+
+ my @RowDataMemory;
+ push @RowDataMemory,{val=>$_} foreach @totalsize;
+
+ my @RowPerPage;
+ push @RowPerPage,{val=>(floor((32768-128)/$TotalDataMemory[$_]{val}))} foreach 0..$#TotalDataMemory;
+
+ my @RowPerIndexPage;
+ push @RowPerIndexPage,{val=>(floor(8192/$RowIndexMemory[$_]{val}))} foreach 0..$#TotalDataMemory;
+
+ my @DataMemory;
+ push @DataMemory,{val=>ceil(($count[0]/($RowPerPage[$_]{val})))*32} foreach 0..$#RowPerPage;
+
+ my @IndexMemory;
+ push @IndexMemory,{val=>ceil(($count[0]/($RowPerIndexPage[$_]{val})))*8} foreach 0..$#RowPerPage;
+
+ my $count= $count[0];
+ my @counts;
+ $counts[$_]{val}= $count foreach 0..$#releases;
+
+ push @table_size, {
+ table=>$table,
+ indexes=>\@indexes,
+ columns=>\@columns,
+ count=>\@counts,
+ RowDataMemory=>\@RowDataMemory,
+ releases=>\@releases,
+ IndexDataMemory=>\@IndexDataMemory,
+ TotalDataMemory=>\@TotalDataMemory,
+ RowPerPage=>\@RowPerPage,
+ DataMemory=>\@DataMemory,
+ RowIndexMemory=>\@RowIndexMemory,
+ RowPerIndexPage=>\@RowPerIndexPage,
+ IndexMemory=>\@IndexMemory,
+
+ };
+}
+
+$template->param(tables => \@table_size);
+print $template->output;
diff --git a/ndb/tools/ndb_size.tmpl b/ndb/tools/ndb_size.tmpl
new file mode 100644
index 00000000000..d83d5d2c6af
--- /dev/null
+++ b/ndb/tools/ndb_size.tmpl
@@ -0,0 +1,175 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+ <head>
+ <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8"/>
+ <meta name="keywords" content="MySQL Cluster" />
+ <title>MySQL Cluster size estimate for <TMPL_VAR NAME="db" ESCAPE="HTML"></title>
+<style type="text/css">
+table { border-collapse: collapse }
+td,th { border: 1px solid black }
+</style>
+ </head>
+<body>
+<h1>MySQL Cluster analysis for <TMPL_VAR NAME="db" escape="html"></h1>
+<p>This is an automated analysis of the <TMPL_VAR NAME="DSN" escape="html"> database for migration into <a href="http://www.mysql.com/">MySQL</a> Cluster. No warranty is made to the accuracy of the information.</p>
+
+<p>This information should be valid for MySQL 4.1</p>
+
+<ul>
+<TMPL_LOOP NAME="tables">
+<li><TMPL_VAR NAME="table"></li>
+</TMPL_LOOP>
+</ul>
+
+<hr/>
+
+<TMPL_LOOP NAME="tables">
+<h2><TMPL_VAR NAME="table"></h2>
+<table>
+ <tr>
+ <th>Column</th>
+ <th>Type</th>
+ <th>Size</th>
+ <th>Key</th>
+ <TMPL_LOOP NAME=releases>
+ <th><TMPL_VAR NAME=rel> NDB Size</th>
+ </TMPL_LOOP>
+ </tr>
+ <TMPL_LOOP NAME="columns">
+ <tr>
+ <td><TMPL_VAR NAME=name></td>
+ <td><TMPL_VAR NAME=type></td>
+ <td><TMPL_VAR NAME=size></td>
+ <td><TMPL_VAR NAME=key></td>
+ <TMPL_LOOP NAME=datamemory>
+ <td><TMPL_VAR NAME=val></td>
+ </TMPL_LOOP>
+ </tr>
+ </TMPL_LOOP>
+</table>
+
+<p>&nbsp;</p>
+
+<h3>Indexes</h3>
+
+<p>We assume that indexes are ORDERED (not created USING HASH). If order is not required, 10 bytes of data memory can be saved per row if the index is created USING HASH</p>
+<table>
+<tr>
+ <th>Index</th>
+ <th>Type</th>
+ <th>Columns</th>
+ <TMPL_LOOP NAME=releases>
+ <th><TMPL_VAR NAME=rel> IdxMem</th>
+ </TMPL_LOOP>
+ <TMPL_LOOP NAME=releases>
+ <th><TMPL_VAR NAME=rel> DatMem</th>
+ </TMPL_LOOP>
+</tr>
+<TMPL_LOOP NAME="indexes">
+ <tr>
+ <td><TMPL_VAR NAME=name></td>
+ <td><TMPL_VAR NAME=type></td>
+ <td><TMPL_VAR NAME=columns></td>
+ <TMPL_LOOP NAME=indexmemory>
+ <td><TMPL_VAR NAME=val></td>
+ </TMPL_LOOP>
+ <TMPL_LOOP NAME=datamemory>
+ <td><TMPL_VAR NAME=val></td>
+ </TMPL_LOOP>
+ </tr>
+</TMPL_LOOP>
+</table>
+
+<h3>DataMemory Usage</h3>
+<table>
+<tr>
+ <th>&nbsp;</th>
+ <TMPL_LOOP NAME=releases>
+ <th><TMPL_VAR NAME=rel></th>
+ </TMPL_LOOP>
+</tr>
+<tr>
+ <th>Row Overhead</th>
+ <TMPL_LOOP NAME=releases>
+ <td>16</td>
+ </TMPL_LOOP>
+</tr>
+<tr>
+ <th>Column DataMemory/Row</th>
+ <TMPL_LOOP NAME=RowDataMemory>
+ <td><TMPL_VAR NAME=val></td>
+ </TMPL_LOOP>
+</tr>
+<tr>
+ <th>Index DataMemory/Row</th>
+ <TMPL_LOOP NAME=IndexDataMemory>
+ <td><TMPL_VAR NAME=val></td>
+ </TMPL_LOOP>
+</tr>
+<tr>
+ <th>Total DataMemory/Row</th>
+ <TMPL_LOOP NAME=TotalDataMemory>
+ <td><TMPL_VAR NAME=val></td>
+ </TMPL_LOOP>
+</tr>
+<tr>
+ <th>Rows per 32kb page</th>
+ <TMPL_LOOP NAME=RowPerPage>
+ <td><TMPL_VAR NAME=val></td>
+ </TMPL_LOOP>
+</tr>
+<tr>
+ <th>Current number of rows</th>
+ <TMPL_LOOP NAME=count>
+ <td><TMPL_VAR NAME=val></td>
+ </TMPL_LOOP>
+</tr>
+<tr>
+ <th>Total DataMemory (kb)</th>
+ <TMPL_LOOP NAME=DataMemory>
+ <td><TMPL_VAR NAME=val></td>
+ </TMPL_LOOP>
+</tr>
+</table>
+
+<h3>IndexMemory Usage</h3>
+<table>
+<tr>
+ <th>&nbsp;</th>
+ <TMPL_LOOP NAME=releases>
+ <th><TMPL_VAR NAME=rel></th>
+ </TMPL_LOOP>
+</tr>
+<tr>
+ <th>IndexMemory/Row</th>
+ <TMPL_LOOP NAME=RowIndexMemory>
+ <td><TMPL_VAR NAME=val></td>
+ </TMPL_LOOP>
+</tr>
+<tr>
+ <th>Rows per 8kb page</th>
+ <TMPL_LOOP NAME=RowPerIndexPage>
+ <td><TMPL_VAR NAME=val></td>
+ </TMPL_LOOP>
+</tr>
+<tr>
+ <th>Current number of rows</th>
+ <TMPL_LOOP NAME=count>
+ <td><TMPL_VAR NAME=val></td>
+ </TMPL_LOOP>
+</tr>
+<tr>
+ <th>Total IndexMemory (kb)</th>
+ <TMPL_LOOP NAME=IndexMemory>
+ <td><TMPL_VAR NAME=val></td>
+ </TMPL_LOOP>
+</tr>
+</table>
+
+<hr/>
+</TMPL_LOOP>
+
+<p>This is the output of ndb_size.pl.</p>
+</body>
+</html>
+