Sunday, June 7, 2009

dev usage

IF OBJECT_ID('dbo.sp_dba_devusage') IS NOT NULL
BEGIN
    DROP PROCEDURE dbo.sp_dba_devusage
    IF OBJECT_ID('dbo.sp_dba_devusage') IS NOT NULL
        PRINT '<<<>>>'
    ELSE
        PRINT '<<<>>>'
END
go


create proc sp_dba_devusage
as
set nocount on

select distinct
     name = d.name,
     db_size = str(sum(u.size) / 512, 10, 1) + " MB",
     dbid = d.dbid,
     created = convert(char(14), d.crdate, 107)
     from master.dbo.sysdatabases d, master.dbo.sysusages u
     where d.dbid = u.dbid
     group by d.dbid, u.dbid
     order by d.dbid

declare @vsize int
select @vsize = low
from master.dbo.spt_values
where type="E"
  and number = 3

select device = substring(name,1,15),
       vdevno = convert(tinyint,substring(convert(binary(4),low),@vsize,1)),
       "default disk?" = "      " + substring("NY",(status & 1)+1,1),
       "total (MB)" = str(round((high-low)/512.,2),7,2),
       used = str(round(isnull(sum(size),0)/512.,2),7,2),
       free = str(round(abs((high-low-isnull(sum(size),0))/512.),2),7,2)
from master.dbo.sysusages u,
     master.dbo.sysdevices d
where vstart between low and high
  and cntrltype=0
group by all name
  having cntrltype=0
order by vdevno


go
EXEC sp_procxmode 'dbo.sp_dba_devusage','unchained'
go
IF OBJECT_ID('dbo.sp_dba_devusage') IS NOT NULL
    PRINT '<<<>>>'
ELSE
    PRINT '<<<>>>'
go
USE sybsystemprocs
go
GRANT EXECUTE ON dbo.sp_dba_devusage TO public
go

check for blocks in database

IF OBJECT_ID('dbo.sp_dba_dblock') IS NOT NULL
BEGIN
    DROP PROCEDURE dbo.sp_dba_dblock
    IF OBJECT_ID('dbo.sp_dba_dblock') IS NOT NULL
        PRINT '<<<>>>'
    ELSE
        PRINT '<<<>>>'
END
go

create procedure sp_dba_dblock
as

declare @PRNTBUFFER varchar(150),
@CURRENTDB varchar(30),
@CURRENTDBID smallint,
        @LOCKCOUNT int

select @CURRENTDB = rtrim(db_name())
select @CURRENTDBID = db_id()

select @LOCKCOUNT = count(dbid)
       from master.dbo.syslocks
       where master.dbo.syslocks.dbid = @CURRENTDBID

if ( @LOCKCOUNT = 0 )
begin
select @PRNTBUFFER = " " + replicate("=",68)
print @PRNTBUFFER
select @PRNTBUFFER = " No Locks In Database : " + rtrim(@CURRENTDB)
print @PRNTBUFFER
                select @PRNTBUFFER = " " + replicate("=",68) 
                print @PRNTBUFFER 
end

else

  begin
                select @PRNTBUFFER = " " + replicate("=",68) 
                print @PRNTBUFFER 
                select @PRNTBUFFER = " Locks In Database : " + rtrim(@CURRENTDB) 
                print @PRNTBUFFER 
                select @PRNTBUFFER = " " + replicate("=",68)  
                print @PRNTBUFFER  

    select Spid = convert(char(4),master.dbo.syslocks.spid),
           SybaseUser = convert(char(12),master.dbo.syslogins.name),
           LockType = convert(char(14),master.dbo.spt_values.name),
           TableName = convert(char(18),object_name(master.dbo.syslocks.id)),
           PageNumber = convert(char(14),master.dbo.syslocks.page),
  UnixHost = convert(char(8),master.dbo.sysprocesses.hostname),
           UnixPid = convert(char(8),master.dbo.sysprocesses.hostprocess)
      from master.dbo.syslocks,
           master.dbo.spt_values,
           master.dbo.syslogins,
           master.dbo.sysprocesses
     where master.dbo.syslocks.type = master.dbo.spt_values.number
       and master.dbo.sysprocesses.suid = master.dbo.syslogins.suid
       and master.dbo.spt_values.type = "L"
       and master.dbo.syslocks.spid = master.dbo.sysprocesses.spid
       and master.dbo.syslocks.dbid = @CURRENTDBID
     order by 1,4,5
  end

return (0)


go
EXEC sp_procxmode 'dbo.sp_dba_dblock','unchained'
go
IF OBJECT_ID('dbo.sp_dba_dblock') IS NOT NULL
    PRINT '<<<>>>'
ELSE
    PRINT '<<<>>>'
go
USE sybsystemprocs
go
GRANT EXECUTE ON dbo.sp_dba_dblock TO public

gen database ddl


create procedure sp_dba_gendbddl @dbname varchar(32) = null
as

declare @db_name varchar(30),
@dbid smallint,
@segmap smallint,
@segs varchar(4),
@dev_frag varchar(30),
@frag_size char(10),
@sql_string varchar(255),
@alter_status tinyint,
@counter int,
@seq_no int,
    @numdbs     int

set nocount on

if @dbname is null select @dbname = "%"

-- Match databases if given else do them all
--
select @numdbs = count(*) from master.dbo.sysdatabases where name like @dbname

-- DB exists ?
--
if @numdbs = 0
begin
/* 17590, "The specified database does not exist." */
raiserror 17590
return (1)
end

-- Initialize dbids twmp table from sysdatabases
--
select dbid into #dbids from master.dbo.sysdatabases where name like @dbname

declare @curdbid smallint /* the one we're currently working on */
declare @dbdesc varchar(102) /* the total description for the db */
declare @bitdesc varchar(30) /* the bit description for the db */

-- Set @curdbid to the first dbid.
--
select @curdbid = min(dbid) from #dbids

create table #db_info(
    seq_no int not null,
    sql_string varchar(255) null
    )

-- Iterate through by dbid
--
while @curdbid is not NULL
begin

    /************************************************
     * Temporary table to hold individual fragments *
     * of SQL_STRING, numbered to preserve their *
     * correct sequence                             *
     ************************************************/

    delete #db_info

    -- Initialize variables
    select @alter_status = 0
    select @counter = 0
    select @seq_no = 0
    select @db_name = db_name(@curdbid)

    /* Declare a cursor to allow processing of each row individually */
    /* Select statement extracts relevant info from the system tables */
    /* ORDER BY clause is critical in order to ensure that all fragments */
    /* are processed in the correct sequence */

    declare get_db_info cursor for
    select db.dbid, usg.segmap, "device fragment"=substring(dev.name,1,15), "size(MB)" = str(usg.size/512.,10,0)
    from master.dbo.sysusages usg,
     master.dbo.sysdevices dev,
     master.dbo.sysdatabases db
    where vstart between low and high
    and cntrltype = 0
    and db.dbid = @curdbid
    and db.dbid =usg.dbid
    order by db.dbid,usg.lstart,dev.name


    -- Prepare initial SQL string
    select @sql_string = 'create database ' + @db_name + ' on '

    -- Store initial SQL_STRING in temporary table and
    -- number it to allow later extraction in the correct
    -- sequence
    insert into #db_info (seq_no, sql_string) values (@seq_no, @sql_string)

    -- Increment the sequence number
    select @seq_no = @seq_no + 1

    -- Open the cursor and fetch the first row
    open get_db_info

    fetch get_db_info into @dbid,@segmap,@dev_frag,@frag_size

    if (@@sqlstatus=2)
    begin
     select "No information found for database"
     close get_db_info
     deallocate cursor get_db_info
     return
    end

    while (@@sqlstatus = 0)
    begin

     if @alter_status = 0 /* Still part of original 'create database ...' command */
     begin
     if @segmap = 4 /* Must refer to log device */
        begin
     select @sql_string = '    log on ' + @dev_frag + ' = ' +
     ltrim(convert(varchar(20),@frag_size)) + char(10) + 'go' + char(10)
     select @alter_status = 1
     end
     else /* Must refer to data device */
     begin
     if @counter = 0 /* 1st data device of "create database ..." */
        begin
        select @sql_string = '    ' + @dev_frag + ' = ' + ltrim(convert(varchar(20),@frag_size))
        select @counter = @counter + 1
        end
     else /* Additional data device for "create database ..." */
        begin
        select @sql_string = '   ,' + @dev_frag + ' = ' + ltrim(convert(varchar(20),@frag_size))
        end
     end
     end
     else /* Must be an alter database command */
     begin

     select @sql_string = 'alter database ' + @db_name

     insert into #db_info (seq_no, sql_string) values (@seq_no, @sql_string)

     select @seq_no = @seq_no + 1

            -- Log = 4 or data
            --
     if @segmap = 4
     select @sql_string = '    log on ' + @dev_frag + ' = ' +
     ltrim(convert(varchar(20),@frag_size)) + char(10) + 'go' + char(10)
     else
     select @sql_string = '    on ' + @dev_frag + ' = ' +
     ltrim(convert(varchar(20),@frag_size)) + char(10) + 'go' + char(10)
     end

     insert into #db_info (seq_no, sql_string) values (@seq_no, @sql_string)

     select @seq_no = @seq_no + 1

     -- Get next row
     fetch get_db_info into @dbid,@segmap,@dev_frag,@frag_size
    end

    close get_db_info
    deallocate cursor get_db_info

    -- Select each piece of SQL from temporary table ordering by */
    -- sequence number */
    --
    declare print_createdb_cursor cursor for
     select sql_string
     from #db_info
     order by seq_no
     for read only

    open print_createdb_cursor

    fetch print_createdb_cursor into @sql_string

    while (@@sqlstatus = 0)
    begin
     print @sql_string
     fetch print_createdb_cursor into @sql_string
    end

    close print_createdb_cursor
    deallocate cursor print_createdb_cursor

   -- Get next dbid.
--
select @curdbid = min(dbid) from #dbids where dbid > @curdbid

end

drop table #db_info
drop table #dbids

return(0)

check table space

IF OBJECT_ID('dbo.sp_dba_dbspace') IS NOT NULL

BEGIN

    DROP PROCEDURE dbo.sp_dba_dbspace

    IF OBJECT_ID('dbo.sp_dba_dbspace') IS NOT NULL

        PRINT '<<<>>>'

    ELSE

        PRINT '<<<>>>'

END

go


create procedure sp_dba_dbspace

as

set nocount on


declare @data_size float,

@log_size float,

@psize_kb float,

@log_id int,

@rsrvTTL int,

@log_segment int,

@message char(80)


/* get the segmap mask for log only segments */


select @log_segment = number

from master..spt_values

where name = "log only"

if @@error != 0 return


/* get the id of the syslogs table */


select @log_id = id

from sysobjects where name = "syslogs"

        if @@error != 0 return


/* get page size from system proc values table */


select distinct @psize_kb = convert(float, low) / 1024

from master..spt_values

where number = 1 and type = "E"

if @@error != 0 return


/* get the db size excluding the log segment size */


select distinct @data_size = sum(size)

from master..sysusages

where dbid = db_id() and segmap != @log_segment

if @@error != 0 return


/* get the log segment size */


select distinct @log_size = sum(size)

from master..sysusages

where dbid = db_id() and segmap = @log_segment

if @@error != 0 return

if @log_size is NULL

  select @log_size = 0   /* case where the db has no log segment */

/* print out the overall database stats */


        select @message = " " + replicate("=",68)

        print @message

select DbName = rtrim(db_name()),

      DbSize = ltrim(str(sum(@data_size + @log_size) * @psize_kb, 12, 0) + " KB"),

      DataSegments = ltrim(str(@data_size * @psize_kb, 10, 0) + " KB"),

      LogSegments = ltrim(str(@log_size * @psize_kb, 10, 0) + " KB")

        if @@error != 0 return


        select @message = " " + replicate("=",68)

        print @message


/* get page counts for all tables except syslogs */


select

               o.name "Tname",

               i.name "Iname",

               i.segment,

      i.id,

      i.indid,

      Rrows = rowcnt(i.doampg),

      rsrv = (reserved_pgs(i.id,i.doampg) + reserved_pgs(i.id,i.ioampg)),

      data = data_pgs(i.id,i.doampg),

      idx = data_pgs(i.id,i.ioampg),

      unused = ((reserved_pgs(i.id,i.doampg) + reserved_pgs(i.id,i.ioampg)) -

  (data_pgs(i.id,i.doampg) + data_pgs(i.id,i.ioampg)))

               into #tableCount

      from sysobjects o, sysindexes i

               where o.id = i.id

      and o.id != @log_id

      if @@error != 0 return


       /* print out results

* order by system tables, system tables nonclustered idx

* then user tables with no clustered idx, clustered idx tabs and

* finally non clustered idx 

*/


               print "             SYSTEM TABLES"

      select @message = " " + replicate("=",68) 

      print @message


               select rtrim(Tname) "TableName",

               rtrim(Iname) "IndexName",

               ltrim(str(convert(float,Rrows),12,0)) "RowCount",

               ltrim(str(convert(float,rsrv),10,0)) "ReservedPages",

               ltrim(str(convert(float,rsrv)*@psize_kb,10,2)+" KB") "Space",

               ltrim(str(convert(float,data),10,0)) "DataPages",

               ltrim(str(convert(float,idx),10,0)) "IndexPages",

               ltrim(str(convert(float,unused),10,0)) "UnusedPages"

               from #tableCount tp 

               where tp.id <>

               and ( tp.indid = 0 or tp.indid = 1 )

               order by id 


               select @message = " " + replicate("=",68)

               print @message 

               print "  NON CLUSTERED INDEXES ON SYSTEM TABLES" 

               select @message = " " + replicate("=",68)  

               print @message 

               


               select rtrim(Tname) "TableName",

               rtrim(Iname) "IndexName",

               ltrim(str(convert(float,rsrv),10,0)) "ReservedPages", 

               ltrim(str(convert(float,rsrv)*@psize_kb,10,2)+" KB") "Space", 

               ltrim(str(convert(float,idx),10,0)) "IndexPages",

               ltrim(str(convert(float,unused),10,0)) "UnusedPages" 

               from #tableCount tp  

               where tp.id <>

               and tp.indid > 1  

               order by id


               select @message = " " + replicate("=",68)

               print @message

               print "         USER TABLES WITH NO CLUSTERED INDEXES"

      select @message = " " + replicate("=",68) 

      print @message



               select rtrim(Tname) "TableName",

      ltrim(str(convert(float,Rrows),12,0)) "RowCount",

      ltrim(str(convert(float,rsrv),10,0)) "ReservedPages",

      ltrim(str(convert(float,rsrv)*@psize_kb,10,2)+" KB") "Space",

      ltrim(str(convert(float,data),10,0)) "DataPages",

      ltrim(str(convert(float,unused),10,0)) "UnusedPages"

      from #tableCount tp 

      where tp.id > 100

      and tp.indid = 0

      order by Tname


               select @message = " " + replicate("=",68) 

               print @message 

               print "            USER TABLES WITH CLUSTERED INDEX" 

               select @message = " " + replicate("=",68)  

               print @message 



               select rtrim(Tname) "TableName",

               rtrim(Iname) "IndexName",

      ltrim(str(convert(float,Rrows),12,0)) "RowCount",

               ltrim(str(convert(float,rsrv),10,0)) "ReservedPages",

      ltrim(str(convert(float,rsrv)*@psize_kb,10,2)+" KB") "Space",

      ltrim(str(convert(float,data),10,0)) "DataPages",

               ltrim(str(convert(float,idx),10,0)) "IndexPages",

      ltrim(str(convert(float,unused),10,0)) "UnusedPages"

               from #tableCount tp

               where tp.id > 100

               and tp.indid = 1

      order by Tname


               select @message = " " + replicate("=",68)

               print @message

               print "        NON CLUSTERED INDEXES ON USER TABLES"

               select @message = " " + replicate("=",68)

               print @message 

               


               select rtrim(Tname) "TableName",

               rtrim(Iname) "IndexName",

      ltrim(str(convert(float,rsrv),12,0)) "ReservedPages",

      ltrim(str(convert(float,rsrv)*@psize_kb,10,2)+" KB") "Space",

      ltrim(str(convert(float,idx),10,0)) "IndexPages", 

               ltrim(str(convert(float,unused),10,0)) "UnusedPages"

               from #tableCount tp 

               where tp.id > 100 

               and tp.indid > 1 

      order by Tname


        /* now get space for syslogs */


insert into #tableCount

select

               o.name,

               i.name,

               i.segment,

      i.id,

      i.indid,

               rowcnt(i.doampg),

      reserved_pgs(i.id, i.doampg),

      data_pgs(i.id,i.doampg),

      0,

      unused = reserved_pgs(i.id, i.doampg) - data_pgs(i.id,i.doampg)

      from sysobjects o, sysindexes i

      where o.id = i.id 

      and o.id = @log_id

      if @@error != 0 return


        /* compute summary results showing Total data seg allocated

* whats been reserved and unused.

* compute a usage as a percentage  

*/


select @message = " " + replicate("=",68) 

        print @message

print "      SUMMARY RESULTS " 


        /* if log on its own segment then use @log_size else use data_size  */

 

        if @log_size != 0

         begin


 select

DataSegTTL = ltrim(str(@data_size * @psize_kb, 10, 0) + " KB"),

Reserved = ltrim(str(convert(float,sum(rsrv))*@psize_kb,10,0)+" KB"),

Objects = ltrim(str(convert(float,sum(data))*@psize_kb,10,0)+" KB"),

Indixes = ltrim(str(convert(float,sum(idx))*@psize_kb,10,0)+" KB"),

FreeSpace =str((@data_size*@psize_kb)-(sum(rsrv)*@psize_kb)) + " KB",

                Usage = str((sum(rsrv)*@psize_kb)/(@data_size * @psize_kb) * 100,6,2) + " % Used" 

   from #tableCount

   where id != @log_id

   if @@error != 0 return


          select

LogSegTTL = str(@log_size * @psize_kb, 10, 0) + " KB",

Reserved = str(rsrv*@psize_kb) + " KB",

                 FreeSpace = str((@log_size*@psize_kb)-(rsrv*@psize_kb)) + " KB",

Usage = str((((rsrv*@psize_kb)/(@log_size * @psize_kb)) * 100),6,2) + " % Used"

   from #tableCount

   where id = @log_id

   if @@error != 0 return

         end


        else


begin


                select @message = "DATA AND LOG SHARE SAME SEGMENT"

                print @message


                /* get the total of reserved pages from #tableCount */


                select @rsrvTTL = sum(rsrv) from #tableCount

                if @@error != 0 return


select

DataSegTTL = ltrim(str(@data_size * @psize_kb, 10, 0) + " KB"),

Reserved = ltrim(str(convert(float,sum(rsrv))*@psize_kb,10,0)+" KB"),

Objects = ltrim(str(convert(float,sum(data))*@psize_kb,10,0)+" KB"),

Indixes = ltrim(str(convert(float,sum(idx))*@psize_kb,10,0)+" KB"),

                FreeSpace = str((@data_size*@psize_kb)-(@rsrvTTL*@psize_kb)) + " KB",

Usage = str((@rsrvTTL*@psize_kb)/(@data_size * @psize_kb) * 100,6,2) + " % Used"

from #tableCount

where id != @log_id


                select

                DataSegTTL = str(@data_size * @psize_kb, 10, 0) + " KB",

                LogReserved = str(rsrv*@psize_kb) + " KB",

                FreeSpace = str((@data_size*@psize_kb)-(@rsrvTTL*@psize_kb)) + " KB",

                Usage = str((((@rsrvTTL*@psize_kb)/(@data_size * @psize_kb)) * 100),6,2) + " % Used"

                from #tableCount

                where id = @log_id

                if @@error != 0 return 

         end        


                select @message = " " + replicate("=",68)

       print @message

                select @message = "TOTAL SPACE RESERVED PER SEGMENT" 

       print @message


          select

          s.name "SegName",

          Reserved = ltrim(str(convert(float,sum(rsrv))*@psize_kb,10,0)+" KB")

          from syssegments s, #tableCount t

          where s.segment = t.segment

          group by s.name

          order by s.segment


                select @message = " " + replicate("=",68)

                print @message


       /* how much space the current db uses on each device */


select size = convert(float, high-low+1) * @psize_kb,

alloc = convert(float, sum(size)) * @psize_kb,

device = name,

total_alloc = convert(float, 0)

into #devSum

from master..sysusages, master..sysdevices

where vstart >= low and vstart <= high and cntrltype = 0 

and dbid = db_id()

group by name

if @@error != 0 return


/* update total usage for each device. */


update #devSum

set total_alloc = ( select sum(su.size) from master..sysusages su,

   master..sysdevices sd

   where vstart >= low and vstart <= high and cntrltype = 0

   and #devSum.device = sd.name

   )

   if @@error != 0 return


   update #devSum

   set total_alloc = total_alloc * @psize_kb

   if @@error != 0 return


/* add summary lines to the device sum table */


insert #devSum

select sum(size), sum(alloc), "TOTAL", sum(total_alloc)

from #devSum

if @@error != 0 return


/* now print stats */

select @message = "      DISK ACTUAL USAGE BY DEVICE FOR: " + db_name()

        print @message

        select @message = " " + replicate("=",68)

print @message


select DeviceName = device,

      Size = str(size, 10, 0) + " KB",

      ThisDb = str(alloc, 10, 0) + " KB",

      OtherDb = str(total_alloc - alloc, 10, 0) + " KB",

      UnAlloc = str(size - total_alloc, 10 ,0) + " KB",

      Usage = str(total_alloc/size *100,6,2) + " % Used"

      from #devSum

      where device != "TOTAL"

      order by device

      if @@error != 0 return


        select @message = " " + replicate("=",68)

print @message


        select TOTAL = device,

               Size = str(size, 10, 0) + " KB",

               ThisDb = str(alloc, 10, 0) + " KB",

               OtherDb = str(total_alloc - alloc, 10, 0) + " KB",

               UnAlloc = str(size - total_alloc, 10 ,0) + " KB",

      Usage = str(total_alloc/size *100,6,2) + " % Used"

               from #devSum 

               where device = "TOTAL" 

               if @@error != 0 return


        select @message = " " + replicate("=",68)

        print @message 


drop table #devSum

drop table #tableCount


return 0



go

EXEC sp_procxmode 'dbo.sp_dba_dbspace','unchained'

go

IF OBJECT_ID('dbo.sp_dba_dbspace') IS NOT NULL

    PRINT '<<<>>>'

ELSE

    PRINT '<<<>>>'

go

USE sybsystemprocs

go

GRANT EXECUTE ON dbo.sp_dba_dbspace TO public

go


Sybase IPG Group White Paper

Sybase IPG Group White Paper

Series :            Sybase IQ 12 Technical Series

                        Index Selection Criteria

Sybase IQ 12.5

Index Selection Criteria

Richard Soundy
                        Sybase IPG (Analytics) Group for EMEA
                        November 2003


Abstract

This document details the basic steps for the determination of the required indexes for a Sybase IQ 12.5 database.  It is not considered that this document is in any way a complete blueprint for the required indexes, it is intended more to be a layout or a basis for the layout.

In addition this document only seeks to provide the information for the 12.5 version of the IQ product.  It is very likely that future releases will require a different choice of indexes, or maybe provide a different set of index to select from.

Initial Information Requirements

In order to determine the “best” initial fit of indexes for a specific database and application the following information will need to be derived.  It is understood that for a new system some if not much of this information will not be available, and in this event the best information should be obtained.

In all cases the monitoring of the performance of the server/database/application will be required to correct any errors in the initial index determination.  The tools and techniques used for this monitoring are outside the scope of this document, and are a subject for future documentation.

Table Related Information Required.

For each and every table the following information needs to be obtained:

1.      Number of Rows in the table using the following bands:

                                i.            Less than 2,000 rows

                              ii.            Between 2,000 and 1,000,000 rows

                            iii.            Greater than 1,000,000 rows.

The first two bands above (less than 1,000,000 rows) relates, in the majority of data warehouse designs to dimension or look-up tables, the final band to the fact or event tables.

The reasons for the split are two fold.  An IQ system will only access a table of less than 2,000 rows with Default or Fast Projection Indexes[1].  Any enumerated indexes are only of use to the Optimizer, and are not used by the Run-Time Engine at all.  This is not to say that the enumerated indexes are of no use, in fact to the contrary, they provide priceless information to the optimizer in planning the query.  Also given the small size of the table(s) the indexes are also of similar size.

The banding of 2,000 to 1,000,000 rows is given by the relative sizes of the Low Fast (LF) and High Group (HG) index types.  It is considered that these two index types are merely reflections of each other.  This is very wrong.  They are internally very different, have very different performance capabilities, and have differing load and storage costs.  This document is not the place to go in to the structures of the HG or LF indexes, however the following two examples show the differing sizes of the two index types, which leads to the reasons behind the indexing guidelines presented further on in this document.

If we consider a table of 10,000 rows, which has a column that has a cardinality of 1,000[2], we should consider the differing storage sizes for the two index types under discussion (LF and HG).

An LF index will have 1 page of storage for the Index ID page and 1 bitmap for each and every discrete value the column is storing.  For a 10,000-row table, given a page size of 218 Kbytes[3], the bitmaps will be 1 page in size.  Hence the size of the LF index will be 1,001 pages.  It should be noted that this will not use 64 times 1,001 Kbytes of disk space as the compression routines will shrink this to a smaller number.  However to access a given value the system will have to read 2 pages (ID + 1 bitmap).

For the HG index the calculation is somewhat different.  The HG will still have and ID page (which will also contain the B-Tree component of the index), but the G-Array component will be stored in some 2- 3 pages.  This is because a G-Array page can hold some 8,000 row-id values; hence 10,000 row-ids (rows) can be stored in 2 to 3 pages.  So the overall size of the HG index will be 4 pages.  Also the access to a given value will still only be 2 I/Os (pages).

Given that the optimizer will select (all other thing being equal) the fast search index (LF or HG) that is physically smaller the optimizer will always select the HG index.  Hence for a table of 10,000 rows an HG is far more efficient than an LF.  There is a question of skewed data  - but even this does not effect the overall size of the HG index to any great degree, as this will only add a couple of pages to the overall size of the index.  The difference in the load time between an LF and an HG index is for all intents and purposes irrelevant for a table of only 10,000 rows.

The break even point on the relative size of the two index types is easily calculated for each table, but the larger the table, the more the likelihood is that it will be dynamic, and hence favour the use of the LF index for low cardinality columns.

We can continue to provide sizing costs for the two indexes, but the logic is clear, for smaller tables the Hg index should be considered to be the “index of choice”.

So where is the break point, well there is no firm “number-of-rows” at which the LF becomes a “better” index than the HG.  But we would consider somewhere in the few million rows this takes place.  Of course other questions need be answered, for example should the table have a very low volatility[4] of the size compression is not an issue then you might wish to use the HG for low cardinality columns up to 10 or 20 million row tables.

Column Related Information Required

For each and every column the following information should be obtained:

1.      The cardinality of the column

2.      The data type of the column

3.      The expected use of the column

                                i.            Simple searches (equality/inequality, EXISTS, IN)

                              ii.            Range Searches (<, >, between)

                            iii.            Aggregation functions (SUM(), AVG(), DISTINCT, COUNT)

                            iv.            Intra-table comparison

                              v.            Free Text searches (CONTAINS, LIKE)

                            vi.            Ordering and grouping (ORDER BY, GROUP BY)

This information is required to determine the LF/HG and other index requirements.  This is also deemed to be the most difficult information to gather for a new system.  If the Sybase IQ database is to accelerate of replace an existing system, there should be no barrier to any of the above information.

As the volume of this information is likely to be high it is suggested that some form of schema design tool is adopted to hold the information.  A product such as Sybase Power Designer is recommended for this area as it provides many other features in addition to its documentation functions.

Database Index Selection

The following list needs to be applied to each and every column in each and every table (with the possible exception of tables of less than 2,000 rows) in the database.

  1. For each column the cardinality needs to be applied to the CREATE TABLE IQ UNIQUE constraint.  This will provide the best version of the FP index (1-byte FP, 2-byte FP or Flat FP).  Note that the MINIMIZE_STORAGE option will auto-generate an IQ UNIQUE value of 255 for all columns for which IQ UNIQUE is not specified.

Note: for a column that has a data type of FLOAT, REAL or DOUBLE then the only indexes that can be applied are flat FP.  For a column of data type BIT there is a custom index structure automatically generated under it, hence there is no requirement for any further indexes to be placed upon it.  A column of data type VARCHAR() with a length greater than 255 can only have a Flat FP and (if required) a Word (WRD) index placed on it.

  1. If a column is a PRIMARY KEY or UNIQUE then this should be specified within the CREATE TABLE statement, this will auto generate a unique HG index on the column, allowing improved storage, better join processing and more statistics available to the optimizer.
  2. If a column is a FOREIGN KEY then this should also be defined in the CREATE TABLE statement, also allowing automatic construction of an HG index, this is vital for both the optimizer and for join performance.
  3. If a table has a multi-column PRIMARY or FOREIGN KEY, this can be specified, and the system will auto-generate the correct type of Multi-Column HG index on the columns (at this point it should be noted that the individual columns within the key will not have an HG index placed upon them – so it is the responsibility of the designer to ensure that if these columns are used for searches or whatever then the appropriate indexes are placed upon them).
  4. For any and every other join column (column that is used in a join – but not in a PRIMARY KEY/FOREIGN KEY join), then this column should also have an HG index placed upon it.

At this point all of the above information applies to columns of any cardinality and to tables of any size.  The following steps may require the column cardinality and the table size to be considered.

  1. For a column that is going to be used for searching (Equality, inequality) for certain aggregations (COUNT, DISTINCT), for grouping and ordering, then either an HG or an LF index is required.  For a table of less than around 1 million rows (see discussion above) a HG index is more efficient for all cardinalities.  For a table greater in size than this use an LF for columns with a cardinality of less than around 1,500 and an HG of columns that have a cardinality greater than this.
  2. For a column that has a data type of DATE, TIME or DATETIME, then it is suggested that the DATE, TIME and DTTM indexes are applied.  This will provide for both range searches and date function searches (DATEPART() etc.).
  3. For a non-date/time column that is involved in range searches and/or SUM() and AVG() aggregation functions it is suggested that an HNG index is applied.  This may not be required if the column has a high percentage of NULL values (greater than 10% of the rows).  Also the use of HNG columns should be monitored, as HG and LF can provide (in some cases) a faster range search capability.
  4. Any column that is compared with another column in the same table should have a compare (CMP) index applied to the comparison pair of columns.
  5. Any VARCHAR() or CHAR() columns that are likely to be used for word searches should have an word (WRD) index applied to them.

Conclusion

That is really all that is required for the implementation of indexes to an IQ database.  Naturally there are always exceptions (for both table sizes cardinality [HG vs. LF]) and the performance of the system must be monitored to determine if further indexes are required or maybe some indexes to be removed.

For further information on the structure and function of the various index types then the Sybase IQ manual set can be consulted, in addition the Sybase Professional Services Groups can be used both as implementers and trainers in some of the more esoteric functionality of the Sybase IQ index sets.

Richard Soundy

November 2003

London.

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

Sybase, Inc.

Worldwide Headquarters

One Sybase Drive

Dublin, CA 94568-7902 USA

Tel: +800 8 SYBASE

www.sybase.com



[1] In this document I generally refer to Default Indexes as Fast Projection or FP indexes.

[2] The column supports the storage of 1,000 different values.

[3] The default IQ Page Size for an IQ 12.5 Database

[4] A table that is low volatility does not change very often; hence the loading time is not of issue.