IF OBJECT_ID('dbo.sp_dba_devusage') IS NOT NULL
BEGIN
DROP PROCEDURE dbo.sp_dba_devusage
IF OBJECT_ID('dbo.sp_dba_devusage') IS NOT NULL
PRINT '<<<>>>'
ELSE
PRINT '<<<>>>'
END
go
create proc sp_dba_devusage
as
set nocount on
select distinct
name = d.name,
db_size = str(sum(u.size) / 512, 10, 1) + " MB",
dbid = d.dbid,
created = convert(char(14), d.crdate, 107)
from master.dbo.sysdatabases d, master.dbo.sysusages u
where d.dbid = u.dbid
group by d.dbid, u.dbid
order by d.dbid
declare @vsize int
select @vsize = low
from master.dbo.spt_values
where type="E"
and number = 3
select device = substring(name,1,15),
vdevno = convert(tinyint,substring(convert(binary(4),low),@vsize,1)),
"default disk?" = " " + substring("NY",(status & 1)+1,1),
"total (MB)" = str(round((high-low)/512.,2),7,2),
used = str(round(isnull(sum(size),0)/512.,2),7,2),
free = str(round(abs((high-low-isnull(sum(size),0))/512.),2),7,2)
from master.dbo.sysusages u,
master.dbo.sysdevices d
where vstart between low and high
and cntrltype=0
group by all name
having cntrltype=0
order by vdevno
go
EXEC sp_procxmode 'dbo.sp_dba_devusage','unchained'
go
IF OBJECT_ID('dbo.sp_dba_devusage') IS NOT NULL
PRINT '<<<>>>'
ELSE
PRINT '<<<>>>'
go
USE sybsystemprocs
go
GRANT EXECUTE ON dbo.sp_dba_devusage TO public
go
Sunday, June 7, 2009
dev usage
check for blocks in database
gen database ddl
check table space
IF OBJECT_ID('dbo.sp_dba_dbspace') IS NOT NULL
BEGIN
DROP PROCEDURE dbo.sp_dba_dbspace
IF OBJECT_ID('dbo.sp_dba_dbspace') IS NOT NULL
PRINT '<<<>>>'
ELSE
PRINT '<<<>>>'
END
go
create procedure sp_dba_dbspace
as
set nocount on
declare @data_size float,
@log_size float,
@psize_kb float,
@log_id int,
@rsrvTTL int,
@log_segment int,
@message char(80)
/* get the segmap mask for log only segments */
select @log_segment = number
from master..spt_values
where name = "log only"
if @@error != 0 return
/* get the id of the syslogs table */
select @log_id = id
from sysobjects where name = "syslogs"
if @@error != 0 return
/* get page size from system proc values table */
select distinct @psize_kb = convert(float, low) / 1024
from master..spt_values
where number = 1 and type = "E"
if @@error != 0 return
/* get the db size excluding the log segment size */
select distinct @data_size = sum(size)
from master..sysusages
where dbid = db_id() and segmap != @log_segment
if @@error != 0 return
/* get the log segment size */
select distinct @log_size = sum(size)
from master..sysusages
where dbid = db_id() and segmap = @log_segment
if @@error != 0 return
if @log_size is NULL
select @log_size = 0 /* case where the db has no log segment */
/* print out the overall database stats */
select @message = " " + replicate("=",68)
print @message
select DbName = rtrim(db_name()),
DbSize = ltrim(str(sum(@data_size + @log_size) * @psize_kb, 12, 0) + " KB"),
DataSegments = ltrim(str(@data_size * @psize_kb, 10, 0) + " KB"),
LogSegments = ltrim(str(@log_size * @psize_kb, 10, 0) + " KB")
if @@error != 0 return
select @message = " " + replicate("=",68)
print @message
/* get page counts for all tables except syslogs */
select
o.name "Tname",
i.name "Iname",
i.segment,
i.id,
i.indid,
Rrows = rowcnt(i.doampg),
rsrv = (reserved_pgs(i.id,i.doampg) + reserved_pgs(i.id,i.ioampg)),
data = data_pgs(i.id,i.doampg),
idx = data_pgs(i.id,i.ioampg),
unused = ((reserved_pgs(i.id,i.doampg) + reserved_pgs(i.id,i.ioampg)) -
(data_pgs(i.id,i.doampg) + data_pgs(i.id,i.ioampg)))
into #tableCount
from sysobjects o, sysindexes i
where o.id = i.id
and o.id != @log_id
if @@error != 0 return
/* print out results
* order by system tables, system tables nonclustered idx
* then user tables with no clustered idx, clustered idx tabs and
* finally non clustered idx
*/
print " SYSTEM TABLES"
select @message = " " + replicate("=",68)
print @message
select rtrim(Tname) "TableName",
rtrim(Iname) "IndexName",
ltrim(str(convert(float,Rrows),12,0)) "RowCount",
ltrim(str(convert(float,rsrv),10,0)) "ReservedPages",
ltrim(str(convert(float,rsrv)*@psize_kb,10,2)+" KB") "Space",
ltrim(str(convert(float,data),10,0)) "DataPages",
ltrim(str(convert(float,idx),10,0)) "IndexPages",
ltrim(str(convert(float,unused),10,0)) "UnusedPages"
from #tableCount tp
where tp.id <>
and ( tp.indid = 0 or tp.indid = 1 )
order by id
select @message = " " + replicate("=",68)
print @message
print " NON CLUSTERED INDEXES ON SYSTEM TABLES"
select @message = " " + replicate("=",68)
print @message
select rtrim(Tname) "TableName",
rtrim(Iname) "IndexName",
ltrim(str(convert(float,rsrv),10,0)) "ReservedPages",
ltrim(str(convert(float,rsrv)*@psize_kb,10,2)+" KB") "Space",
ltrim(str(convert(float,idx),10,0)) "IndexPages",
ltrim(str(convert(float,unused),10,0)) "UnusedPages"
from #tableCount tp
where tp.id <>
and tp.indid > 1
order by id
select @message = " " + replicate("=",68)
print @message
print " USER TABLES WITH NO CLUSTERED INDEXES"
select @message = " " + replicate("=",68)
print @message
select rtrim(Tname) "TableName",
ltrim(str(convert(float,Rrows),12,0)) "RowCount",
ltrim(str(convert(float,rsrv),10,0)) "ReservedPages",
ltrim(str(convert(float,rsrv)*@psize_kb,10,2)+" KB") "Space",
ltrim(str(convert(float,data),10,0)) "DataPages",
ltrim(str(convert(float,unused),10,0)) "UnusedPages"
from #tableCount tp
where tp.id > 100
and tp.indid = 0
order by Tname
select @message = " " + replicate("=",68)
print @message
print " USER TABLES WITH CLUSTERED INDEX"
select @message = " " + replicate("=",68)
print @message
select rtrim(Tname) "TableName",
rtrim(Iname) "IndexName",
ltrim(str(convert(float,Rrows),12,0)) "RowCount",
ltrim(str(convert(float,rsrv),10,0)) "ReservedPages",
ltrim(str(convert(float,rsrv)*@psize_kb,10,2)+" KB") "Space",
ltrim(str(convert(float,data),10,0)) "DataPages",
ltrim(str(convert(float,idx),10,0)) "IndexPages",
ltrim(str(convert(float,unused),10,0)) "UnusedPages"
from #tableCount tp
where tp.id > 100
and tp.indid = 1
order by Tname
select @message = " " + replicate("=",68)
print @message
print " NON CLUSTERED INDEXES ON USER TABLES"
select @message = " " + replicate("=",68)
print @message
select rtrim(Tname) "TableName",
rtrim(Iname) "IndexName",
ltrim(str(convert(float,rsrv),12,0)) "ReservedPages",
ltrim(str(convert(float,rsrv)*@psize_kb,10,2)+" KB") "Space",
ltrim(str(convert(float,idx),10,0)) "IndexPages",
ltrim(str(convert(float,unused),10,0)) "UnusedPages"
from #tableCount tp
where tp.id > 100
and tp.indid > 1
order by Tname
/* now get space for syslogs */
insert into #tableCount
select
o.name,
i.name,
i.segment,
i.id,
i.indid,
rowcnt(i.doampg),
reserved_pgs(i.id, i.doampg),
data_pgs(i.id,i.doampg),
0,
unused = reserved_pgs(i.id, i.doampg) - data_pgs(i.id,i.doampg)
from sysobjects o, sysindexes i
where o.id = i.id
and o.id = @log_id
if @@error != 0 return
/* compute summary results showing Total data seg allocated
* whats been reserved and unused.
* compute a usage as a percentage
*/
select @message = " " + replicate("=",68)
print @message
print " SUMMARY RESULTS "
/* if log on its own segment then use @log_size else use data_size */
if @log_size != 0
begin
select
DataSegTTL = ltrim(str(@data_size * @psize_kb, 10, 0) + " KB"),
Reserved = ltrim(str(convert(float,sum(rsrv))*@psize_kb,10,0)+" KB"),
Objects = ltrim(str(convert(float,sum(data))*@psize_kb,10,0)+" KB"),
Indixes = ltrim(str(convert(float,sum(idx))*@psize_kb,10,0)+" KB"),
FreeSpace =str((@data_size*@psize_kb)-(sum(rsrv)*@psize_kb)) + " KB",
Usage = str((sum(rsrv)*@psize_kb)/(@data_size * @psize_kb) * 100,6,2) + " % Used"
from #tableCount
where id != @log_id
if @@error != 0 return
select
LogSegTTL = str(@log_size * @psize_kb, 10, 0) + " KB",
Reserved = str(rsrv*@psize_kb) + " KB",
FreeSpace = str((@log_size*@psize_kb)-(rsrv*@psize_kb)) + " KB",
Usage = str((((rsrv*@psize_kb)/(@log_size * @psize_kb)) * 100),6,2) + " % Used"
from #tableCount
where id = @log_id
if @@error != 0 return
end
else
begin
select @message = "DATA AND LOG SHARE SAME SEGMENT"
print @message
/* get the total of reserved pages from #tableCount */
select @rsrvTTL = sum(rsrv) from #tableCount
if @@error != 0 return
select
DataSegTTL = ltrim(str(@data_size * @psize_kb, 10, 0) + " KB"),
Reserved = ltrim(str(convert(float,sum(rsrv))*@psize_kb,10,0)+" KB"),
Objects = ltrim(str(convert(float,sum(data))*@psize_kb,10,0)+" KB"),
Indixes = ltrim(str(convert(float,sum(idx))*@psize_kb,10,0)+" KB"),
FreeSpace = str((@data_size*@psize_kb)-(@rsrvTTL*@psize_kb)) + " KB",
Usage = str((@rsrvTTL*@psize_kb)/(@data_size * @psize_kb) * 100,6,2) + " % Used"
from #tableCount
where id != @log_id
select
DataSegTTL = str(@data_size * @psize_kb, 10, 0) + " KB",
LogReserved = str(rsrv*@psize_kb) + " KB",
FreeSpace = str((@data_size*@psize_kb)-(@rsrvTTL*@psize_kb)) + " KB",
Usage = str((((@rsrvTTL*@psize_kb)/(@data_size * @psize_kb)) * 100),6,2) + " % Used"
from #tableCount
where id = @log_id
if @@error != 0 return
end
select @message = " " + replicate("=",68)
print @message
select @message = "TOTAL SPACE RESERVED PER SEGMENT"
print @message
select
s.name "SegName",
Reserved = ltrim(str(convert(float,sum(rsrv))*@psize_kb,10,0)+" KB")
from syssegments s, #tableCount t
where s.segment = t.segment
group by s.name
order by s.segment
select @message = " " + replicate("=",68)
print @message
/* how much space the current db uses on each device */
select size = convert(float, high-low+1) * @psize_kb,
alloc = convert(float, sum(size)) * @psize_kb,
device = name,
total_alloc = convert(float, 0)
into #devSum
from master..sysusages, master..sysdevices
where vstart >= low and vstart <= high and cntrltype = 0
and dbid = db_id()
group by name
if @@error != 0 return
/* update total usage for each device. */
update #devSum
set total_alloc = ( select sum(su.size) from master..sysusages su,
master..sysdevices sd
where vstart >= low and vstart <= high and cntrltype = 0
and #devSum.device = sd.name
)
if @@error != 0 return
update #devSum
set total_alloc = total_alloc * @psize_kb
if @@error != 0 return
/* add summary lines to the device sum table */
insert #devSum
select sum(size), sum(alloc), "TOTAL", sum(total_alloc)
from #devSum
if @@error != 0 return
/* now print stats */
select @message = " DISK ACTUAL USAGE BY DEVICE FOR: " + db_name()
print @message
select @message = " " + replicate("=",68)
print @message
select DeviceName = device,
Size = str(size, 10, 0) + " KB",
ThisDb = str(alloc, 10, 0) + " KB",
OtherDb = str(total_alloc - alloc, 10, 0) + " KB",
UnAlloc = str(size - total_alloc, 10 ,0) + " KB",
Usage = str(total_alloc/size *100,6,2) + " % Used"
from #devSum
where device != "TOTAL"
order by device
if @@error != 0 return
select @message = " " + replicate("=",68)
print @message
select TOTAL = device,
Size = str(size, 10, 0) + " KB",
ThisDb = str(alloc, 10, 0) + " KB",
OtherDb = str(total_alloc - alloc, 10, 0) + " KB",
UnAlloc = str(size - total_alloc, 10 ,0) + " KB",
Usage = str(total_alloc/size *100,6,2) + " % Used"
from #devSum
where device = "TOTAL"
if @@error != 0 return
select @message = " " + replicate("=",68)
print @message
drop table #devSum
drop table #tableCount
return 0
go
EXEC sp_procxmode 'dbo.sp_dba_dbspace','unchained'
go
IF OBJECT_ID('dbo.sp_dba_dbspace') IS NOT NULL
PRINT '<<<>>>'
ELSE
PRINT '<<<>>>'
go
USE sybsystemprocs
go
GRANT EXECUTE ON dbo.sp_dba_dbspace TO public
go
Sybase IPG Group White Paper
Sybase IPG Group White Paper
Series : Sybase IQ 12 Technical Series
Index Selection Criteria
Sybase IQ 12.5
Index Selection Criteria
Richard Soundy
Sybase IPG (Analytics) Group for EMEA
November 2003
Abstract
This document details the basic steps for the determination of the required indexes for a Sybase IQ 12.5 database. It is not considered that this document is in any way a complete blueprint for the required indexes, it is intended more to be a layout or a basis for the layout.
In addition this document only seeks to provide the information for the 12.5 version of the IQ product. It is very likely that future releases will require a different choice of indexes, or maybe provide a different set of index to select from.
Initial Information Requirements
In order to determine the “best” initial fit of indexes for a specific database and application the following information will need to be derived. It is understood that for a new system some if not much of this information will not be available, and in this event the best information should be obtained.
In all cases the monitoring of the performance of the server/database/application will be required to correct any errors in the initial index determination. The tools and techniques used for this monitoring are outside the scope of this document, and are a subject for future documentation.
Table Related Information Required.
For each and every table the following information needs to be obtained:
1. Number of Rows in the table using the following bands:
i. Less than 2,000 rows
ii. Between 2,000 and 1,000,000 rows
iii. Greater than 1,000,000 rows.
The first two bands above (less than 1,000,000 rows) relates, in the majority of data warehouse designs to dimension or look-up tables, the final band to the fact or event tables.
The reasons for the split are two fold. An IQ system will only access a table of less than 2,000 rows with Default or Fast Projection Indexes[1]. Any enumerated indexes are only of use to the Optimizer, and are not used by the Run-Time Engine at all. This is not to say that the enumerated indexes are of no use, in fact to the contrary, they provide priceless information to the optimizer in planning the query. Also given the small size of the table(s) the indexes are also of similar size.
The banding of 2,000 to 1,000,000 rows is given by the relative sizes of the Low Fast (LF) and High Group (HG) index types. It is considered that these two index types are merely reflections of each other. This is very wrong. They are internally very different, have very different performance capabilities, and have differing load and storage costs. This document is not the place to go in to the structures of the HG or LF indexes, however the following two examples show the differing sizes of the two index types, which leads to the reasons behind the indexing guidelines presented further on in this document.
If we consider a table of 10,000 rows, which has a column that has a cardinality of 1,000[2], we should consider the differing storage sizes for the two index types under discussion (LF and HG).
An LF index will have 1 page of storage for the Index ID page and 1 bitmap for each and every discrete value the column is storing. For a 10,000-row table, given a page size of 218 Kbytes[3], the bitmaps will be 1 page in size. Hence the size of the LF index will be 1,001 pages. It should be noted that this will not use 64 times 1,001 Kbytes of disk space as the compression routines will shrink this to a smaller number. However to access a given value the system will have to read 2 pages (ID + 1 bitmap).
For the HG index the calculation is somewhat different. The HG will still have and ID page (which will also contain the B-Tree component of the index), but the G-Array component will be stored in some 2- 3 pages. This is because a G-Array page can hold some 8,000 row-id values; hence 10,000 row-ids (rows) can be stored in 2 to 3 pages. So the overall size of the HG index will be 4 pages. Also the access to a given value will still only be 2 I/Os (pages).
Given that the optimizer will select (all other thing being equal) the fast search index (LF or HG) that is physically smaller the optimizer will always select the HG index. Hence for a table of 10,000 rows an HG is far more efficient than an LF. There is a question of skewed data - but even this does not effect the overall size of the HG index to any great degree, as this will only add a couple of pages to the overall size of the index. The difference in the load time between an LF and an HG index is for all intents and purposes irrelevant for a table of only 10,000 rows.
The break even point on the relative size of the two index types is easily calculated for each table, but the larger the table, the more the likelihood is that it will be dynamic, and hence favour the use of the LF index for low cardinality columns.
We can continue to provide sizing costs for the two indexes, but the logic is clear, for smaller tables the Hg index should be considered to be the “index of choice”.
So where is the break point, well there is no firm “number-of-rows” at which the LF becomes a “better” index than the HG. But we would consider somewhere in the few million rows this takes place. Of course other questions need be answered, for example should the table have a very low volatility[4] of the size compression is not an issue then you might wish to use the HG for low cardinality columns up to 10 or 20 million row tables.
Column Related Information Required
For each and every column the following information should be obtained:
1. The cardinality of the column
2. The data type of the column
3. The expected use of the column
i. Simple searches (equality/inequality, EXISTS, IN)
ii. Range Searches (<, >, between)
iii. Aggregation functions (SUM(), AVG(), DISTINCT, COUNT)
iv. Intra-table comparison
v. Free Text searches (CONTAINS, LIKE)
vi. Ordering and grouping (ORDER BY, GROUP BY)
This information is required to determine the LF/HG and other index requirements. This is also deemed to be the most difficult information to gather for a new system. If the Sybase IQ database is to accelerate of replace an existing system, there should be no barrier to any of the above information.
As the volume of this information is likely to be high it is suggested that some form of schema design tool is adopted to hold the information. A product such as Sybase Power Designer is recommended for this area as it provides many other features in addition to its documentation functions.
Database Index Selection
The following list needs to be applied to each and every column in each and every table (with the possible exception of tables of less than 2,000 rows) in the database.
- For each column the cardinality needs to be applied to the CREATE TABLE IQ UNIQUE constraint. This will provide the best version of the FP index (1-byte FP, 2-byte FP or Flat FP). Note that the MINIMIZE_STORAGE option will auto-generate an IQ UNIQUE value of 255 for all columns for which IQ UNIQUE is not specified.
Note: for a column that has a data type of FLOAT, REAL or DOUBLE then the only indexes that can be applied are flat FP. For a column of data type BIT there is a custom index structure automatically generated under it, hence there is no requirement for any further indexes to be placed upon it. A column of data type VARCHAR() with a length greater than 255 can only have a Flat FP and (if required) a Word (WRD) index placed on it.
- If a column is a PRIMARY KEY or UNIQUE then this should be specified within the CREATE TABLE statement, this will auto generate a unique HG index on the column, allowing improved storage, better join processing and more statistics available to the optimizer.
- If a column is a FOREIGN KEY then this should also be defined in the CREATE TABLE statement, also allowing automatic construction of an HG index, this is vital for both the optimizer and for join performance.
- If a table has a multi-column PRIMARY or FOREIGN KEY, this can be specified, and the system will auto-generate the correct type of Multi-Column HG index on the columns (at this point it should be noted that the individual columns within the key will not have an HG index placed upon them – so it is the responsibility of the designer to ensure that if these columns are used for searches or whatever then the appropriate indexes are placed upon them).
- For any and every other join column (column that is used in a join – but not in a PRIMARY KEY/FOREIGN KEY join), then this column should also have an HG index placed upon it.
At this point all of the above information applies to columns of any cardinality and to tables of any size. The following steps may require the column cardinality and the table size to be considered.
- For a column that is going to be used for searching (Equality, inequality) for certain aggregations (COUNT, DISTINCT), for grouping and ordering, then either an HG or an LF index is required. For a table of less than around 1 million rows (see discussion above) a HG index is more efficient for all cardinalities. For a table greater in size than this use an LF for columns with a cardinality of less than around 1,500 and an HG of columns that have a cardinality greater than this.
- For a column that has a data type of DATE, TIME or DATETIME, then it is suggested that the DATE, TIME and DTTM indexes are applied. This will provide for both range searches and date function searches (DATEPART() etc.).
- For a non-date/time column that is involved in range searches and/or SUM() and AVG() aggregation functions it is suggested that an HNG index is applied. This may not be required if the column has a high percentage of NULL values (greater than 10% of the rows). Also the use of HNG columns should be monitored, as HG and LF can provide (in some cases) a faster range search capability.
- Any column that is compared with another column in the same table should have a compare (CMP) index applied to the comparison pair of columns.
- Any VARCHAR() or CHAR() columns that are likely to be used for word searches should have an word (WRD) index applied to them.
Conclusion
That is really all that is required for the implementation of indexes to an IQ database. Naturally there are always exceptions (for both table sizes cardinality [HG vs. LF]) and the performance of the system must be monitored to determine if further indexes are required or maybe some indexes to be removed.
For further information on the structure and function of the various index types then the Sybase IQ manual set can be consulted, in addition the Sybase Professional Services Groups can be used both as implementers and trainers in some of the more esoteric functionality of the Sybase IQ index sets.
Richard Soundy
November 2003
Sybase, Inc.
Worldwide Headquarters
Tel: +800 8 SYBASE
www.sybase.com
[1] In this document I generally refer to Default Indexes as Fast Projection or FP indexes.
[2] The column supports the storage of 1,000 different values.
[3] The default IQ Page Size for an IQ 12.5 Database
[4] A table that is low volatility does not change very often; hence the loading time is not of issue.