среда, 23. октобар 2013.

MySQL - space reclaim and Chuck Norris

Chuck Norris is the only one able to reclaim space from InnoDB engine without doing logical dump and restore ....
And community is screaming about Postgres' VACUUM FULL ...

субота, 18. фебруар 2012.

SVN 1.7 on Ubuntu 11.10

This is just a reminder for myself (maybe you will find it useful):


echo "deb http://opensource.wandisco.com/ubuntu lucid svn17" | sudo tee /etc/apt/sources.list.d/svn.list
sudo wget -q http://opensource.wandisco.com/wandisco-debian.gpg -O- | sudo apt-key add -
sudo apt-get update
sudo apt-get install subversion
sudo apt-get install libsvn-java


For Eclipe Indigo (or any other ...) add the following to eclipse.ini:


-Djava.library.path=/usr/lib/jni

уторак, 6. децембар 2011.

Recreate primary keys as CLUSTERED (SQL Server 2008)

Let's say that you want to re-create all nonclustered PKs and make them clustered.
Here is stored procedure which will generate necessary DDLs.

Table that will store DDLs:


create table log_table
(
id integer identity,
t text
);



Stored procedure:


IF OBJECT_ID ( 'sp_fk_pk', 'P' ) IS NOT NULL
DROP PROCEDURE sp_fk_pk;
GO

CREATE PROCEDURE sp_fk_pk
AS

-- Declare variables to store data returned by cursor
DECLARE
@childColumnList varchar(2000),
@parentColumnList varchar(2000),
@childSchema varchar(100),
@childTable varchar(100),
@fkName varchar(100),
@parentSchema varchar(100),
@parentTable varchar(100);

-- clean log table
TRUNCATE TABLE log_table;


-- generate DDL to drop all foreign keys
INSERT INTO LOG_TABLE (t)
SELECT
'alter table ['+ SCHEMA_NAME(TB.schema_id)+'].['+OBJECT_NAME(F.PARENT_OBJECT_ID)+'] drop constraint ' + F.NAME +';'
FROM SYS.FOREIGN_KEYS AS F
JOIN sys.tables AS TB ON F.parent_object_id = TB.object_id
JOIN sys.tables AS FT ON F.REFERENCED_OBJECT_ID = FT.object_id;



/**
PRIMARY KEY
**/



-- declare the cursor to get all NONCLUSTERED indexes on primary keys
DECLARE dm_cursor_pk Cursor
FOR
select TABLE_SCHEMA, TABLE_NAME, name
from sys.indexes i, INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
where name=tc.CONSTRAINT_NAME
and type_desc='NONCLUSTERED';

-- open cursor
OPEN dm_cursor_pk

-- iterate over recordset
FETCH NEXT from dm_cursor_pk
INTO
@childSchema,
@childTable,
@fkName;

WHILE (@@FETCH_STATUS <> -1)
BEGIN
IF ( @@FETCH_STATUS <> -2 )

SET @childColumnList = NULL;
-- get list of columns that make PK
select @childColumnList = coalesce(@childColumnList + ', ', '') + column_name
from INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE
where constraint_name=@fkName;

-- insert DDL for index recreation into log table
INSERT INTO LOG_TABLE (T) VALUES ('create unique clustered index ' + @fkName + ' on [' + @childSchema + '].[' + @childTable + '](' + @childColumnList + ') with drop_existing;');

-- iterate over recordset
FETCH NEXT from dm_cursor_pk
INTO
@childSchema,
@childTable,
@fkName;
END

-- close the cursor
CLOSE dm_cursor_pk

-- deallocate the space
DEALLOCATE dm_cursor_pk



/*
FOREIGN KEYS CREATION
*/


-- declare the cursor to get all foreign keys
DECLARE dm_cursor_fk Cursor
FOR
SELECT
distinct(F.NAME)
FROM SYS.FOREIGN_KEYS AS F
JOIN SYS.FOREIGN_KEY_COLUMNS AS FC ON F.OBJECT_ID = FC.CONSTRAINT_OBJECT_ID
JOIN sys.tables AS TB ON F.parent_object_id = TB.object_id
JOIN sys.tables AS FT ON F.REFERENCED_OBJECT_ID = FT.object_id;


-- open cursor
OPEN dm_cursor_fk

-- iterate over recordset
FETCH NEXT from dm_cursor_fk
INTO
@fkName;

WHILE (@@FETCH_STATUS <> -1)
BEGIN
IF ( @@FETCH_STATUS <> -2 )
SET @childColumnList = NULL;
SET @parentColumnList = NULL;

-- get list of columns that make FK
SELECT
@childSchema = SCHEMA_NAME(TB.schema_id),
@childTable = OBJECT_NAME(F.PARENT_OBJECT_ID),
@childColumnList = coalesce(@childColumnList + ', ', '') + COL_NAME(FC.PARENT_OBJECT_ID,FC.PARENT_COLUMN_ID),
@parentSchema = SCHEMA_NAME(FT.schema_id),
@parentTable = OBJECT_NAME (F.REFERENCED_OBJECT_ID),
@parentColumnList = coalesce(@parentColumnList + ', ', '') + COL_NAME(FC.REFERENCED_OBJECT_ID,FC.REFERENCED_COLUMN_ID)
FROM (SELECT * FROM SYS.FOREIGN_KEYS WHERE NAME=@fkName) AS F
JOIN SYS.FOREIGN_KEY_COLUMNS AS FC ON F.OBJECT_ID = FC.CONSTRAINT_OBJECT_ID
JOIN sys.tables AS TB ON F.parent_object_id = TB.object_id
JOIN sys.tables AS FT ON F.REFERENCED_OBJECT_ID = FT.object_id
ORDER BY SCHEMA_NAME(TB.schema_id), OBJECT_NAME(F.PARENT_OBJECT_ID);

-- generate DDL to recreate FK
INSERT INTO LOG_TABLE (T) VALUES ('alter table [' + @childSchema + '].[' + @childTable + '] '
+ ' add constraint ' + @fkName + ' foreign key (' + @childColumnList + ') references ' + '[' + @parentSchema + '].[' + @parentTable + '] '
+ '(' + @parentColumnList + '); ');
-- iterate over recordset
FETCH NEXT from dm_cursor_fk
INTO
@fkName;

END

-- close the cursor
CLOSE dm_cursor_fk

-- deallocate the space
DEALLOCATE dm_cursor_fk


GO




Invocation of the procedure:

exec sp_fk_pk



Check the DDLs:

select t from log_table order by id


Note: use generated DDLs at your own risk

Sample schema:

create table a (
id integer not null,
t varchar(10)
);
alter table a add constraint pk_a primary key nonclustered(id);

create table b (
id integer not null,
a_id integer
);
alter table b add constraint pk_b primary key nonclustered(id);
alter table b add constraint fk_b_a foreign key (a_id) references a (id);

create table c (
id integer not null,
t varchar(10)
);
alter table c add constraint pk_c primary key nonclustered(id);

create table d (
id integer not null,
id1 integer not null,
t varchar(10)
);
alter table d add constraint pk_d primary key nonclustered(id,id1);


create table e (
id integer not null,
d_id integer,
d_id1 integer
);
alter table e add constraint pk_e primary key nonclustered(id);
alter table e add constraint fk_e_d foreign key (d_id,d_id1) references d (id,id1);

петак, 26. август 2011.

Why select max(id) is wrong with PostgreSQL


Today I got complaints from ex-team leader about some stored procedure and "bad coding practice".

I'm going to elaborate problem a bit (it's classic in DB development). And it's real error, not bad coding practice ...

The stored procedure is part of API for a primitive queuing mechanism written in plpgsql for my ex company.
The queuing mechanism was written 5.5 years ago.


Problematic procedure contains code like this:

....
insert into table a (....)
select max(id) from a

....

So I did basic mistake: first insert into table, followed by immediate selection of max id.

Default isolation level for PostgreSQL is READ COMMITTED, so (theoreticaly) the same procedure invoked from the other transaction could commit between insert and select of the previous transaction moving MAX(ID) to a higher value.


Let's "construct" an example.

First, let's create two tables: table a will be main table which will store IDs and the second one, b, will store inserted ID (i.e. expected ID) and MAX(ID) (i.e. the ID we got).

create table a (id serial);
create table b (a_id integer, a_max_id integer);


Here is the store procedure:

create or replace function insert_into_a (_sleep_time numeric(12,4) )

returns integer as
$body$
declare
_id integer;
begin
-- I did small trick here: I prefetched nextval from a_id_seq (as I had to do 6 years ago! shame on me!)
-- to show the difference between what we excpected to get and what we got.
select nextval('a_id_seq') into _id;

-- ok, let's insert nextval
insert into a values (_id);

-- let's sleep for given number of seconds
perform pg_sleep(_sleep_time);

-- insert generated id and max id
insert into b select _id, max(id) from a;

return _id;

end;

$body$
language plpgsql volatile
cost 100;



The procedure invokes pg_sleep: handy procedure which can help you create race condition tests and emulate real execution.
pg_sleep accepts one numeric parameter which represents number of seconds. The parameter is double precision, so fractional-second delays can be specified.

OK, now we have tables and stored procedure: let's test the system.

First single transaction which sleeps 0 seconds:

milos=# select * from b;

a_id | a_max_id
------+----------
(0 rows)

milos=# begin;
BEGIN
milos=# select insert_into_a(0);
insert_into_a
---------------
1
(1 row)

milos=# commit;
COMMIT
milos=# select * from b;
a_id | a_max_id
------+----------
1 | 1
(1 row)


OK, expected result.


Now real action.

Transaction T1

milos=# begin;

BEGIN
milos=# select insert_into_a(100);
....

100 seconds: plenty of time to create second transaction which will screw our final result.


Transaction T2

milos=# begin;

BEGIN
milos=# select insert_into_a(0);
insert_into_a
---------------
3
(1 row)

milos=# select * from b;
a_id | a_max_id
------+----------
1 | 1
3 | 3
(2 rows)

milos=# commit;
COMMIT


So far, so good.


Back to committed transaction T1:

 insert_into_a 

---------------
2
(1 row)

milos=# select * from b;
a_id | a_max_id
------+----------
1 | 1
3 | 3
2 | 3
(3 rows)


HA!
Last pair (2,3) clearly shows "bad coding practice" :)

So, the proper way is to prefetch the ID using nextval('a_id_seq') and than to re-use it using currval('a_id_seq').

OK, I agree that this example is fake: we achieved with pg_sleep something that cannot be done with real procedure, but still we have race condition, and sometimes it can produce error which can be hard to trace and find (maybe it didn't fail in 5.5 years but ....).



четвртак, 16. септембар 2010.

Oracle SOA Suite 11.1.1.2: purge dehydration store

Recently, while working on one major, mission critical system, we have discovered problem in Oracle SOA Suite 11.1.1.2 purging scripts.
After a day spent on Metalink searching for patches and solutions we opened SR. Oracle Support recommended patch no. 9645701 (note ID 1131775.1; this patch is part of 11.1.1.3).

The main problem was purging of couple of major tables: xml_document, dlv_message, document_dlv_msg_ref.

However, patching didn't bring solution for orphaned messages left behind by buggy 11.1.1.2 scripts.

Since I was in-charge for the database (in general), role to dive into into purge scripts which come with Repository Creation Utility was mine.
After couple of queries, few major tests (God bless big teams: lot of developers' schemes for testing!), I released purge bridge: one-time shot script which should be used to remove "junk" left by .2 scripts.

In order to check critical tables affected by instance growth in your DB, execute diagnostic.sql (I'd be happy to get results from your environment; feel free to mail them to me).



-- extract distinct states
select distinct (case when state=0 then 'running'
when state=1 then 'recovery required'
when state=2 then 'completed successfully'
when state=3 then 'faulted'
when state=4 then 'terminated by user'
when state=5 then 'suspended'
when state=6 then 'stale'
else state || ''
end) as states_from_comp_inst
from composite_instance;

-- extract number of instances per state
select case when state=0 then 'running'
when state=1 then 'recovery required'
when state=2 then 'completed successfully'
when state=3 then 'faulted'
when state=4 then 'terminated by user'
when state=5 then 'suspended'
when state=6 then 'stale'
else state || ''
end, count(*) as num_of_comp_inst
from composite_instance group by state;


-- extract distinct states
select distinct (case when state=1 then 'running'
when state=2 then 'open suspended'
when state=3 then 'open faulted'
when state=5 then 'completed'
when state=6 then 'closed faulted'
when state=8 then 'aborted'
when state=9 then 'closed stale'
else state || ''
end) as state_from_cube_inst
from cube_instance;

-- extract number grouped by state
select case when state=1 then 'running'
when state=2 then 'open suspended'
when state=3 then 'open faulted'
when state=5 then 'completed'
when state=6 then 'closed faulted'
when state=8 then 'aborted'
when state=9 then 'closed stale'
else state || ''
end, count(*) as num_of_cube_inst
from cube_instance group by state;


select 'AUDIT_TRAIL', count(*) as audit_trail_num from audit_trail;
select 'CUBE_SCOPE', count(*) as cube_scope_num from cube_scope;
select 'CI_INDEXES', count(*) as ci_indexes_num from ci_indexes where cikey not in ( select cikey from cube_instance );
select 'WORK_ITEM', count(*) as work_item_num from work_item;
select 'WI_FAULT', count(*) as wi_fault_num from wi_fault;
select 'AUDIT_DETAILS', count(*) as audit_details_num from audit_details;
select 'TEST_DETAILS', count(*) as test_details_num from test_details;
select 'DOCUMENT_CI_REF', count(*) as document_ci_ref_num from document_ci_ref;
select 'AG_INSTANCE', count(*) as ag_instance_num from ag_instance;
select 'DLV_SUBSCRIPTION', count(*) as dlv_subscription_num from dlv_subscription;



select 'XML_DOCUMENT' as n, a.xml_document_num as total, b.xml_document_orph_num as orph, 100*b.xml_document_orph_num/a.xml_document_num as "%"
from
( select count(*) as xml_document_num from xml_document ) a,
(select count(*) as xml_document_orph_num from xml_document where document_id not in
(select document_id from document_dlv_msg_ref r, dlv_message m, cube_instance c
where r.message_guid=m.message_guid and m.cikey=c.cikey)
and document_id not in ( select payload_key from instance_payload ))b
union all
select 'DOCUMENT_DLV_MSG_REF' as n, a.xml_document_dlv_msg_ref_num, b.xml_doc_dlv_msg_ref_orp_num, 100*b.xml_doc_dlv_msg_ref_orp_num/a.xml_document_dlv_msg_ref_num as perc_of_orph
from
(select count(*) as xml_document_dlv_msg_ref_num from document_dlv_msg_ref) a,
(select count(*) as xml_doc_dlv_msg_ref_orp_num from document_dlv_msg_ref where message_guid not in
(select m.message_guid from dlv_message m, cube_instance c
where m.cikey=c.cikey)) b
union all
select 'XML_DOCUMENT_REF' as n, a.xml_document_ref_num, b.xml_document_ref_orph_num, 100*b.xml_document_ref_orph_num/a.xml_document_ref_num as perc_of_orph
from
(select count(*) as xml_document_ref_num from xml_document_ref) a,
(select count(*) as xml_document_ref_orph_num from xml_document_ref where document_id not in
(select document_id from document_dlv_msg_ref r, dlv_message m, cube_instance c
where r.message_guid=m.message_guid and m.cikey=c.cikey)
and document_id not in ( select payload_key from instance_payload )) b
union all
select 'HEADERS_PROPERTIES' as n, a.headers_properties_num, b.headers_properties_orph_num, 100*b.headers_properties_orph_num/a.headers_properties_num as perc_of_orph
from
(select count(*) as headers_properties_num from headers_properties) a,
(select count(*) as headers_properties_orph_num from headers_properties where message_guid in
(select m.message_guid from dlv_message m where m.cikey not in ( select cikey from cube_instance ))) b
union all
select 'DLV_MESSAGE' as n, a.dlv_message_num, b.dlv_message_orph_num, 100*b.dlv_message_orph_num/a.dlv_message_num as perc_of_orph
from
(select count(*) as dlv_message_num from dlv_message) a,
(select count(*) as dlv_message_orph_num from dlv_message where cikey not in ( select cikey from cube_instance )) b;



After this, you can execute clean.sql.



/*
*
Script which purges the tables impacted by instance data growth as documented on:
http://download.oracle.com/docs/cd/E14571_01/core.1111/e10108/bpel.htm#BABFBDGF

The script removes junk left over by 11.1.1.2 purge scripts

*/

create table xml_document_x as
select * from xml_document where document_id in
(select document_id from document_dlv_msg_ref r, dlv_message m, cube_instance c
where r.message_guid=m.message_guid and m.cikey=c.cikey)
or document_id in ( select payload_key from instance_payload );
alter table b2b_data_storage disable constraint b2b_ds_doc_id_fk;
truncate table xml_document;
alter table b2b_data_storage enable constraint b2b_ds_doc_id_fk;
insert into xml_document select * from xml_document_x;
commit;
drop table xml_document_x;



create table xml_document_ref_x as
select * from xml_document_ref where document_id in
(select document_id from document_dlv_msg_ref r, dlv_message m, cube_instance c
where r.message_guid=m.message_guid and m.cikey=c.cikey)
or document_id in ( select payload_key from instance_payload );
truncate table xml_document_ref;
insert into xml_document_ref select * from xml_document_ref_x;
commit;
drop table xml_document_ref_x;



create table document_dlv_msg_ref_x as
select * from document_dlv_msg_ref where message_guid in
(select m.message_guid from dlv_message m, cube_instance c
where m.cikey=c.cikey);
truncate table document_dlv_msg_ref;
insert into document_dlv_msg_ref select * from document_dlv_msg_ref_x;
commit;
drop table document_dlv_msg_ref_x;




create table headers_properties_x as
select * from headers_properties where message_guid in (select m.message_guid from dlv_message m where m.cikey in ( select cikey from cube_instance ));
commit;
truncate table headers_properties;
insert into headers_properties select * from headers_properties_x;
commit;
drop table headers_properties_x;




create table dlv_message_x as
select * from dlv_message where cikey in ( select cikey from cube_instance );
commit;
truncate table dlv_message;
insert into dlv_message select * from dlv_message_x;
drop table dlv_message_x;



purge recyclebin;





Note: make backup :)


Cheers.

уторак, 19. јануар 2010.

Delete cascade - PL/SQL version

Lets suppose that:

  • foreign key constraints are created without "CASCADE" option enabled

  • all primary keys are simple numeric(19) fields named 'ID'

  • no cyclic references (i.e. tableA -> tableB and tableB -> tableA)



You got request from developer to delete "one problematic row" in XY table.
Unfortunately, your row is referenced by multiple child rows in other tables, those child rows are referenced by other rows and so on, and so on ...
This scenario is not the worst one if you made the database design. But if you are just
simple DBA on complicated project, this can spoil your daily routine.




create or replace procedure delete_cascade( table_name_ in varchar2, id_ in numeric ) is
/*
Generates set of DELETE statements which have to be executed prior to final entry deletion.

Stored procedure have to be invoked with parameters:
TABLE_NAME_ : name of the table from which record have to be deleted
ID_ : id of the record

Stored procedure assumes:
- that all primary keys are named 'ID' and that pk's data type is 'NUMERIC'
- that cycle references (i.e. tableA -> tableB and tableB -> tableA) do not exist

Author: Milos Babic

*/
TYPE refcur IS REF CURSOR;

childid_ numeric(19);
fetch_sql_ varchar2(2000);
cur_child_tables refcur;

cursor ref_tables is
select fk.table_name as child_table_name_, fk_c.column_name as child_column_name_
from user_constraints fk,
user_constraints pk,
user_cons_columns fk_c
where fk.constraint_type='R' and fk.r_constraint_name=pk.constraint_name
and pk.table_name=table_name_ and fk_c.constraint_name=fk.constraint_name;

begin

for iterator in ref_tables loop

fetch_sql_ := 'select id from ' || iterator.child_table_name_ || ' where ' || iterator.child_column_name_ || '=' || id_;

open cur_child_tables for fetch_sql_;

loop
fetch cur_child_tables into childid_;
exit when cur_child_tables%notfound;
delete_cascade ( iterator.child_table_name_, childid_ );
end loop;

close cur_child_tables;

insert into delete_log values (delete_log_seq.nextval, 'delete from ' || iterator.child_table_name_ || ' where ' || iterator.child_column_name_ || ' = ' || id_);

end loop;

end delete_cascade;




This procedure expects that you have created table DELETE_LOG and sequence DELETE_LOG_SEQ:



create sequence delete_log_seq;
create table delete_log (
id numeric(19),
statement varchar2(2000)
);



Upon successful execution, you can obtain list of delete statements from DELETE_LOG table.


select statement from delete_log order by id




In case you want to try, this is sample DB



create table a (
id numeric(19),
a_log varchar2(100),
constraint pk_a primary key (id)
) tablespace user_data;


create table a_a (
id numeric(19),
a_id numeric(19),
a_a_log varchar2(100),
constraint pk_a_a primary key (id)
) tablespace user_data;
alter table a_a
add constraint fk_a_a_a foreign key (a_id)
references a (id)
initially immediate deferrable;


create table a_b (
id numeric(19),
a_id numeric(19),
a_b_log varchar2(100),
constraint pk_a_b primary key (id)
) tablespace user_data;
alter table a_b
add constraint fk_a_b_a foreign key (a_id)
references a (id)
initially immediate deferrable;

create table a_a_a (
id numeric(19),
a_a_id numeric(19),
a_a_a_log varchar2(100),
constraint pk_a_a_a primary key (id)
) tablespace user_data;
alter table a_a_a
add constraint fk_a_a_a_a foreign key (a_a_id)
references a_a (id)
initially immediate deferrable;


insert into a values (1,'1');
insert into a values (2,'2');
insert into a values (3,'3');

insert into a_a values (1,1,'1');
insert into a_a values (2,1,'1');
insert into a_a values (3,2,'1');
insert into a_a values (4,3,'1');

insert into a_b values (1,1,'1');
insert into a_b values (2,1,'1');
insert into a_b values (3,2,'1');
insert into a_b values (4,3,'1');


insert into a_a_a values (1,1,'1');
insert into a_a_a values (2,1,'1');
insert into a_a_a values (3,2,'1');
insert into a_a_a values (4,2,'1');