Tuesday, March 12, 2019

Advance Oracle Full Text Indexing on External OS File Storage


Here is my case

Files are storing on separate server (NAS).  I want to build a table in Oracle for indexing only, the table size must as small as possible.  I can full search my NAS with SQL to find where my file(s) is located.

1. Setup Connection to NAS from Oracle Server

First I need to setup my SMB client connection in Oracle Server which is mapped to a logical path
/apexmedia/hknas, to do this setup the /etc/fstab
here is my setting
#
# /etc/fstab
# Created by anaconda on Thu Nov 16 16:36:31 2017
#
# Accessible filesystems, by reference, are maintained under '/dev/disk'
# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info
#
/dev/mapper/vg_cloud-lv_root /                       ext4    defaults        1 1
UUID=16f3bc2b-1a1c-4eca-bee7-116c63db1af7 /boot                   ext4    defaults        1 2
UUID=B740-E4E8          /boot/efi               vfat    umask=0077,shortname=winnt 0 0
/dev/mapper/vg_cloud-lv_home /home                   ext4    defaults        1 2
/dev/mapper/vg_cloud-lv_swap swap                    swap    defaults        0 0
tmpfs                   /dev/shm                tmpfs   defaults        0 0
devpts                  /dev/pts                devpts  gid=5,mode=620  0 0
sysfs                   /sys                    sysfs   defaults        0 0
proc                    /proc                   proc    defaults        0 0
\\192.168.0.200\hv-dept   /apexmedia/hknas        cifs    user,uid=502,r,suid,credentials=/root/secret.txt        0 0
 

and i can access my NAS from oracle server with path /apexmedia/hknas

2.  Setup Oracle Table to hold file list of my NAS (you will need additional package from http://plsqlexecoscomm.sourceforge.net

create table demo_filelist as
select * from table(file_pkg.get_file_list(file_pkg.get_file('/apexmedia/hknas/MIS/Full Index Test')));



select * from demo_filelist;

FILE_PATH
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
FILE_NAME
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 FILE_SIZE LAST_MODIFIED    I I I F
---------- ---------------- - - - -
/apexmedia/hknas/MIS/Full Index Test/rfc2822.pdf                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             
rfc2822.pdf                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
    156631 2019-01-31 17:33 N Y Y Y

/apexmedia/hknas/MIS/Full Index Test/sample.ods                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              
sample.ods                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   
      8661 2019-02-19 11:30 N Y Y Y

FILE_PATH
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
FILE_NAME
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 FILE_SIZE LAST_MODIFIED    I I I F
---------- ---------------- - - - -

/apexmedia/hknas/MIS/Full Index Test/sample.xlsx                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             
sample.xlsx                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
      5341 2019-02-19 11:31 N Y Y Y

/apexmedia/hknas/MIS/Full Index Test/Thumbs.db                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
Thumbs.db                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    

FILE_PATH
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
FILE_NAME
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 FILE_SIZE LAST_MODIFIED    I I I F
---------- ---------------- - - - -
      8704 2019-02-19 11:30 N Y Y Y


3. Create an SP for accessing my NAS datastore

to create the sp demo_ds, you will also need these two objects HKNAS_DIR and  fn_get_blobfile

create or replace directory HKNAS_DIR as '/apexmedia/hknas/'

create or replace function fn_get_blobfile
(
  p_dir in varchar2
, p_file in varchar2
) return blob

as
  l_blob blob;
  l_bfile bfile;
begin
      DBMS_LOB.CREATETEMPORARY(l_blob, TRUE);

      l_bfile := bfilename(p_dir, p_file );
     
      dbms_lob.fileopen( l_bfile );

      dbms_lob.loadfromfile( l_blob, l_bfile,
                               dbms_lob.getlength( l_bfile ) );
      DBMS_LOB.filecloseall;
      return l_blob;
exception
  when others then
      return null; 
     
end fn_get_blobfile;




create or replace procedure demo_ds(rid in rowid, dataout in out blob)
as
  l_file_path       demo_filelist.file_path%type;
  l_is_dir          demo_filelist.is_dir%type;
  l_is_readable     demo_filelist.is_readable%type;
  l_file_exists     demo_filelist.file_exists%type;

begin
  select a.file_path, a.is_dir, a.is_readable, a.file_exists into l_file_path, l_is_dir, l_is_readable, l_file_exists
    from demo_filelist a where  rowid = rid ;
  if l_is_dir = 'N' and l_is_readable = 'Y' and l_file_exists = 'Y' then
    select fn_get_blobfile('HKNAS_DIR', replace(l_file_path,'/apexmedia/hknas','')) into dataout from dual;
  else
    dataout := null;
  end if;
end demo_ds;



4. create a user data store(demo_ud) attributes for full text indexing

begin
  ctx_ddl.create_preference('demo_ud', 'user_datastore');
  ctx_ddl.set_attribute('demo_ud', 'procedure', 'demo_ds');
  ctx_ddl.set_attribute('demo_ud', 'output_type', 'blob_loc');
end;


note that you might need to remove the demo_ud with following statement if encounter any problem to rerun step 4.

exec ctx_ddl.drop_preference('demo_ud');








5. create the full text index

create index demo_filelist_ctx on demo_filelist(file_path) indextype is ctxsys.context
parameters ('filter ctxsys.auto_filter datastore demo_ud nopopulate')
parallel 20;


5a.-- generating pending indexing to queue
exec ctx_ddl.populate_pending('demo_filelist_ctx');

-- you can see the indexing queue is generated
SELECT pnd_index_name, pnd_rowid, to_char(
        pnd_timestamp, 'dd-mon-yyyy hh24:mi:ss'
        ) timestamp FROM ctx_user_pending;



-- process indexing objects in queue
exec ctx_ddl.sync_index(idx_name =>'demo_filelist_ctx',maxtime=>2) ;


--for large data store indexing takes long time.  
--after indexing finish, the queue should be empty.
SELECT pnd_index_name, pnd_rowid, to_char(
        pnd_timestamp, 'dd-mon-yyyy hh24:mi:ss'
        ) timestamp FROM ctx_user_pending;


--if indexing error occurs, you can check error with following statement. 
select * from ctx_user_index_errors  where lower(err_index_name) = 'demo_filelist_ctx' order by err_timestamp desc;

--to clear error log, you can
delete from ctx_user_index_errors where err_index_name = 'DMS_FILELIST_CTX';
commit;




6. Check the full text search

select a.*, fn_get_blobfile('HKNAS_DIR',replace(a.file_path,'/apexmedia/hknas','')) blobfile
from demo_filelist a where contains(file_path, 'Text You want to search',1)>0;

-- you can download the file from column blobfile...



No comments: