вторник, 25 ноября 2014 г.

Extract duplicate keywords from string

SQL example using wm_concat function:

 
select wm_concat(distinct keyword) keywords
  from (
        SELECT LEVEL                                 AS keyword_no
           ,      REGEXP_SUBSTR(keywords, '[^,]+', 1, LEVEL) AS keyword
           --, str     
           FROM  (
                  SELECT ROWNUM AS id
                  ,      'SCOTT,ALLEN,KING,SCOTT,12345,SCOTT,12345,ALLEN,SCOTT' as keywords
                  FROM   dual
                 )
           CONNECT BY
                  -- INSTR for compatibility with Oracle 10g 
                  --INSTR(keywords, ',', 1, LEVEL-1) > 0
                  -- From Oracle 11g may used regexp_count 
                  LEVEL <= regexp_count(keywords,',') + 1
                  AND id = PRIOR id
                  AND PRIOR DBMS_RANDOM.VALUE IS NOT NULL
  )          
;

KEYWORDS                  
--------------------------------------------
12345,ALLEN,KING,SCOTT                      
1 row selected.


SQL example using listagg function:

 
select listagg(keyword, ',') WITHIN GROUP (ORDER BY keyword) keywords 
  from (
        select distinct keyword
          from (
                SELECT LEVEL                                 AS keyword_no
                   ,      REGEXP_SUBSTR(keywords, '[^,]+', 1, LEVEL) AS keyword
                   --, str     
                   FROM  (
                          SELECT ROWNUM AS id
                          ,      'SCOTT,ALLEN,KING,SCOTT,12345,SCOTT,12345,ALLEN,SCOTT' as keywords
                          FROM   dual
                         )
                   CONNECT BY
                          -- INSTR for compatibility with Oracle 10g 
                          --INSTR(keywords, ',', 1, LEVEL-1) > 0
                          -- From Oracle 11g may used regexp_count 
                          LEVEL <= regexp_count(keywords,',') + 1
                          AND id = PRIOR id
                          AND PRIOR DBMS_RANDOM.VALUE IS NOT NULL
          )
       )                
;

KEYWORDS                                                                        
--------------------------------------------------------------------------------
12345,ALLEN,KING,SCOTT                                                          
1 row selected.

Using PL/SQL function example:

 
DECLARE
    l_keywords varchar2(32767) := '10,20,30,40, 10, 20,SCOTT,ALLEN,KING,SCOTT,12345,SCOTT,12345,ALLEN,SCOTT';
    
    function extract_duplicate_keywords(p_keywords varchar2) return varchar2
    is   
       l_result varchar2(32767);
       cnt pls_integer;
       
       --l_str varchar2(4000);
       TYPE   nested_nbr_type IS TABLE OF varchar2(4000);
       nnt2   nested_nbr_type := nested_nbr_type();
       --nnt3   nested_nbr_type;
       answer nested_nbr_type;
       j pls_integer;
    BEGIN
       nnt2 := nested_nbr_type();
       
       cnt := regexp_count(p_keywords, '[^,]+');
       --dbms_output.put_line('Count: ' || cnt);
       for i in 1 .. cnt + 1
       loop
         nnt2.extend;
         nnt2(nnt2.last) := trim(REGEXP_SUBSTR(p_keywords,'[^,]+', 1, i));
         --dbms_output.put_line(i || ': ' || l_str);
       end loop;
       --nnt3 := nnt2;
       answer := nnt2 MULTISET INTERSECT DISTINCT nnt2;
       
       j := answer.first;
       while j is not null
       loop
         if answer(j) is not null then
           l_result := l_result || ',' ||answer(j);
           --dbms_output.put_line(answer(j));
         end if;
         j := answer.next(j);
       end loop;
       return ltrim(l_result, ',');
    END;
BEGIN
   l_keywords := extract_duplicate_keywords(l_keywords);
   dbms_output.put_line(l_keywords);
END;