вторник, 25 ноября 2014 г.

Extract duplicate keywords from string

SQL example using wm_concat function:

 
  1. select wm_concat(distinct keyword) keywords
  2. from (
  3. SELECT LEVEL AS keyword_no
  4. , REGEXP_SUBSTR(keywords, '[^,]+', 1, LEVEL) AS keyword
  5. --, str
  6. FROM (
  7. SELECT ROWNUM AS id
  8. , 'SCOTT,ALLEN,KING,SCOTT,12345,SCOTT,12345,ALLEN,SCOTT' as keywords
  9. FROM dual
  10. )
  11. CONNECT BY
  12. -- INSTR for compatibility with Oracle 10g
  13. --INSTR(keywords, ',', 1, LEVEL-1) > 0
  14. -- From Oracle 11g may used regexp_count
  15. LEVEL <= regexp_count(keywords,',') + 1
  16. AND id = PRIOR id
  17. AND PRIOR DBMS_RANDOM.VALUE IS NOT NULL
  18. )
  19. ;
  20.  
  21. KEYWORDS
  22. --------------------------------------------
  23. 12345,ALLEN,KING,SCOTT
  24. 1 row selected.
  25.  

SQL example using listagg function:

 
  1. select listagg(keyword, ',') WITHIN GROUP (ORDER BY keyword) keywords
  2. from (
  3. select distinct keyword
  4. from (
  5. SELECT LEVEL AS keyword_no
  6. , REGEXP_SUBSTR(keywords, '[^,]+', 1, LEVEL) AS keyword
  7. --, str
  8. FROM (
  9. SELECT ROWNUM AS id
  10. , 'SCOTT,ALLEN,KING,SCOTT,12345,SCOTT,12345,ALLEN,SCOTT' as keywords
  11. FROM dual
  12. )
  13. CONNECT BY
  14. -- INSTR for compatibility with Oracle 10g
  15. --INSTR(keywords, ',', 1, LEVEL-1) > 0
  16. -- From Oracle 11g may used regexp_count
  17. LEVEL <= regexp_count(keywords,',') + 1
  18. AND id = PRIOR id
  19. AND PRIOR DBMS_RANDOM.VALUE IS NOT NULL
  20. )
  21. )
  22. ;
  23.  
  24. KEYWORDS
  25. --------------------------------------------------------------------------------
  26. 12345,ALLEN,KING,SCOTT
  27. 1 row selected.

Using PL/SQL function example:

 
  1. DECLARE
  2. l_keywords varchar2(32767) := '10,20,30,40, 10, 20,SCOTT,ALLEN,KING,SCOTT,12345,SCOTT,12345,ALLEN,SCOTT';
  3. function extract_duplicate_keywords(p_keywords varchar2) return varchar2
  4. is
  5. l_result varchar2(32767);
  6. cnt pls_integer;
  7. --l_str varchar2(4000);
  8. TYPE nested_nbr_type IS TABLE OF varchar2(4000);
  9. nnt2 nested_nbr_type := nested_nbr_type();
  10. --nnt3 nested_nbr_type;
  11. answer nested_nbr_type;
  12. j pls_integer;
  13. BEGIN
  14. nnt2 := nested_nbr_type();
  15. cnt := regexp_count(p_keywords, '[^,]+');
  16. --dbms_output.put_line('Count: ' || cnt);
  17. for i in 1 .. cnt + 1
  18. loop
  19. nnt2.extend;
  20. nnt2(nnt2.last) := trim(REGEXP_SUBSTR(p_keywords,'[^,]+', 1, i));
  21. --dbms_output.put_line(i || ': ' || l_str);
  22. end loop;
  23. --nnt3 := nnt2;
  24. answer := nnt2 MULTISET INTERSECT DISTINCT nnt2;
  25. j := answer.first;
  26. while j is not null
  27. loop
  28. if answer(j) is not null then
  29. l_result := l_result || ',' ||answer(j);
  30. --dbms_output.put_line(answer(j));
  31. end if;
  32. j := answer.next(j);
  33. end loop;
  34. return ltrim(l_result, ',');
  35. END;
  36. BEGIN
  37. l_keywords := extract_duplicate_keywords(l_keywords);
  38. dbms_output.put_line(l_keywords);
  39. END;