Skip to content

Latest commit

 

History

History
277 lines (259 loc) · 7.98 KB

nazaryev.com-kurs_2_stars.sql.md

File metadata and controls

277 lines (259 loc) · 7.98 KB
/*Левенштейн*/
CREATE OR REPLACE FUNCTION ld (
  as_src_i                         IN VARCHAR2
, as_trg_i                         IN VARCHAR2
)
RETURN NUMBER
DETERMINISTIC
AS
  ln_src_len                       PLS_INTEGER := NVL(LENGTH(as_src_i), 0);
  ln_trg_len                       PLS_INTEGER := NVL(LENGTH(as_trg_i), 0);
  ln_hlen                          PLS_INTEGER;
  ln_cost                          PLS_INTEGER;
  TYPE t_numtbl IS TABLE OF PLS_INTEGER INDEX BY BINARY_INTEGER;
  la_ldmatrix                         t_numtbl;

BEGIN
  IF (ln_src_len = 0)
  THEN
    RETURN ln_trg_len;
  ELSIF (ln_trg_len = 0)
  THEN
    RETURN ln_src_len;
  END IF;

  ln_hlen := ln_src_len + 1;
  FOR h IN 0 .. ln_src_len
  LOOP
    la_ldmatrix(h) := h;
  END LOOP;

  FOR v IN 0 .. ln_trg_len
  LOOP
    la_ldmatrix(v * ln_hlen) := v;
  END LOOP;

  FOR h IN 1 .. ln_src_len
  LOOP
    FOR v IN 1 .. ln_trg_len
    LOOP
      IF (SUBSTR(as_src_i, h, 1) = SUBSTR(as_trg_i, v, 1))
      THEN
        ln_cost := 0;
      ELSE
        ln_cost := 1;
      END IF;
      la_ldmatrix(v * ln_hlen + h) :=
         LEAST(
           la_ldmatrix((v - 1) * ln_hlen + h    ) + 1
         , la_ldmatrix( v      * ln_hlen + h - 1) + 1
         , la_ldmatrix((v - 1) * ln_hlen + h - 1) + ln_cost
         )
      ;
    END LOOP;
  END LOOP;
  RETURN la_ldmatrix(ln_trg_len * ln_hlen + ln_src_len);
END ld;
/

DECLARE
A NUMBER;
BEGIN
A:=ld('triim','troom');
DBMS_OUTPUT.ENABLE;
DBMS_OUTPUT.PUT_LINE(A);
END;
/

SELECT ld('Россия','Роисся') FROM DUAL;

/*Джаро-Винклер*/
CREATE OR REPLACE FUNCTION jws 
  (p_string1     IN VARCHAR2,
   p_string2     IN VARCHAR2)
  RETURN            NUMBER
  DETERMINISTIC
AS
  v_closeness       NUMBER := 0;
  v_temp            VARCHAR2 (32767);
  v_comp1           VARCHAR2 (32767);
  v_comp2           VARCHAR2 (32767);
  v_matches         NUMBER := 0; 
  v_char            VARCHAR2 (1);
  v_transpositions  NUMBER := 0;
  v_d_jaro          NUMBER := 0;
  v_leading         NUMBER := 0;
  v_d_winkler       NUMBER := 0;
  v_jws             NUMBER := 0;
BEGIN
  -- check for null strings:
  IF p_string1 IS NULL OR p_string2 IS NULL THEN 
    RETURN 0;
  END IF;
  -- closeness:
  v_closeness := (GREATEST (LENGTH (p_string1), LENGTH (p_string2)) / 2) - 1;
  -- find matching characters and transpositions within closeness:
  v_temp := p_string2;
  FOR i IN 1 .. LENGTH (p_string1) LOOP
    IF INSTR (v_temp, SUBSTR (p_string1, i, 1)) > 0 THEN
      v_char := SUBSTR (p_string1, i, 1);
      IF ABS (INSTR (p_string1, v_char) - INSTR (p_string2, v_char)) <= v_closeness THEN
        v_comp1 := v_comp1 || SUBSTR (p_string1, i, 1);
        v_temp := SUBSTR (v_temp, 1, INSTR (v_temp, SUBSTR (p_string1, i, 1)) - 1)
               || SUBSTR (v_temp, INSTR (v_temp, SUBSTR (p_string1, i, 1)) + 1);
      END IF;
    END IF;    
  END LOOP;
  v_temp := p_string1;
  FOR i IN 1 .. LENGTH (p_string2) LOOP
    IF INSTR (v_temp, SUBSTR (p_string2, i, 1)) > 0 THEN
      v_char := SUBSTR (p_string2, i, 1);
      IF ABS (INSTR (p_string2, v_char) - INSTR (p_string1, v_char)) <= v_closeness THEN
        v_comp2 := v_comp2 || SUBSTR (p_string2, i, 1);
        v_temp := SUBSTR (v_temp, 1, INSTR (v_temp, SUBSTR (p_string2, i, 1)) - 1)
               || SUBSTR (v_temp, INSTR (v_temp, SUBSTR (p_string2, i, 1)) + 1);
      END IF;
    END IF;    
  END LOOP;
  -- check for null strings:
  IF v_comp1 IS NULL OR v_comp2 IS NULL THEN 
    RETURN 0;
  END IF;
  -- count matches and transpositions within closeness:
  FOR i IN 1 .. LEAST (LENGTH (v_comp1), LENGTH (v_comp2)) LOOP
    IF SUBSTR (v_comp1, i, 1) = SUBSTR (v_comp2, i, 1) THEN
      v_matches := v_matches + 1;
    ELSE
      v_char := SUBSTR (v_comp1, i, 1);
      IF ABS (INSTR (p_string1, v_char) - INSTR (p_string2, v_char)) <= v_closeness THEN
        v_transpositions := v_transpositions + 1;
        v_matches := v_matches + 1;
      END IF; 
    END IF;
  END LOOP;
  v_transpositions := v_transpositions / 2;
  -- check for no matches:
  IF v_matches = 0
    THEN RETURN 0;
  END IF;
  -- Jaro:
  v_d_jaro := ((v_matches / LENGTH (p_string1)) + 
               (v_matches / LENGTH (p_string2)) +
               ((v_matches - v_transpositions) / v_matches)) 
               / 3;
  -- count matching leading characters (up to 4):
  FOR i IN 1 .. LEAST (LENGTH (p_string1), LENGTH (p_string2), 4) LOOP
    IF SUBSTR (p_string1, i, 1) = SUBSTR (p_string2, i, 1) THEN
      v_leading := v_leading + 1;
    ELSE
      EXIT;
    END IF;
  END LOOP;
  -- Winkler:
  v_d_winkler := v_d_jaro + ((v_leading * .1) * (1 - v_d_jaro));
  -- Jaro-Winkler similarity rounded:
  v_jws := ROUND (v_d_winkler * 100);
  RETURN v_jws;
END jws;
/

WITH strings AS
(SELECT NULL       string1, NULL     string2 FROM DUAL UNION ALL
SELECT 'test'       string1, NULL     string2 FROM DUAL UNION ALL
SELECT NULL       string1, 'test'     string2 FROM DUAL UNION ALL
SELECT 'CRATE'      string1, 'TRACE'      string2 FROM DUAL UNION ALL
SELECT 'MARTHA'     string1, 'MARHTA'     string2 FROM DUAL UNION ALL
SELECT 'DWAYNE'     string1, 'DUANE'      string2 FROM DUAL UNION ALL
SELECT 'DIXON'      string1, 'DICKSONX'   string2 FROM DUAL UNION ALL
SELECT 'Dunningham' string1, 'Cunningham' string2 FROM DUAL UNION ALL
SELECT 'Abroms'     string1, 'Abrams'     string2 FROM DUAL UNION ALL
SELECT 'Lampley'    string1, 'Campley'    string2 FROM DUAL UNION ALL
SELECT 'Jonathon'   string1, 'Jonathan'   string2 FROM DUAL UNION ALL
SELECT 'Jeraldine'  string1, 'Gerladine'  string2 FROM DUAL UNION ALL
SELECT 'test'       string1, 'blank'      string2 FROM DUAL UNION ALL
SELECT 'everybody'  string1, 'every'      string2 FROM DUAL UNION ALL
SELECT 'a'        string1, 'aaa'        string2 FROM DUAL)
SELECT string1, string2,
    UTL_MATCH.JARO_WINKLER_SIMILARITY (string1, string2) oracle_jws,
      jws (string1, string2) my_jws
  FROM   strings
  ORDER  BY my_jws DESC
/


SELECT jws ('Барак Абама','Парап Апама') FROM DUAL;

-- Hamming Distance
CREATE OR REPLACE FUNCTION hd (
  as_src_i                         IN VARCHAR2
, as_trg_i                         IN VARCHAR2
)
RETURN NUMBER
DETERMINISTIC
AS
  ln_src_len                       PLS_INTEGER := NVL(LENGTH(as_src_i), 0);
  ln_trg_len                       PLS_INTEGER := NVL(LENGTH(as_trg_i), 0);
  ln_distance                      PLS_INTEGER := 0;
BEGIN
  IF (ln_src_len <> ln_trg_len)
  THEN
    RETURN NULL;
  END IF;

  IF (ln_src_len = 0)
  THEN
    RETURN ln_src_len;
  END IF;

  FOR i IN 1..ln_src_len
  LOOP
    IF (SUBSTR(as_src_i, i, 1) <> SUBSTR(as_trg_i, i, 1))
    THEN
      ln_distance := ln_distance + 1;
    END IF;
  END LOOP;
  RETURN ln_distance;
END hd;
/

DECLARE
A NUMBER;
BEGIN
A:=hd('triim','troom');
DBMS_OUTPUT.ENABLE;
DBMS_OUTPUT.PUT_LINE(A);
END;
/

SELECT hd('Россия','Роисся') FROM DUAL;

-- ЗАПРОС **
SELECT * FROM(
    WITH COUNTRY_NAMES AS
    (SELECT NAME FROM CITIZENSHIP)
    SELECT COUNTRY_NAMES.NAME, jws(COUNTRY_NAMES.NAME,'Rassiya') mjws
    FROM COUNTRY_NAMES
    ORDER BY mjws DESC
)
WHERE ROWNUM = 1;

SELECT SUR, EN_PR, CIT FROM
  (
  SELECT SUR, EN_PR, C.NAME CIT
  FROM CITIZENSHIP C
  JOIN
    (
    SELECT H.SURNAME SUR, EN_PR, H.CITIZENSHIP_ID CIT_ID
    FROM HUMANS H
    JOIN
    (
      SELECT HUMAN_ID H_ID, EN_PR FROM
      ENTRANTS E JOIN
        (
        SELECT P.ENTRANT_ID ENT_ID, LISTAGG(P.PROGRAM_ID,', ')
        WITHIN GROUP (ORDER BY P.ENTRANT_ID) AS EN_PR
        FROM PRIORITIES P 
        GROUP BY P.ENTRANT_ID
        )
        ON E.ID = ENT_ID
    )
    ON H.ID = H_ID
    )
  ON C.ID = CIT_ID
  )
WHERE CIT IN 
(
  SELECT NAME FROM(
    WITH COUNTRY_NAMES AS
    (SELECT NAME FROM CITIZENSHIP)
    SELECT COUNTRY_NAMES.NAME, jws(COUNTRY_NAMES.NAME,'Rassiya') mjws
    FROM COUNTRY_NAMES
    ORDER BY mjws DESC
)
WHERE ROWNUM = 1
);