diff --git a/Pilot1/ST1/VocabFiles_spe/SPE_ChEMBL.txt b/Pilot1/ST1/VocabFiles_spe/SPE_ChEMBL.txt new file mode 100644 index 00000000..65da6743 --- /dev/null +++ b/Pilot1/ST1/VocabFiles_spe/SPE_ChEMBL.txt @@ -0,0 +1,3002 @@ +c c +C C +O ) +c 1 +c ( +C ( += O) +c 2 +C ) +cc cc +c 3 +C( =O) +cc c( +) cc +( C) +( =O) +2 ) +cc ( +C 1 +F ) +N ( +CC CC +c1 ccc( +3 ) +C O +c n +c1 cccc +c2 cccc +c 4 +C N +C(=O) N +O C) +)cc 1 +c2 ) +C 2 +n 1 +n c( +C( C) +c2 ccc( +N ) +[C@H] ( +[C@@H] ( +c2 c( += C( +CC 1 +c1 ) +Cl ) +c3 cccc +N C(=O) +cc 1 +O CC +c1 c( +c1 cc +c1 cc( += C +S (=O) +CC N( +( F) +c3 ) +N 1 +n 2 += O +c3 ccc( +S(=O) (=O) +c3 c( +CC (=O) +CC ( +/ C +c1cccc c1 +C( F) +F )cc +c2 cc +c2 n +CC C +N 2 +4 ) +c2cccc c2) +O C +Cl )cc +O =C( +C(F) (F) +c1 n +c( =O) +c2cccc c2 +(C) C) +c2 cc( +C( N +c1 ( +[C@H] 1 +=C / += N +n ( +[C@@H] 1 +C 3 +CC ) +CC O +CC N +CC 2) +1 ) +C # +c1 2 +CC 2 +C N( +c1cccc ( +c( - +CC (C) +c3cccc c3 +c( C) +cc 2) +N C( +cc 2 +( O) +c1cccc c1) +[C@H] 2 +C c1ccc( +c( OC) +C(=O) O) +c3cccc c3) +[C@@H] 2 +n 2) +nc( - +C(C) C) +c4 cccc +)cc1 ) +O = +/C =C/ +c3 cc +C 2) +CC C( +c 5 +3 )cc +CO c1ccc( +CN 1 +[O-] ) +C O) +C(F)(F) F) +cc1 ) +[C@H]( O) +n c2 +c( N +[C@@H]( O) +C( O) +c3 cc( +CC(=O) N +c s +CC1 ) +/ C( +CC 3) +[N+] (=O) +C S +c4cccc c4 +c1cc 2 +c3 n +c( O) +2 )cc1 +CC CN +nc( N +CC OCC +cc 3) +(C) C +(=O) =O) +n 3 +C1 ) +n c1 +S ) +C# N) +cc c1 +/ N +)cc ( +C(=O) O +[C@H] 3 +CCCC CCCC +( N +c1 cn +C 3) +Br ) +)cc c1 +c2 c1 +O=C( N +cccc 1 +[C@@H] 3 +c2cccc ( +C(C) (C)C) +c1cc c2c( +c4 ccc( +CN 2 +C = +- c2ccc( +- c2 +C(C) =O) +c( Cl) +CO C(=O) +)cc 2) +C [C@H]( +n (C) +C( N) +=C \ +n1 ) +C(C) (C) +cc 3 +c o +S ( +OCC ) +CC N1 +c [nH] +C( = +n cn +C n1 +C(F)(F) F)cc +O C(=O) +[C@H] (C) +( [O-]) +C c2ccc( +[C@@H] (C) +c4 c( +OC) c(OC) +C(=O)N 1 +)cc 2 +CN C(=O) +nc( C) +2) c1 +=O) cc1 +S(=O)(=O) N +CC 3 +C [C@@H]( +C(=O) N( +Cl )cc1 +n n +C c1 +O= C1 +C(=O)N 2 +S(=O) ( +C c2ccccc2) +O CO +C 4 +CN (C) +C1 =O +c4 ) +=N / +C c1cc +c2 cn +5 ) +cccc 2 +cc( - +)cc 3) +[N+](=O) [O-] +C1 ( +c1ccc2c( c1) +F )cc1 +C c1cc( +c1cccc 2 +c1cc2 c( +n 3) +CO c1cc( +CC C) +c2cc 3 +cc (C) +C(O) =O) +C c1ccccc1) +[C@] 1 +n c3 +- c2n +c2ccc( Cl)cc +[N+](=O) [O-]) +O C( +CCN( C(=O) +CC(C) (C) +[C@@] 2 +CC C1 +N 3 +=O) c1 +cccc 2) +[N+] ([O-]) += C2 +C# N +c( F) +[C@] 2 +n( - +s 1 +O 1 +CC C2 +o 1 +C c1ccccc1 +cc c2 +(C) CC +c( N) +[C@@] 1 +CC( O) +CCO C(=O) +N C(=O)N +C c1n +c4ccccc4 ) +cccc 3) +c1 c2c( +cn 1 +F)cc 2) +C c1c( +CC (C)C) +nc( N) +C [C@H]1 +O=C( O) +N =C( +c3cccc ( +C1 CCCC += C1 +C n2 += S) +c2cc c3c( +CC( N +- n2 +CCN( C +c3ccc( Cl)cc +c2 )cc1 +Cl) c( +c(=O) [nH] +c3 cn +C(N) =O) +c3ccc( F)cc +P (=O) +( C +N (C) +n2 )cc1 +c1c( C) +CCC N( +c2 [nH] +CCCC C2) +c2c( C) +CC [C@H]( +3)cc 2) +c2 s +O [C@H]( +C [C@@H]1 +OC) c1 +C) cc1 +C2 =O) +c4 cc +CC CO +c2) c1 +N S(=O)(=O) +Cl)cc ( +c12 c( +=N \ +c2 3) +c1 nc( +/N =C/ +C) =O) +N # +cccc c1 +3 )cc1 +N2 CCN( +N1 CCN( +CC(=O) O) +=C( \ +OCC (=O) +c1 =O +(C) ( +CC 4) +CC2 )cc1 +CCN (C) +c1 - +n cc +c2ccc( - +[C@] 3 +S (C) +O ( +C c1cccc( +CC NC(=O) +N( C(=O) +c2 1 +c2ccc(Cl)cc 2) +c1 s +cc c3 +CN 3 +CCCC ) +cc c2) +F )cc1) +cn 2 +CC 4 +C(=O)N [C@@H]( +c4 cc( +C 4) +Cl )cc1) +[N+]([O-]) =O) +CC O) +[C@H] 4 +c( S +CC S +CC CN1 +Cl) c1 +c2 o +[C@@H] 4 +C(=O) OC) +c2 nc( +c1ccc( Cl)cc1 +CCCC 1 +n n1 +c2ccc3c( c2) +C [C@H]2 +n o +c2ccccc2 1 +[C@@] 3 +C2 ( +c5 cccc +CC n1 +C) cc2) +CC N2 +c1ccc( N +3)cc 2 +N) N +cccc 3 +cccc 1) +CCCC 2) +c2cc( - +c( C(=O)N +OC) =O) +c2cc3 c( +c3ccc(F)cc 3) +C [C@@H]2 +C2 =O +/C =C\ +n( C +S(C) (=O)=O) +cn 2) +cn c1 +Cl) c(Cl) +c2ccc( OC) +N1 ( +c2cc cn +c2 c3c( +cc c1) +O) cc1 +( - +c1ccc( - +c2ccccc2 )cc1 +cn c3 +c3ccc(Cl)cc 3) +c(C) c1 +n [nH] +)cc 3 +OC) cc( +CCOCC 2) +N c1n +c2 =O) += C(C) +CO c1cccc( +O C(C)=O) +CC C(N +c2ccc( F)cc2) +c2cc n +F) c( +c2 nc(- +P(=O) (O) +N( C( +CO c1ccccc1 +N C +CCCC C3) +CCN( CC) +n cc1 +OCC O +c2) =O) +s 2) +O [C@@H]( +n 4 +C(= S) +/C =N/ +CC C(=O)N +O) c( +CCOCC 3) +N (C)C) +C1 2 +C2 )cc1 +c2 n1 +CCCC 3) +CC = +nc( S +CN( C(=O) +N( C +n2) c1 +c1ccc( OC) +C =C +=O) =O) +[nH] 1 +/C( =C/ +c3ccc( OC) +CC [C@@H]( +/C( =C\ +cc 4 +CN C( +O c1ccc( +c1 [nH] +C1 =O) +c3ccc( C) +c2cc (C) +CCC 3 +c5cccc c5 +c2 cc1 +CC2) c1 +O) c(O) +CCCC 2 +c1cc (C) +c1ccc( O +C2 =N +cc2 )cc1 +F)cc 2 +c2n ( +C1 CCN( +c3cc 4 +c2cccc c12 +cc 4) +N) =O) +[C@]1 2 +NC(=O) [C@H]( +c1 (C) +C1 CN( +C c3ccccc3) +c1 nc(- +OC)c(OC) c(OC) +[C@] 4 +Cl)cc 2) +N2 C(=O) +C3 =O) +C) c1 +c2cccc 3 +c2c( - +N c1ccc( +CCCC C1 +C( O +S(=O) (N +CC(=O) O +cc c3) +CC C(C) +C S(=O)(=O) +c1c( O) +c3 [nH] +OCC) =O) +OC) c( +C(=O) OC +[C@@H]1 O +c2n cn +c 6 +s 1) +C1 =C( +CCCN 2 +C [C@@H](O) +CCC 2) +(=O) =O +o c( +( O +N( CC) +c(=O) n( +n n2 +/C =C +c2c( =O) +C [C@H](O) +-c2 cn +c2c( cccc2) +c3c( C) +c1 O +- c2ccccc2) +- c1ccc( +NC( =S) +C(C) C +CCO CC1 +=C( / +C(=O)N C +c3c( cccc3) +( C(F)(F)F) +[n+] 1 +CC1 (C) +cc( O) +- c2cc( +N 4 +O 2) +C( OC)=O) +CCCC (=O) +- 2 +cn 3) +c3ccc( - += N) +CC # +c2) CC1 +[C@@H](O) [C@H](O) +C(=O)N [C@H]( +c( NC(=O) +c1cc2c( cc1 +c2 nc(N +C c1cn +C2 CC2) +O 2 +C(=O)N 3 +N1 C(=O) +[C@@H]( CO) +c1ccc( Cl)cc1) +c3 s +/C( =N\ +C =C( +S(=O)(=O) N2 +CCO c1ccc( +/C =C2 +(F) (F) +[C@H]1 O +CC [C@H]2 +cs 1 +c2ccc(OC) cc2) +c3 o +C1 C( +n c2) +c2cc co +cs 2) +c2c( O) +F)cc ( +[C@]2 (C) +[C@@] 4 +[C@@]2 (C) +O[C@H]( CO) +c1c( Cl) +c2cc( Cl)cc +Cl)cc 2 +CCCC ( +ccc( - +C1 N( +c1 C +c2cccc( Cl) +Cc1cc (C) +C1 = +cn 1) +CC(=O) N1 +n1 c( +c1ccc( F)cc1 +cccc 4) +c1 2) +S( N) +CN1 CCN( +CCC( O) +C3 CC3) +c3cc c4c( +c1c( OC) +CC C(=O)O) +C(=O) OCC) +Br )cc +n cc( +c1 o +n n2) +O CCCC +- c1ccccc1 +C2 =C( +C2 CCCCC2) +C1 (=O) +c2ccc( C)cc2) +c1cc2c( cc1) +/C =C(\ +nc2 c1 +c3cc n +c2ccc( O) +C1 =N +S 1 +C) cc1) +( CC) +C(=O) C( +CCN 3 +c2ccc(Cl)cc 2 +cn c2 +C c2ccccc2 +C [C@H]3 +n cc2 +CS c1n +CO C( +c3ccc(OC) cc3) +c3 c2 +c2c( Cl) +CCC 3) +[C@@] (C)( +Br )cc1 +C(F)(F) F +CC c1ccc( +[C@@H]( N) +C(C) (C)C +c(N 3 +c(=O) c( +C2 CCN( +c3cccc c23) +C(=O) OCC +S(N) (=O)=O) +s c1 +o 2) +C( OCC)=O) +/ C(C) +c1 nc(N +N C(N +CN1 C(=O) +S (N +CN ) +Cl)cc c1 +C( c2ccccc2) +CO c1cc +c2ccn cc2) +OCO 2 +OCO 3) +c(OC) c1 +c( O +c3 4) +c4 n +OC)c1 OC +4 )cc +I ) +c1) =O +n 4) +C [C@@H]3 +c4 cn +c5 c( +N) N) +CCCC C1) +OCC(=O) N +- c2ccccc2 +[C@@H]( NC(=O) +N2 CCOCC2) +N1 CCC( +C( CC) +CC2) =O) +n2 c( +CCO CC1) +C# C +[nH] c(=O) +C= C1 +C(=O)N (C) +CN( C +O) cc1) +[C@] (C)( +O) c1 +cc2 c( +F) c(F) +[C@@]1 2 +cccc ( +c2ccc( C) +c2 c[nH] +s 3) +C(=O)N ) +N( CC +n c12 +N CC +c2n cc +CCN (C)C) +n(C) c(=O) +c3ccccc3 2) +c1 =O) +c3n cn +O C1 +c3ccc(C) cc3) +P (O) +ccc( OC) +/C( =N/ +c3cc (C) +c( C#N) +c3ccc( O) +o c(- +C(F)(F) F)cc1 +C2 =O)cc1 +c1ccc( F)cc1) +( CC +C(=O)N CC +c2ccco 2) +O c2ccc( +C(= N)N +=O) cc2) +c3cc4 c( +C(C) ( +c2 C) +CCCCCCCC CCCC +Br )cc2) +=N /N +C( c1ccccc1) +C c2c( +O [C@H]1 +[C@]3 (C) +4 )cc3) +C1 CC +C =O) +[N+] (C) +CC [C@@H]1 +c3 2) +c1( N +C( N1 +c4ccc( F)cc +c2) n1 +C(=O) ( +C2 = +O [C@@H]1 +c1ccc( NC(=O) +C c2cc( +c( Br) +n c3) +c3ccc4c( c3) +c2n c3c( +5 )cc +CC [C@H]1 +- c1n +c2n cc( +c(Cl) c1 +c2cccc n2) +c1c( F) +C1CCCC C1 +=O) ccc1 +N C(C)=O) +c5 ccc( +n1 cn +[n+] 2 +n c1) +O 3) +c2cc cs2) +c1cc( - +[n+] ( +c3cc cn +C(F)(F)F)cc 2) +CC1 ( +[C@]2 ( +c3cc( Cl)cc +O= [N+]([O-]) +CC(=O) N( +c1 c[nH] +cc c4 +c2ccccc2) c1 +cs 1) +c2ccc( O +CN (C)C) +[nH] 2) +cc( OC) +c3cc co +c3 n( +n c4 +C1 CC1 +CC(O) =O) +O 1) +c1cc( OC) +3 C) +nc(N 3 +C( N2 +c1cc co +2 )ccc1 +/N =C(\ +CCC2 ( +n c2c( +CC( CC( +CC [C@@H]2 +c3ccccc3 )cc2) +OC 3 +[C@] ( +CCN( C( +CCCC 1) +C 5 +CC [C@H]3 +c4ccc( Cl)cc +c( C(N +c2ccccc2) CC1 +OCC ( +C(=O) /C=C/ +[C@@] ( +4 CCCC +cs 3) +n1 cc( +N=C( N) +/C=C2 \ +/ C1 +c2c( F) +=C1 \ +c3c( =O) +c3 c4c( +)cc c2 +c1c( N +Cl) c3) +nc( O) +c2n c3ccccc3 +C( C(=O)N +C n3 +C(F)(F)F) c1 +c3ccn cc3) +C(= C) +c1cc cnc1 +c1cccc2 c1 +[N+](=O)[O-] )cc1 +c1c( - +CC(=O) N2 +O) =O) +c2ccc( N +O (C) +CC) =O) +ccc( C) +c3cccc 4 +[C@]1 (C) +CC= C) +CCCC C3 +n2 cn +S(=O) (C) +C(C) =O +O= S(=O)( +c(=O) o +c2 nc(C) +N [C@@H]( +CC c1n +CC 3)cc +c2) C1 +O C2 += C) +c2cccc (F) +CCC 4 +n n( +[nH] c( +c2c( c1) +n1 2 +[C@H]( NC(=O) +c1c( =O) +c1cc( N +c3ccco 3) +C( C( +P(=O)(O) O) +CC(C) ( +c2cc3c( cc2) +c3cccc n3) +[C@@]1 (C) +C[C@H]( NC(=O) +ccc( F) +CCCC C2 +CCOCC 4) +CCN1 C(=O) +CC C1) +S C) +c(C) c( +C1CCCC C1) +c5 ) +CO c1c( +c2c( OC) +c1cccc (F) +C1 CC( +OC(=O) C) +[C@@]2 ( +c4cccc ( +C3 CCCCC3) +n nc1 +c1ccc(OC) cc1) +(O) ( +cccc 4 +c2c( cc( +s c( +n o1 +CS c2n +[C@H]( CO) +CO c1cc2 +c3n cc +c2cccc c12) +C [C@@H](C) +c(F) c1 +C /C=C\ +CC S(=O)(=O) +CC OC +CN (C)C +c1cn ( +CO C) += CC(=O) +[C@@H]1 2 +CC2 ( +(C) c1 +CC [C@]4 +nc( NC(=O) +n1 ( +C(=O)N c2ccc( +c3 =O) +CCCN C(=O) +c1 (=O) +Cc1 c[nH] +c1n ( +c1n cc( +c2cc( OC) +cn c1) +CCCC N +CCN (C)CC +c(- c3ccc( +[C@]1 ( +ccc( N +o 1) +N1 CC +c(- c2ccc( +CCCC CC +[C@@H] 5 +6 ) +F)cc (F) +c1( OC) +c1cc( O) +C( c1ccc( +NC(=S) N +c3) =O) +[C@@]3 (C) +c( OCC +C(=O)N c1ccc( +OCO 4) +O C(C)(C)C) +F)cc c1 +c2cccn c2) +=C 3 +O [C@@H]2 +c2 )ccc1 +C c3ccccc3 +c3ccc(Cl)cc 3 +c1 cs +c -2 +C(=O)O) cc1 +c2ccccc2 c1 +c2 3)cc1 +C2) c1 +OC)cc( OC) +-c2 nc( +n2) CC1 +c2ccccc2 n1 +[C@@]1 ( +n cc1) +c3cc( - +c1ccc(OC) cc1 +C3 ( +4) CC3) +CCCN 3 +c1) =O) +Cn1 cc( +n n1) +c1n cc +F C(F)(F) +C#N )cc +N c1ccccc1 +C1 (C) +P(=O) ( +c3c( Cl) +[C@H]1 2 +C(N) =N +c(- c3ccccc3) +[nH] 3) +c2cccc( - +c1( - +c2ccccc2) =O) +C( NC(=O) +/C=C(\ C) +c(=O) n1 +c3cccn c3) +c2cccc( C(F)(F)F) +C(N C( +O C(C)(C) +C(=O)N C( +C(F)(F)F)cc 3) +CC C(=O) +CO c2ccc( +n(- c2ccccc2) +OC c1ccccc1) +C( N( +CC [C@@H]3 +3) c1 +)cc( OC)c1OC +C c2n +[N+] ( +P ( +c3 nc( +CCC n1 +C(F) F) +CN2 C(=O) +S(=O)(=O)N 3 +c(O) c1 +s c2 +[C@H]( N) +3 c( +c1( Cl) +[C@H](O) [C@H](O) +=O) ccc( +n cc3 +cc cn +CCCC 3 +/C=C/ C(=O) +c3cc cs3) +c2ccccc2 Cl) +c1n c2c( +CN2 CCN( +c(- c2ccccc2) +CC (F)(F) +C(=S) N +c3c( F) +N) ncn +Cl c1ccc( +)cc 2)cc1 +o c(=O) +c5ccccc5 ) +O [C@H]2 +c1cccc( Cl)c1 +[N+](=O)[O-]) c1 +N2 CCC( +C1 C2 +CCC [C@H]1 +S 2 +[C@H]1 CC +CN( C( +c3ccc( Br +C(= N)N) +cn ( +Br )cc1) +Cl) c1) +N1 CCOCC1 +Cn1 cn +C(=O) C) +C(N [C@H]( +c1cc( Cl)ccc1 +(O) =O) +c1ccc( OCC +nc2 1 +CO c1ccc2c(c1) +c2cc c3ccccc3 +CN(C) C(=O) +C4 CC4) +CC1) =O +c3ccc( O +c3 C) +c1 (F) +O= S( +C( CCCC +CC2 )ccc1 +[C@@H]( NC( +c3ccccc3) CC2) +c2ccc( N3 +O=C(N c1ccc( +cc3 c( +C) =O +CC1 =C( +c2 c1) +n3 c( +(=O) o +=O) C +c2ccc(O) cc2) +cc( N +O=C( /C=C/ +C(=O)N O) +c2cc( O) +c( NC( +CC c1ccccc1) +C 5) +C[C@H]( N) +[S+] ([O-]) +c2 =O)cc1 +=O) CC1 +OC) cc2) +C= C(C) +c2 cs +/C =C(/ +c3 nc(- +c2cc( F)cc +c3 )cc2) +C2 CCCC2) +o 3) +C(O) ( +Cc1ccc( NC(=O) +C2 3 +S(=O)(=O) c1ccc( +C1 =C(C) +C(N [C@@H]( +C(F) ( +o 2)cc1 +c2ccc3c(c2) OCO3) +n2 C) +c2cccc(Cl) c2) +n1 C +c2n cccc2 +[C@@H]1 CC +[nH] 1) +n2 )ccc1 +c3cc cnc3 += S +Br )cc( +c2ccccc2 F) +c2ccc( F)cc2 +c2ccc( Br)cc2) +- c1c( +Cc2ccc( - +c2ccc( OCC +/C =N/N +C(C)C) =O) +C(F)(F) F)cc1) +P(=O)(O) O +c1cc ncc1 +c(O) c( +# N) +CCN C( +3)cc ( +n n3 +c - +CC2) C1 +c3cc( OC) +Cn1 c(=O) +CC2) CC1 +C [C@H](C) +C1 CC1) +OC [C@H]1 += [N+] +(C) (O) +[C@H] 5 +1 C +C#N )cc1 +c1( O) +c1c2c( ccc1) +[n+] ([O-]) +[N+](=O)[O-] )cc2) +nn 3) +/ c( +CN 4 +c4ccc(F)cc 4) +F c1ccc( +s 2)cc1 +Cc1 nc( +c( N2 +C [C@]12 +OCC 2 +c1cc c2[nH] +O) cc2) +[C@H]2 O) +c2ccc( NC(=O) +c1) N +c4 [nH] +=O) cc( +c1 nc(N) +CC OC) +C= C2 +3)cc 2)cc1 +N(C) C(=O) +c3c( O) +c1cccc n1 +(=O) N +nc2 n1 +c1cc( -c2ccc( +c1cc2 cccc +C( C +Br )cc2 +c1c( -c2ccc( +C( \ +O=C1 N +c2cc3c( cc2 +C( Cl) +c1 nc(C) +C2 CCCC +CC 5) +c3ccc(O) cc3) +c3c( - +c1 N +c1ccc( -c2ccc( +c3c( cc( +CC c1ccccc1 +/C=N/ NC(=O) +n( -c2ccc( +c3ccc(F)cc 3 +ccc( Cl) +CO c1cc2c(cc1 +C(F)(F)F)cc (C(F)(F)F) +C1 O +c2cccc (C) +c1cc( NC(=O) +C c3ccc( +C( C#N) +C(=O)O C(C)(C)C) +C(=O) c1ccccc1 +CC(C)(C) O +c3cccc (F) +CCCCCCCC CCCCCCCC +c2 F) +CC= C(C)C) +O c1c( +C( OC) +CC c2ccccc2) +c2 )cc( +=O) cc2 +c4ccccc4 3) +Br) c1 +(F) F +C(N) =O +n1 c2c( +=C( O) +n nc2 +c4cccc c34) +/C=C /C +Cc2ccc( F)cc2) +c1cccc n1) +O= c1[nH] +n(- c3ccccc3) +C( C(N +[C@H]3 CC +O[C@H](CO) [C@@H](O) +CCCC C) +- c2cc +c1ccc( C)cc1) +ncn 2 +c(=O) n2 +c2cccc( OC) +nc1 - +c2cc3 cccc +C4 CCCC +=O) cc1) +c( C(F)(F)F) +O C(C)C) +c3 c2) +o c1 +c2ccccc2 1) +c3 )cc +c2 c3ccccc3 +C(F)(F)F)cc 2 +c2n (C) +2) CC1 +s c2c1 +CCC3 ( +c1n cn2 +)cc1) =O +c4c( cccc4) +/C(C) =C/ +COc1ccc( NC(=O) +[C@H]1 ( +[C@@] 5 +Cl)cc( Cl) += N1 +c3 )cc2 +O= S(=O)(N +C1CCCC 1 +c1ccccc1 - +n2 cc( +c1( -c2ccc( +S CC(=O)N +[N+](=O)[O-] )cc +N# Cc1ccc( +c12c( cccc1) +OC c2ccccc2) +C(N 3 +C1 C +c2 Cl) +c1ccc( -c2n +CCCN( C(=O) +c1ccc( -n2 +CN2 CCC( +c1c( NC(=O) +CN C +N (C)C +c3n c4ccccc4 +c(=O)[nH] c1=O +[C@H](O) [C@@H](O) +=O) C1 +c3 c[nH] +C(=O) C1 +c3cccc( Cl)c3) +-c2 o +N [C@H]( +OC c1ccccc1 +O CC1 +n1 c(- +c3cc( F)cc +-c2 cs +c1ccc( O)cc1 +O[C@H](CO) [C@@H](O)[C@H](O) +C#N )cc2) +)cc (C) +c4ccc(Cl)cc 4) +[C@@]2 3 +ccc( O) +C c1ccc2c(c1) +Cc1cc co +CC( NC(=O) +Cc2ccc( Cl)cc2) +c2 O) +c3n cc( +C(=O) C2 +c1cc cs1 +CCCN (C)C) +n c3c( +[C@]2 3 +C1 CO +nc1 N +C(=O) C +CCN(C) CC3) +c2n cnc3 +CN1 CCC( +c2cc( OC)c(OC)c(OC) +c( C(=O)O) +c2 nc(N) +[n+] (C) +C3 =N +c2cc( NC(=O) +cc2 1 +c(C) cc1 +CCN1 CCN( +C2) C1 +c2cc c1 +S(=O)(C) =O) +C= C) +C(=O)N1 CCN( +n2 cc +[C@@H]1 ( +c3ccccc3 )cc2 +CN S(=O)(=O) +c4ccc( OC) +C( CO) +(C) (C) +C( /C=C/ +cc2) c1 +c2 N +CCCN1 C(=O) +c1) C +=C1 / +OCC (O) +)cc1 2 +c1ccc( N2 +C) c( +c2c( N +cc2 c1 +N c1nc( +[C@]3 ( +[C@@H]3 CC +n3 cn +N=C( N)N +o n1 +C(=O)N (C)C) +cc n1 +[C@@H]( OC(C)=O) +S(=O)(=O) O) +c3 nc(N +cn 3 +c2cc( OC)c(OC) +N( CC(=O)N +C3 CCCC3) +[nH] c(- +cc c4) +CC(C) = +O c1ccccc1 +Cc1ccccc1) NC(=O) +c2c( c1 +[C@@H]2 CC +c3c( OC) +C2 =O)c1 +C1 CCC( +C( S +c4ccc( C) +OC) cc1 +C c2cn +n n(C) +CC( N) +c3ccc( N4 +C(C)C) cc1 +c1n cn +CCC O) +c1ccccc1) c1ccccc1 +Cl)c(Cl) c1 +n1 cc +3) CC1 +C( C(=O)O) +ncn 1 +CN1 CC +c1ccco 1) +C12 CC3 +[C@@H]( N +N2 CCCCC2) +C3 =C( +c3cc( O) +[C@H] (C)C +c12 ccccc1 +c3cc( OC)c(OC) +2 )cc( +C(=O)N2 CCN( +[C@H]( O +c2cccc3 cccc +N1 CCCC1 +c(S CC(=O)N +CCOCC 2)cc1 +c12 cc( +[C@@H]( O +C2) CC1 +=O) [nH] +c3ccc(Br )cc3) +c(=O) n(C) +C( CO +C(= N) +c3ccccc3 Cl) +c( C(=O) +cc c2c1 +c4cc cn +N c1cc( +Cc1 o +OCO 2) +c1ccco 1 +Cc1c( C) +c3ccccc3) =O) +CC 3)cc2 += [N-] +c1cc cnc1) +C(C)(C)C) =O) +c( CC) +c1cccc(F) c1 +CC 5 +4 CCOCC4) +CCN(C) CC2) +3)cc c1 +c3 nc(C) +[C@@]3 ( +c2ccccc2 )cc1) +[C@@H]( CC +CCC2( CC1) +C(F)(F)F) c3) +c1cc cs1) +[C@H]( OC(C)=O) +c2cc( N +P(O) (=O) +=C( N) +/N =N/ +n nc( +CO 2 +C(=O)N( C +c1ccc( C)cc1 +CC# N) +O c3ccc( +cccc c12 +S(=O)(=O) C) +c1( NC( +CC[C@]4 (C) +CCCC N) +[nH] c1 +CCCC NC(=O) +( c2ccccc2) +nc1 C +c1cccc (C)c1 +c1c( N) +cc c12 +(C)C) =O) +C( CC +c4ccccc4 )cc3) +n2 C +CCN CC3) +c2c( N) +CCCC O +CCCC C +c3n cccc3 +c1cc( C(=O)N +Cl)cc 3) +s c3 +c2cc1 OC +c(C) c(C) +/C=C2 / +c1cc ncc1) +c2nc(N 3 +c3ccc( OCC +c2c(Cl) cccc2 +2 )cc1) +O C(C) +O= c1 +4) =O) +[nH] c2 +C(=O)N1 CCC( +[C@] 5 +[C@@H]( CO +C3 = +c3ccccc3 2)cc1 +c4ccccc4) CC3) +CC( N)=O) +c2cc( Cl)c( +c2cccc (N +[C@H]3 O) +c2n c1 +)cc2 c( +N =C1 +CC2) n1 +c5 cc +cc2) =O) +c1cccc( NC(=O) +OCCO 2 +c1cc( Cl)c( +[C@H]1 CC[C@H]( +n2) C1 +C /C=C/ +c( Cl)cc1 +ccc( Cl)c1 +C(C) =C( +C /C(=N\ +C1) C2 +c1ccc( Br)cc1 +CCC S +NC( N) +CN( CC) +3)cc c2 +C(N C +)cc c3 +c(OC) cc1 +c( C +nc(- c3ccccc3) +O O +c4ccc( O) +C4 =O) +S) =N +cc1) =O +=[N+] =[N-] +C /C(=C\ +/N =C(/ +N2 CC +O c2c( +CCCN ) +2) C1 +Cn1 cc +c3ccccc3 F) +c1ccc( C(=O)N +N =C2 +nc(S CC(=O)N +CCCC N1 +C1CCCC 1) +c1c(Cl) cccc1 += C(C)C) +n3 C) +c1cc(OC) ccc1 +C(=O) (N +F) c(Cl) +c3cccc c23)cc1 +c3cccc (C) +c1ccc( OC +n1 - +c1c(OC) ccc( +Cc1ccc( O)cc1) +c4cc ncc +cccc c4 +c( =S) +N(CC) CC) +n n(- +[C@@H]( C +c1n c2ccccc2 +c2 1) +c1cccc( N +4 )cc3 +c(=O)[nH] c(=O) +c2ccc( Cl)c(Cl) +c1c( NC( +O[C@H]( CO +n3 cc +Cc1ccc( Cl)cc1 +=O) =O +c1n cccc1 +( Cl) +/ C2 +o c(C) +c1cc(C) ccc1 +c1( NC(=O) +CC2 CC2) +n 3)cc2) +CCO 1 +c2cccc (O) +Cc1 nc(- +c2c(F) cccc2 +c1cc( -c2n +N2 CCCC +C( OCC +/C=C 3 +CC1 2 +C(O) =O +C[C@H]1 CN( += N)N +[C@H]( CC +C) ccc1 +c2 c1cccc2 +c1cn cc( +c1c( Cl)cc( +C3 CCN( +c2cccc(F) c2) +c3ccc4c(c3) OCO4) +N C1 +CCO c1ccccc1 +N1 CCN(C(=O) +F)cc 3) +Cc1 cs +c- 3 +n2cn c3c( +N2 CCCC2) +[C@@]4 (C) +- c2c( +c3cc( OC)c(OC)c(OC) +c1c(Cl) ccc( +n( CC) +c1( C(N +c5 cc( +C( c1cc( +[C@@H]( OC(=O) +[C@@H](O) [C@@H](O) +(=O)=O) cc1 +NC( =N) +c1c( C(=O)N +[C@@H] (C)C +cn c(N +CN CC +N2 CCN(C(=O) +N C1=O +[C@@]1 (O) +c2) ccc( +ccc2 1 +S C +NC(=O) C( +CC3) =O) +c2cc( C(F)(F)F)cc +)cc n1 +[N+] 1 +NC(=O) c1ccc( +CC [C@H](C) +c4cc 5 +c3cc4c( cc3 +N1 CCCCC1 +CO 1 +c1cc( F)ccc1 +CC(=O)N c1ccc( +CC 3)cc2) +N CCCC +CO c1ccc2c( +-c2 [nH] +c2cc(Cl)cc c2 +c2ccc( C(F)(F)F)cc2) +COc1cc2c(cc1 OC) +COc1ccc( -c2n +o c2c1 +=C( N)N) +cc2 )ccc1 +C3) C2 +C1 =C +S(=O)(=O) N( +CO C +CC [C@]3(C) +COc1ccc( C2 +C( c3ccccc3) +c3ccc( N +CC1 CC1) +)cc( - +n1 c(N +CC(C) (C)C) +n 3)cc +c2c( Cl)cc( +c1c(F) cccc1 +N2 C +[C@H](C) CO) +-c2 nc(- +[N+](=O)[O-] )cc1) +/C=C/ C(=O)N +C c1cc2 +CC(C) C +c1( -c2n +CN C(=O)N +C(=O)O 1 +C c1cccc2 +n n2)cc1 +c2cccc(C(F)(F)F) c2) +c3cc c4ccccc4 +F C(F)( +n2 c(=O) +S(=O)(=O) c2ccc( +Cc1cn ( +ccc(F) c1 +O [C@@H]3 +c3cccc(F) c3) +CC N) +c2o ccc2) +CCC 4) +C#N )cc3) +Cc1ccc( C(=O)N +OC[C@H]1 O[C@@H]( +c( C( +n c4) +C [n+]1 +F)cc 2)cc1 +[C@H](O) [C@@H]1O +ccc( NC(=O) +CC c1cc( +c3ccc( C(F)(F)F)cc3) +-c2 s +[C@] (O)( +c1n (C) +CS ) +c(F) c2) +C(O) C(O) +CC1 CCCCC1) +n1 c(=O) +C) C +N1 CCOCC1) +[C@@] (O)( +CC1 =O +c1ccc2c(c1) OCO2 +CCN CC2) +Cn2 cn +[C@@]2 (O) +c3 cs +Cn1 c( +c4 c(C) +c2cccc 3) +c2cc (=O) +c2 C1 +c(=O)c( C(=O)O) +Cc2ccccc2) CC1 +/C1 =C\ +n c2ccccc12 +CCCC CC) +c1c2c( cc( +c2c( n1) +c1( C( +Cc1ccc( F)cc1) +n2 )cc( +cccc1 2 +[N+](=O)[O-] )cc2 +c1c( C(N +c2[nH] 1 +C(=O) c2ccccc2 +CO C(=O)N +CCN( CC +Cc1ccc( F)cc1 +n nc(- +C1 CCCN( +C[C@@H]( N) +CCN CC1 +[C@@H]1 C +C2) =O) +[C@]1 (O) +/C(=N/ O) +cs 2)cc1 +c3cccc( OC) +C( =C/ +C(=O) CC +[C@H]( OC(=O) +ccc( NC( +c2n nc( +c1 3) +cccc 2)c1 +C(F)(F)F)cc ( +OCCO 3) +Cc1 s +c1n nc( +c1cc( Cl)cc +c4 3) +[C@@]2 1C +ncn 3) +c2ccc( [N+](=O)[O-])cc2) +CCN(C) CC1 +c2ccc( C(=O)N +CC1) =O) +c1cc( NC( +c1cc( Cl) +CC2 )cc( +C Cl) +[nH] c3 +c3ccccc3 )cc +cc( NC(=O) +c2cc( C(=O)N +c1ccc( O)cc1) +[N+](C) (C)C) +C1 CC2 +C( CN +cn 2)cc1 +cc2 C) +CC3 ( +Cc1ccc( N +C1) =O +c1cc( Cl)cc( +c(=O)[nH] 1 +c( OC +o 2)c1 +c1c( -c2n +COc1ccc( N +/C =C1\ +c2cccc3cccc c23) +N c1cccc( +c1c( O +Br )ccc1 +N = +OC)c(OC) c1 +CC(F)(F) F) +CC2 CCCCC2) +cc( C(=O)N +c3c(F) cccc3 +CC( C(=O)N +/N = +NC(=O) c1ccccc1 +c2c[nH] c3ccccc23) +c(N C +s 2)c1 +[C@H]1 CC[C@H]2 +c2 )cc1) +N c1cc +C23 CC4 +c12 cccc +[N+](C) (C) +[C@]2 (O) +c3cc(Cl)cc c3 +C(C)(C)C) cc1 +cc n2) +c1n nc(- +c2cc s +[C@H]1 C +N3 CCOCC3) +OCC N +F)cc 3 +c2ccc( OC +Cc1ccc( -c2ccccc2 +C1 CCN(C(=O) +cc(C) c1 +c2ccccc2) C1 +c3ccc( [N+](=O)[O-] +C(C)C) c1 +Cc1c[nH] c2ccccc12) +c1ccc( Br)cc1) +c4ccc( - +C3 )cc1 +(F) F) +CC3 CC3) +nn n2 +Cc1ccc( S(=O)(=O)N +Cc1ccc( - +N) c1 +c1( N) +nn n1 +ccc( O +cc 3)cc +Cc1ccc( Cl)cc1) +COc1ccc( -n2 +c1ccc( Cl) +c2cc( F) +[nH] c2c1 +CCO C( +n2 c1 +c1ccc( C) +c1ccc( [N+](=O)[O-])cc1 +C1=C( O) +C(F)(F)F) c1) +CC c3ccccc3) +Cc1ccc( -n2 +[C@]4 (C) +c3cc4c( cc3) +[C@@H]2 [C@@H]( +c3n c4c( +[C@H]( C +c2cc c3[nH] +n 5 +NC(=O) CS +nc2 N +c(C) c2) +S1 (=O) +CCCC CCC +C( =N +C1 (C)C +c1 c2cccc +OC)c(OC) c3) +CC(CC( C3) +c( [N+](=O)[O-]) +[C@H]( N +[C@H]( C(=O)O) +c2cccc( [N+](=O)[O-]) +ccc1 O +c(C) n1 +CCC(O) =O) +CC C1( +c1cc( O +COc1ccc( C(=O)N +[C@@H]2 O +c4cc c5c( +CCO C1 +B (O) +C2 C( +N# Cc1c( +CCO c1cc( +c1ccccc1) =O +CC(=O)N [C@@H]( +5)cc 4) +c3n (C) +NC(=O) CO +c3cccc( Cl) +S(=O)( N)=O) +C(=O)O) c1 +c2s ccc2) +c1c( C( +c2ccccc2) n1 +N3 CCN( +c3ccc( C(=O)N +c1 co +C(N CC +# N +OCC N( +=O) CC2) +n2 c3c( +c1(Cl) ccc( +N(CC) CC +c3 F) +COc1ccc( - +CCCC(=O) N +F)cc 2)c1 +c4cc co +c2ccc( OC)c(OC) +c1( C(=O)N +C(C)(C) O) +Cc1cc( N +c2 =O +N1 C +=C( N +c1ccccc1 2 +nc2 cc1 +c1s c( +S(C) (=O)=O +nc( Cl) +cn 4) +OCC CO +ncn 2) +CC( O +c4 s +c(O) cc1 +O=C( CSc1n +CC [C@H](O) +C(=O)O) cc2) +c4cc( Cl)cc +[nH] 2)cc1 +c1ccccc1 Cl) +Cl)c(Cl) c3) +c2 =O)c1 +c6 cccc +[C@H]( CC(C)C) +=O) C) +C(=O)N( CC) +CCCC 4) +c2nc(- c3ccccc3) +C2 C3 +c2cc( Br)cc +(- c3ccccc3) +c12 ccc( +CC [C@@H](C) +nc2 )cc1 +CC1 = +(F)(F) F) +C(F)(F)F)cc c1 +c12c( cc( +C(=O) c1c( +CCC [C@H]2 +C(=O)N[C@@H]( Cc1ccccc1) +CCC =C(C)C) +=[N+]=[N-] ) +=C 4 +n1 c(C) +c1cc( -n2 +Cn2 cc +n2) ccc( +c2s c( +OCO 4 +C [C@H]4 +c2ccc(Cl)cc2 Cl) +OC c3ccccc3) +c2cc( Cl) +Cl)c(Cl) c1) +[C@@]1 (C)CC +/N =C2 +c(- c2n +[C@H]( C(C)C) +c1c2c( ccc1 +c2cn ( +n1 (C) +Cl)cc 3 +c2ccccc2 OC) +c2c(- c3ccccc3) +/C( C#N) +no 2) +S(=O)(=O) c1ccccc1 +nc2 C) +c2s c3c( +=C /C +c3cc(OC)c(OC)c(OC) c3) +cc1 - +s 2 +c4ccc(OC) cc4) +CC(O) ( +Cc1 [nH] +c1( O +C(=O)N1 CC +n( CC(=O)N +n ccc1 +C S(=O)(=O)N +)cc 4) +c4 ncc +[C@] (C)(O) +C) C) +C2) n1 +Cc2ccc( OC)cc2) +c2cccn c2 +CCN (C)C +c1( -n2 +N c1c( +CCN2 C(=O) +C [C@@H]4 +[nH] 2 +[C@H]( NC( +c1ccc( NC( +OCC (N +COc1ccc( Cl)cc1 +CC1 (C)C +c4 c3 +CN2 CC +c4cccc n4) +NC(=O) [C@@H]1 +Cc1ccco 1) +C(=O)N2 CC +c2c( ccc( +cc 5) +cc1 2 +c1cc (=O) +NC(=O) C +CCn1 cc( +c1cccc( Cl)c1) +C2 )ccc1 +C2 CCN(C +c1cc( OC)c(OC)c(OC) +4 )cc2) +c1c( -n2 +c3cccc( C(F)(F)F)c3) +O=C1 c2ccccc2 +C[C@H]( N +( N) +CCCC (C) +c2ccc( Cl) +[N+](=O)[O-] )cc( +CCOCC 2)c1 +CC C(C)(C) +n2 )cc1) +Cn2 c(=O) +c2ncn ( +CC(C) N +c(=O) c1 +[C@]3 4 +C c3c( +)cc c1) +C1 (O) +CCC) =O) +c2ccccc2 )ccc1 +C(=O)N C(=O) +C( OCC) +C /C( +c1cn c( +[C@@H]1 O) +CC3 )cc1 +c1nc(- c2ccccc2) +c1cn 2 +/N =C1\ +CCC [C@@H]( +Nc1n cn +C= CC( +c1c(F) ccc( +c1ccc( C2 +[C@@H]( OC) +Cc1n o +c3) CC2) +OC [C@@H]1 +Cl)cc2 Cl) +C(N) =N) +cc 3)cc2) +[C@H]2 [C@H]( +cccc 5) +-c2ccc( - +OCC OCC +C3 =O +N2 CCN(C +CO c1cccc2 +S) N +( CO) +c1ccccc1 Cl +c1cc c2ccccc2c1 +n cc(- +ccc(OC) cc1 +nc2 - +CC( C(=O)O) +c2ccc(OC) cc2 +c3cc o +C(N C)=O) +CCCC CCN +CC(C) (O) +c2cc3cccc c3 +=O) o +-c2 c3c( +(C)C) cc1 +OCO 3 +Cc1ccc( O +CC( c2ccccc2) +CC4 )cc3) +n cc2) +CCCCCCCC ) +c1cc2 cc( +(=O)=O) c1 +n2 ccc( +cc3) =O) +c1c( Br) +cc2c( c1) +c1ccccc1) =O) +N(CC Cl) +N( c2ccccc2) +Cc1ccc( -c2n +5 CCCC +O c1cc( +3)cc 2)c1 +Nc1n cc( +c2 C)cc1 +N( Cc2ccccc2) +CC )cc1 +c2c( C#N) +CC1 CCN( +OC)c(OC) cc1 +c1cccc( O +o n2) +ncn c(N +c1cn ccc1 +C1 N(C( +[C@@] (C)(O) +NC(=O) [C@@H]( +c3 c4ccccc4 +c1c( Cl)c( +/C=C/ C( +c2ccn cc2 +[C@H]( OC) +n c2ccccc21 +5 CCOCC +c4 cc(C) +Br) c2) +n c3ccccc3 +cnc2 c1ncn2 +CCCC N( +CC) c1 +cn n2 +=O) c( +c1( C2 +O C(C)(C)C +n(C c3ccccc3) +c(C) c3) +nc( C(=O)N +CC2 )cc1) +c2cc( N3 +c2c(Cl) cccc2) +c2ccc( C#N)cc2) +O 4) +c1cccc 2) +CC C(=O)O +c4ccc(C) cc4) +cn2) CC1 +OCC 3 +(- c2ccccc2) +N1 CCCC +c1cn c(N +c4) CC3) +c [n+]( +Cc1ccc( S(=O)(=O) +c4cccc( Cl) +c3cc s +CC1 =N +c[nH] 1 +c(=O) n(C)c(=O) +cc(- c3ccccc3) +OCC(=O) O) +c1(OC) ccc( +CC C(C)C) +c3cc( Cl)c( +c1- c1ccccc1 +C#N) c1 +c2ccc3ccccc3 c2) +CC(CC( C4) +C) cc( +c1cccc(F) c1) +c3ccc( C#N)cc3) +CN(C) c1ccc( +( c1ccccc1) +c1cc( C(N +c1c( OC)cc( +CC1 CCCO +CCC [C@@H]1 +nn1 2 +Cn2 cc( +(=O) O +[C@]12 C +[C@H]2 O +c1cc( C( +c(OC) c2) +CCC(N 3 +c3ccc(OC) cc3 +C2) C3) +NC(=O) C2 +c2cn ccc2) +C/C=C\ C/C=C\ +c4 o +NC( =N)N +[C@H]( CC) +c2cc( O +C(=O) CS +CCCC C2)cc1 +O=C( COC(=O) +c1ccc( O) +c2c(C) cc(C) +C(=O)N2 CCC( +ncn c3 +c3ccccc3 OC) +[C@@H]4 [C@@]5 +[C@H]2 CC[C@H]( +/N =C\ +[C@@H]2 O) +c3cccc( - +N( C(C)=O) +c3ccccc3 - +c3cc(OC)c(OC) cc3 +c2ccccc2 c1=O +S C(=S) +c1ccccc1) N +CCCN (C) +CCS C) +nc(C) c1 +NC(=O) C) +C(=O)N C(=O)N +c1c(C) ccc( +c2n cccc2) +c(N 4 +C(=O)N1 CCC[C@H]1 +CC(=O)N 3 +CN3 CCN( +c2ccc(O) cc2 +CCC( CC) +n2) n1 +[C@@H](C) O) +NC(=O) /C=C/ +=O) n( +[n+] 3 +C#N )cc2 +F C( +c2cc( Cl)cc(Cl) +N C1=N +c3c( N +c2c1 =O +CCC #N) +c1 O) +C c4ccccc4) +c2cccc3 c2 +N( Cc1ccccc1) +c6cccc c6 +ccc( OC)c1 +O C1=O +nc( C(F)(F)F) +C(=O)N C) +c2 co +c( F)cc1 +c3o ccc3) +O 2)cc1 +c1cccc2 ccccc12 +N=C( N +c1ccccc1 O +N1 CCN(C +n s +nc2 3) +c4 c3) +CO C(C)=O) +CC3 CC( +c3ccc( Cl) +CCCN( CCC) +c1cc( F)c( +[C@]3 (C)CC +c(C) cc( +c1cccc( C(F)(F)F)c1 +c1ccc( CN +C(=O) /C(=C/ +c3ccc(O) cc3 +c1cccc( - +cc n3) +- c1cc +c1c( -c2ccccc2) +S C1 +CO [C@@H]1 +c1c(C) cccc1 +O= S1(=O) +c1cccc( -c2n +c(C) cc(C) +cc 5 +c( CN +c3c(Cl) cccc3 +COc1ccc( OC)c( +C(=O)N [C@@H](C) +c3cc( C(F)(F)F)cc +n1) =O +=C( C#N) +=O) cccc1 +CO [C@H]1 +[C@H]( Cc2ccccc2) +c(=O) n3 +CCCN( C +CC(C) (C)C +c2c(OC) cccc2) +CN(C) S(=O)(=O) +s c(- +c1cccc (C) +Cc2ccc( O)cc2) +c1ccc( O)c(O) +[N+](=O)[O-] )ccc1 +c(=O)[nH] c2=O) +[C@@]3 (C)CC +nc3 2) +CO CCO +c2cc c3n +[C@H]( Cc1ccccc1) +CC [C@@H](O) +c3ccc( Cl)c(Cl)c3) +c2cc(C) ccc2 +c2ccccc2 C) +[C@@]2 1 +-c2 nc(N +[C@@]3 4 +/C =N\ +C =C\ +nc( OC) +CCC( N) +c2ccc(Cl)cc 2)cc1 +c1( S +[nH] n1 +ccccc1 2) +c1 c-2 +CN( CC +CO c1cc2c( +c2cc(OC)c(OC)c(OC) c2) +CCCN2 C(=O) +C2 CC +C(N O)=O) +C(=O)N [C@H]1 +CCCN1 CCN( +c2cccc( O +C2 (C) +CCCO 2) +cc3 C) +[C@@]2 (C)CC +)cc (O) +N( O) +C( C(=O) +c( /C=C/ +c2ccc(Cl)c(Cl) c2) +CC [C@H]4 +N( CCC) +C(=O)N CCCC +c3n cccc3) +n 3)cc2 +4 )cc2 +COc1ccc( C(=O) +cn 2)c1 +c4 5) +CCCN C(=N)N) +CCC [C@H]( +c3ccc([N+](=O)[O-] )cc3) +nc( N2 +c2n nc(- +CC n2 +c1( N2 +c4 )cc3) +C1= CC(=O) +S S +C2 1 +O=C(N /N=C/ +c(- n2 +nc3 c2 +C( N1CCN( +n( Cc2ccccc2) +C(O) =C( +N2CCN( C( +c2ccccc2 c1) +CC(O) CO +c2cc( F)c( +CC(=O) OC +C(=O)N c1ccccc1 +c3cc(OC) ccc3 +Br) c3) +4CCCC C4) +CCCC (N +C( C(C)C) +CC C(=O)N1 +C(=O)N /N=C/ +c1ccc(Cl)cc1 Cl +c3ccccc3 C) +c3 c2cccc3) +c1ccc( Cl)c( +4) c3) +o 2 +c3cccc (O) +C1 CCN(C +c(=O) cc( +[C@@]4 (C)CC +Cc1cc( O) +[C@H](C) N +Cc1c( Cl) +c2ccccc2) CC1) +c1(F) ccc( +CN( S(=O)(=O) +cc1) =O) +c1c( cccc1) +c1c( OCC +c3 nc(N) +c2c( F)cc( +c(OC) c3) +c2s ccc2 +CC2 =O +n ccc2 +CCC3( CC2) +Cn2 c( +CCCC(=O) O) +/C(=N\ O) +cccc 2)cc1 +c3c( ccc( +c2c(C) cccc2 +P(O) (O)=O) +CN =C( +C( OC +N 2) +cc c5 +cc (=O) +COc1cc( N +c1s ccc1 +O=C1 N( +/C=C3 \ +C(C)(C) O +CC )cc2) +c1c( F)cc( +C) C(=O) +C3 )cc2) +C2 (C)C) +c2c(=O) n1 +CCN S(=O)(=O) +c3n nn +C(=S) S +C [C@@]12 +c2ccc(- c3ccccc3)cc2) +c2ccc( N(C)C) +C1 N(C(=O) +Cc1cc(C) cc( +c1 oc( +[C@@H](C) CO) +c4 )cc +c( C(N)=O) +N O +/C=C /C=C/ +- n1 +c1ccc( C(F)(F)F)cc1 +c(N (C)C) +C1 (N +C(C) (O) +N3 C(=O) +C c1cccc(C)c1 +c3cc4 ccccc4 +C3 4 +( C(=O)O) +[C@H]1 [C@H]( +[C@H]1 O) +N( S(=O)(=O) +c2cccc(OC) c2) +O=C1 NC(=O) +CC(C) O +C2=C( O) +[C@H]2 CC +C(C)(C)C) cc2) +Cc1 c2c( +CCC [C@@H]2 +C2 CCN(C(=O) +c(OC) c1) +c4cc cs +COc1cc( OC) +NC(=O) c1cc( +c2cccc( NC(=O) +-c2ccc( F)cc2) +S(=O)(=O) N1 +c2cc(- c3ccccc3) +c( CO +c(C) c2 +P(O) (O) +CCN(CC) C(=O) +C( C(F)(F)F) +n c2n( +c2ccc(C) cc2 +n( C(C)C) +CC[C@]4 3C) +C(=O)N (C)C +c4ccco 4) +cn (C) +c2n cc(- +c1n ccc( +c3cn ccc3) +c3ccc(Cl)cc3 Cl) +N c1nc(N +no 1) +S1 (=O)=O +P(O)(=O) O) +n n2)c1 +c2ccc(F)cc2) CC1 +C2 (C)C +nc(S C +CCN 4 +CCCO 3) +c 7 +CC[C@]4(C) [C@H]3CC +c2ccc( S(=O)(=O)N +c2ccc( Br)cc2 +[N+] (C)( +c4cc5 c( +C(=O)N[C@@H]( CC(C)C) +c1ccc2c(c1) OCO2) +N C(=O)N1 +n(C) c1 +Cc1ccc( C) +Cc1 nc(N +c2cc(C) cc(C) +c1( OC)cc( +c2cc o +C( CCC) +COC(=O) c1ccc( +C) C1 +N1 2 +c3cccc c13) +[C@]3 (O) +c3s ccc3) +O= c1cc( +NC(=O)[C@H]( CC(C)C) +C4CCCC C4) +CCN1 CCC( +C2 CC( +c4cccc 5 +c2n (C +NS(=O)(=O) c1ccc( +CCCN =C(N)N) +n1 c(N) +cc4 c( +c2 c3 +N S(=O)( +OCCO 4) +N1( C( +nc(N3 CCOCC3) +- c1cc( +c4ccc5c( c4) +N )cc1 +c1ccccc1 F) +c4ccncc 4) +N =C +c3c( c1) +/N=C(\ C) +c4 n( +c3cc(C) ccc3 +c2o c(- +[C@@H]( C(=O)O) +CCC(O) ( +c3 Cl) +C12CC3 CC(CC(C3) +C c1ccc2c( +c(=O) c2c1 +c3ccc( C(=O)O) +/C1 =C/ +C(Cl) (Cl) +c2cc(OC) ccc2 +OC [C@@H]2 +C( C(O)=O) +cc(C) cc1 +c12 n( +3 )cc1) +Cn1 c2c( +ccc( Cl)cc1 +c3c( n2) +F) c1 +[C@@]3 (O) +O=C( CN1 +c1( -c2cc( +C3 CCC3) +c1( Cl)cc( +N( C(=O)N +c3cc( Br)cc +CCC1 2 +cc n1) +Cc1cc cnc1) +c3cccc4 cccc +[C@H]2 C +[C@H]2 [C@@H]( +CCC( C(=O)N +c3ccccc3) CC2)cc1 +CCCC CC3) +n(C)c(=O) n(C) +[C@H](O) [C@@H](O)[C@H](O) +N(CCCl) CC +Cc1ccco 1 +CCN2 CCOCC2) +[C@H]( CO +OC( CO) +c1- 2 +CC(C)(C) N +=C /C=C/ +C(=O) OCC(=O) +c4cccc (F) +c1ccc( S(=O)(=O)N +4CCCC 4) +/C=N/ NC( +c1cn (C) +c1c(C) cc( +F) c(- +C(C)(C) [C@@H]5 +c1ccc( S +C1= S +Cc1cc cs1) +c3n [nH] +CCN(C) CC1) +[nH] 2)c1 +c(N C(=O)N +c(Cl) c2) +=O) cc3) +c3cccc (N +c(S C) +c3 2)cc1 +c(F) c3) +c2 - +c3ncc cn3) +c3cc( N +-c2ccc( Cl)cc2) +c4 c(Cl) +c( OCC) +c3ccc( S(=O)(=O)N +nc(- c2ccccc2) +CCOCC 3)cc2) +c(=O) n(C +C(C)C) cc2) +c2n o +c4 c5c( +COc1ccccc1 N1CCN( +n 5) +CC COc1ccc( +co 1 +c1cccc( O)c1 +(N 3 +c1cc( OCC +N1 ) +=O) ( += N2 +nc( NC( +c4ccc( N +c1cccc( -n2 +OC 4 +c3c( N) +O [C@H](C) +n c2)c1 +c4ccc( C(F)(F)F)cc +c2cc( OCC +c2nc( O) +Cc1ccc( OC) +N1 (C +Cc1cc cnc1 +c4cccn c4) +c3 n2) +C( OC(=O) +c5 cn +O=C( CS +C1( c2ccccc2) +C(F)(F)F) c( +[C@H]1 CO +c4 ncn +C(C#N) =C(N) +C1 CN(C(=O) +C1 c2c( +c1( Br) +n[nH] 1 +c3c( c2) +c2 )cc +C(=O) C(C) +c4c( F) +c1ccc( C( +/C =C1/ +c(OC) c(OC) +[C@H]( C(=O)N +c1c(O) cc( +c1ccc( CN2 +[C@]2 (C)CC +Cc1n n(C) +CC(=O) OC) +c1c( C2 +C3 CCCCC3 +c1c( C#N) +c1(C) ccc( +C2=O) ccc1 +C) ( +[C@@H](O) [C@H]1O +c2ccccc2 O) +c2cc nc(N +c2c(F) cccc2) +P(=O) (O +)cc( OC)c1 +c3cc(O) ccc3 +C1 N +Br) c1) +[C@H]( Cc1ccccc1)NC(=O) +[nH] c12 +3)cc2 1 +cn c12 +c1cc( F)cc +c( C(C)C) +c4cc( F)cc +N( S( +CCn1 c(=O) +C(=O)N2 C +C( CS +s c1) +c2cn (C) +c1cc( C2 +c(N) n1 +C[C@H]( NC( +C [S+]([O-]) +c2cn n3 +CC[C@H]( NC(=O) +C#N )cc1) +c2ccc(- c3n +P (O +CCN(CC) CC) +c3cccc(OC) c3) +CCCC CCC) +OCCO 2) +CC c2c( +c2) nc1 +ccc( C)c1 +c3 - +c2ccc(Cl)cc2) CC1 +nc3 C) +C23CC4 CC(CC(C4) +c3ccc( C(F)(F)F)cc +C[C@@H]( NC(=O) +c2cc( S(=O)(=O)N +cc( Cl) +n1 ccc( +[C@]1 (C)CC +ncn c32) +c4ccc(Cl)cc 4 +c3cccc c3c(=O) +3)cc2) CC1 +N S( +c(C) cc2) +CC [C@@]4 +ccc1) =O +Cc1ccc( S(=O)(=O)N2 +cc nc1 +CC1 CC1 +c1ccc( Cl)c(Cl)c1 +c1c( OC)c(OC) +CC( CO +n( CC +c4 n3) +c2ccccc2 C1 +nc3 n2 +c2)cc1 OC +c1( Cl)c( +CO 2) +Cc2ccccc2) c1 +C=C 3 +C4 ( +N [C@H]1 +c1n cc(- +[N+]([O-])=O) cc2) +c4 nn +o c12 +/ C(O) +CN( Cc1ccccc1) +C3) =O) +[N+]([O-]) =O)cc1 +CCCCCCCC CC +-c2 c[nH] +C(=O) c2c( +c1( CN2 +c3 n2 +C1=C(C) N +C#N )cc( +C(=O)N [C@@H]1 +c1 C) +O=C( N1 +N( C(N +COc1cc c2[nH] +c2n ccc( +O) =O +cn cc1 +c3c(F)cccc3 F) +CC(C)C) =O) +[nH] cc2 +/C( =C(\ +c(=O) n(- +C[C@@H]( CO) +CCN3 CCOCC3) +c1cc( OC)c( +N1 (C) +c4ccc(F)cc 4 +[C@@H]2 C +n cc3) +O=C(N c1cccc( +cccc1) =O +c2ccc( S(=O)(=O)N3 +N# Cc1cccc( +c1c( F)c( +c1( CN +c3ccc(C) cc3 +c( [N+](=O)[O-])c1 +c( Cl)cc +c2cccc(C) c2) +c(Br) c1 +CC(=O) N1CCN( +c1c( C(F)(F)F) +[N+] 2 +CCN( S(=O)(=O) +C) cc2 +/C( =C(/ +c2c(Cl)cccc2 Cl) +[C@@]1 3 +c(- c4ccccc4) +nc1 S +O [C@H]3 +C(=O)O) cc1) +c2c(C) cccc2) +c1c( O)c( +n2) =O) +OCO 5) +O =C +C(N (C)C) +c1ccc(- c2ccccc2)cc1 +CC2 =O) +C( CNC(=O) +c5 [nH] +CC(=O)N (C) +cs 2)c1 +c2ncn c(N +c2ccc(Cl)cc 2)c1 +Cc1n c2c( +CC( CO) +c(S C +c(C) c1) +[n+] (C +C) CC1 +c3cccc(C) c3) +c3cn (C) +c1cn n2 +[nH] c2) +CC [C@]2(C) +c1cccc( [N+](=O)[O-])c1 +C(N)=O) c(N +c2ccc(- n3 +[C@@H]4 CC +C(=O)N S(=O)(=O) +C(=O)N C2 +c( F)cc2 +S C( +c1ccccc1 F +c1cc( Br) +c1cc n +[C@@H]1 [C@@H]( +c(OC) cc( +Cc2ccc( C)cc2) +c3c( Cl)cc( +c1cc c2nc( +[nH] c1) +c2cc( Br) +[C@@H]2 C1 +/C(C) =N/ +c3ccc( NC(=O) +ncc 4 +P(=O)( OCC) +c1n [nH] +CCN1 C +ccc( F)cc1 +OC)c(OC) c1) +N( S(=O)( +c1cc( O)c(O) +C1 C2( +CC(C) C[C@H](NC(=O) +c1c(O) ccc( +(O) =O +S2 (=O)=O +N( C(C)C) +C2 CCC2) +ncn c2 +c3ccccc3 C2=O) +C4 )cc3) +[C@@H]( CC(C)C) +OC(C)(C)C) =O) +[N+]([O-]) =O +C= CC(=O) +C) O +c3 c1 +c1( -c2ccccc2) +c3 O) +cn c2) +o c2 +n4 cn +N c1nc(- +CC( NC( +cn n1 +C(F)(F) C(F)(F) +c23) c1 +CCCC 2)cc1 +c3cc( C(=O)N +CC(=O) NC( +c1cc( Br)ccc1 +C(=O)N1 CCN(C(=O) +C(C)=O) cc1 +c2cc3c(cc2) OCO3) +nc(S C) +[C@@H]( CC) +OC [C@H]2 +c1ccc( C(N +C(=O) [C@H]( +C =N +c23) CC1 +[N+]([O-]) =O)c1 +3)cc c21 +)ccc1 O +[C@@H]1 (O) +/N=C2 \ +CC( c1ccccc1) +no 2)cc1 +n n(C +[C@H]1 (O) diff --git a/Pilot1/ST1/VocabFiles_spe/vocab_spe.txt b/Pilot1/ST1/VocabFiles_spe/vocab_spe.txt new file mode 100644 index 00000000..f7a74519 --- /dev/null +++ b/Pilot1/ST1/VocabFiles_spe/vocab_spe.txt @@ -0,0 +1,3132 @@ +[PAD] +[unused1] +[unused2] +[unused3] +[unused4] +[unused5] +[unused6] +[unused7] +[unused8] +[unused9] +[unused10] +[UNK] +[CLS] +[SEP] +[MASK] +[c-] +[SeH] +[N] +[C@@] +[Te] +[OH+] +n +[AsH] +[B] +b +[S@@] +o +) +[NH+] +[SH] +O +I +[C@] +- +[As+] +[Cl+2] +[P+] +[o+] +[C] +[C@H] +[CH2] +\ +P +[O-] +[NH-] +[S@@+] +[te] +[s+] +s +[B-] +B +F += +[te+] +[H] +[C@@H] +[Na] +[Si] +[CH2-] +[S@+] +C +[se+] +[cH-] +6 +N +[IH2] +[As] +[Si@] +[BH3-] +[Se] +Br +[C+] +[I+3] +[b-] +[P@+] +[SH2] +[I+2] +%11 +[Ag-3] +[O] +9 +c +[N-] +[BH-] +4 +[N@+] +[SiH] +[Cl+3] +# +( +[O+] +[S-] +[Br+2] +[nH] +[N+] +[n-] +3 +[Se+] +[P@@] +[Zn] +2 +[NH2+] +%10 +[SiH2] +[nH+] +[Si@@] +[P@@+] +/ +1 +[c+] +[S@] +[S+] +[SH+] +[B@@-] +8 +[B@-] +[C-] +7 +[P@] +[se] +S +[n+] +[PH] +[I+] +5 +p +[BH2-] +[N@@+] +[CH] +Cl +cc +CC +O) +c1 +c( +C( +=O) +c2 +C) +cccc +c3 +C(=O) +ccc( +)cc +(C) +(=O) +2) +cc( +C1 +F) +N( +CCCC +c1ccc( +3) +CO +cn +c1cccc +c2cccc +c4 +CN +C(=O)N +OC) +)cc1 +c2) +C2 +n1 +nc( +C(C) +c2ccc( +N) +[C@H]( +[C@@H]( +c2c( +=C( +CC1 +c1) +Cl) +c3cccc +NC(=O) +cc1 +OCC +c1c( +c1cc +c1cc( +=C +S(=O) +CCN( +(F) +c3) +N1 +n2 +=O +c3ccc( +S(=O)(=O) +c3c( +CC(=O) +CC( +/C +c1ccccc1 +C(F) +F)cc +c2cc +c2n +CCC +N2 +4) +c2ccccc2) +OC +Cl)cc +O=C( +C(F)(F) +c1n +c(=O) +c2ccccc2 +(C)C) +c2cc( +C(N +c1( +[C@H]1 +=C/ +=N +n( +[C@@H]1 +C3 +CC) +CCO +CCN +CC2) +1) +C# +c12 +CC2 +CN( +c1cccc( +c(- +CC(C) +c3ccccc3 +c(C) +cc2) +NC( +cc2 +(O) +c1ccccc1) +[C@H]2 +Cc1ccc( +c(OC) +C(=O)O) +c3ccccc3) +[C@@H]2 +n2) +nc(- +C(C)C) +c4cccc +)cc1) +O= +/C=C/ +c3cc +C2) +CCC( +c5 +3)cc +COc1ccc( +CN1 +[O-]) +CO) +C(F)(F)F) +cc1) +[C@H](O) +nc2 +c(N +[C@@H](O) +C(O) +c3cc( +CC(=O)N +cs +CC1) +/C( +CC3) +[N+](=O) +CS +c4ccccc4 +c1cc2 +c3n +c(O) +2)cc1 +CCCN +nc(N +CCOCC +cc3) +(C)C +(=O)=O) +n3 +C1) +nc1 +S) +C#N) +ccc1 +/N +)cc( +C(=O)O +[C@H]3 +CCCCCCCC +(N +c1cn +C3) +Br) +)ccc1 +c2c1 +O=C(N +cccc1 +[C@@H]3 +c2cccc( +C(C)(C)C) +c1ccc2c( +c4ccc( +CN2 +C= +-c2ccc( +-c2 +C(C)=O) +c(Cl) +COC(=O) +)cc2) +C[C@H]( +n(C) +C(N) +=C\ +n1) +C(C)(C) +cc3 +co +S( +OCC) +CCN1 +c[nH] +C(= +ncn +Cn1 +C(F)(F)F)cc +OC(=O) +[C@H](C) +([O-]) +Cc2ccc( +[C@@H](C) +c4c( +OC)c(OC) +C(=O)N1 +)cc2 +CNC(=O) +nc(C) +2)c1 +=O)cc1 +S(=O)(=O)N +CC3 +C[C@@H]( +C(=O)N( +Cl)cc1 +nn +Cc1 +O=C1 +C(=O)N2 +S(=O)( +Cc2ccccc2) +OCO +C4 +CN(C) +C1=O +c4) +=N/ +Cc1cc +c2cn +5) +cccc2 +cc(- +)cc3) +[N+](=O)[O-] +C1( +c1ccc2c(c1) +F)cc1 +Cc1cc( +c1cccc2 +c1cc2c( +n3) +COc1cc( +CCC) +c2cc3 +cc(C) +C(O)=O) +Cc1ccccc1) +[C@]1 +nc3 +-c2n +c2ccc(Cl)cc +[N+](=O)[O-]) +OC( +CCN(C(=O) +CC(C)(C) +[C@@]2 +CCC1 +N3 +=O)c1 +cccc2) +[N+]([O-]) +=C2 +C#N +c(F) +[C@]2 +n(- +s1 +O1 +CCC2 +o1 +Cc1ccccc1 +ccc2 +(C)CC +c(N) +[C@@]1 +CC(O) +CCOC(=O) +NC(=O)N +Cc1n +c4ccccc4) +cccc3) +c1c2c( +cn1 +F)cc2) +Cc1c( +CC(C)C) +nc(N) +C[C@H]1 +O=C(O) +N=C( +c3cccc( +C1CCCC +=C1 +Cn2 +=S) +c2ccc3c( +CC(N +-n2 +CCN(C +c3ccc(Cl)cc +c2)cc1 +Cl)c( +c(=O)[nH] +c3cn +C(N)=O) +c3ccc(F)cc +P(=O) +(C +N(C) +n2)cc1 +c1c(C) +CCCN( +c2[nH] +CCCCC2) +c2c(C) +CC[C@H]( +3)cc2) +c2s +O[C@H]( +C[C@@H]1 +OC)c1 +C)cc1 +C2=O) +c4cc +CCCO +c2)c1 +NS(=O)(=O) +Cl)cc( +c12c( +=N\ +c23) +c1nc( +/N=C/ +C)=O) +N# +ccccc1 +3)cc1 +N2CCN( +N1CCN( +CC(=O)O) +=C(\ +OCC(=O) +c1=O +(C)( +CC4) +CC2)cc1 +CCN(C) +c1- +ncc +c2ccc(- +[C@]3 +S(C) +O( +Cc1cccc( +CCNC(=O) +N(C(=O) +c21 +c2ccc(Cl)cc2) +c1s +ccc3 +CN3 +CCCC) +ccc2) +F)cc1) +cn2 +CC4 +C(=O)N[C@@H]( +c4cc( +C4) +Cl)cc1) +[N+]([O-])=O) +CCO) +[C@H]4 +c(S +CCS +CCCN1 +Cl)c1 +c2o +[C@@H]4 +C(=O)OC) +c2nc( +c1ccc(Cl)cc1 +CCCC1 +nn1 +c2ccc3c(c2) +C[C@H]2 +no +c2ccccc21 +[C@@]3 +C2( +c5cccc +CCn1 +C)cc2) +CCN2 +c1ccc(N +3)cc2 +N)N +cccc3 +cccc1) +CCCC2) +c2cc(- +c(C(=O)N +OC)=O) +c2cc3c( +c3ccc(F)cc3) +C[C@@H]2 +C2=O +/C=C\ +n(C +S(C)(=O)=O) +cn2) +cnc1 +Cl)c(Cl) +c2ccc(OC) +N1( +c2cccn +c2c3c( +ccc1) +O)cc1 +(- +c1ccc(- +c2ccccc2)cc1 +cnc3 +c3ccc(Cl)cc3) +c(C)c1 +n[nH] +)cc3 +OC)cc( +CCOCC2) +Nc1n +c2=O) +=C(C) +COc1cccc( +OC(C)=O) +CCC(N +c2ccc(F)cc2) +c2ccn +F)c( +c2nc(- +P(=O)(O) +N(C( +COc1ccccc1 +NC +CCCCC3) +CCN(CC) +ncc1 +OCCO +c2)=O) +s2) +O[C@@H]( +n4 +C(=S) +/C=N/ +CCC(=O)N +O)c( +CCOCC3) +N(C)C) +C12 +C2)cc1 +c2n1 +CCCC3) +CC= +nc(S +CN(C(=O) +N(C +n2)c1 +c1ccc(OC) +C=C +=O)=O) +[nH]1 +/C(=C/ +c3ccc(OC) +CC[C@@H]( +/C(=C\ +cc4 +CNC( +Oc1ccc( +c1[nH] +C1=O) +c3ccc(C) +c2cc(C) +CCC3 +c5ccccc5 +c2cc1 +CC2)c1 +O)c(O) +CCCC2 +c1cc(C) +c1ccc(O +C2=N +cc2)cc1 +F)cc2 +c2n( +C1CCN( +c3cc4 +c2ccccc12 +cc4) +N)=O) +[C@]12 +NC(=O)[C@H]( +c1(C) +C1CN( +Cc3ccccc3) +c1nc(- +OC)c(OC)c(OC) +[C@]4 +Cl)cc2) +N2C(=O) +C3=O) +C)c1 +c2cccc3 +c2c(- +Nc1ccc( +CCCCC1 +C(O +S(=O)(N +CC(=O)O +ccc3) +CCC(C) +CS(=O)(=O) +c1c(O) +c3[nH] +OCC)=O) +OC)c( +C(=O)OC +[C@@H]1O +c2ncn +c6 +s1) +C1=C( +CCCN2 +C[C@@H](O) +CCC2) +(=O)=O +oc( +(O +N(CC) +c(=O)n( +nn2 +/C=C +c2c(=O) +C[C@H](O) +-c2cn +c2c(cccc2) +c3c(C) +c1O +-c2ccccc2) +-c1ccc( +NC(=S) +C(C)C +CCOCC1 +=C(/ +C(=O)NC +c3c(cccc3) +(C(F)(F)F) +[n+]1 +CC1(C) +cc(O) +-c2cc( +N4 +O2) +C(OC)=O) +CCCC(=O) +-2 +cn3) +c3ccc(- +=N) +CC# +c2)CC1 +[C@@H](O)[C@H](O) +C(=O)N[C@H]( +c(NC(=O) +c1cc2c(cc1 +c2nc(N +Cc1cn +C2CC2) +O2 +C(=O)N3 +N1C(=O) +[C@@H](CO) +c1ccc(Cl)cc1) +c3s +/C(=N\ +C=C( +S(=O)(=O)N2 +CCOc1ccc( +/C=C2 +(F)(F) +[C@H]1O +CC[C@H]2 +cs1 +c2ccc(OC)cc2) +c3o +C1C( +nc2) +c2ccco +cs2) +c2c(O) +F)cc( +[C@]2(C) +[C@@]4 +[C@@]2(C) +O[C@H](CO) +c1c(Cl) +c2cc(Cl)cc +Cl)cc2 +CCCC( +ccc(- +C1N( +c1C +c2cccc(Cl) +Cc1cc(C) +C1= +cn1) +CC(=O)N1 +n1c( +c1ccc(F)cc1 +cccc4) +c12) +S(N) +CN1CCN( +CCC(O) +C3CC3) +c3ccc4c( +c1c(OC) +CCC(=O)O) +C(=O)OCC) +Br)cc +ncc( +c1o +nn2) +OCCCC +-c1ccccc1 +C2=C( +C2CCCCC2) +C1(=O) +c2ccc(C)cc2) +c1cc2c(cc1) +/C=C(\ +nc2c1 +c3ccn +c2ccc(O) +C1=N +S1 +C)cc1) +(CC) +C(=O)C( +CCN3 +c2ccc(Cl)cc2 +cnc2 +Cc2ccccc2 +C[C@H]3 +ncc2 +CSc1n +COC( +c3ccc(OC)cc3) +c3c2 +c2c(Cl) +CCC3) +[C@@](C)( +Br)cc1 +C(F)(F)F +CCc1ccc( +[C@@H](N) +C(C)(C)C +c(N3 +c(=O)c( +C2CCN( +c3ccccc23) +C(=O)OCC +S(N)(=O)=O) +sc1 +o2) +C(OCC)=O) +/C(C) +c1nc(N +NC(N +CN1C(=O) +S(N +CN) +Cl)ccc1 +C(c2ccccc2) +COc1cc +c2ccncc2) +OCO2 +OCO3) +c(OC)c1 +c(O +c34) +c4n +OC)c1OC +4)cc +I) +c1)=O +n4) +C[C@@H]3 +c4cn +c5c( +N)N) +CCCCC1) +OCC(=O)N +-c2ccccc2 +[C@@H](NC(=O) +N2CCOCC2) +N1CCC( +C(CC) +CC2)=O) +n2c( +CCOCC1) +C#C +[nH]c(=O) +C=C1 +C(=O)N(C) +CN(C +O)cc1) +[C@](C)( +O)c1 +cc2c( +F)c(F) +[C@@]12 +cccc( +c2ccc(C) +c2c[nH] +s3) +C(=O)N) +N(CC +nc12 +NCC +c2ncc +CCN(C)C) +n(C)c(=O) +c3ccccc32) +c1=O) +c3ncn +OC1 +c3ccc(C)cc3) +P(O) +ccc(OC) +/C(=N/ +c3cc(C) +c(C#N) +c3ccc(O) +oc(- +C(F)(F)F)cc1 +C2=O)cc1 +c1ccc(F)cc1) +(CC +C(=O)NCC +c2ccco2) +Oc2ccc( +C(=N)N +=O)cc2) +c3cc4c( +C(C)( +c2C) +CCCCCCCCCCCC +Br)cc2) +=N/N +C(c1ccccc1) +Cc2c( +O[C@H]1 +[C@]3(C) +4)cc3) +C1CC +C=O) +[N+](C) +CC[C@@H]1 +c32) +c1(N +C(N1 +c4ccc(F)cc +c2)n1 +C(=O)( +C2= +O[C@@H]1 +c1ccc(NC(=O) +Cc2cc( +c(Br) +nc3) +c3ccc4c(c3) +c2nc3c( +5)cc +CC[C@H]1 +-c1n +c2ncc( +c(Cl)c1 +c2ccccn2) +c1c(F) +C1CCCCC1 +=O)ccc1 +NC(C)=O) +c5ccc( +n1cn +[n+]2 +nc1) +O3) +c2cccs2) +c1cc(- +[n+]( +c3cccn +C(F)(F)F)cc2) +CC1( +[C@]2( +c3cc(Cl)cc +O=[N+]([O-]) +CC(=O)N( +c1c[nH] +ccc4 +c2ccccc2)c1 +cs1) +c2ccc(O +CN(C)C) +[nH]2) +cc(OC) +c3ccco +c3n( +nc4 +C1CC1 +CC(O)=O) +O1) +c1cc(OC) +3C) +nc(N3 +C(N2 +c1ccco +2)ccc1 +/N=C(\ +CCC2( +nc2c( +CC(CC( +CC[C@@H]2 +c3ccccc3)cc2) +OC3 +[C@]( +CCN(C( +CCCC1) +C5 +CC[C@H]3 +c4ccc(Cl)cc +c(C(N +c2ccccc2)CC1 +OCC( +C(=O)/C=C/ +[C@@]( +4CCCC +cs3) +n1cc( +N=C(N) +/C=C2\ +/C1 +c2c(F) +=C1\ +c3c(=O) +c3c4c( +)ccc2 +c1c(N +Cl)c3) +nc(O) +c2nc3ccccc3 +C(C(=O)N +Cn3 +C(F)(F)F)c1 +c3ccncc3) +C(=C) +c1cccnc1 +c1cccc2c1 +[N+](=O)[O-])cc1 +c1c(- +CC(=O)N2 +O)=O) +c2ccc(N +O(C) +CC)=O) +ccc(C) +c3cccc4 +[C@]1(C) +CC=C) +CCCCC3 +n2cn +S(=O)(C) +C(C)=O +O=S(=O)( +c(=O)o +c2nc(C) +N[C@@H]( +CCc1n +CC3)cc +c2)C1 +OC2 +=C) +c2cccc(F) +CCC4 +nn( +[nH]c( +c2c(c1) +n12 +[C@H](NC(=O) +c1c(=O) +c1cc(N +c3ccco3) +C(C( +P(=O)(O)O) +CC(C)( +c2cc3c(cc2) +c3ccccn3) +[C@@]1(C) +C[C@H](NC(=O) +ccc(F) +CCCCC2 +CCOCC4) +CCN1C(=O) +CCC1) +SC) +c(C)c( +C1CCCCC1) +c5) +COc1c( +c2c(OC) +c1cccc(F) +C1CC( +OC(=O)C) +[C@@]2( +c4cccc( +C3CCCCC3) +nnc1 +c1ccc(OC)cc1) +(O)( +cccc4 +c2c(cc( +sc( +no1 +CSc2n +[C@H](CO) +COc1cc2 +c3ncc +c2ccccc12) +C[C@@H](C) +c(F)c1 +C/C=C\ +CCS(=O)(=O) +CCOC +CN(C)C +c1cn( +COC) +=CC(=O) +[C@@H]12 +CC2( +(C)c1 +CC[C@]4 +nc(NC(=O) +n1( +C(=O)Nc2ccc( +c3=O) +CCCNC(=O) +c1(=O) +Cc1c[nH] +c1n( +c1ncc( +c2cc(OC) +cnc1) +CCCCN +CCN(C)CC +c(-c3ccc( +[C@]1( +ccc(N +o1) +N1CC +c(-c2ccc( +CCCCCC +[C@@H]5 +6) +F)cc(F) +c1(OC) +c1cc(O) +C(c1ccc( +NC(=S)N +c3)=O) +[C@@]3(C) +c(OCC +C(=O)Nc1ccc( +OCO4) +OC(C)(C)C) +F)ccc1 +c2cccnc2) +=C3 +O[C@@H]2 +c2)ccc1 +Cc3ccccc3 +c3ccc(Cl)cc3 +c1cs +c-2 +C(=O)O)cc1 +c2ccccc2c1 +c23)cc1 +C2)c1 +OC)cc(OC) +-c2nc( +n2)CC1 +c2ccccc2n1 +[C@@]1( +ncc1) +c3cc(- +c1ccc(OC)cc1 +C3( +4)CC3) +CCCN3 +c1)=O) +Cn1cc( +nn1) +c1ncc +FC(F)(F) +C#N)cc +Nc1ccccc1 +C1(C) +P(=O)( +c3c(Cl) +[C@H]12 +C(N)=N +c(-c3ccccc3) +[nH]3) +c2cccc(- +c1(- +c2ccccc2)=O) +C(NC(=O) +/C=C(\C) +c(=O)n1 +c3cccnc3) +c2cccc(C(F)(F)F) +C(NC( +OC(C)(C) +C(=O)NC( +C(F)(F)F)cc3) +CCC(=O) +COc2ccc( +n(-c2ccccc2) +OCc1ccccc1) +C(N( +CC[C@@H]3 +3)c1 +)cc(OC)c1OC +Cc2n +[N+]( +P( +c3nc( +CCCn1 +C(F)F) +CN2C(=O) +S(=O)(=O)N3 +c(O)c1 +sc2 +[C@H](N) +3c( +c1(Cl) +[C@H](O)[C@H](O) +=O)ccc( +ncc3 +cccn +CCCC3 +/C=C/C(=O) +c3cccs3) +c2ccccc2Cl) +c1nc2c( +CN2CCN( +c(-c2ccccc2) +CC(F)(F) +C(=S)N +c3c(F) +N)ncn +Clc1ccc( +)cc2)cc1 +oc(=O) +c5ccccc5) +O[C@H]2 +c1cccc(Cl)c1 +[N+](=O)[O-])c1 +N2CCC( +C1C2 +CCC[C@H]1 +S2 +[C@H]1CC +CN(C( +c3ccc(Br +C(=N)N) +cn( +Br)cc1) +Cl)c1) +N1CCOCC1 +Cn1cn +C(=O)C) +C(N[C@H]( +c1cc(Cl)ccc1 +(O)=O) +c1ccc(OCC +nc21 +COc1ccc2c(c1) +c2ccc3ccccc3 +CN(C)C(=O) +C4CC4) +CC1)=O +c3ccc(O +c3C) +c1(F) +O=S( +C(CCCC +CC2)ccc1 +[C@@H](NC( +c3ccccc3)CC2) +c2ccc(N3 +O=C(Nc1ccc( +cc3c( +C)=O +CC1=C( +c2c1) +n3c( +(=O)o +=O)C +c2ccc(O)cc2) +cc(N +O=C(/C=C/ +C(=O)NO) +c2cc(O) +c(NC( +CCc1ccccc1) +C5) +C[C@H](N) +[S+]([O-]) +c2=O)cc1 +=O)CC1 +OC)cc2) +C=C(C) +c2cs +/C=C(/ +c3nc(- +c2cc(F)cc +c3)cc2) +C2CCCC2) +o3) +C(O)( +Cc1ccc(NC(=O) +C23 +S(=O)(=O)c1ccc( +C1=C(C) +C(N[C@@H]( +C(F)( +o2)cc1 +c2ccc3c(c2)OCO3) +n2C) +c2cccc(Cl)c2) +n1C +c2ncccc2 +[C@@H]1CC +[nH]1) +n2)ccc1 +c3cccnc3 +=S +Br)cc( +c2ccccc2F) +c2ccc(F)cc2 +c2ccc(Br)cc2) +-c1c( +Cc2ccc(- +c2ccc(OCC +/C=N/N +C(C)C)=O) +C(F)(F)F)cc1) +P(=O)(O)O +c1ccncc1 +c(O)c( +#N) +CCNC( +3)cc( +nn3 +c- +CC2)C1 +c3cc(OC) +Cn1c(=O) +CC2)CC1 +C[C@H](C) +C1CC1) +OC[C@H]1 +=[N+] +(C)(O) +[C@H]5 +1C +C#N)cc1 +c1(O) +c1c2c(ccc1) +[n+]([O-]) +[N+](=O)[O-])cc2) +nn3) +/c( +CN4 +c4ccc(F)cc4) +Fc1ccc( +s2)cc1 +Cc1nc( +c(N2 +C[C@]12 +OCC2 +c1ccc2[nH] +O)cc2) +[C@H]2O) +c2ccc(NC(=O) +c1)N +c4[nH] +=O)cc( +c1nc(N) +CCOC) +C=C2 +3)cc2)cc1 +N(C)C(=O) +c3c(O) +c1ccccn1 +(=O)N +nc2n1 +c1cc(-c2ccc( +c1cc2cccc +C(C +Br)cc2 +c1c(-c2ccc( +C(\ +O=C1N +c2cc3c(cc2 +C(Cl) +c1nc(C) +C2CCCC +CC5) +c3ccc(O)cc3) +c3c(- +c1N +c1ccc(-c2ccc( +c3c(cc( +CCc1ccccc1 +/C=N/NC(=O) +n(-c2ccc( +c3ccc(F)cc3 +ccc(Cl) +COc1cc2c(cc1 +C(F)(F)F)cc(C(F)(F)F) +C1O +c2cccc(C) +c1cc(NC(=O) +Cc3ccc( +C(C#N) +C(=O)OC(C)(C)C) +C(=O)c1ccccc1 +CC(C)(C)O +c3cccc(F) +CCCCCCCCCCCCCCCC +c2F) +CC=C(C)C) +Oc1c( +C(OC) +CCc2ccccc2) +c2)cc( +=O)cc2 +c4ccccc43) +Br)c1 +(F)F +C(N)=O +n1c2c( +=C(O) +nnc2 +c4ccccc34) +/C=C/C +Cc2ccc(F)cc2) +c1ccccn1) +O=c1[nH] +n(-c3ccccc3) +C(C(N +[C@H]3CC +O[C@H](CO)[C@@H](O) +CCCCC) +-c2cc +c1ccc(C)cc1) +ncn2 +c(=O)n2 +c2cccc(OC) +nc1- +c2cc3cccc +C4CCCC +=O)cc1) +c(C(F)(F)F) +OC(C)C) +c3c2) +oc1 +c2ccccc21) +c3)cc +c2c3ccccc3 +C(F)(F)F)cc2 +c2n(C) +2)CC1 +sc2c1 +CCC3( +c1ncn2 +)cc1)=O +c4c(cccc4) +/C(C)=C/ +COc1ccc(NC(=O) +[C@H]1( +[C@@]5 +Cl)cc(Cl) +=N1 +c3)cc2 +O=S(=O)(N +C1CCCC1 +c1ccccc1- +n2cc( +c1(-c2ccc( +SCC(=O)N +[N+](=O)[O-])cc +N#Cc1ccc( +c12c(cccc1) +OCc2ccccc2) +C(N3 +C1C +c2Cl) +c1ccc(-c2n +CCCN(C(=O) +c1ccc(-n2 +CN2CCC( +c1c(NC(=O) +CNC +N(C)C +c3nc4ccccc4 +c(=O)[nH]c1=O +[C@H](O)[C@@H](O) +=O)C1 +c3c[nH] +C(=O)C1 +c3cccc(Cl)c3) +-c2o +N[C@H]( +OCc1ccccc1 +OCC1 +n1c(- +c3cc(F)cc +-c2cs +c1ccc(O)cc1 +O[C@H](CO)[C@@H](O)[C@H](O) +C#N)cc2) +)cc(C) +c4ccc(Cl)cc4) +[C@@]23 +ccc(O) +Cc1ccc2c(c1) +Cc1ccco +CC(NC(=O) +Cc2ccc(Cl)cc2) +c2O) +c3ncc( +C(=O)C2 +c1cccs1 +CCCN(C)C) +nc3c( +[C@]23 +C1CO +nc1N +C(=O)C +CCN(C)CC3) +c2ncnc3 +CN1CCC( +c2cc(OC)c(OC)c(OC) +c(C(=O)O) +c2nc(N) +[n+](C) +C3=N +c2cc(NC(=O) +cc21 +c(C)cc1 +CCN1CCN( +C2)C1 +c2ccc1 +S(=O)(C)=O) +C=C) +C(=O)N1CCN( +n2cc +[C@@H]1( +c3ccccc3)cc2 +CNS(=O)(=O) +c4ccc(OC) +C(CO) +(C)(C) +C(/C=C/ +cc2)c1 +c2N +CCCN1C(=O) +c1)C +=C1/ +OCC(O) +)cc12 +c1ccc(N2 +C)c( +c2c(N +cc2c1 +Nc1nc( +[C@]3( +[C@@H]3CC +n3cn +N=C(N)N +on1 +C(=O)N(C)C) +ccn1 +[C@@H](OC(C)=O) +S(=O)(=O)O) +c3nc(N +cn3 +c2cc(OC)c(OC) +N(CC(=O)N +C3CCCC3) +[nH]c(- +ccc4) +CC(C)= +Oc1ccccc1 +Cc1ccccc1)NC(=O) +c2c(c1 +[C@@H]2CC +c3c(OC) +C2=O)c1 +C1CCC( +C(S +c4ccc(C) +OC)cc1 +Cc2cn +nn(C) +CC(N) +c3ccc(N4 +C(C)C)cc1 +c1ncn +CCCO) +c1ccccc1)c1ccccc1 +Cl)c(Cl)c1 +n1cc +3)CC1 +C(C(=O)O) +ncn1 +CN1CC +c1ccco1) +C12CC3 +[C@@H](N +N2CCCCC2) +C3=C( +c3cc(O) +[C@H](C)C +c12ccccc1 +c3cc(OC)c(OC) +2)cc( +C(=O)N2CCN( +[C@H](O +c2cccc3cccc +N1CCCC1 +c(SCC(=O)N +CCOCC2)cc1 +c12cc( +[C@@H](O +C2)CC1 +=O)[nH] +c3ccc(Br)cc3) +c(=O)n(C) +C(CO +C(=N) +c3ccccc3Cl) +c(C(=O) +ccc2c1 +c4cccn +Nc1cc( +Cc1o +OCO2) +c1ccco1 +Cc1c(C) +c3ccccc3)=O) +CC3)cc2 +=[N-] +c1cccnc1) +C(C)(C)C)=O) +c(CC) +c1cccc(F)c1 +CC5 +4CCOCC4) +CCN(C)CC2) +3)ccc1 +c3nc(C) +[C@@]3( +c2ccccc2)cc1) +[C@@H](CC +CCC2(CC1) +C(F)(F)F)c3) +c1cccs1) +[C@H](OC(C)=O) +c2cc(N +P(O)(=O) +=C(N) +/N=N/ +nnc( +CO2 +C(=O)N(C +c1ccc(C)cc1 +CC#N) +Oc3ccc( +ccccc12 +S(=O)(=O)C) +c1(NC( +CC[C@]4(C) +CCCCN) +[nH]c1 +CCCCNC(=O) +(c2ccccc2) +nc1C +c1cccc(C)c1 +c1c(N) +ccc12 +(C)C)=O) +C(CC +c4ccccc4)cc3) +n2C +CCNCC3) +c2c(N) +CCCCO +CCCCC +c3ncccc3 +c1cc(C(=O)N +Cl)cc3) +sc3 +c2cc1OC +c(C)c(C) +/C=C2/ +c1ccncc1) +c2nc(N3 +c3ccc(OCC +c2c(Cl)cccc2 +2)cc1) +OC(C) +O=c1 +4)=O) +[nH]c2 +C(=O)N1CCC( +[C@]5 +[C@@H](CO +C3= +c3ccccc32)cc1 +c4ccccc4)CC3) +CC(N)=O) +c2cc(Cl)c( +c2cccc(N +[C@H]3O) +c2nc1 +)cc2c( +N=C1 +CC2)n1 +c5cc +cc2)=O) +c1cccc(NC(=O) +OCCO2 +c1cc(Cl)c( +[C@H]1CC[C@H]( +n2)C1 +C/C=C/ +c(Cl)cc1 +ccc(Cl)c1 +C(C)=C( +C/C(=N\ +C1)C2 +c1ccc(Br)cc1 +CCCS +NC(N) +CN(CC) +3)ccc2 +C(NC +)ccc3 +c(OC)cc1 +c(C +nc(-c3ccccc3) +OO +c4ccc(O) +C4=O) +S)=N +cc1)=O +=[N+]=[N-] +C/C(=C\ +/N=C(/ +N2CC +Oc2c( +CCCN) +2)C1 +Cn1cc +c3ccccc3F) +c1ccc(C(=O)N +N=C2 +nc(SCC(=O)N +CCCCN1 +C1CCCC1) +c1c(Cl)cccc1 +=C(C)C) +n3C) +c1cc(OC)ccc1 +C(=O)(N +F)c(Cl) +c3ccccc23)cc1 +c3cccc(C) +c1ccc(OC +n1- +c1c(OC)ccc( +Cc1ccc(O)cc1) +c4ccncc +ccccc4 +c(=S) +N(CC)CC) +nn(- +[C@@H](C +c1nc2ccccc2 +c21) +c1cccc(N +4)cc3 +c(=O)[nH]c(=O) +c2ccc(Cl)c(Cl) +c1c(NC( +O[C@H](CO +n3cc +Cc1ccc(Cl)cc1 +=O)=O +c1ncccc1 +(Cl) +/C2 +oc(C) +c1cc(C)ccc1 +c1(NC(=O) +CC2CC2) +n3)cc2) +CCO1 +c2cccc(O) +Cc1nc(- +c2c(F)cccc2 +c1cc(-c2n +N2CCCC +C(OCC +/C=C3 +CC12 +C(O)=O +C[C@H]1CN( +=N)N +[C@H](CC +C)ccc1 +c2c1cccc2 +c1cncc( +c1c(Cl)cc( +C3CCN( +c2cccc(F)c2) +c3ccc4c(c3)OCO4) +NC1 +CCOc1ccccc1 +N1CCN(C(=O) +F)cc3) +Cc1cs +c-3 +n2cnc3c( +N2CCCC2) +[C@@]4(C) +-c2c( +c3cc(OC)c(OC)c(OC) +c1c(Cl)ccc( +n(CC) +c1(C(N +c5cc( +C(c1cc( +[C@@H](OC(=O) +[C@@H](O)[C@@H](O) +(=O)=O)cc1 +NC(=N) +c1c(C(=O)N +[C@@H](C)C +cnc(N +CNCC +N2CCN(C(=O) +NC1=O +[C@@]1(O) +c2)ccc( +ccc21 +SC +NC(=O)C( +CC3)=O) +c2cc(C(F)(F)F)cc +)ccn1 +[N+]1 +NC(=O)c1ccc( +CC[C@H](C) +c4cc5 +c3cc4c(cc3 +N1CCCCC1 +CO1 +c1cc(F)ccc1 +CC(=O)Nc1ccc( +CC3)cc2) +NCCCC +COc1ccc2c( +-c2[nH] +c2cc(Cl)ccc2 +c2ccc(C(F)(F)F)cc2) +COc1cc2c(cc1OC) +COc1ccc(-c2n +oc2c1 +=C(N)N) +cc2)ccc1 +C3)C2 +C1=C +S(=O)(=O)N( +COC +CC[C@]3(C) +COc1ccc(C2 +C(c3ccccc3) +c3ccc(N +CC1CC1) +)cc(- +n1c(N +CC(C)(C)C) +n3)cc +c2c(Cl)cc( +c1c(F)cccc1 +N2C +[C@H](C)CO) +-c2nc(- +[N+](=O)[O-])cc1) +/C=C/C(=O)N +Cc1cc2 +CC(C)C +c1(-c2n +CNC(=O)N +C(=O)O1 +Cc1cccc2 +nn2)cc1 +c2cccc(C(F)(F)F)c2) +c3ccc4ccccc4 +FC(F)( +n2c(=O) +S(=O)(=O)c2ccc( +Cc1cn( +ccc(F)c1 +O[C@@H]3 +c3cccc(F)c3) +CCN) +c2occc2) +CCC4) +C#N)cc3) +Cc1ccc(C(=O)N +OC[C@H]1O[C@@H]( +c(C( +nc4) +C[n+]1 +F)cc2)cc1 +[C@H](O)[C@@H]1O +ccc(NC(=O) +CCc1cc( +c3ccc(C(F)(F)F)cc3) +-c2s +[C@](O)( +c1n(C) +CS) +c(F)c2) +C(O)C(O) +CC1CCCCC1) +n1c(=O) +C)C +N1CCOCC1) +[C@@](O)( +CC1=O +c1ccc2c(c1)OCO2 +CCNCC2) +Cn2cn +[C@@]2(O) +c3cs +Cn1c( +c4c(C) +c2cccc3) +c2cc(=O) +c2C1 +c(=O)c(C(=O)O) +Cc2ccccc2)CC1 +/C1=C\ +nc2ccccc12 +CCCCCC) +c1c2c(cc( +c2c(n1) +c1(C( +Cc1ccc(F)cc1) +n2)cc( +cccc12 +[N+](=O)[O-])cc2 +c1c(C(N +c2[nH]1 +C(=O)c2ccccc2 +COC(=O)N +CCN(CC +Cc1ccc(F)cc1 +nnc(- +C1CCCN( +C[C@@H](N) +CCNCC1 +[C@@H]1C +C2)=O) +[C@]1(O) +/C(=N/O) +cs2)cc1 +c3cccc(OC) +C(=C/ +C(=O)CC +[C@H](OC(=O) +ccc(NC( +c2nnc( +c13) +cccc2)c1 +C(F)(F)F)cc( +OCCO3) +Cc1s +c1nnc( +c1cc(Cl)cc +c43) +[C@@]21C +ncn3) +c2ccc([N+](=O)[O-])cc2) +CCN(C)CC1 +c2ccc(C(=O)N +CC1)=O) +c1cc(NC( +c1cc(Cl) +CC2)cc( +CCl) +[nH]c3 +c3ccccc3)cc +cc(NC(=O) +c2cc(C(=O)N +c1ccc(O)cc1) +[N+](C)(C)C) +C1CC2 +C(CN +cn2)cc1 +cc2C) +CC3( +Cc1ccc(N +C1)=O +c1cc(Cl)cc( +c(=O)[nH]1 +c(OC +o2)c1 +c1c(-c2n +COc1ccc(N +/C=C1\ +c2cccc3ccccc23) +Nc1cccc( +c1c(O +Br)ccc1 +N= +OC)c(OC)c1 +CC(F)(F)F) +CC2CCCCC2) +cc(C(=O)N +c3c(F)cccc3 +CC(C(=O)N +/N= +NC(=O)c1ccccc1 +c2c[nH]c3ccccc23) +c(NC +s2)c1 +[C@H]1CC[C@H]2 +c2)cc1) +Nc1cc +C23CC4 +c12cccc +[N+](C)(C) +[C@]2(O) +c3cc(Cl)ccc3 +C(C)(C)C)cc1 +ccn2) +c1nnc(- +c2ccs +[C@H]1C +N3CCOCC3) +OCCN +F)cc3 +c2ccc(OC +Cc1ccc(-c2ccccc2 +C1CCN(C(=O) +cc(C)c1 +c2ccccc2)C1 +c3ccc([N+](=O)[O-] +C(C)C)c1 +Cc1c[nH]c2ccccc12) +c1ccc(Br)cc1) +c4ccc(- +C3)cc1 +(F)F) +CC3CC3) +nnn2 +Cc1ccc(S(=O)(=O)N +Cc1ccc(- +N)c1 +c1(N) +nnn1 +ccc(O +cc3)cc +Cc1ccc(Cl)cc1) +COc1ccc(-n2 +c1ccc(Cl) +c2cc(F) +[nH]c2c1 +CCOC( +n2c1 +c1ccc(C) +c1ccc([N+](=O)[O-])cc1 +C1=C(O) +C(F)(F)F)c1) +CCc3ccccc3) +Cc1ccc(-n2 +[C@]4(C) +c3cc4c(cc3) +[C@@H]2[C@@H]( +c3nc4c( +[C@H](C +c2ccc3[nH] +n5 +NC(=O)CS +nc2N +c(C)c2) +S1(=O) +CCCCCCC +C(=N +C1(C)C +c1c2cccc +OC)c(OC)c3) +CC(CC(C3) +c([N+](=O)[O-]) +[C@H](N +[C@H](C(=O)O) +c2cccc([N+](=O)[O-]) +ccc1O +c(C)n1 +CCC(O)=O) +CCC1( +c1cc(O +COc1ccc(C(=O)N +[C@@H]2O +c4ccc5c( +CCOC1 +B(O) +C2C( +N#Cc1c( +CCOc1cc( +c1ccccc1)=O +CC(=O)N[C@@H]( +5)cc4) +c3n(C) +NC(=O)CO +c3cccc(Cl) +S(=O)(N)=O) +C(=O)O)c1 +c2sccc2) +c1c(C( +c2ccccc2)n1 +N3CCN( +c3ccc(C(=O)N +c1co +C(NCC +#N +OCCN( +=O)CC2) +n2c3c( +c1(Cl)ccc( +N(CC)CC +c3F) +COc1ccc(- +CCCC(=O)N +F)cc2)c1 +c4ccco +c2ccc(OC)c(OC) +c1(C(=O)N +C(C)(C)O) +Cc1cc(N +c2=O +N1C +=C(N +c1ccccc12 +nc2cc1 +c1sc( +S(C)(=O)=O +nc(Cl) +cn4) +OCCCO +ncn2) +CC(O +c4s +c(O)cc1 +O=C(CSc1n +CC[C@H](O) +C(=O)O)cc2) +c4cc(Cl)cc +[nH]2)cc1 +c1ccccc1Cl) +Cl)c(Cl)c3) +c2=O)c1 +c6cccc +[C@H](CC(C)C) +=O)C) +C(=O)N(CC) +CCCC4) +c2nc(-c3ccccc3) +C2C3 +c2cc(Br)cc +(-c3ccccc3) +c12ccc( +CC[C@@H](C) +nc2)cc1 +CC1= +(F)(F)F) +C(F)(F)F)ccc1 +c12c(cc( +C(=O)c1c( +CCC[C@H]2 +C(=O)N[C@@H](Cc1ccccc1) +CCC=C(C)C) +=[N+]=[N-]) +=C4 +n1c(C) +c1cc(-n2 +Cn2cc +n2)ccc( +c2sc( +OCO4 +C[C@H]4 +c2ccc(Cl)cc2Cl) +OCc3ccccc3) +c2cc(Cl) +Cl)c(Cl)c1) +[C@@]1(C)CC +/N=C2 +c(-c2n +[C@H](C(C)C) +c1c2c(ccc1 +c2cn( +n1(C) +Cl)cc3 +c2ccccc2OC) +c2c(-c3ccccc3) +/C(C#N) +no2) +S(=O)(=O)c1ccccc1 +nc2C) +c2sc3c( +=C/C +c3cc(OC)c(OC)c(OC)c3) +cc1- +s2 +c4ccc(OC)cc4) +CC(O)( +Cc1[nH] +c1(O +C(=O)N1CC +n(CC(=O)N +nccc1 +CS(=O)(=O)N +)cc4) +c4ncc +[C@](C)(O) +C)C) +C2)n1 +Cc2ccc(OC)cc2) +c2cccnc2 +CCN(C)C +c1(-n2 +Nc1c( +CCN2C(=O) +C[C@@H]4 +[nH]2 +[C@H](NC( +c1ccc(NC( +OCC(N +COc1ccc(Cl)cc1 +CC1(C)C +c4c3 +CN2CC +c4ccccn4) +NC(=O)[C@@H]1 +Cc1ccco1) +C(=O)N2CC +c2c(ccc( +cc5) +cc12 +c1cc(=O) +NC(=O)C +CCn1cc( +c1cccc(Cl)c1) +C2)ccc1 +C2CCN(C +c1cc(OC)c(OC)c(OC) +4)cc2) +c1c(-n2 +c3cccc(C(F)(F)F)c3) +O=C1c2ccccc2 +C[C@H](N +(N) +CCCC(C) +c2ccc(Cl) +[N+](=O)[O-])cc( +CCOCC2)c1 +CCC(C)(C) +n2)cc1) +Cn2c(=O) +c2ncn( +CC(C)N +c(=O)c1 +[C@]34 +Cc3c( +)ccc1) +C1(O) +CCC)=O) +c2ccccc2)ccc1 +C(=O)NC(=O) +C(OCC) +C/C( +c1cnc( +[C@@H]1O) +CC3)cc1 +c1nc(-c2ccccc2) +c1cn2 +/N=C1\ +CCC[C@@H]( +Nc1ncn +C=CC( +c1c(F)ccc( +c1ccc(C2 +[C@@H](OC) +Cc1no +c3)CC2) +OC[C@@H]1 +Cl)cc2Cl) +C(N)=N) +cc3)cc2) +[C@H]2[C@H]( +cccc5) +-c2ccc(- +OCCOCC +C3=O +N2CCN(C +COc1cccc2 +S)N +(CO) +c1ccccc1Cl +c1ccc2ccccc2c1 +ncc(- +ccc(OC)cc1 +nc2- +CC(C(=O)O) +c2ccc(OC)cc2 +c3cco +C(NC)=O) +CCCCCCN +CC(C)(O) +c2cc3ccccc3 +=O)o +-c2c3c( +(C)C)cc1 +OCO3 +Cc1ccc(O +CC(c2ccccc2) +CC4)cc3) +ncc2) +CCCCCCCC) +c1cc2cc( +(=O)=O)c1 +n2ccc( +cc3)=O) +c1c(Br) +cc2c(c1) +c1ccccc1)=O) +N(CCCl) +N(c2ccccc2) +Cc1ccc(-c2n +5CCCC +Oc1cc( +3)cc2)c1 +Nc1ncc( +c2C)cc1 +N(Cc2ccccc2) +CC)cc1 +c2c(C#N) +CC1CCN( +OC)c(OC)cc1 +c1cccc(O +on2) +ncnc(N +c1cnccc1 +C1N(C( +[C@@](C)(O) +NC(=O)[C@@H]( +c3c4ccccc4 +c1c(Cl)c( +/C=C/C( +c2ccncc2 +[C@H](OC) +nc2ccccc21 +5CCOCC +c4cc(C) +Br)c2) +nc3ccccc3 +cnc2c1ncn2 +CCCCN( +CC)c1 +cnn2 +=O)c( +c1(C2 +OC(C)(C)C +n(Cc3ccccc3) +c(C)c3) +nc(C(=O)N +CC2)cc1) +c2cc(N3 +c2c(Cl)cccc2) +c2ccc(C#N)cc2) +O4) +c1cccc2) +CCC(=O)O +c4ccc(C)cc4) +cn2)CC1 +OCC3 +(-c2ccccc2) +N1CCCC +c1cnc(N +c4)CC3) +c[n+]( +Cc1ccc(S(=O)(=O) +c4cccc(Cl) +c3ccs +CC1=N +c[nH]1 +c(=O)n(C)c(=O) +cc(-c3ccccc3) +OCC(=O)O) +c1(OC)ccc( +CCC(C)C) +c3cc(Cl)c( +c1-c1ccccc1 +C#N)c1 +c2ccc3ccccc3c2) +CC(CC(C4) +C)cc( +c1cccc(F)c1) +c3ccc(C#N)cc3) +CN(C)c1ccc( +(c1ccccc1) +c1cc(C(N +c1c(OC)cc( +CC1CCCO +CCC[C@@H]1 +nn12 +Cn2cc( +(=O)O +[C@]12C +[C@H]2O +c1cc(C( +c(OC)c2) +CCC(N3 +c3ccc(OC)cc3 +C2)C3) +NC(=O)C2 +c2cnccc2) +C/C=C\C/C=C\ +c4o +NC(=N)N +[C@H](CC) +c2cc(O +C(=O)CS +CCCCC2)cc1 +O=C(COC(=O) +c1ccc(O) +c2c(C)cc(C) +C(=O)N2CCC( +ncnc3 +c3ccccc3OC) +[C@@H]4[C@@]5 +[C@H]2CC[C@H]( +/N=C\ +[C@@H]2O) +c3cccc(- +N(C(C)=O) +c3ccccc3- +c3cc(OC)c(OC)cc3 +c2ccccc2c1=O +SC(=S) +c1ccccc1)N +CCCN(C) +CCSC) +nc(C)c1 +NC(=O)C) +C(=O)NC(=O)N +c1c(C)ccc( +c2ncccc2) +c(N4 +C(=O)N1CCC[C@H]1 +CC(=O)N3 +CN3CCN( +c2ccc(O)cc2 +CCC(CC) +n2)n1 +[C@@H](C)O) +NC(=O)/C=C/ +=O)n( +[n+]3 +C#N)cc2 +FC( +c2cc(Cl)cc(Cl) +NC1=N +c3c(N +c2c1=O +CCC#N) +c1O) +Cc4ccccc4) +c2cccc3c2 +N(Cc1ccccc1) +c6ccccc6 +ccc(OC)c1 +OC1=O +nc(C(F)(F)F) +C(=O)NC) +c2co +c(F)cc1 +c3occc3) +O2)cc1 +c1cccc2ccccc12 +N=C(N +c1ccccc1O +N1CCN(C +ns +nc23) +c4c3) +COC(C)=O) +CC3CC( +c3ccc(Cl) +CCCN(CCC) +c1cc(F)c( +[C@]3(C)CC +c(C)cc( +c1cccc(C(F)(F)F)c1 +c1ccc(CN +C(=O)/C(=C/ +c3ccc(O)cc3 +c1cccc(- +ccn3) +-c1cc +c1c(-c2ccccc2) +SC1 +CO[C@@H]1 +c1c(C)cccc1 +O=S1(=O) +c1cccc(-c2n +c(C)cc(C) +cc5 +c(CN +c3c(Cl)cccc3 +COc1ccc(OC)c( +C(=O)N[C@@H](C) +c3cc(C(F)(F)F)cc +n1)=O +=C(C#N) +=O)cccc1 +CO[C@H]1 +[C@H](Cc2ccccc2) +c(=O)n3 +CCCN(C +CC(C)(C)C +c2c(OC)cccc2) +CN(C)S(=O)(=O) +sc(- +c1cccc(C) +Cc2ccc(O)cc2) +c1ccc(O)c(O) +[N+](=O)[O-])ccc1 +c(=O)[nH]c2=O) +[C@@]3(C)CC +nc32) +COCCO +c2ccc3n +[C@H](Cc1ccccc1) +CC[C@@H](O) +c3ccc(Cl)c(Cl)c3) +c2cc(C)ccc2 +c2ccccc2C) +[C@@]21 +-c2nc(N +[C@@]34 +/C=N\ +C=C\ +nc(OC) +CCC(N) +c2ccc(Cl)cc2)cc1 +c1(S +[nH]n1 +ccccc12) +c1c-2 +CN(CC +COc1cc2c( +c2cc(OC)c(OC)c(OC)c2) +CCCN2C(=O) +C2CC +C(NO)=O) +C(=O)N[C@H]1 +CCCN1CCN( +c2cccc(O +C2(C) +CCCO2) +cc3C) +[C@@]2(C)CC +)cc(O) +N(O) +C(C(=O) +c(/C=C/ +c2ccc(Cl)c(Cl)c2) +CC[C@H]4 +N(CCC) +C(=O)NCCCC +c3ncccc3) +n3)cc2 +4)cc2 +COc1ccc(C(=O) +cn2)c1 +c45) +CCCNC(=N)N) +CCC[C@H]( +c3ccc([N+](=O)[O-])cc3) +nc(N2 +c2nnc(- +CCn2 +c1(N2 +c4)cc3) +C1=CC(=O) +SS +C21 +O=C(N/N=C/ +c(-n2 +nc3c2 +C(N1CCN( +n(Cc2ccccc2) +C(O)=C( +N2CCN(C( +c2ccccc2c1) +CC(O)CO +c2cc(F)c( +CC(=O)OC +C(=O)Nc1ccccc1 +c3cc(OC)ccc3 +Br)c3) +4CCCCC4) +CCCC(N +C(C(C)C) +CCC(=O)N1 +C(=O)N/N=C/ +c1ccc(Cl)cc1Cl +c3ccccc3C) +c3c2cccc3) +c1ccc(Cl)c( +4)c3) +o2 +c3cccc(O) +C1CCN(C +c(=O)cc( +[C@@]4(C)CC +Cc1cc(O) +[C@H](C)N +Cc1c(Cl) +c2ccccc2)CC1) +c1(F)ccc( +CN(S(=O)(=O) +cc1)=O) +c1c(cccc1) +c1c(OCC +c3nc(N) +c2c(F)cc( +c(OC)c3) +c2sccc2 +CC2=O +nccc2 +CCC3(CC2) +Cn2c( +CCCC(=O)O) +/C(=N\O) +cccc2)cc1 +c3c(ccc( +c2c(C)cccc2 +P(O)(O)=O) +CN=C( +C(OC +N2) +ccc5 +cc(=O) +COc1cc(N +c1sccc1 +O=C1N( +/C=C3\ +C(C)(C)O +CC)cc2) +c1c(F)cc( +C)C(=O) +C3)cc2) +C2(C)C) +c2c(=O)n1 +CCNS(=O)(=O) +c3nnn +C(=S)S +C[C@@]12 +c2ccc(-c3ccccc3)cc2) +c2ccc(N(C)C) +C1N(C(=O) +Cc1cc(C)cc( +c1oc( +[C@@H](C)CO) +c4)cc +c(C(N)=O) +NO +/C=C/C=C/ +-n1 +c1ccc(C(F)(F)F)cc1 +c(N(C)C) +C1(N +C(C)(O) +N3C(=O) +Cc1cccc(C)c1 +c3cc4ccccc4 +C34 +(C(=O)O) +[C@H]1[C@H]( +[C@H]1O) +N(S(=O)(=O) +c2cccc(OC)c2) +O=C1NC(=O) +CC(C)O +C2=C(O) +[C@H]2CC +C(C)(C)C)cc2) +Cc1c2c( +CCC[C@@H]2 +C2CCN(C(=O) +c(OC)c1) +c4cccs +COc1cc(OC) +NC(=O)c1cc( +c2cccc(NC(=O) +-c2ccc(F)cc2) +S(=O)(=O)N1 +c2cc(-c3ccccc3) +c(CO +c(C)c2 +P(O)(O) +CCN(CC)C(=O) +C(C(F)(F)F) +nc2n( +c2ccc(C)cc2 +n(C(C)C) +CC[C@]43C) +C(=O)N(C)C +c4ccco4) +cn(C) +c2ncc(- +c1nccc( +c3cnccc3) +c3ccc(Cl)cc3Cl) +Nc1nc(N +no1) +S1(=O)=O +P(O)(=O)O) +nn2)c1 +c2ccc(F)cc2)CC1 +C2(C)C +nc(SC +CCN4 +CCCO3) +c7 +CC[C@]4(C)[C@H]3CC +c2ccc(S(=O)(=O)N +c2ccc(Br)cc2 +[N+](C)( +c4cc5c( +C(=O)N[C@@H](CC(C)C) +c1ccc2c(c1)OCO2) +NC(=O)N1 +n(C)c1 +Cc1ccc(C) +Cc1nc(N +c2cc(C)cc(C) +c1(OC)cc( +c2cco +C(CCC) +COC(=O)c1ccc( +C)C1 +N12 +c3ccccc13) +[C@]3(O) +c3sccc3) +O=c1cc( +NC(=O)[C@H](CC(C)C) +C4CCCCC4) +CCN1CCC( +C2CC( +c4cccc5 +c2n(C +NS(=O)(=O)c1ccc( +CCCN=C(N)N) +n1c(N) +cc4c( +c2c3 +NS(=O)( +OCCO4) +N1(C( +nc(N3CCOCC3) +-c1cc( +c4ccc5c(c4) +N)cc1 +c1ccccc1F) +c4ccncc4) +N=C +c3c(c1) +/N=C(\C) +c4n( +c3cc(C)ccc3 +c2oc(- +[C@@H](C(=O)O) +CCC(O)( +c3Cl) +C12CC3CC(CC(C3) +Cc1ccc2c( +c(=O)c2c1 +c3ccc(C(=O)O) +/C1=C/ +C(Cl)(Cl) +c2cc(OC)ccc2 +OC[C@@H]2 +C(C(O)=O) +cc(C)cc1 +c12n( +3)cc1) +Cn1c2c( +ccc(Cl)cc1 +c3c(n2) +F)c1 +[C@@]3(O) +O=C(CN1 +c1(-c2cc( +C3CCC3) +c1(Cl)cc( +N(C(=O)N +c3cc(Br)cc +CCC12 +ccn1) +Cc1cccnc1) +c3cccc4cccc +[C@H]2C +[C@H]2[C@@H]( +CCC(C(=O)N +c3ccccc3)CC2)cc1 +CCCCCC3) +n(C)c(=O)n(C) +[C@H](O)[C@@H](O)[C@H](O) +N(CCCl)CC +Cc1ccco1 +CCN2CCOCC2) +[C@H](CO +OC(CO) +c1-2 +CC(C)(C)N +=C/C=C/ +C(=O)OCC(=O) +c4cccc(F) +c1ccc(S(=O)(=O)N +4CCCC4) +/C=N/NC( +c1cn(C) +c1c(C)cc( +F)c(- +C(C)(C)[C@@H]5 +c1ccc(S +C1=S +Cc1cccs1) +c3n[nH] +CCN(C)CC1) +[nH]2)c1 +c(NC(=O)N +c(Cl)c2) +=O)cc3) +c3cccc(N +c(SC) +c32)cc1 +c(F)c3) +c2- +c3ncccn3) +c3cc(N +-c2ccc(Cl)cc2) +c4c(Cl) +c(OCC) +c3ccc(S(=O)(=O)N +nc(-c2ccccc2) +CCOCC3)cc2) +c(=O)n(C +C(C)C)cc2) +c2no +c4c5c( +COc1ccccc1N1CCN( +n5) +CCCOc1ccc( +co1 +c1cccc(O)c1 +(N3 +c1cc(OCC +N1) +=O)( +=N2 +nc(NC( +c4ccc(N +c1cccc(-n2 +OC4 +c3c(N) +O[C@H](C) +nc2)c1 +c4ccc(C(F)(F)F)cc +c2cc(OCC +c2nc(O) +Cc1ccc(OC) +N1(C +Cc1cccnc1 +c4cccnc4) +c3n2) +C(OC(=O) +c5cn +O=C(CS +C1(c2ccccc2) +C(F)(F)F)c( +[C@H]1CO +c4ncn +C(C#N)=C(N) +C1CN(C(=O) +C1c2c( +c1(Br) +n[nH]1 +c3c(c2) +c2)cc +C(=O)C(C) +c4c(F) +c1ccc(C( +/C=C1/ +c(OC)c(OC) +[C@H](C(=O)N +c1c(O)cc( +c1ccc(CN2 +[C@]2(C)CC +Cc1nn(C) +CC(=O)OC) +c1c(C2 +C3CCCCC3 +c1c(C#N) +c1(C)ccc( +C2=O)ccc1 +C)( +[C@@H](O)[C@H]1O +c2ccccc2O) +c2ccnc(N +c2c(F)cccc2) +P(=O)(O +)cc(OC)c1 +c3cc(O)ccc3 +C1N +Br)c1) +[C@H](Cc1ccccc1)NC(=O) +[nH]c12 +3)cc21 +cnc12 +c1cc(F)cc +c(C(C)C) +c4cc(F)cc +N(S( +CCn1c(=O) +C(=O)N2C +C(CS +sc1) +c2cn(C) +c1cc(C2 +c(N)n1 +C[C@H](NC( +C[S+]([O-]) +c2cnn3 +CC[C@H](NC(=O) +C#N)cc1) +c2ccc(-c3n +P(O +CCN(CC)CC) +c3cccc(OC)c3) +CCCCCCC) +OCCO2) +CCc2c( +c2)nc1 +ccc(C)c1 +c3- +c2ccc(Cl)cc2)CC1 +nc3C) +C23CC4CC(CC(C4) +c3ccc(C(F)(F)F)cc +C[C@@H](NC(=O) +c2cc(S(=O)(=O)N +cc(Cl) +n1ccc( +[C@]1(C)CC +ncnc32) +c4ccc(Cl)cc4 +c3ccccc3c(=O) +3)cc2)CC1 +NS( +c(C)cc2) +CC[C@@]4 +ccc1)=O +Cc1ccc(S(=O)(=O)N2 +ccnc1 +CC1CC1 +c1ccc(Cl)c(Cl)c1 +c1c(OC)c(OC) +CC(CO +n(CC +c4n3) +c2ccccc2C1 +nc3n2 +c2)cc1OC +c1(Cl)c( +CO2) +Cc2ccccc2)c1 +C=C3 +C4( +N[C@H]1 +c1ncc(- +[N+]([O-])=O)cc2) +c4nn +oc12 +/C(O) +CN(Cc1ccccc1) +C3)=O) +[N+]([O-])=O)cc1 +CCCCCCCCCC +-c2c[nH] +C(=O)c2c( +c1(CN2 +c3n2 +C1=C(C)N +C#N)cc( +C(=O)N[C@@H]1 +c1C) +O=C(N1 +N(C(N +COc1ccc2[nH] +c2nccc( +O)=O +cncc1 +c3c(F)cccc3F) +CC(C)C)=O) +[nH]cc2 +/C(=C(\ +c(=O)n(- +C[C@@H](CO) +CCN3CCOCC3) +c1cc(OC)c( +N1(C) +c4ccc(F)cc4 +[C@@H]2C +ncc3) +O=C(Nc1cccc( +cccc1)=O +c2ccc(S(=O)(=O)N3 +N#Cc1cccc( +c1c(F)c( +c1(CN +c3ccc(C)cc3 +c([N+](=O)[O-])c1 +c(Cl)cc +c2cccc(C)c2) +c(Br)c1 +CC(=O)N1CCN( +c1c(C(F)(F)F) +[N+]2 +CCN(S(=O)(=O) +C)cc2 +/C(=C(/ +c2c(Cl)cccc2Cl) +[C@@]13 +c(-c4ccccc4) +nc1S +O[C@H]3 +C(=O)O)cc1) +c2c(C)cccc2) +c1c(O)c( +n2)=O) +OCO5) +O=C +C(N(C)C) +c1ccc(-c2ccccc2)cc1 +CC2=O) +C(CNC(=O) +c5[nH] +CC(=O)N(C) +cs2)c1 +c2ncnc(N +c2ccc(Cl)cc2)c1 +Cc1nc2c( +CC(CO) +c(SC +c(C)c1) +[n+](C +C)CC1 +c3cccc(C)c3) +c3cn(C) +c1cnn2 +[nH]c2) +CC[C@]2(C) +c1cccc([N+](=O)[O-])c1 +C(N)=O)c(N +c2ccc(-n3 +[C@@H]4CC +C(=O)NS(=O)(=O) +C(=O)NC2 +c(F)cc2 +SC( +c1ccccc1F +c1cc(Br) +c1ccn +[C@@H]1[C@@H]( +c(OC)cc( +Cc2ccc(C)cc2) +c3c(Cl)cc( +c1ccc2nc( +[nH]c1) +c2cc(Br) +[C@@H]2C1 +/C(C)=N/ +c3ccc(NC(=O) +ncc4 +P(=O)(OCC) +c1n[nH] +CCN1C +ccc(F)cc1 +OC)c(OC)c1) +N(S(=O)( +c1cc(O)c(O) +C1C2( +CC(C)C[C@H](NC(=O) +c1c(O)ccc( +(O)=O +S2(=O)=O +N(C(C)C) +C2CCC2) +ncnc2 +c3ccccc3C2=O) +C4)cc3) +[C@@H](CC(C)C) +OC(C)(C)C)=O) +[N+]([O-])=O +C=CC(=O) +C)O +c3c1 +c1(-c2ccccc2) +c3O) +cnc2) +oc2 +n4cn +Nc1nc(- +CC(NC( +cnn1 +C(F)(F)C(F)(F) +c23)c1 +CCCC2)cc1 +c3cc(C(=O)N +CC(=O)NC( +c1cc(Br)ccc1 +C(=O)N1CCN(C(=O) +C(C)=O)cc1 +c2cc3c(cc2)OCO3) +nc(SC) +[C@@H](CC) +OC[C@H]2 +c1ccc(C(N +C(=O)[C@H]( +C=N +c23)CC1 +[N+]([O-])=O)c1 +3)ccc21 +)ccc1O +[C@@H]1(O) +/N=C2\ +CC(c1ccccc1) +no2)cc1 +nn(C +[C@H]1(O) diff --git a/Pilot1/ST1/config_st_spe_training.json b/Pilot1/ST1/config_st_spe_training.json new file mode 100644 index 00000000..43f18b90 --- /dev/null +++ b/Pilot1/ST1/config_st_spe_training.json @@ -0,0 +1,52 @@ +{ + "general": { + "use_hvd": true, + "batch_size": 64, + "epochs": 400, + "lr": 0.00000991301767144166, + "loss_fn": "mean_squared_error" + }, + + "data_loading": { + "data_path": "/lus/grand/projects/datascience/avasan/Data_Docking/2M-flatten", + "rec": "3CLPro_7BQY_A_1_F", + "pattern": "Orderable_zinc_db_enaHLL.sorted.4col.descriptors.parquet.xform-smiles.csv.reg" + }, + + "tokenization": { + "vocab_size": 3132, + "maxlen": 45, + "tokenizer": { + "category": "smilespair", + "spe_file": "VocabFiles/SPE_ChEMBL.txt", + "vocab_file": "VocabFiles/vocab_spe.txt" + } + }, + + "architecture": { + "embedding": { + "embed_dim": 128 + }, + "transformer_block": { + "num_blocks": 5, + "activation": "selu", + "ff_dim": 128, + "num_heads": 21, + "dr1": 0.12717945391278226, + "dr2": 0.12717945391278226, + "drop_mha": true + }, + "regressor_head": { + "activation": "selu", + "dr": 0.04990303516069576 + } + }, + + "callbacks": { + "checkpt_file": "smile_regress.autosave.model.h5", + "log_csv": "smile_regress.training.log", + "patience_red_lr": 20, + "patience_early_stop": 100 + } + +} diff --git a/Pilot1/ST1/polaris_sub_smiles_regress_transformer_spe.sh b/Pilot1/ST1/polaris_sub_smiles_regress_transformer_spe.sh new file mode 100755 index 00000000..4dc1990b --- /dev/null +++ b/Pilot1/ST1/polaris_sub_smiles_regress_transformer_spe.sh @@ -0,0 +1,27 @@ +#!/bin/bash +#PBS -N st_spe +#PBS -l select=4 +#PBS -l walltime=12:00:00 +#PBS -q preemptable +#PBS -l filesystems=grand +#PBS -A datascience +#PBS -o logs/ +#PBS -e logs/ +#PBS -m abe +#PBS -M avasan@anl.gov + +module load conda/2022-09-08 +conda activate + +cd /grand/datascience/avasan/ST_Benchmarks/Test_Tokenizers/SMILESPair_Encoder_continue + +NP=16 +PPN=4 +OUT=logfile.log +let NDEPTH=64/$NP +let NTHREADS=$NDEPTH + +TF_GPU_ALLOCATOR=cuda_malloc_async +export TF_FORCE_GPU_ALLOW_GROWTH=true + +mpiexec --np 16 -ppn 4 --cpu-bind verbose,list:0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 -env NCCL_COLLNET_ENABLE=1 -env NCCL_NET_GDR_LEVEL=PHB python smiles_regress_transformer_run.py > $OUT diff --git a/Pilot1/ST1/smiles_pair_encoders_functions.py b/Pilot1/ST1/smiles_pair_encoders_functions.py new file mode 100644 index 00000000..15a8c6a2 --- /dev/null +++ b/Pilot1/ST1/smiles_pair_encoders_functions.py @@ -0,0 +1,420 @@ +# Tokenizaion classes for huggingface interface +# reference: https://github.com/huggingface/transformers/blob/master/src/transformers/tokenization_bert.py +# reference https://github.com/rxn4chemistry/rxnmapper + +import collections +import logging +import os +import re +import codecs +import unicodedata +from typing import List, Optional +from transformers import PreTrainedTokenizer +from SmilesPE.tokenizer import SPE_Tokenizer + +def load_vocab(vocab_file): + """Loads a vocabulary file into a dictionary.""" + vocab = collections.OrderedDict() + with open(vocab_file, "r", encoding="utf-8") as reader: + tokens = reader.readlines() + for index, token in enumerate(tokens): + token = token.rstrip("\n") + vocab[token] = index + return vocab + +class Atomwise_Tokenizer(object): + """Run atom-level SMILES tokenization""" + + def __init__(self): + """ Constructs a atom-level Tokenizer. + """ + self.regex_pattern = r"(\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\(|\)|\.|=|#|-|\+|\\|\/|:|~|@|\?|>>?|\*|\$|\%[0-9]{2}|[0-9])" + self.regex = re.compile(self.regex_pattern) + + def tokenize(self, text): + """ Basic Tokenization of a SMILES. + """ + tokens = [token for token in self.regex.findall(text)] + return tokens + +class SMILES_SPE_Tokenizer(PreTrainedTokenizer): + r""" + Constructs a SMILES tokenizer. Based on SMILES Pair Encoding (https://github.com/XinhaoLi74/SmilesPE). + This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the methods. Users + should refer to the superclass for more information regarding methods. + Args: + vocab_file (:obj:`string`): + File containing the vocabulary. + spe_file (:obj:`string`): + File containing the trained SMILES Pair Encoding vocabulary. + unk_token (:obj:`string`, `optional`, defaults to "[UNK]"): + The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this + token instead. + sep_token (:obj:`string`, `optional`, defaults to "[SEP]"): + The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences + for sequence classification or for a text and a question for question answering. + It is also used as the last token of a sequence built with special tokens. + pad_token (:obj:`string`, `optional`, defaults to "[PAD]"): + The token used for padding, for example when batching sequences of different lengths. + cls_token (:obj:`string`, `optional`, defaults to "[CLS]"): + The classifier token which is used when doing sequence classification (classification of the whole + sequence instead of per-token classification). It is the first token of the sequence when built with + special tokens. + mask_token (:obj:`string`, `optional`, defaults to "[MASK]"): + The token used for masking values. This is the token used when training this model with masked language + modeling. This is the token which the model will try to predict. + """ + + def __init__( + self, + vocab_file, + spe_file, + unk_token="[UNK]", + sep_token="[SEP]", + pad_token="[PAD]", + cls_token="[CLS]", + mask_token="[MASK]", + **kwargs + ): + super().__init__( + unk_token=unk_token, + sep_token=sep_token, + pad_token=pad_token, + cls_token=cls_token, + mask_token=mask_token, + **kwargs, + ) + + if not os.path.isfile(vocab_file): + raise ValueError( + "Can't find a vocabulary file at path '{}'.".format(vocab_file) + ) + if not os.path.isfile(spe_file): + raise ValueError( + "Can't find a SPE vocabulary file at path '{}'.".format(spe_file) + ) + self.vocab = load_vocab(vocab_file) + self.spe_vocab = codecs.open(spe_file) + self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) + self.spe_tokenizer = SPE_Tokenizer(self.spe_vocab) + + @property + def vocab_size(self): + return len(self.vocab) + + def get_vocab(self): + return dict(self.vocab, **self.added_tokens_encoder) + + def _tokenize(self, text): + return self.spe_tokenizer.tokenize(text).split(' ') + + def _convert_token_to_id(self, token): + """ Converts a token (str) in an id using the vocab. """ + return self.vocab.get(token, self.vocab.get(self.unk_token)) + + def _convert_id_to_token(self, index): + """Converts an index (integer) in a token (str) using the vocab.""" + return self.ids_to_tokens.get(index, self.unk_token) + + def convert_tokens_to_string(self, tokens): + """ Converts a sequence of tokens (string) in a single string. """ + out_string = " ".join(tokens).replace(" ##", "").strip() + return out_string + + def build_inputs_with_special_tokens( + self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None + ) -> List[int]: + """ + Build model inputs from a sequence or a pair of sequence for sequence classification tasks + by concatenating and adding special tokens. + A BERT sequence has the following format: + - single sequence: ``[CLS] X [SEP]`` + - pair of sequences: ``[CLS] A [SEP] B [SEP]`` + Args: + token_ids_0 (:obj:`List[int]`): + List of IDs to which the special tokens will be added + token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`): + Optional second list of IDs for sequence pairs. + Returns: + :obj:`List[int]`: list of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens. + """ + if token_ids_1 is None: + return [self.cls_token_id] + token_ids_0 + [self.sep_token_id] + cls = [self.cls_token_id] + sep = [self.sep_token_id] + return cls + token_ids_0 + sep + token_ids_1 + sep + + def get_special_tokens_mask( + self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False + ) -> List[int]: + """ + Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding + special tokens using the tokenizer ``prepare_for_model`` method. + Args: + token_ids_0 (:obj:`List[int]`): + List of ids. + token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`): + Optional second list of IDs for sequence pairs. + already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`): + Set to True if the token list is already formatted with special tokens for the model + Returns: + :obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. + """ + + if already_has_special_tokens: + if token_ids_1 is not None: + raise ValueError( + "You should not supply a second sequence if the provided sequence of " + "ids is already formated with special tokens for the model." + ) + return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0)) + + if token_ids_1 is not None: + return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1] + return [1] + ([0] * len(token_ids_0)) + [1] + + def create_token_type_ids_from_sequences( + self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None + ) -> List[int]: + """ + Creates a mask from the two sequences passed to be used in a sequence-pair classification task. + A BERT sequence pair mask has the following format: + :: + 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 + | first sequence | second sequence | + if token_ids_1 is None, only returns the first portion of the mask (0's). + Args: + token_ids_0 (:obj:`List[int]`): + List of ids. + token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`): + Optional second list of IDs for sequence pairs. + Returns: + :obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given + sequence(s). + """ + sep = [self.sep_token_id] + cls = [self.cls_token_id] + if token_ids_1 is None: + return len(cls + token_ids_0 + sep) * [0] + return len(cls + token_ids_0 + sep) * [0] + len(token_ids_1 + sep) * [1] + + def save_vocabulary(self, vocab_path): + """ + Save the sentencepiece vocabulary (copy original file) and special tokens file to a directory. + Args: + vocab_path (:obj:`str`): + The directory in which to save the vocabulary. + Returns: + :obj:`Tuple(str)`: Paths to the files saved. + """ + index = 0 + if os.path.isdir(vocab_path): + vocab_file = os.path.join(vocab_path, VOCAB_FILES_NAMES["vocab_file"]) + else: + vocab_file = vocab_path + with open(vocab_file, "w", encoding="utf-8") as writer: + for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]): + if index != token_index: + logger.warning( + "Saving vocabulary to {}: vocabulary indices are not consecutive." + " Please check that the vocabulary is not corrupted!".format(vocab_file) + ) + index = token_index + writer.write(token + "\n") + index += 1 + return (vocab_file,) + +class Atomwise_Tokenizer(object): + """Run atom-level SMILES tokenization""" + + def __init__(self): + """ Constructs a atom-level Tokenizer. + """ + self.regex_pattern = r"(\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\(|\)|\.|=|#|-|\+|\\|\/|:|~|@|\?|>>?|\*|\$|\%[0-9]{2}|[0-9])" + self.regex = re.compile(self.regex_pattern) + + def tokenize(self, text): + """ Basic Tokenization of a SMILES. + """ + tokens = [token for token in self.regex.findall(text)] + return tokens + +class SMILES_Atomwise_Tokenizer(PreTrainedTokenizer): + r""" + Constructs a SMILES tokenizer. Based on SMILES Pair Encoding (https://github.com/XinhaoLi74/SmilesPE). + This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the methods. Users + should refer to the superclass for more information regarding methods. + Args: + vocab_file (:obj:`string`): + File containing the vocabulary. + unk_token (:obj:`string`, `optional`, defaults to "[UNK]"): + The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this + token instead. + sep_token (:obj:`string`, `optional`, defaults to "[SEP]"): + The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences + for sequence classification or for a text and a question for question answering. + It is also used as the last token of a sequence built with special tokens. + pad_token (:obj:`string`, `optional`, defaults to "[PAD]"): + The token used for padding, for example when batching sequences of different lengths. + cls_token (:obj:`string`, `optional`, defaults to "[CLS]"): + The classifier token which is used when doing sequence classification (classification of the whole + sequence instead of per-token classification). It is the first token of the sequence when built with + special tokens. + mask_token (:obj:`string`, `optional`, defaults to "[MASK]"): + The token used for masking values. This is the token used when training this model with masked language + modeling. This is the token which the model will try to predict. + """ + + def __init__( + self, + vocab_file, + unk_token="[UNK]", + sep_token="[SEP]", + pad_token="[PAD]", + cls_token="[CLS]", + mask_token="[MASK]", + **kwargs + ): + super().__init__( + unk_token=unk_token, + sep_token=sep_token, + pad_token=pad_token, + cls_token=cls_token, + mask_token=mask_token, + **kwargs, + ) + + if not os.path.isfile(vocab_file): + raise ValueError( + "Can't find a vocabulary file at path '{}'.".format(vocab_file) + ) + self.vocab = load_vocab(vocab_file) + self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) + self.tokenizer = Atomwise_Tokenizer() + + @property + def vocab_size(self): + return len(self.vocab) + + def get_vocab(self): + return dict(self.vocab, **self.added_tokens_encoder) + + def _tokenize(self, text): + return self.tokenizer.tokenize(text) + + def _convert_token_to_id(self, token): + """ Converts a token (str) in an id using the vocab. """ + return self.vocab.get(token, self.vocab.get(self.unk_token)) + + def _convert_id_to_token(self, index): + """Converts an index (integer) in a token (str) using the vocab.""" + return self.ids_to_tokens.get(index, self.unk_token) + + def convert_tokens_to_string(self, tokens): + """ Converts a sequence of tokens (string) in a single string. """ + out_string = " ".join(tokens).replace(" ##", "").strip() + return out_string + + def build_inputs_with_special_tokens( + self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None + ) -> List[int]: + """ + Build model inputs from a sequence or a pair of sequence for sequence classification tasks + by concatenating and adding special tokens. + A BERT sequence has the following format: + - single sequence: ``[CLS] X [SEP]`` + - pair of sequences: ``[CLS] A [SEP] B [SEP]`` + Args: + token_ids_0 (:obj:`List[int]`): + List of IDs to which the special tokens will be added + token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`): + Optional second list of IDs for sequence pairs. + Returns: + :obj:`List[int]`: list of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens. + """ + if token_ids_1 is None: + return [self.cls_token_id] + token_ids_0 + [self.sep_token_id] + cls = [self.cls_token_id] + sep = [self.sep_token_id] + return cls + token_ids_0 + sep + token_ids_1 + sep + + def get_special_tokens_mask( + self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False + ) -> List[int]: + """ + Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding + special tokens using the tokenizer ``prepare_for_model`` method. + Args: + token_ids_0 (:obj:`List[int]`): + List of ids. + token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`): + Optional second list of IDs for sequence pairs. + already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`): + Set to True if the token list is already formatted with special tokens for the model + Returns: + :obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. + """ + + if already_has_special_tokens: + if token_ids_1 is not None: + raise ValueError( + "You should not supply a second sequence if the provided sequence of " + "ids is already formated with special tokens for the model." + ) + return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0)) + + if token_ids_1 is not None: + return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1] + return [1] + ([0] * len(token_ids_0)) + [1] + + def create_token_type_ids_from_sequences( + self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None + ) -> List[int]: + """ + Creates a mask from the two sequences passed to be used in a sequence-pair classification task. + A BERT sequence pair mask has the following format: + :: + 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 + | first sequence | second sequence | + if token_ids_1 is None, only returns the first portion of the mask (0's). + Args: + token_ids_0 (:obj:`List[int]`): + List of ids. + token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`): + Optional second list of IDs for sequence pairs. + Returns: + :obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given + sequence(s). + """ + sep = [self.sep_token_id] + cls = [self.cls_token_id] + if token_ids_1 is None: + return len(cls + token_ids_0 + sep) * [0] + return len(cls + token_ids_0 + sep) * [0] + len(token_ids_1 + sep) * [1] + + def save_vocabulary(self, vocab_path): + """ + Save the sentencepiece vocabulary (copy original file) and special tokens file to a directory. + Args: + vocab_path (:obj:`str`): + The directory in which to save the vocabulary. + Returns: + :obj:`Tuple(str)`: Paths to the files saved. + """ + index = 0 + if os.path.isdir(vocab_path): + vocab_file = os.path.join(vocab_path, VOCAB_FILES_NAMES["vocab_file"]) + else: + vocab_file = vocab_path + with open(vocab_file, "w", encoding="utf-8") as writer: + for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]): + if index != token_index: + logger.warning( + "Saving vocabulary to {}: vocabulary indices are not consecutive." + " Please check that the vocabulary is not corrupted!".format(vocab_file) + ) + index = token_index + writer.write(token + "\n") + index += 1 + return (vocab_file,) + diff --git a/Pilot1/ST1/smiles_regress_transformer_spe_funcs.py b/Pilot1/ST1/smiles_regress_transformer_spe_funcs.py new file mode 100644 index 00000000..1082bede --- /dev/null +++ b/Pilot1/ST1/smiles_regress_transformer_spe_funcs.py @@ -0,0 +1,768 @@ +############# Module Loading ############## + +import argparse +import os +import numpy as np +import matplotlib +import pandas as pd +import json +from functools import partial + +matplotlib.use("Agg") + +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import backend as K +from tensorflow.keras import layers +from tensorflow.keras.callbacks import ( + CSVLogger, + EarlyStopping, + ModelCheckpoint, + ReduceLROnPlateau, +) +import codecs +from SmilesPE.tokenizer import * +#from SmilesPE.spe2vec import * +from smiles_pair_encoders_functions import * +from tensorflow.keras.optimizers import Adam +from tensorflow.keras.preprocessing import sequence, text +import horovod.keras as hvd ### importing horovod to use data parallelization in another step +from clr_callback import * +import deephyper +from deephyper.problem import HpProblem +from tensorflow.python.client import device_lib +import ray +from deephyper.evaluator import Evaluator +from deephyper.evaluator.callback import TqdmCallback +from deephyper.search.hps import CBO +from itertools import chain, repeat, islice + +def pad_infinite(iterable, padding=None): + return chain(iterable, repeat(padding)) + +def pad(iterable, size, padding=None): + return islice(pad_infinite(iterable, padding), size) + + +def ParamsJson(json_file): + with open(json_file) as f: + params = json.load(f) + return params + + +def ArgParsing(): + file_path = os.path.dirname(os.path.realpath(__file__)) + # psr and args take input outside of the script and assign: + # (1) file paths for data_path_train and data_path_vali + # (2) number of training epochs + + psr = argparse.ArgumentParser(description="input csv file") + psr.add_argument("--in_train", default="in_train") + psr.add_argument("--in_vali", default="in_vali") + psr.add_argument("--ep", type=int, default=400) + psr.add_argument("--num_heads", type=int, default=16) + psr.add_argument("--DR_TB", type=float, default=0.1) + psr.add_argument("--DR_ff", type=float, default=0.1) + psr.add_argument("--activation", default="activation") + psr.add_argument("--drop_post_MHA", type=bool, default=True) + psr.add_argument("--lr", type=float, default=1e-5) + psr.add_argument("--loss_fn", default="mean_squared_error") + psr.add_argument("--hvd_switch", type=bool, default=True) + + args = vars(psr.parse_args()) # returns dictionary mapping of an object + + ######## Set hyperparameters ######## + data_path_train = args["in_train"] + data_path_vali = args["in_vali"] + + EPOCH = args["ep"] + num_heads = args["num_heads"] + DR_TB = args["DR_TB"] + DR_ff = args["DR_ff"] + activation = args["activation"] + dropout1 = args["drop_post_MHA"] + lr = args["lr"] + loss_fn = args["loss_fn"] + hvd_switch = args["hvd_switch"] + + return data_path_train, data_path_vali, EPOCH, num_heads, DR_TB, DR_ff, activation, dropout1, lr, loss_fn, hvd_switch + +def initialize_hvd(): + hvd.init() + print("I am rank %d of %d" %(hvd.rank(), hvd.size())) + + #HVD-2: GPU pinning + gpus = tf.config.experimental.list_physical_devices('GPU') + # Ping GPU to each9 rank + for gpu in gpus: + tf.config.experimental.set_memory_growth(gpu,True) + if gpus: + tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU') + + return + + +def split_data(data_x, data_y): + data_x = np.array_split(data_x, hvd.size())[hvd.rank()] + data_y = np.array_split(data_y, hvd.size())[hvd.rank()] + return (data_x, data_y) + + +#def implement_hvd(x_train, y_train): +# x_train = x_train[hvd.rank()] +# y_train = y_train[hvd.rank()] +# return (x_train, y_train) + +# Implement embedding layer +# Two seperate embedding layers, one for tokens, one for token index (positions). + +class TokenAndPositionEmbedding(layers.Layer): + def __init__(self, maxlen, vocab_size, embed_dim): + super(TokenAndPositionEmbedding, self).__init__() + self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim) + self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim) + + def call(self, x): + maxlen = tf.shape(x)[-1] + positions = tf.range(start=0, limit=maxlen, delta=1) + positions = self.pos_emb(positions) + x = self.token_emb(x) + return x + positions + +def prep_text(texts, tokenizer, max_sequence_length): + # Turns text into into padded sequences. + text_sequences = tokenizer.texts_to_sequences(texts) # turns text into tokens + return sequence.pad_sequences(text_sequences, maxlen=max_sequence_length) # pad all sequences so they all have same length + + +#def train_val_data(data_path_train, data_path_vali, hvd_switch, vocab_size, maxlen): + +def preprocess_smiles_pair_encoding(data, maxlen, vocab_file, spe_file): + # some default tokens from huggingface + default_toks = ['[PAD]', + '[unused1]', '[unused2]', '[unused3]', '[unused4]','[unused5]', '[unused6]', '[unused7]', '[unused8]', '[unused9]', '[unused10]', + '[UNK]', '[CLS]', '[SEP]', '[MASK]'] + + + # atom-level tokens used for trained the spe vocabulary + atom_toks = ['[c-]', '[SeH]', '[N]', '[C@@]', '[Te]', '[OH+]', 'n', '[AsH]', '[B]', 'b', + '[S@@]', 'o', ')', '[NH+]', '[SH]', 'O', 'I', '[C@]', '-', '[As+]', '[Cl+2]', + '[P+]', '[o+]', '[C]', '[C@H]', '[CH2]', '\\', 'P', '[O-]', '[NH-]', '[S@@+]', + '[te]', '[s+]', 's', '[B-]', 'B', 'F', '=', '[te+]', '[H]', '[C@@H]', '[Na]', + '[Si]', '[CH2-]', '[S@+]', 'C', '[se+]', '[cH-]', '6', 'N', '[IH2]', '[As]', + '[Si@]', '[BH3-]', '[Se]', 'Br', '[C+]', '[I+3]', '[b-]', '[P@+]', '[SH2]', '[I+2]', + '%11', '[Ag-3]', '[O]', '9', 'c', '[N-]', '[BH-]', '4', '[N@+]', '[SiH]', '[Cl+3]', '#', + '(', '[O+]', '[S-]', '[Br+2]', '[nH]', '[N+]', '[n-]', '3', '[Se+]', '[P@@]', '[Zn]', '2', + '[NH2+]', '%10', '[SiH2]', '[nH+]', '[Si@@]', '[P@@+]', '/', '1', '[c+]', '[S@]', '[S+]', + '[SH+]', '[B@@-]', '8', '[B@-]', '[C-]', '7', '[P@]', '[se]', 'S', '[n+]', '[PH]', '[I+]', '5', 'p', '[BH2-]', '[N@@+]', '[CH]', 'Cl'] + + tokenizer = SMILES_SPE_Tokenizer(vocab_file=vocab_file, spe_file= spe_file) + + tokenized_data = np.array([list(pad(tokenizer(smi)['input_ids'], maxlen, 0)) for smi in data]) + + return tokenized_data + + +def train_val_data(hyper_params): + + data_path = hyper_params['data_loading']['data_path'] + rec = hyper_params['data_loading']['rec'] + pattern = hyper_params['data_loading']['pattern'] + + tokenizer_params = hyper_params['tokenization']['tokenizer'] + #vocabulary = hyper_params['tokenization']['vocab'] + vocab_size = hyper_params['tokenization']['vocab_size'] + maxlen = hyper_params['tokenization']['maxlen'] + hvd_switch = hyper_params['general']['use_hvd'] + + data_train = pd.read_csv(f'{data_path}/ml.{rec}.{pattern}.train') + data_vali = pd.read_csv(f'{data_path}/ml.{rec}.{pattern}.val') + + data_train.head() + # Dataset has type and smiles as the two fields + # reshaping: y formatted as [[y_1],[y_2],...] with floats + x_smiles_train = data_train["smiles"] + x_smiles_val = data_vali["smiles"] + y_train = data_train["type"].values.reshape(-1, 1) * 1.0 + y_val = data_vali["type"].values.reshape(-1, 1) * 1.0 + + if hvd_switch: + x_smiles_train, y_train = split_data(x_smiles_train, y_train) + + if tokenizer_params['category'] == 'smilespair': + spe_file = tokenizer_params['spe_file'] + vocab_file = tokenizer_params['vocab_file'] + x_train = preprocess_smiles_pair_encoding(x_smiles_train, + maxlen, + vocab_file, + spe_file) + + x_val = preprocess_smiles_pair_encoding(x_smiles_val, + maxlen, + vocab_file, + spe_file) + print(x_train) + + else: + tokenizer = text.Tokenizer(num_words=vocab_size) + tokenizer.fit_on_texts(data_train["smiles"]) + + x_train = prep_text(data_train["smiles"], tokenizer, maxlen) + x_val = prep_text(data_vali["smiles"], tokenizer, maxlen) + + ######## Implement horovod if necessary ######## + #if hvd_switch: + # x_train, y_train = initialize_hvd(x_train, y_train) + # x_train, y_train = implement_hvd(x_train, y_train) + + return x_train, y_train, x_val, y_val + +def get_available_gpus(): + local_device_protos = device_lib.list_local_devices() + n_gpus = len([x.name for x in local_device_protos if x.device_type == "GPU"]) + print(f"Num of gpus is {n_gpus}") + if n_gpus > 1: + n_gpus -= 1 + + is_gpu_available = n_gpus > 0 + + #if is_gpu_available: + #print(f"{n_gpus} GPU{'s are' if n_gpus > 1 else ' is'} available.") + #else: + #print("No GPU available") + + return local_device_protos, [x.name for x in local_device_protos if x.device_type == "GPU"], n_gpus, is_gpu_available + + +def r2(y_true, y_pred): + SS_res = K.sum(K.square(y_true - y_pred)) + SS_tot = K.sum(K.square(y_true - K.mean(y_true))) + return 1 - SS_res / (SS_tot + K.epsilon()) + + +# Implement a Transformer block as a layer +# embed_dim: number of tokens. This is used for the key_dim for the multi-head attention calculation +# ff_dim: number of nodes in Dense layer +# epsilon: needed for numerical stability... not sure what this means to be honest + +class TransformerBlock(layers.Layer): + # __init__: defining all class variables + def __init__(self, embed_dim, num_heads, ff_dim, rate, activation, dropout1): + super(TransformerBlock, self).__init__() + self.drop_chck = dropout1 + self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)#, activation=activation) + self.ffn = keras.Sequential( + [ + layers.Dense(ff_dim, activation=activation), + layers.Dense(embed_dim), + ] + ) + self.layernorm1 = layers.LayerNormalization(epsilon=1e-6) + self.layernorm2 = layers.LayerNormalization(epsilon=1e-6) + self.dropout1 = layers.Dropout(rate) + self.dropout2 = layers.Dropout(rate) + # call: building simple transformer architecture + def call(self, inputs, training): + attn_output = self.att(inputs, inputs) + if self.drop_chck: + attn_output = self.dropout1(attn_output, training=training) + out1 = self.layernorm1(inputs + attn_output) + ffn_output = self.ffn(out1) + ffn_output = self.dropout2(ffn_output, training=training) + + return self.layernorm2(out1 + ffn_output) + +class ModelArchitecture(layers.Layer): + #def __init__(self, vocab_size, maxlen, embed_dim, num_heads, ff_dim, DR_TB, DR_ff, activation, dropout1, lr, loss_fn, hvd_switch): + def __init__(self, hyper_params): + + lr = hyper_params['general']['lr'] + vocab_size = hyper_params['tokenization']['vocab_size'] + maxlen = hyper_params['tokenization']['maxlen'] + hvd_switch = hyper_params['general']['use_hvd'] + + arch_params = hyper_params['architecture'] + embed_dim = arch_params['embedding']['embed_dim'] + num_heads = arch_params['transformer_block']['num_heads'] + ff_dim = arch_params['transformer_block']['ff_dim'] + DR_TB_1 = arch_params['transformer_block']['dr1'] + DR_TB_2 = arch_params['transformer_block']['dr2'] + DR_ff = arch_params['regressor_head']['dr'] + activation_transformer = arch_params['transformer_block']['activation'] + activation_regressor = arch_params['regressor_head']['activation'] + dropout1 = arch_params['transformer_block']['drop_mha'] + + self.num_tb = arch_params['transformer_block']['num_blocks'] + self.loss_fn = hyper_params['general']['loss_fn'] + + self.inputs = layers.Input(shape=(maxlen,)) + self.embedding_layer = TokenAndPositionEmbedding(maxlen, + vocab_size, + embed_dim) + + self.transformer_block = TransformerBlock(embed_dim, + num_heads, + ff_dim, + DR_TB_1, + activation_transformer, + dropout1) + + self.reshape = layers.Reshape((1, maxlen * embed_dim), + input_shape=(maxlen, embed_dim,)) + + self.dropout1 = layers.Dropout(DR_ff) + self.dropout2 = layers.Dropout(DR_ff) + self.dropout3 = layers.Dropout(DR_ff) + self.dropout4 = layers.Dropout(DR_ff) + self.dropout5 = layers.Dropout(DR_ff) + + self.dense1 = layers.Dense(1024, activation=activation_regressor) + self.dense2 = layers.Dense(256, activation=activation_regressor) + self.dense3 = layers.Dense(64, activation=activation_regressor) + self.dense4 = layers.Dense(16, activation=activation_regressor) + self.dense5 = layers.Dense(1, activation=activation_regressor) + + if hvd_switch: + lr = lr * hvd.size() + self.opt = Adam(learning_rate=lr) + self.opt = hvd.DistributedOptimizer(self.opt) + else: + self.opt = Adam(learning_rate=lr) + + def call(self): + x = self.embedding_layer(self.inputs) + for tb in range(self.num_tb): + x = self.transformer_block(x) + + x = self.reshape(x) + + x = self.dropout1(x) + x = self.dense1(x) + + x = self.dropout2(x) + x = self.dense2(x) + + x = self.dropout3(x) + x = self.dense3(x) + + x = self.dropout4(x) + x = self.dense4(x) + + x = self.dropout5(x) + outputs = self.dense5(x) + + model = keras.Model(inputs=self.inputs, outputs=outputs) + model.summary() + + model.compile( + loss=self.loss_fn, optimizer=self.opt, metrics=["mae", r2], steps_per_execution=100 + ) + + return model + +class TrainingAndCallbacks: + #def __init__(self, hvd_switch, checkpt_file, lr, csv_file, patience_red_lr, patience_early_stop): + def __init__(self, hyper_params): + self.hvd_switch = hyper_params['general']['use_hvd'] + checkpt_file = hyper_params['callbacks']['checkpt_file'] + csv_file = hyper_params['callbacks']['log_csv'] + patience_red_lr = hyper_params['callbacks']['patience_red_lr'] + patience_early_stop = hyper_params['callbacks']['patience_early_stop'] + lr = hyper_params['general']['lr'] + if self.hvd_switch: + lr = lr * hvd.size() + + self.checkpointer = ModelCheckpoint( + filepath=checkpt_file,#"smile_regress.autosave.model.h5", + verbose=1, + save_weights_only=True, + save_best_only=True, + ) + + self.clr = CyclicLR(base_lr = lr, max_lr = 5*lr, step_size=2000.) + self.csv_logger = CSVLogger(csv_file) + + self.reduce_lr = ReduceLROnPlateau( + monitor="val_loss", + factor=0.75, + patience=patience_red_lr, + verbose=1, + mode="auto", + epsilon=0.0001, + cooldown=3, + min_lr=0.000000001, + ) + + self.early_stop = EarlyStopping( + monitor="val_loss", + patience=patience_early_stop, + verbose=1, + mode="auto", + ) + + if self.hvd_switch: + #HVD broadcast initial variables from rank0 to all other processes + self.hvd_broadcast = hvd.callbacks.BroadcastGlobalVariablesCallback(0) + + def callback_defining(self): + + if self.hvd_switch: + callbacks = [self.hvd_broadcast, self.reduce_lr, self.clr] + if hvd.rank() == 0: + callbacks.append(self.csv_logger) + callbacks.append(self.early_stop) + callbacks.append(self.checkpointer) + return callbacks + else: + return [self.reduce_lr, self.clr, self.csv_logger, self.early_stop, self.checkpointer] + + def training(self, model, x_train, y_train, validation_data, hyper_params): + BATCH = hyper_params['general']['batch_size'] + EPOCH = hyper_params['general']['epochs'] + + callbacks = self.callback_defining() + history = model.fit( + x_train, + y_train, + batch_size=BATCH, + epochs=EPOCH, + verbose=1, + validation_data=validation_data, + callbacks=callbacks, + ) + + return history + + + +class RunFnDeepHyper: + def __init__(self, x_train, y_train, x_val, y_val): + # Params that are currently static + self.vocab_size = 40000 + self.maxlen = 250 + self.embed_dim = 128 + self.ff_dim = 128 + self.BATCH = 32 + self.patience_red_lr = 20 + self.patience_early_stop = 100 + self.hvd_switch = False + self.checkpt_file = 'smile_regress.autosave.model.h5' + self.csv_file = 'smile_regress.training.log' + + self.x_train = x_train + self.y_train = y_train + self.x_val = x_val + self.y_val = y_val + + def run(self, config): + + num_heads = config["num_heads"] + DR_TB = config["DR_TB"] + DR_ff = config["DR_ff"] + activation = config["activation"] + dropout1 = config["dropout_aftermulti"] + lr = config["lr"] + loss_fn = config["loss_fn"] + EPOCH = config["epochs"] + validation_data = (self.x_val, self.y_val) + + model = ModelArchitecture(self.vocab_size, self.maxlen, self.embed_dim, num_heads, self.ff_dim, DR_TB, DR_ff, activation, dropout1, lr, loss_fn, self.hvd_switch).call() + + history = TrainingAndCallbacks(self.hvd_switch, self.checkpt_file, lr, self.csv_file, self.patience_red_lr, self.patience_early_stop).training( model, self.x_train, self.y_train, validation_data, self.BATCH, EPOCH) + + return history.history["val_accuracy"] [-1] + + +def run(config): + + DATA_PATH='/grand/datascience/avasan/ST_Benchmarks/Data/1M-flatten' + + TFIL='ml.3CLPro_7BQY_A_1_F.Orderable_zinc_db_enaHLL.sorted.4col.dd.parquet.xform-smiles.csv.reg.train' + + VFIL='ml.3CLPro_7BQY_A_1_F.Orderable_zinc_db_enaHLL.sorted.4col.dd.parquet.xform-smiles.csv.reg.val' + + data_path_train = f'{DATA_PATH}/{TFIL}' + data_path_vali = f'{DATA_PATH}/{TFIL}' + hvd_switch = False + BATCH = 32 # batch size used for training + vocab_size = 40000 + maxlen = 250 + embed_dim = 128 # Embedding size for each token + ff_dim = 128 # Hidden layer size in feed forward network inside transformer + checkpt_file = "smile_regress.autosave.model.h5" + csv_file = "smile_regress.training.log" + patience_red_lr = 20 + patience_early_stop = 100 + + ########Create training and validation data##### + x_train, y_train, x_val, y_val = train_val_data(data_path_train, data_path_vali, hvd_switch, vocab_size, maxlen) + num_heads = config["num_heads"] + DR_TB = config["DR_TB"] + DR_ff = config["DR_ff"] + activation = config["activation"] + dropout1 = config["dropout_aftermulti"] + lr = config["lr"] + loss_fn = config["loss_fn"] + EPOCH = config["epochs"] + validation_data = (x_val, y_val) + + model = ModelArchitecture(vocab_size, maxlen, embed_dim, num_heads, ff_dim, DR_TB, DR_ff, activation, dropout1, lr, loss_fn, hvd_switch).call() + + history = TrainingAndCallbacks(hvd_switch, checkpt_file, lr, csv_file, patience_red_lr, patience_early_stop).training( model, x_train, y_train, validation_data, BATCH, EPOCH) + + return history.history["val_accuracy"] [-1] + + + +def hyper_param_problem(): + + ACTIVATIONS = [ + "elu", "gelu", "hard_sigmoid", "linear", "relu", "selu", + "sigmoid", "softplus", "softsign", "swish", "tanh", + ] + + LRS = [1e-6 * i for i in range(0,11)] + + LOSSFNS = ["mean_squared_error", "mean_absolute_error"] + + problem = HpProblem() + problem.add_hyperparameter((12, 32), "num_heads", default_value = 16) + problem.add_hyperparameter((0.025, 0.5), "DR_TB", default_value = 0.1) + problem.add_hyperparameter((0.025, 0.5), "DR_ff", default_value = 0.1) + problem.add_hyperparameter(ACTIVATIONS, "activation", default_value = "elu") + problem.add_hyperparameter((1e-7, 1e-5), "lr", default_value = 1e-6) + problem.add_hyperparameter(LOSSFNS, "loss_fn", default_value = "mean_squared_error") + problem.add_hyperparameter((2,10), "epochs", default_value = 2) + problem.add_hyperparameter([True, False], "dropout_aftermulti", default_value = False) + + return problem + + +def default_evaluation(problem, is_gpu_available, n_gpus, run): + if is_gpu_available: + if not(ray.is_initialized()): + ray.init(num_cpus=n_gpus, num_gpus=n_gpus, log_to_driver=False) + + + run_default = ray.remote(num_cpus=1, num_gpus=1)(run) + objective_default = ray.get(run_default.remote(problem.default_configuration)) + else: + if not(ray.is_initialized()): + ray.init(num_cpus=1, log_to_driver=False) + run_default = run + print(problem.default_configuration) + objective_default = run_default(problem.default_configuration) + return objective_default + + +def get_evaluator(run_function, is_gpu_available, n_gpus): + # Default arguments for Ray: 1 worker and 1 worker per evaluation + method_kwargs = { + "num_cpus": 1, + "num_cpus_per_task": 1, + "callbacks": [TqdmCallback()] + } + + # If GPU devices are detected then it will create 'n_gpus' workers + # and use 1 worker for each evaluation + if is_gpu_available: + method_kwargs["num_cpus"] = n_gpus + method_kwargs["num_gpus"] = n_gpus + method_kwargs["num_cpus_per_task"] = 1 + method_kwargs["num_gpus_per_task"] = 1 + + evaluator = Evaluator.create( + run_function, + method="ray", + method_kwargs=method_kwargs + ) + print(f"Created new evaluator with {evaluator.num_workers} worker{'s' if evaluator.num_workers > 1 else ''} and config: {method_kwargs}", ) + + return evaluator + + +def build_model_tuner(hp): + #units = hp.Int("units", min_value=32, max_value=512, step=32) + vocab_size = 40000 + maxlen = 250 + embed_dim = 128 + num_heads = hp.Int("num_heads", min_value=12, max_value=40, step=4) + ff_dim = 128 + DR_TB = hp.Float("DR_TB", min_value=0.025, max_value=0.5, step=0.025) + DR_ff = hp.Float("DR_TB", min_value=0.025, max_value=0.5, step=0.025) + activation = hp.Choice("activation", ["relu", "elu", "gelu"]) + #activation="elu" + dropout1 = hp.Boolean("dropout_aftermulti") + lr = hp.Float("lr", min_value=1e-6, max_value=1e-5, step=1e-6) + loss_fn = hp.Choice("loss_fn", ["mean_squared_error", "mean_absolute_error"]) + # call existing model-building code with the hyperparameter values. + model = ModelArchitecture(vocab_size, maxlen, embed_dim, num_heads, ff_dim, DR_TB, DR_ff, activation, dropout1, lr, loss_fn, hvd_switch).call() + + return model + + +#tfm.optimization.lars_optimizer.LARS( +# learning_rate = 0.0000025, +# momentum = 0.9, +# weight_decay_rate = 0.0, +# eeta = 0.001, +# nesterov = False, +# classic_momentum = True, +# exclude_from_weight_decay = None, +# exclude_from_layer_adaptation = None, +# name = 'LARS', +# ) + +def model_architecture(embed_dim, num_heads, ff_dim, DR_TB, DR_ff, activation, dropout1, lr, loss_fn, hvd_switch): + + vocab_size = 40000 #number of possible 'words' in SMILES data + maxlen = 250 #length of each SMILE sequence in input + inputs = layers.Input(shape=(maxlen,)) + embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim) + x = embedding_layer(inputs) + transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim, DR_TB, activation, dropout1) + # Use 4 transformer blocks here + x = transformer_block(x) + x = transformer_block(x) + x = transformer_block(x) + x = transformer_block(x) + + x = layers.Reshape((1, 32000), input_shape=(250, 128,))( + x + ) # reshaping increases parameters but improves accuracy a lot + x = layers.Dropout(DR_ff)(x) + x = layers.Dense(1024, activation=activation)(x) + x = layers.Dropout(DR_ff)(x) + x = layers.Dense(256, activation=activation)(x) + x = layers.Dropout(DR_ff)(x) + x = layers.Dense(64, activation=activation)(x) + x = layers.Dropout(DR_ff)(x) + x = layers.Dense(16, activation=activation)(x) + x = layers.Dropout(DR_ff)(x) + outputs = layers.Dense(1, activation=activation)(x) + + model = keras.Model(inputs=inputs, outputs=outputs) + + model.summary() + + # Train and Evaluate + + opt = Adam(learning_rate=lr) + + #HVD Wrap optimizer in hvd Distributed Optimizer delegates gradient comp to original optimizer, averages gradients, and applies averaged gradients + if hvd_switch: + opt = hvd.DistributedOptimizer(opt) + + model.compile( + loss=loss_fn, optimizer=opt, metrics=["mae", r2] + ) + return model + +def callback_setting(hvd_switch, checkpt_file, lr, csv_file, patience_red_lr, patience_early_stop): + + checkpointer = ModelCheckpoint( + filepath=checkpt_file,#"smile_regress.autosave.model.h5", + verbose=1, + save_weights_only=True, + save_best_only=True, + ) + + clr = CyclicLR(base_lr = lr, max_lr = 5*lr, step_size=2000.) + + csv_logger = CSVLogger(csv_file)#"smile_regress.training.log") + + # learning rate tuning at each epoch + # is it possible to do batch size tuning at each epoch as well? + reduce_lr = ReduceLROnPlateau( + monitor="val_loss", + factor=0.75, + patience=patience_red_lr,#20, + verbose=1, + mode="auto", + epsilon=0.0001, + cooldown=3, + min_lr=0.000000001, + ) + + early_stop = EarlyStopping( + monitor="val_loss", + patience=patience_early_stop,#100, + verbose=1, + mode="auto", + ) + + if hvd_switch: + #HVD broadcast initial variables from rank0 to all other processes + hvd_broadcast = hvd.callbacks.BroadcastGlobalVariablesCallback(0) + + callbacks = [hvd_broadcast,reduce_lr,clr] + + if hvd.rank() == 0: + callbacks.append(csv_logger) + callbacks.append(early_stop) + callbacks.append(checkpointer) + + return callbacks + + else: + return [reduce_lr, clr, csv_logger, early_stop, checkpointer] + + +def build_model_DeepHyper(x_train, y_train, x_val, y_val, config, hvd_switch=False, checkpt_file = 'smile_regress.autosave.model.h5', csv_file = 'smile_regress.training.log'): + #units = hp.Int("units", min_value=32, max_value=512, step=32) + embed_dim = 128 + ff_dim = 128 + BATCH = 32 + patience_red_lr = 20 + patience_early_stop = 100 + + num_heads = config["num_heads"] + DR_TB = config["DR_TB"] + DR_ff = config["DR_ff"] + activation = config["activation"] + dropout1 = config["dropout_aftermulti"] + lr = config["lr"] + loss_fn = config["loss_fn"] + EPOCH = config["epochs"] + + # call existing model-building code with the hyperparameter values. + model = model_architecture ( + embed_dim=embed_dim, num_heads=num_heads, ff_dim=ff_dim, DR_TB=DR_TB, DR_ff = DR_ff, activation=activation, dropout1=dropout1, lr=lr, loss_fn=loss_fn + ) + + callbacks = callback_setting ( + hvd_switch, + checkpt_file, + lr, + csv_file, + patience_red_lr, + patience_early_stop + ) + + history = model.fit( + x_train, + y_train, + batch_size=BATCH, + epochs=EPOCH, + verbose=1, + validation_data=(x_val, y_val), + callbacks=callbacks, + ) + + return history.history["val_accuracy"] [-1] + + + + +#def build_model(num_heads, DR_TB, DR_ff, activation, dropout1, lr, loss_fn, hvd_switch): +# #units = hp.Int("units", min_value=32, max_value=512, step=32) +# embed_dim = 128 +# ff_dim = 128 +# # call existing model-building code with the hyperparameter values. +# model = model_architecture ( +# embed_dim=embed_dim, num_heads=num_heads, ff_dim=ff_dim, DR_TB=DR_TB, DR_ff = DR_ff, activation=activation, dropout1=dropout1, lr=lr, loss_fn=loss_fn, hvd_switch=hvd_switch +# ) +# return model + + + diff --git a/Pilot1/ST1/smiles_regress_transformer_spe_run.py b/Pilot1/ST1/smiles_regress_transformer_spe_run.py new file mode 100644 index 00000000..8124de42 --- /dev/null +++ b/Pilot1/ST1/smiles_regress_transformer_spe_run.py @@ -0,0 +1,53 @@ +############# Module Loading ############## +import argparse +import os +import numpy as np +import matplotlib +import pandas as pd +matplotlib.use("Agg") +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import backend as K +from tensorflow.keras import layers +from tensorflow.keras.callbacks import ( + CSVLogger, + EarlyStopping, + ModelCheckpoint, + ReduceLROnPlateau, +) +from tensorflow.keras.optimizers import Adam +from tensorflow.keras.preprocessing import sequence, text +import horovod.keras as hvd ### importing horovod to use data parallelization in another step + +from clr_callback import * +from smiles_regress_transformer_spe_funcs import * +from tensorflow.python.client import device_lib +import json + +#######HyperParamSetting############# + +json_file = 'config_st_spe_training.json' +hyper_params = ParamsJson(json_file) + +if hyper_params['general']['use_hvd']==True: + initialize_hvd() + +########Create training and validation data##### +x_train, y_train, x_val, y_val = train_val_data(hyper_params) + +######## Build model ############# + +model = ModelArchitecture(hyper_params).call() + +####### Set callbacks + train model ############## + +train_and_callbacks = TrainingAndCallbacks(hyper_params) + +history = train_and_callbacks.training( + model, + x_train, + y_train, + (x_val, y_val), + hyper_params + ) + diff --git a/Pilot1/ST1/sub_smiles_regress_transformer_spe.sh b/Pilot1/ST1/sub_smiles_regress_transformer_spe.sh new file mode 100755 index 00000000..4e8035db --- /dev/null +++ b/Pilot1/ST1/sub_smiles_regress_transformer_spe.sh @@ -0,0 +1,16 @@ + +module load conda/2022-09-08 +conda activate + +cd /grand/datascience/avasan/ST_Benchmarks/Test_Tokenizers/SMILESPair_Encoder_Github + +NP=16 +PPN=4 +OUT=logfile.log +let NDEPTH=64/$NP +let NTHREADS=$NDEPTH + +TF_GPU_ALLOCATOR=cuda_malloc_async +export TF_FORCE_GPU_ALLOW_GROWTH=true + +mpiexec --np 16 -ppn 4 --cpu-bind verbose,list:0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 -env NCCL_COLLNET_ENABLE=1 -env NCCL_NET_GDR_LEVEL=PHB python smiles_regress_transformer_run.py > $OUT