diff --git a/IP_Extraction_Fine_Tuning/code/dataset_small.csv b/IP_Extraction_Fine_Tuning/code/dataset_small.csv new file mode 100644 index 0000000..0b06600 --- /dev/null +++ b/IP_Extraction_Fine_Tuning/code/dataset_small.csv @@ -0,0 +1,230 @@ +ip,class,text +1.000.000.000,A, 1.000.000.000+ +1.0.0.1,A, 1.0.0.1 +1.0.0.1,A, 1.0.0.1 +1.1.1.1,A, 1.1.1.1 +1.1.1.1,A, 1.1.1.1 +608.80.24.1,N, 3608.80.24.1.8 +1.0.0.14,A, eng-USA-1.0.0.14-20170731 +1.0.0.12,A, cmn-CHN-1.0.0.12-20170804 +3.6.1.4,A, 3.6.1.4 +608.80.24.1,N, 3608.80.24.1.8 +12.6.0.1,A, 12.6.0.1 +100.73.156.224,A, 100.73.156.224 +192.95.36.142,C, obfs4 192.95.36.142:443 CDF2E852BF539B82BD10E27E9115A31734E378C2 cert=qUVQ0srL1JI/vO6V6m/24anYXiJD3QP2HgzUKQtQ7GRqqUvs7P+tG43RtAqdhLOALP7DJQ iat-mode=1 +38.229.1.78,A, obfs4 38.229.1.78:80 C8CBDB2464FC9804A69531437BCF2BE31FDD2EE4 cert=Hmyfd2ev46gGY7NoVxA9ngrPF2zCZtzskRTzoWXbxNkzeVnGFPWmrTtILRyqCTjHR+s9dg iat-mode=1 +38.229.33.83,A, obfs4 38.229.33.83:80 0BAC39417268B96B9F514E7F63FA6FBA1A788955 cert=VwEFpk9F/UN9JED7XpG1XOjm/O8ZCXK80oPecgWnNDZDv5pdkhq1OpbAH0wNqOT6H6BmRQ iat-mode=1 +37.218.240.34,A, obfs4 37.218.240.34:40035 88CD36D45A35271963EF82E511C8827A24730913 cert=eGXYfWODcgqIdPJ+rRupg4GGvVGfh25FWaIXZkit206OSngsp7GAIiGIXOJJROMxEqFKJg iat-mode=1 +37.218.245.14,A, obfs4 37.218.245.14:38224 D9A82D2F9C2F65A18407B1D2B764F130847F8B5D cert=bjRaMrr1BRiAW8IE9U5z27fQaYgOhX1UCmOpg2pFpoMvo6ZgQMzLsaTzzQNTlm7hNcb+Sg iat-mode=0 +85.31.186.98,A, obfs4 85.31.186.98:443 011F2599C0E9B27EE74B353155E244813763C3E5 cert=ayq0XzCwhpdysn5o0EyDUbmSOx3X/oTEbzDMvczHOdBJKlvIdHHLJGkZARtT4dcBFArPPg iat-mode=0 +85.31.186.26,A, obfs4 85.31.186.26:443 91A6354697E6B02A386312F68D82CF86824D3606 cert=PBwr+S8JTVZo6MPdHnkTwXJPILWADLqfMGoVvhZClMq/Urndyd42BwX9YFJHZnBB3H0XCw iat-mode=0 +216.252.162.21,A, obfs4 216.252.162.21:46089 0DB8799466902192B6C7576D58D4F7F714EC87C1 cert=XPUwcQPxEXExHfJYX58gZXN7mYpos7VNAHbkgERNFg+FCVNzuYo1Wp+uMscl3aR9hO2DRQ iat-mode=0 +144.217.20.138,B, obfs4 144.217.20.138:80 FB70B257C162BF1038CA669D568D76F5B7F0BABB cert=vYIV5MgrghGQvZPIi1tJwnzorMgqgmlKaB77Y3Z9Q/v94wZBOAXkW+fdx4aSxLVnKO+xNw iat-mode=0 +193.11.166.194,C, obfs4 193.11.166.194:27015 2D82C2E354D531A68469ADF7F878FA6060C6BACA cert=4TLQPJrTSaDffMK7Nbao6LC7G9OW/NHkUwIdjLSS3KYf0Nv4/nQiiI8dY2TcsQx01NniOg iat-mode=0 +193.11.166.194,C, obfs4 193.11.166.194:27020 86AC7B8D430DAC4117E9F42C9EAED18133863AAF cert=0LDeJH4JzMDtkJJrFphJCiPqKx7loozKN7VNfuukMGfHO0Z8OGdzHVkhVAOfo1mUdv9cMg iat-mode=0 +193.11.166.194,C, obfs4 193.11.166.194:27025 1AE2C08904527FEA90C4C4F8C1083EA59FBC6FAF cert=ItvYZzW5tn6v3G4UnQa6Qz04Npro6e81AP70YujmK/KXwDFPTs3aHXcHp4n8Vt6w/bv8cA iat-mode=0 +209.148.46.65,A, obfs4 209.148.46.65:443 74FAD13168806246602538555B5521A0383A1875 cert=ssH+9rP8dG2NLDN2XuFw63hIO/9MNNinLmxQDpVa+7kTOa9/m+tGWT1SmSYpQ9uTBGa6Hw iat-mode=0 +146.57.248.225,B, obfs4 146.57.248.225:22 10A6CD36A537FCE513A322361547444B393989F0 cert=K1gDtDAIcUfeLqbstggjIw2rtgIKqdIhUlHp82XRqNSq/mtAjp1BIC9vHKJ2FAEpGssTPw iat-mode=0 +3.3.7.0,A, 3.3.7.0 +6.8.4.2,A, https://pm-gateway.supersonicads.com/auction?appKey=67052255&SDKVersion=6.8.4.2&initCountry=US&platform=iphone&advId=0D99ABE9-B1D0-41C1-8C45-2681A498AD97 +3.3.7.0,A, 3.3.7.0 +6.8.4.2,A, 6.8.4.2 +6.8.4.2,A, 6.8.4.2 +2.19.40.12,A, Sticker_2.19.40.12 +2.19.20.2,A, Sticker_2.19.20.2 +2.19.40.24,A, Sticker_2.19.40.24 +2.19.40.24,A, Sticker_2.19.40.24 +17.57.144.52,A, 17.57.144.52 +1.0.0.1,A, 1.0.0.1 +12.6.0.1,A, 12.6.0.1 +12.6.0.1,A, 12.6.0.1 +135.0.0.22,B, 135.0.0.22.118 (206072521) +12.6.0.1,A, 12.6.0.1 +608.80.24.1,N, 3608.80.24.1.8 +608.80.24.1,N, 3608.80.24.1.8 +135.0.0.22,B, 135.0.0.22.118 +135.0.0.22,B, 135.0.0.22.118 (206072690) +135.0.0.22,B, 135.0.0.22.118 (206072521) +10.5.0.1,A, http://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js +12.6.0.1,A, 12.6.0.1 +4.1.1.0,A, cmn-CHN-4.1.1.0-20160421 +4.1.1.5,A, eng-USA-4.1.1.5-20160919 +4.1.1.5,A, eng-USA-4.1.1.5-20160919 +4.1.0.2,A, spa-USA-4.1.0.2-20160424 +4.1.0.2,A, rus-RUS-4.1.0.2-20160422 +4.1.0.3,A, tur-TUR-4.1.0.3-20160422 +6.0.0.2,A, eng-USA-OfflineIOS-6.0.0.2-20191003 +4.1.1.0,A, cmn-CHN-4.1.1.0-20160421 +4.1.0.2,A, yue-HKG-4.1.0.2-20160424 +5.2.0.2,A, eng-USA-5.2.0.2-20170705 +5.0.0.1,A, spa-ESP-5.0.0.1-20170310 +5.3.0.3,A, eng-USA-5.3.0.3-20180129 +5.0.0.1,A, cmn-CHN-OfflineIOS-5.0.0.1-20180624 +5.3.0.1,A, eng-USA-OfflineIOS-5.3.0.1-20190209 +5.3.0.1,A, eng-USA-OfflineIOS-5.3.0.1-20190530 +5.0.0.1,A, cmn-CHN-OfflineIOS-5.0.0.1-20180925 +4.1.0.1,A, eng-GBR-4.1.0.1-20160422 +4.1.0.1,A, ita-ITA-4.1.0.1-20160422 +4.1.0.4,A, por-BRA-4.1.0.4-20160424 +4.1.0.1,A, eng-IND-4.1.0.1-20160424 +6.0.0.4,A, fra-FRA-OfflineIOS-6.0.0.4-20191114 +6.0.0.3,A, deu-DEU-OfflineIOS-6.0.0.3-20191114 +6.0.0.1,A, eng-AUS-OfflineIOS-6.0.0.1-20191215 +5.0.0.3,A, kor-KOR-OfflineIOS-5.0.0.3-20191215 +6.0.0.2,A, spa-ESP-OfflineIOS-6.0.0.2-20191120 +6.0.0.3,A, spa-USA-OfflineIOS-6.0.0.3-20190831 +6.0.0.0,A, cmn-CHN-OfflineIOS-6.0.0.0-20191009 +6.0.0.3,A, jpn-JPN-OfflineIOS-6.0.0.3-20191024 +5.0.0.4,A, rus-RUS-OfflineIOS-5.0.0.4-20190930 +5.0.0.1,A, tur-TUR-OfflineIOS-5.0.0.1-20190928 +6.0.0.2,A, yue-HKG-OfflineIOS-6.0.0.2-20190901 +6.0.0.1,A, eng-GBR-6.0.0.1-20190729 +5.1.1.0,A, ita-ITA-5.1.1.0-20190729 +5.0.0.2,A, por-BRA-5.0.0.2-20190729 +6.0.0.5,A, eng-IND-6.0.0.5-20190907 +6.0.0.0,A, ara-XWW-OfflineIOS-6.0.0.0-20200227 +5.0.0.1,A, tur-TUR-OfflineIOS-5.0.0.1-20200229 +7.0.0.3,A, deu-DEU-OfflineIOS-7.0.0.3-20200323 +7.0.0.3,A, eng-GBR-OfflineIOS-7.0.0.3-20200323 +7.0.0.4,A, eng-USA-OfflineIOS-7.0.0.4-20200319 +7.0.0.3,A, spa-ESP-OfflineIOS-7.0.0.3-20200323 +7.0.0.3,A, fra-FRA-OfflineIOS-7.0.0.3-20200323 +7.0.0.3,A, jpn-JPN-OfflineIOS-7.0.0.3-20200323 +7.0.0.3,A, cmn-CHN-OfflineIOS-7.0.0.3-20200323 +10.78.6.0,A, Snapchat/10.78.6.0 (iPhone8_4; iOS 13.4.1; gzip) +10.78.6.0,A, 10.78.6.0 +10.22.22.1,A, 10.22.22.1 +10.68.68.22,A, 10.68.68.22 +10.22.22.1,A, 10.22.22.1 +216.239.36.126,A, 216.239.36.126 +10.78.6.0,A, 10.78.6.0 +10.78.6.0,A, Snapchat/10.78.6.0 (iPhone8_4; iOS 13.4.1; gzip) +216.239.36.126,A, 216.239.36.126 +52.207.89.96,A, 52.207.89.96_52.207.21.88_52.4.205.231_52.5.37.1_52.4.79.247_52.22.144.146_52.3.185.35_52.21.187.77 +52.207.21.88,A, 52.207.89.96_52.207.21.88_52.4.205.231_52.5.37.1_52.4.79.247_52.22.144.146_52.3.185.35_52.21.187.77 +52.4.205.231,A, 52.207.89.96_52.207.21.88_52.4.205.231_52.5.37.1_52.4.79.247_52.22.144.146_52.3.185.35_52.21.187.77 +52.5.37.1,A, 52.207.89.96_52.207.21.88_52.4.205.231_52.5.37.1_52.4.79.247_52.22.144.146_52.3.185.35_52.21.187.77 +52.4.79.247,A, 52.207.89.96_52.207.21.88_52.4.205.231_52.5.37.1_52.4.79.247_52.22.144.146_52.3.185.35_52.21.187.77 +52.22.144.146,A, 52.207.89.96_52.207.21.88_52.4.205.231_52.5.37.1_52.4.79.247_52.22.144.146_52.3.185.35_52.21.187.77 +52.3.185.35,A, 52.207.89.96_52.207.21.88_52.4.205.231_52.5.37.1_52.4.79.247_52.22.144.146_52.3.185.35_52.21.187.77 +52.21.187.77,A, 52.207.89.96_52.207.21.88_52.4.205.231_52.5.37.1_52.4.79.247_52.22.144.146_52.3.185.35_52.21.187.77 +216.239.36.126,A, 216.239.36.126 +35.186.197.135,A, 35.186.197.135 +35.190.43.134,A, 35.190.43.134 +35.186.197.135,A, 35.186.197.135 +35.186.197.135,A, 35.186.197.135 +3.213.65.77,A, 3.213.65.77 +3.94.245.242,A, 3.94.245.242 +172.217.8.20,B, 172.217.8.20 +3.94.245.242,A, 3.94.245.242 +216.239.36.126,A, 216.239.36.126 +172.217.8.20,B, 172.217.8.20 +216.239.36.126,A, 216.239.36.126 +15.21.2.145,A, 15.21.2.14594 +8.58.0.93,A, 8.58.0.93 +10.5.0.1,A, http://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js +8.58.0.93,A, 8.58.0.93 +2.20.31.4,A, 2.20.31.4 +10.78.6.0,A, 10.78.6.0 +10.5.0.1,A, http://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js +255.255.255.255,E, 255.255.255.255 +0.0.0.0,N, 0.0.0.0 +255.255.255.255,E, 255.255.255.255 +0.0.0.0,N, 0.0.0.0 +3.6.1.4,A, 3.6.1.4 +609.1.20.0,N, 609.1.20.0.9 +12.6.0.1,A, 12.6.0.1 +12.6.0.1,A, 12.6.0.1 +12.6.0.1,A, 12.6.0.1 +6.9.4.0,A, MoPub-6.9.4.0 +7.4.2.6,A, 7.4.2.6 +135.0.0.22,B, 135.0.0.22.118 +2.20.31.4,A, 2.20.31.4 +0.5.1.3,N, 0.5.1.3 +0.6.7.8,N, 0.6.7.8 +10.78.6.0,A, 10.78.6.0 +8.58.0.93,A, 8.58.0.93 +12.6.0.1,A, 12.6.0.1 +20.10.0.8,A, 20.10.0.8 +2.20.31.4,A, 2.20.31.4 +15.21.2.145,A, 15.21.2.14594 +10.78.6.0,A, 10.78.6.0 +3.6.1.4,A, 3.6.1.4 +1.0.0.1,A, 1.0.0.1 +1.0.0.1,A, 1.0.0.1 +1.0.0.1,A, 1.0.0.1 +1.1.1.1,A, 1.1.1.1 +1.1.1.1,A, 1.1.1.1 +1.1.1.1,A, 1.1.1.1 +208.180.231.139,A, http://abuseipdb.com/check/208.180.231.139 +69.50.232.54,A, http://www.abuseipdb.com/whois/69.50.232.54 +4.3.0.49,A, http://advantage1.checkm8.com/AdminServer/swf/admin-release.swf?version=4.3.0.49 +0.0.0.392,N, http://d3bn78kc7qbjb6.cloudfront.net/rc-0.0.0.392/views/logs/logs.html +3.2.108.2,A, http://fcs.dell.com/fcs/fcs/UIFramework/3.2.108.2/res/Image?path=sharedcontrols%2Fcontainers%2Fvalidation_messaging_left.png +1.7.0.0,A, http://ui1.img.digitalrivercontent.net/drui/1.7.0.0.7.1/css/dr-stylesheet-ff-compressed.css +2.15.134.0,A, http://fast.fonts.com/FontsCom/Live/static/2.15.134.0/img/apple-touch-icon-57x57.png +0.0.798.798,N, http://tva1.sinaimg.cn/crop.0.0.798.798.180/6462d00fgw1egno4hgy6wj20m80m8mxn.jpg +0.9.9.3,N, http://netix.dl.sourceforge.net/project/kreogist-mu/Releases/0.9.9.3/Windows/mu_0.9.9.3_win64_intel_d9b5f2d.zip +0.9.9.3,N, http://netix.dl.sourceforge.net/project/kreogist-mu/Releases/0.9.9.3/Windows/mu_0.9.9.3_win64_intel_d9b5f2d.zip +5.6.1.892,N, http://sm.wdjcdn.com/release/files/jupiter/5.6.1.8925/wandoujia-wandoujia_web.apk +0.21.685.685,N, http://tvassl.weibo.cn/crop.0.21.685.685.1024/6b84272bgw1euxxzxrndpj20k00k0adc.jpg +1.4.4.02,A, http://novel-cdn.kuangxiangit.com/custom/download/1.4.4.02/Novel-release_14402_jiagu_sign-1109-2-PCdownload.apk +10.5.0.1,A, http://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js +608.80.24.1,N, 3608.80.24.1.8 +2.20.31.4,A, 2.20.31.4 +180.0.720.720,B, https://scontent-iad3-1.cdninstagram.com/v/t51.2885-15/e15/c180.0.720.720a/s240x240/93425170_1848465348622192_2718028079642416718_n.jpg?cachebreaker=9823&_nc_ht=scontent-iad3-1.cdninstagram.com&_nc_cat=1&_nc_ohc=MteGbc14-xIAX9Oom5c&oh=bce805572a79dd6ed9fbedbbf1b198ee&oe=5EC259EB&ig_cache_key=MjI4ODMyOTY2NzQ3OTg3Nzk0Ng%3D%3D.2 +8.5.51.877,N, 8.5.51.877.g6cea6b5 +8.5.51.877,N, 8.5.51.877 +1.000.000.000,A, 1.000.000.000+ +2.20.31.4,A, 2.20.31.4 +257.0.1.38,N, 257.0.1.38.116 +7.0.0.4,A, /private/var/MobileAsset/AssetsV2/com_apple_MobileAsset_EmbeddedSpeech/479991bc60161e4545ff833d6117f264954cb6b3.asset/AssetData/eng-USA-OfflineIOS-7.0.0.4-20200319 +608.80.24.1,N, 3608.80.24.1.8 +10.5.0.1,A, http://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js +10.78.6.0,A, 10.78.6.0 +608.80.24.1,N, 3608.80.24.1.8 +2.20.31.4,A, 2.20.31.4 +13.3.1.17,A, 13.3.1.17D50 +13.3.1.17,A, 13.3.1.17D50 +104.36.225.166,A, 104.36.225.166 +104.36.225.154,A, 104.36.225.154 +104.36.225.179,A, 104.36.225.179 +104.36.225.142,A, 104.36.225.142 +10.5.0.1,A, http://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js +20.10.0.8,A, 20.10.0.8 +135.0.0.22,B, 135.0.0.22.118 +34.227.164.166,A, 34.227.164.166 +10.5.0.1,A, http://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js +7.0.0.4,A, eng-USA-OfflineIOS-7.0.0.4-20200319 +7.4.1.2,A, 7.4.1.2 +64.98.122.70,A, 64.98.122.70 +20.10.0.8,A, 20.10.0.8 +12.6.0.1,A, 12.6.0.1 +12.6.0.1,A, 12.6.0.1 +257.0.1.38,N, 257.0.1.38.116 +257.0.1.38,N, 257.0.1.38.116 +256.0.1.26,N, tincan_register_device_256.0.1.26.113 +257.0.1.38,N, tincan_register_device_257.0.1.38.116 +257.0.1.38,N, 257.0.1.38.116 +257.0.1.38,N, 257.0.1.38.116 +135.0.0.22,B, 135.0.0.22.118 (206072690) +2.20.31.4,A, 2.20.31.4 +3.6.1.4,A, 3.6.1.4 +10.5.0.1,A, http://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js +12.6.0.1,A, 12.6.0.1 +10.5.0.1,A, http://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js +12.6.0.1,A, 12.6.0.1 +15.21.2.145,A, 15.21.2.14594 +10.5.0.1,A, http://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js +608.80.24.1,N, 3608.80.24.1.8 +5.12.20.4,A, 5.12.20.4.10.18.5.5.2 +10.18.5.5,A, 5.12.20.4.10.18.5.5.2 +608.80.24.1,N, 3608.80.24.1.8 +12.6.0.1,A, 12.6.0.1 +10.5.0.1,A, http://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js +2.20.31.4,A, 2.20.31.4 +608.80.24.1,N, 3608.80.24.1.8 +255.255.255.255,E, 255.255.255.255 +0.0.0.0,N, 0.0.0.0 +255.255.255.255,E, 255.255.255.255 +0.0.0.0,N, 0.0.0.0 diff --git a/IP_Extraction_Fine_Tuning/code/dataset_test.jsonl b/IP_Extraction_Fine_Tuning/code/dataset_test.jsonl new file mode 100644 index 0000000..82cd667 --- /dev/null +++ b/IP_Extraction_Fine_Tuning/code/dataset_test.jsonl @@ -0,0 +1,46 @@ +{"promote": "\t3608.80.24.1.8;608.80.24.1", "completion": "negtive"} +{"promote": "\t2.20.31.4;2.20.31.4", "completion": "positive"} +{"promote": "\t\t13.3.1.17D50;13.3.1.17", "completion": "positive"} +{"promote": "\t\t13.3.1.17D50;13.3.1.17", "completion": "positive"} +{"promote": "\t\t104.36.225.166;104.36.225.166", "completion": "positive"} +{"promote": "\t\t104.36.225.154;104.36.225.154", "completion": "positive"} +{"promote": "\t\t104.36.225.179;104.36.225.179", "completion": "positive"} +{"promote": "\t\t104.36.225.142;104.36.225.142", "completion": "positive"} +{"promote": "\t\t\thttp://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js;10.5.0.1", "completion": "positive"} +{"promote": "\t20.10.0.8;20.10.0.8", "completion": "positive"} +{"promote": "\t135.0.0.22.118;135.0.0.22", "completion": "positive"} +{"promote": "\t\t34.227.164.166;34.227.164.166", "completion": "positive"} +{"promote": "\t\t\thttp://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js;10.5.0.1", "completion": "positive"} +{"promote": "\t\teng-USA-OfflineIOS-7.0.0.4-20200319;7.0.0.4", "completion": "positive"} +{"promote": "\t7.4.1.2;7.4.1.2", "completion": "positive"} +{"promote": "\t64.98.122.70;64.98.122.70", "completion": "positive"} +{"promote": "\t20.10.0.8;20.10.0.8", "completion": "positive"} +{"promote": "\t12.6.0.1;12.6.0.1", "completion": "positive"} +{"promote": "\t12.6.0.1;12.6.0.1", "completion": "positive"} +{"promote": "\t257.0.1.38.116;257.0.1.38", "completion": "negtive"} +{"promote": "\t257.0.1.38.116;257.0.1.38", "completion": "negtive"} +{"promote": "\t\t\ttincan_register_device_256.0.1.26.113;256.0.1.26", "completion": "negtive"} +{"promote": "\t\t\ttincan_register_device_257.0.1.38.116;257.0.1.38", "completion": "negtive"} +{"promote": "\t257.0.1.38.116;257.0.1.38", "completion": "negtive"} +{"promote": "\t257.0.1.38.116;257.0.1.38", "completion": "negtive"} +{"promote": "\t135.0.0.22.118 (206072690);135.0.0.22", "completion": "positive"} +{"promote": "\t2.20.31.4;2.20.31.4", "completion": "positive"} +{"promote": "\t3.6.1.4;3.6.1.4", "completion": "positive"} +{"promote": "\t\t\thttp://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js;10.5.0.1", "completion": "positive"} +{"promote": "\t12.6.0.1;12.6.0.1", "completion": "positive"} +{"promote": "\t\t\thttp://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js;10.5.0.1", "completion": "positive"} +{"promote": "\t12.6.0.1;12.6.0.1", "completion": "positive"} +{"promote": "\t15.21.2.14594;15.21.2.145", "completion": "positive"} +{"promote": "\t\t\thttp://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js;10.5.0.1", "completion": "positive"} +{"promote": "\t3608.80.24.1.8;608.80.24.1", "completion": "negtive"} +{"promote": "\t5.12.20.4.10.18.5.5.2;5.12.20.4", "completion": "positive"} +{"promote": "\t5.12.20.4.10.18.5.5.2;10.18.5.5", "completion": "positive"} +{"promote": "\t3608.80.24.1.8;608.80.24.1", "completion": "negtive"} +{"promote": "\t12.6.0.1;12.6.0.1", "completion": "positive"} +{"promote": "\t\t\thttp://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js;10.5.0.1", "completion": "positive"} +{"promote": "\t2.20.31.4;2.20.31.4", "completion": "positive"} +{"promote": "\t3608.80.24.1.8;608.80.24.1", "completion": "negtive"} +{"promote": "\t\t\t\t\t\t255.255.255.255;255.255.255.255", "completion": "positive"} +{"promote": "\t\t\t\t\t\t0.0.0.0;0.0.0.0", "completion": "negtive"} +{"promote": "\t\t\t\t\t\t255.255.255.255;255.255.255.255", "completion": "positive"} +{"promote": "\t\t\t\t\t\t0.0.0.0;0.0.0.0", "completion": "negtive"} diff --git a/IP_Extraction_Fine_Tuning/code/dataset_train.jsonl b/IP_Extraction_Fine_Tuning/code/dataset_train.jsonl new file mode 100644 index 0000000..4aeea86 --- /dev/null +++ b/IP_Extraction_Fine_Tuning/code/dataset_train.jsonl @@ -0,0 +1,183 @@ +{"promote": "\t1.000.000.000+;1.000.000.000", "completion": "positive"} +{"promote": "\t1.0.0.1;1.0.0.1", "completion": "positive"} +{"promote": "\t1.0.0.1;1.0.0.1", "completion": "positive"} +{"promote": "\t1.1.1.1;1.1.1.1", "completion": "positive"} +{"promote": "\t1.1.1.1;1.1.1.1", "completion": "positive"} +{"promote": "\t3608.80.24.1.8;608.80.24.1", "completion": "negtive"} +{"promote": "\t\t\teng-USA-1.0.0.14-20170731;1.0.0.14", "completion": "positive"} +{"promote": "\t\t\tcmn-CHN-1.0.0.12-20170804;1.0.0.12", "completion": "positive"} +{"promote": "\t3.6.1.4;3.6.1.4", "completion": "positive"} +{"promote": "\t3608.80.24.1.8;608.80.24.1", "completion": "negtive"} +{"promote": "\t\t12.6.0.1;12.6.0.1", "completion": "positive"} +{"promote": "\t100.73.156.224;100.73.156.224", "completion": "positive"} +{"promote": "\tobfs4 192.95.36.142:443 CDF2E852BF539B82BD10E27E9115A31734E378C2 cert=qUVQ0srL1JI/vO6V6m/24anYXiJD3QP2HgzUKQtQ7GRqqUvs7P+tG43RtAqdhLOALP7DJQ iat-mode=1;192.95.36.142", "completion": "positive"} +{"promote": "\tobfs4 38.229.1.78:80 C8CBDB2464FC9804A69531437BCF2BE31FDD2EE4 cert=Hmyfd2ev46gGY7NoVxA9ngrPF2zCZtzskRTzoWXbxNkzeVnGFPWmrTtILRyqCTjHR+s9dg iat-mode=1;38.229.1.78", "completion": "positive"} +{"promote": "\tobfs4 38.229.33.83:80 0BAC39417268B96B9F514E7F63FA6FBA1A788955 cert=VwEFpk9F/UN9JED7XpG1XOjm/O8ZCXK80oPecgWnNDZDv5pdkhq1OpbAH0wNqOT6H6BmRQ iat-mode=1;38.229.33.83", "completion": "positive"} +{"promote": "\tobfs4 37.218.240.34:40035 88CD36D45A35271963EF82E511C8827A24730913 cert=eGXYfWODcgqIdPJ+rRupg4GGvVGfh25FWaIXZkit206OSngsp7GAIiGIXOJJROMxEqFKJg iat-mode=1;37.218.240.34", "completion": "positive"} +{"promote": "\tobfs4 37.218.245.14:38224 D9A82D2F9C2F65A18407B1D2B764F130847F8B5D cert=bjRaMrr1BRiAW8IE9U5z27fQaYgOhX1UCmOpg2pFpoMvo6ZgQMzLsaTzzQNTlm7hNcb+Sg iat-mode=0;37.218.245.14", "completion": "positive"} +{"promote": "\tobfs4 85.31.186.98:443 011F2599C0E9B27EE74B353155E244813763C3E5 cert=ayq0XzCwhpdysn5o0EyDUbmSOx3X/oTEbzDMvczHOdBJKlvIdHHLJGkZARtT4dcBFArPPg iat-mode=0;85.31.186.98", "completion": "positive"} +{"promote": "\tobfs4 85.31.186.26:443 91A6354697E6B02A386312F68D82CF86824D3606 cert=PBwr+S8JTVZo6MPdHnkTwXJPILWADLqfMGoVvhZClMq/Urndyd42BwX9YFJHZnBB3H0XCw iat-mode=0;85.31.186.26", "completion": "positive"} +{"promote": "\tobfs4 216.252.162.21:46089 0DB8799466902192B6C7576D58D4F7F714EC87C1 cert=XPUwcQPxEXExHfJYX58gZXN7mYpos7VNAHbkgERNFg+FCVNzuYo1Wp+uMscl3aR9hO2DRQ iat-mode=0;216.252.162.21", "completion": "positive"} +{"promote": "\tobfs4 144.217.20.138:80 FB70B257C162BF1038CA669D568D76F5B7F0BABB cert=vYIV5MgrghGQvZPIi1tJwnzorMgqgmlKaB77Y3Z9Q/v94wZBOAXkW+fdx4aSxLVnKO+xNw iat-mode=0;144.217.20.138", "completion": "positive"} +{"promote": "\tobfs4 193.11.166.194:27015 2D82C2E354D531A68469ADF7F878FA6060C6BACA cert=4TLQPJrTSaDffMK7Nbao6LC7G9OW/NHkUwIdjLSS3KYf0Nv4/nQiiI8dY2TcsQx01NniOg iat-mode=0;193.11.166.194", "completion": "positive"} +{"promote": "\tobfs4 193.11.166.194:27020 86AC7B8D430DAC4117E9F42C9EAED18133863AAF cert=0LDeJH4JzMDtkJJrFphJCiPqKx7loozKN7VNfuukMGfHO0Z8OGdzHVkhVAOfo1mUdv9cMg iat-mode=0;193.11.166.194", "completion": "positive"} +{"promote": "\tobfs4 193.11.166.194:27025 1AE2C08904527FEA90C4C4F8C1083EA59FBC6FAF cert=ItvYZzW5tn6v3G4UnQa6Qz04Npro6e81AP70YujmK/KXwDFPTs3aHXcHp4n8Vt6w/bv8cA iat-mode=0;193.11.166.194", "completion": "positive"} +{"promote": "\tobfs4 209.148.46.65:443 74FAD13168806246602538555B5521A0383A1875 cert=ssH+9rP8dG2NLDN2XuFw63hIO/9MNNinLmxQDpVa+7kTOa9/m+tGWT1SmSYpQ9uTBGa6Hw iat-mode=0;209.148.46.65", "completion": "positive"} +{"promote": "\tobfs4 146.57.248.225:22 10A6CD36A537FCE513A322361547444B393989F0 cert=K1gDtDAIcUfeLqbstggjIw2rtgIKqdIhUlHp82XRqNSq/mtAjp1BIC9vHKJ2FAEpGssTPw iat-mode=0;146.57.248.225", "completion": "positive"} +{"promote": "\t3.3.7.0;3.3.7.0", "completion": "positive"} +{"promote": "\t\t\t\t\thttps://pm-gateway.supersonicads.com/auction?appKey=67052255&SDKVersion=6.8.4.2&initCountry=US&platform=iphone&advId=0D99ABE9-B1D0-41C1-8C45-2681A498AD97;6.8.4.2", "completion": "positive"} +{"promote": "\t3.3.7.0;3.3.7.0", "completion": "positive"} +{"promote": "\t\t\t6.8.4.2;6.8.4.2", "completion": "positive"} +{"promote": "\t\t\t6.8.4.2;6.8.4.2", "completion": "positive"} +{"promote": "\t\tSticker_2.19.40.12;2.19.40.12", "completion": "positive"} +{"promote": "\t\tSticker_2.19.20.2;2.19.20.2", "completion": "positive"} +{"promote": "\t\tSticker_2.19.40.24;2.19.40.24", "completion": "positive"} +{"promote": "\tSticker_2.19.40.24;2.19.40.24", "completion": "positive"} +{"promote": "\t\t\t17.57.144.52;17.57.144.52", "completion": "positive"} +{"promote": "\t1.0.0.1;1.0.0.1", "completion": "positive"} +{"promote": "\t\t\t\t12.6.0.1;12.6.0.1", "completion": "positive"} +{"promote": "\t\t\t12.6.0.1;12.6.0.1", "completion": "positive"} +{"promote": "\t135.0.0.22.118 (206072521);135.0.0.22", "completion": "positive"} +{"promote": "\t12.6.0.1;12.6.0.1", "completion": "positive"} +{"promote": "\t\t3608.80.24.1.8;608.80.24.1", "completion": "negtive"} +{"promote": "\t\t3608.80.24.1.8;608.80.24.1", "completion": "negtive"} +{"promote": "\t135.0.0.22.118;135.0.0.22", "completion": "positive"} +{"promote": "\t135.0.0.22.118 (206072690);135.0.0.22", "completion": "positive"} +{"promote": "\t135.0.0.22.118 (206072521);135.0.0.22", "completion": "positive"} +{"promote": "\t\t\thttp://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js;10.5.0.1", "completion": "positive"} +{"promote": "\t\t12.6.0.1;12.6.0.1", "completion": "positive"} +{"promote": "\t\t\tcmn-CHN-4.1.1.0-20160421;4.1.1.0", "completion": "positive"} +{"promote": "\t\t\teng-USA-4.1.1.5-20160919;4.1.1.5", "completion": "positive"} +{"promote": "\t\t\teng-USA-4.1.1.5-20160919;4.1.1.5", "completion": "positive"} +{"promote": "\t\t\tspa-USA-4.1.0.2-20160424;4.1.0.2", "completion": "positive"} +{"promote": "\t\t\trus-RUS-4.1.0.2-20160422;4.1.0.2", "completion": "positive"} +{"promote": "\t\t\ttur-TUR-4.1.0.3-20160422;4.1.0.3", "completion": "positive"} +{"promote": "\t\t\teng-USA-OfflineIOS-6.0.0.2-20191003;6.0.0.2", "completion": "positive"} +{"promote": "\t\t\tcmn-CHN-4.1.1.0-20160421;4.1.1.0", "completion": "positive"} +{"promote": "\t\t\tyue-HKG-4.1.0.2-20160424;4.1.0.2", "completion": "positive"} +{"promote": "\t\t\teng-USA-5.2.0.2-20170705;5.2.0.2", "completion": "positive"} +{"promote": "\t\t\tspa-ESP-5.0.0.1-20170310;5.0.0.1", "completion": "positive"} +{"promote": "\t\t\teng-USA-5.3.0.3-20180129;5.3.0.3", "completion": "positive"} +{"promote": "\t\t\tcmn-CHN-OfflineIOS-5.0.0.1-20180624;5.0.0.1", "completion": "positive"} +{"promote": "\t\t\teng-USA-OfflineIOS-5.3.0.1-20190209;5.3.0.1", "completion": "positive"} +{"promote": "\t\t\teng-USA-OfflineIOS-5.3.0.1-20190530;5.3.0.1", "completion": "positive"} +{"promote": "\t\t\tcmn-CHN-OfflineIOS-5.0.0.1-20180925;5.0.0.1", "completion": "positive"} +{"promote": "\t\t\teng-GBR-4.1.0.1-20160422;4.1.0.1", "completion": "positive"} +{"promote": "\t\t\tita-ITA-4.1.0.1-20160422;4.1.0.1", "completion": "positive"} +{"promote": "\t\t\tpor-BRA-4.1.0.4-20160424;4.1.0.4", "completion": "positive"} +{"promote": "\t\t\teng-IND-4.1.0.1-20160424;4.1.0.1", "completion": "positive"} +{"promote": "\t\t\tfra-FRA-OfflineIOS-6.0.0.4-20191114;6.0.0.4", "completion": "positive"} +{"promote": "\t\t\tdeu-DEU-OfflineIOS-6.0.0.3-20191114;6.0.0.3", "completion": "positive"} +{"promote": "\t\t\teng-AUS-OfflineIOS-6.0.0.1-20191215;6.0.0.1", "completion": "positive"} +{"promote": "\t\t\tkor-KOR-OfflineIOS-5.0.0.3-20191215;5.0.0.3", "completion": "positive"} +{"promote": "\t\t\tspa-ESP-OfflineIOS-6.0.0.2-20191120;6.0.0.2", "completion": "positive"} +{"promote": "\t\t\tspa-USA-OfflineIOS-6.0.0.3-20190831;6.0.0.3", "completion": "positive"} +{"promote": "\t\t\tcmn-CHN-OfflineIOS-6.0.0.0-20191009;6.0.0.0", "completion": "positive"} +{"promote": "\t\t\tjpn-JPN-OfflineIOS-6.0.0.3-20191024;6.0.0.3", "completion": "positive"} +{"promote": "\t\t\trus-RUS-OfflineIOS-5.0.0.4-20190930;5.0.0.4", "completion": "positive"} +{"promote": "\t\t\ttur-TUR-OfflineIOS-5.0.0.1-20190928;5.0.0.1", "completion": "positive"} +{"promote": "\t\t\tyue-HKG-OfflineIOS-6.0.0.2-20190901;6.0.0.2", "completion": "positive"} +{"promote": "\t\t\teng-GBR-6.0.0.1-20190729;6.0.0.1", "completion": "positive"} +{"promote": "\t\t\tita-ITA-5.1.1.0-20190729;5.1.1.0", "completion": "positive"} +{"promote": "\t\t\tpor-BRA-5.0.0.2-20190729;5.0.0.2", "completion": "positive"} +{"promote": "\t\t\teng-IND-6.0.0.5-20190907;6.0.0.5", "completion": "positive"} +{"promote": "\t\t\tara-XWW-OfflineIOS-6.0.0.0-20200227;6.0.0.0", "completion": "positive"} +{"promote": "\t\t\ttur-TUR-OfflineIOS-5.0.0.1-20200229;5.0.0.1", "completion": "positive"} +{"promote": "\t\t\tdeu-DEU-OfflineIOS-7.0.0.3-20200323;7.0.0.3", "completion": "positive"} +{"promote": "\t\t\teng-GBR-OfflineIOS-7.0.0.3-20200323;7.0.0.3", "completion": "positive"} +{"promote": "\t\t\teng-USA-OfflineIOS-7.0.0.4-20200319;7.0.0.4", "completion": "positive"} +{"promote": "\t\t\tspa-ESP-OfflineIOS-7.0.0.3-20200323;7.0.0.3", "completion": "positive"} +{"promote": "\t\t\tfra-FRA-OfflineIOS-7.0.0.3-20200323;7.0.0.3", "completion": "positive"} +{"promote": "\t\t\tjpn-JPN-OfflineIOS-7.0.0.3-20200323;7.0.0.3", "completion": "positive"} +{"promote": "\t\t\tcmn-CHN-OfflineIOS-7.0.0.3-20200323;7.0.0.3", "completion": "positive"} +{"promote": "\t\tSnapchat/10.78.6.0 (iPhone8_4; iOS 13.4.1; gzip);10.78.6.0", "completion": "positive"} +{"promote": "\t\t10.78.6.0;10.78.6.0", "completion": "positive"} +{"promote": "\t\t10.22.22.1;10.22.22.1", "completion": "positive"} +{"promote": "\t\t10.68.68.22;10.68.68.22", "completion": "positive"} +{"promote": "\t\t10.22.22.1;10.22.22.1", "completion": "positive"} +{"promote": "\t\t\t216.239.36.126;216.239.36.126", "completion": "positive"} +{"promote": "\t\t10.78.6.0;10.78.6.0", "completion": "positive"} +{"promote": "\t\tSnapchat/10.78.6.0 (iPhone8_4; iOS 13.4.1; gzip);10.78.6.0", "completion": "positive"} +{"promote": "\t\t\t216.239.36.126;216.239.36.126", "completion": "positive"} +{"promote": "\t\t\t52.207.89.96_52.207.21.88_52.4.205.231_52.5.37.1_52.4.79.247_52.22.144.146_52.3.185.35_52.21.187.77;52.207.89.96", "completion": "positive"} +{"promote": "\t\t\t52.207.89.96_52.207.21.88_52.4.205.231_52.5.37.1_52.4.79.247_52.22.144.146_52.3.185.35_52.21.187.77;52.207.21.88", "completion": "positive"} +{"promote": "\t\t\t52.207.89.96_52.207.21.88_52.4.205.231_52.5.37.1_52.4.79.247_52.22.144.146_52.3.185.35_52.21.187.77;52.4.205.231", "completion": "positive"} +{"promote": "\t\t\t52.207.89.96_52.207.21.88_52.4.205.231_52.5.37.1_52.4.79.247_52.22.144.146_52.3.185.35_52.21.187.77;52.5.37.1", "completion": "positive"} +{"promote": "\t\t\t52.207.89.96_52.207.21.88_52.4.205.231_52.5.37.1_52.4.79.247_52.22.144.146_52.3.185.35_52.21.187.77;52.4.79.247", "completion": "positive"} +{"promote": "\t\t\t52.207.89.96_52.207.21.88_52.4.205.231_52.5.37.1_52.4.79.247_52.22.144.146_52.3.185.35_52.21.187.77;52.22.144.146", "completion": "positive"} +{"promote": "\t\t\t52.207.89.96_52.207.21.88_52.4.205.231_52.5.37.1_52.4.79.247_52.22.144.146_52.3.185.35_52.21.187.77;52.3.185.35", "completion": "positive"} +{"promote": "\t\t\t52.207.89.96_52.207.21.88_52.4.205.231_52.5.37.1_52.4.79.247_52.22.144.146_52.3.185.35_52.21.187.77;52.21.187.77", "completion": "positive"} +{"promote": "\t\t\t216.239.36.126;216.239.36.126", "completion": "positive"} +{"promote": "\t\t\t35.186.197.135;35.186.197.135", "completion": "positive"} +{"promote": "\t\t\t35.190.43.134;35.190.43.134", "completion": "positive"} +{"promote": "\t\t\t35.186.197.135;35.186.197.135", "completion": "positive"} +{"promote": "\t\t\t35.186.197.135;35.186.197.135", "completion": "positive"} +{"promote": "\t\t\t3.213.65.77;3.213.65.77", "completion": "positive"} +{"promote": "\t\t\t3.94.245.242;3.94.245.242", "completion": "positive"} +{"promote": "\t\t\t172.217.8.20;172.217.8.20", "completion": "positive"} +{"promote": "\t\t\t3.94.245.242;3.94.245.242", "completion": "positive"} +{"promote": "\t\t\t216.239.36.126;216.239.36.126", "completion": "positive"} +{"promote": "\t\t\t172.217.8.20;172.217.8.20", "completion": "positive"} +{"promote": "\t\t\t216.239.36.126;216.239.36.126", "completion": "positive"} +{"promote": "\t15.21.2.14594;15.21.2.145", "completion": "positive"} +{"promote": "\t8.58.0.93;8.58.0.93", "completion": "positive"} +{"promote": "\t\t\thttp://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js;10.5.0.1", "completion": "positive"} +{"promote": "\t8.58.0.93;8.58.0.93", "completion": "positive"} +{"promote": "\t\t2.20.31.4;2.20.31.4", "completion": "positive"} +{"promote": "\t10.78.6.0;10.78.6.0", "completion": "positive"} +{"promote": "\t\t\thttp://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js;10.5.0.1", "completion": "positive"} +{"promote": "\t\t\t\t\t\t255.255.255.255;255.255.255.255", "completion": "positive"} +{"promote": "\t\t\t\t\t\t0.0.0.0;0.0.0.0", "completion": "negtive"} +{"promote": "\t\t\t\t\t\t255.255.255.255;255.255.255.255", "completion": "positive"} +{"promote": "\t\t\t\t\t\t0.0.0.0;0.0.0.0", "completion": "negtive"} +{"promote": "\t\t3.6.1.4;3.6.1.4", "completion": "positive"} +{"promote": "\t609.1.20.0.9;609.1.20.0", "completion": "negtive"} +{"promote": "\t12.6.0.1;12.6.0.1", "completion": "positive"} +{"promote": "\t12.6.0.1;12.6.0.1", "completion": "positive"} +{"promote": "\t12.6.0.1;12.6.0.1", "completion": "positive"} +{"promote": "\t\tMoPub-6.9.4.0;6.9.4.0", "completion": "positive"} +{"promote": "\t7.4.2.6;7.4.2.6", "completion": "positive"} +{"promote": "\t135.0.0.22.118;135.0.0.22", "completion": "positive"} +{"promote": "\t2.20.31.4;2.20.31.4", "completion": "positive"} +{"promote": "\t\t0.5.1.3;0.5.1.3", "completion": "negtive"} +{"promote": "\t\t0.6.7.8;0.6.7.8", "completion": "negtive"} +{"promote": "\t10.78.6.0;10.78.6.0", "completion": "positive"} +{"promote": "\t\t8.58.0.93;8.58.0.93", "completion": "positive"} +{"promote": "\t\t12.6.0.1;12.6.0.1", "completion": "positive"} +{"promote": "\t\t20.10.0.8;20.10.0.8", "completion": "positive"} +{"promote": "\t\t2.20.31.4;2.20.31.4", "completion": "positive"} +{"promote": "\t\t15.21.2.14594;15.21.2.145", "completion": "positive"} +{"promote": "\t\t10.78.6.0;10.78.6.0", "completion": "positive"} +{"promote": "\t\t3.6.1.4;3.6.1.4", "completion": "positive"} +{"promote": "\t1.0.0.1;1.0.0.1", "completion": "positive"} +{"promote": "\t\t\t1.0.0.1;1.0.0.1", "completion": "positive"} +{"promote": "\t\t\t\t1.0.0.1;1.0.0.1", "completion": "positive"} +{"promote": "\t1.1.1.1;1.1.1.1", "completion": "positive"} +{"promote": "\t\t\t1.1.1.1;1.1.1.1", "completion": "positive"} +{"promote": "\t\t\t\t1.1.1.1;1.1.1.1", "completion": "positive"} +{"promote": "\t\t\t\t\thttp://abuseipdb.com/check/208.180.231.139;208.180.231.139", "completion": "positive"} +{"promote": "\t\t\t\t\thttp://www.abuseipdb.com/whois/69.50.232.54;69.50.232.54", "completion": "positive"} +{"promote": "\t\t\t\t\thttp://advantage1.checkm8.com/AdminServer/swf/admin-release.swf?version=4.3.0.49;4.3.0.49", "completion": "positive"} +{"promote": "\t\t\t\t\thttp://d3bn78kc7qbjb6.cloudfront.net/rc-0.0.0.392/views/logs/logs.html;0.0.0.392", "completion": "negtive"} +{"promote": "\t\t\t\t\thttp://fcs.dell.com/fcs/fcs/UIFramework/3.2.108.2/res/Image?path=sharedcontrols%2Fcontainers%2Fvalidation_messaging_left.png;3.2.108.2", "completion": "positive"} +{"promote": "\t\t\t\thttp://ui1.img.digitalrivercontent.net/drui/1.7.0.0.7.1/css/dr-stylesheet-ff-compressed.css;1.7.0.0", "completion": "positive"} +{"promote": "\t\t\t\thttp://fast.fonts.com/FontsCom/Live/static/2.15.134.0/img/apple-touch-icon-57x57.png;2.15.134.0", "completion": "positive"} +{"promote": "\t\t\t\t\thttp://tva1.sinaimg.cn/crop.0.0.798.798.180/6462d00fgw1egno4hgy6wj20m80m8mxn.jpg;0.0.798.798", "completion": "negtive"} +{"promote": "\t\t\t\t\thttp://netix.dl.sourceforge.net/project/kreogist-mu/Releases/0.9.9.3/Windows/mu_0.9.9.3_win64_intel_d9b5f2d.zip;0.9.9.3", "completion": "negtive"} +{"promote": "\t\t\t\t\thttp://netix.dl.sourceforge.net/project/kreogist-mu/Releases/0.9.9.3/Windows/mu_0.9.9.3_win64_intel_d9b5f2d.zip;0.9.9.3", "completion": "negtive"} +{"promote": "\t\t\t\t\thttp://sm.wdjcdn.com/release/files/jupiter/5.6.1.8925/wandoujia-wandoujia_web.apk;5.6.1.892", "completion": "negtive"} +{"promote": "\t\t\t\t\thttp://tvassl.weibo.cn/crop.0.21.685.685.1024/6b84272bgw1euxxzxrndpj20k00k0adc.jpg;0.21.685.685", "completion": "negtive"} +{"promote": "\t\t\t\t\thttp://novel-cdn.kuangxiangit.com/custom/download/1.4.4.02/Novel-release_14402_jiagu_sign-1109-2-PCdownload.apk;1.4.4.02", "completion": "positive"} +{"promote": "\t\t\thttp://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js;10.5.0.1", "completion": "positive"} +{"promote": "\t3608.80.24.1.8;608.80.24.1", "completion": "negtive"} +{"promote": "\t2.20.31.4;2.20.31.4", "completion": "positive"} +{"promote": "\t\thttps://scontent-iad3-1.cdninstagram.com/v/t51.2885-15/e15/c180.0.720.720a/s240x240/93425170_1848465348622192_2718028079642416718_n.jpg?cachebreaker=9823&_nc_ht=scontent-iad3-1.cdninstagram.com&_nc_cat=1&_nc_ohc=MteGbc14-xIAX9Oom5c&oh=bce805572a79dd6ed9fbedbbf1b198ee&oe=5EC259EB&ig_cache_key=MjI4ODMyOTY2NzQ3OTg3Nzk0Ng%3D%3D.2;180.0.720.720", "completion": "positive"} +{"promote": "\t8.5.51.877.g6cea6b5;8.5.51.877", "completion": "negtive"} +{"promote": "\t8.5.51.877;8.5.51.877", "completion": "negtive"} +{"promote": "\t1.000.000.000+;1.000.000.000", "completion": "positive"} +{"promote": "\t2.20.31.4;2.20.31.4", "completion": "positive"} +{"promote": "\t257.0.1.38.116;257.0.1.38", "completion": "negtive"} +{"promote": "\t/private/var/MobileAsset/AssetsV2/com_apple_MobileAsset_EmbeddedSpeech/479991bc60161e4545ff833d6117f264954cb6b3.asset/AssetData/eng-USA-OfflineIOS-7.0.0.4-20200319;7.0.0.4", "completion": "positive"} +{"promote": "\t3608.80.24.1.8;608.80.24.1", "completion": "negtive"} +{"promote": "\t\t\thttp://en-us-support.belkin.com/euf/rightnow/js/10.5.0.1.6.137/min/RightNow.js;10.5.0.1", "completion": "positive"} +{"promote": "\t10.78.6.0;10.78.6.0", "completion": "positive"} diff --git a/IP_Extraction_Fine_Tuning/code/preprocess_dataset.py b/IP_Extraction_Fine_Tuning/code/preprocess_dataset.py new file mode 100644 index 0000000..cba65a2 --- /dev/null +++ b/IP_Extraction_Fine_Tuning/code/preprocess_dataset.py @@ -0,0 +1,59 @@ +import pandas as pd +import json +import os + + +# Read the CSV file into a DataFrame +file_path = os.path.join(os.getcwd(), "./openAI/dataset_small.csv") + +data = pd.read_csv(file_path) + +# Create a list to store the converted data +json_list = [] + +# Iterate through each row and create the JSON format +for index, row in data.iterrows(): + promote_text = ( + row["text"] + ";" + row["ip"] + ) # Replace with the actual column name for "promote" text + completion_text = ( + "positive" if row["class"] != "N" else "negtive" + ) # Replace with the actual column name for "completion" text + + # Create a dictionary for the JSON object + json_object = {"promote": promote_text, "completion": completion_text} + + json_list.append(json_object) + +# Write the converted data to a JSON file +output_file_path = os.path.join( + os.getcwd(), "./openAI/dataset_small.json" +) # Replace with the desired output file path + + +# Calculate the split index +split_index = int(0.8 * len(json_list)) + +# Split the list into two parts +train_data = json_list[:split_index] +test_data = json_list[split_index:] + +# File paths for train and test data +train_file_path = os.path.join(os.getcwd(), "./openAI/dataset_train.jsonl") +test_file_path = os.path.join(os.getcwd(), "./openAI/dataset_test.jsonl") + +# Write train data to file +with open(train_file_path, "w") as train_file: + for json_object in train_data: + line = json.dumps(json_object) + "\n" + train_file.write(line) + +# Write test data to file +with open(test_file_path, "w") as test_file: + for json_object in test_data: + line = json.dumps(json_object) + "\n" + test_file.write(line) + +print( + "JSON objects have been split (80% \and 20%) and written to train_data.jsonl and test_data.jsonl" +)