diff --git a/CMakeLists.txt b/CMakeLists.txt index 55da6cb..d4bb620 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,16 +46,22 @@ project (uri DESCRIPTION "Lightweight header-only C++20 URI parser" VERSION 1.3.1 ) - include(FetchContent) -message(STATUS "Downloading Criterion...") -FetchContent_Declare(Criterion - GIT_REPOSITORY https://github.com/p-ranav/criterion.git - GIT_SHALLOW ON -) + +# to disable building benchmarking tests: +# cmake -DBUILD_BENCHMARKS=false .. +option(BUILD_BENCHMARKS "enable building benchmarking tests" true) +message("-- Build benchmarking tests: ${BUILD_BENCHMARKS}") +if(BUILD_BENCHMARKS) + message(STATUS "Downloading Criterion...") + FetchContent_Declare(Criterion + GIT_REPOSITORY https://github.com/p-ranav/criterion.git + GIT_SHALLOW ON + ) + FetchContent_MakeAvailable(Criterion) +endif() message(STATUS "Downloading Catch2...") -FetchContent_MakeAvailable(Criterion) FetchContent_Declare(Catch2 GIT_REPOSITORY https://github.com/catchorg/Catch2.git GIT_SHALLOW ON @@ -64,12 +70,19 @@ FetchContent_Declare(Catch2 FetchContent_MakeAvailable(Catch2) list(APPEND CMAKE_MODULE_PATH ${catch2_SOURCE_DIR}/extras) -foreach(x IN ITEMS uritest.cpp unittests.cpp benchmarks.cpp) +set(files uritest.cpp unittests.cpp) +if(BUILD_BENCHMARKS) + list(APPEND files benchmarks.cpp) +endif() +foreach(x IN LISTS files) cmake_path(GET x STEM LAST_ONLY target) add_executable(${target} examples/${x}) set_target_properties(${target} PROPERTIES CXX_STANDARD 20 CXX_STANDARD_REQUIRED true) target_include_directories(${target} PRIVATE include examples) - target_link_libraries(${target} PRIVATE Catch2::Catch2WithMain Criterion) + target_link_libraries(${target} PRIVATE Catch2::Catch2WithMain) + if(BUILD_BENCHMARKS) + target_link_libraries(${target} PRIVATE Criterion) + endif() cmake_path(GET x FILENAME fname) get_target_property(cppstd ${target} CXX_STANDARD) message("-- adding ${fname} cxx std: C++${cppstd}") diff --git a/examples/basiclist.hpp b/examples/basiclist.hpp index 54009af..72ed112 100644 --- a/examples/basiclist.hpp +++ b/examples/basiclist.hpp @@ -30,1003 +30,1003 @@ // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. //---------------------------------------------------------------------------------------- -{ "https://telegraph.co.uk/index.html" }, -{ "https://bp.blogspot.com/index.html" }, -{ "https://17ok.com/index.html" }, -{ "https://lemonde.fr/index.html" }, -{ "https://google.no/index.html" }, -{ "https://yandex.ru/index.html" }, -{ "https://bandcamp.com/index.html" }, -{ "https://att.com/index.html" }, -{ "https://hawaaworld.com/index.html" }, -{ "https://siteadvisor.com/index.html" }, -{ "https://hatenablog.com/index.html" }, -{ "https://wiley.com/index.html" }, -{ "https://gitter.im/t1gor/Robots.txt-Parser-Class/archives" }, -{ "https://support.google.com/webmasters/answer/6062598?hl=en" }, -{ "https://gstatic.com/index.html" }, -{ "https://wix.com/index.html" }, -{ "https://sohu.com/index.html" }, -{ "https://salesforce.com/index.html" }, -{ "https://op.gg/index.html" }, -{ "https://patch.com/index.html" }, -{ "https://premierleague.com/index.html" }, -{ "https://givemesport.com/index.html" }, -{ "https://rambler.ru/index.html" }, -{ "https://coursera.org/index.html" }, -{ "https://sciencedirect.com/index.html" }, -{ "https://newstrend.news/index.html" }, -{ "https://liputan6.com/index.html" }, -{ "https://cbsnews.com/index.html" }, -{ "https://blog.jp/index.html" }, -{ "https://atlassian.net/index.html" }, -{ "https://livejasmin.com/index.html" }, -{ "https://hespress.com/index.html" }, -{ "https://google.com.tw/index.html" }, -{ "https://amazon.de/index.html" }, -{ "https://tube8.com/index.html" }, -{ "https://wikihow.com/index.html" }, -{ "https://books.google.de/books?id=hO7sBQAAQBAJ&pg=PA282&lpg=PA282&dq=robotstxt+parser&source=bl&ots=BJJf9p47U6&sig=w-uc3hT94YumAO1fL8GipBEkhE0&hl=de&sa=X&redir_esc=y#v=onepage&q=robotstxt%20parser&f=false" }, -{ "https://spotscenered.info/index.html" }, -{ "https://google.co.jp/index.html" }, -{ "https://adexchangegate.com/index.html" }, -{ "https://wordpress.org/index.html" }, -{ "http://logparserplus.com/examples/32/" }, -{ "https://google.com.gt/index.html" }, -{ "https://food.tmall.com/index.html" }, -{ "https://unity3d.com/index.html" }, -{ "https://onet.pl/index.html" }, -{ "https://slickdeals.net/index.html" }, -{ "https://bongacams.com/index.html" }, -{ "https://popcash.net/index.html" }, -{ "https://google.com.sg/index.html" }, -{ "https://hola.com/index.html" }, -{ "https://daikynguyenvn.com/index.html" }, -{ "https://chip.de/index.html" }, -{ "https://subscene.com/index.html" }, -{ "https://douyu.com/index.html" }, -{ "https://telegram.org/index.html" }, -{ "https://google.sk/index.html" }, -{ "https://www.webmasterworld.com/forum93/37.htm" }, -{ "https://twitter.com/index.html" }, -{ "https://gogoanime.io/index.html" }, -{ "https://academia.edu/index.html" }, -{ "https://ci123.com/index.html" }, -{ "https://google.co.kr/index.html" }, -{ "https://dianping.com/index.html" }, -{ "https://porn.com/index.html" }, -{ "https://google.com.ly/index.html" }, -{ "https://convert2mp3.net/index.html" }, -{ "https://ouedkniss.com/index.html" }, -{ "https://uzone.id/index.html" }, -{ "https://books.google.de/books?id=vk5ODAAAQBAJ&pg=PA63&lpg=PA63&dq=robotstxt+parser&source=bl&ots=lXXaGw820W&sig=7Mtcu5ABrTTw1kFfeTReh3YuvFU&hl=de&sa=X&redir_esc=y#v=onepage&q=robotstxt%20parser&f=false" }, -{ "https://youdao.com/index.html" }, -{ "https://mit.edu/index.html" }, -{ "https://kinokrad.co/index.html" }, -{ "https://urbandictionary.com/index.html" }, -{ "https://usatoday.com/index.html" }, -{ "https://weibo.com/index.html" }, -{ "https://rakuten.co.jp/index.html" }, -{ "https://rumblinggoodies.com/au/index.html" }, -{ "https://samsung.com/index.html" }, -{ "https://momoshop.com.tw/index.html" }, -{ "https://chron.com/index.html" }, -{ "https://intel.com/index.html" }, -{ "https://kaskus.co.id/index.html" }, -{ "https://google.com/index.html" }, -{ "https://homedepot.com/index.html" }, -{ "https://cqnews.net/index.html" }, -{ "https://free.fr/index.html" }, -{ "https://uol.com.br/index.html" }, -{ "https://google.com.kw/index.html" }, -{ "https://pipeschannels.com/index.html" }, -{ "https://kotaku.com/index.html" }, -{ "https://outbrain.com/index.html" }, -{ "https://moneycontrol.com/index.html" }, -{ "https://gearbest.com/index.html" }, -{ "https://gizmodo.com/index.html" }, -{ "https://myway.com/index.html" }, -{ "https://theatlantic.com/index.html" }, -{ "https://google.co.ve/index.html" }, -{ "https://nasa.gov/index.html" }, -{ "https://google.co.ao/index.html" }, -{ "https://mobile01.com/index.html" }, -{ "https://hackage.haskell.org/package/robots-txt" }, -{ "https://sberbank.ru/index.html" }, -{ "https://livescore.com/index.html" }, -{ "https://ebay.com.au/index.html" }, -{ "https://google.at/index.html" }, -{ "https://prothom-alo.com/index.html" }, -{ "https://thesaurus.com/index.html" }, -{ "https://google.hu/index.html" }, -{ "https://blogfa.com/index.html" }, -{ "https://lifewire.com/index.html" }, -{ "https://deviantart.com/index.html" }, -{ "https://milliyet.com.tr/index.html" }, -{ "https://cisco.com/index.html" }, -{ "https://naij.com/index.html" }, -{ "https://google.com.mx/index.html" }, -{ "https://digitaldsp.com/index.html" }, -{ "https://exosrv.com/index.html" }, -{ "https://citi.com/index.html" }, -{ "https://4dsply.com/index.html" }, -{ "https://netflix.com/index.html" }, -{ "https://ettoday.net/index.html" }, -{ "https://zhihu.com/index.html" }, -{ "https://ameblo.jp/index.html" }, -{ "https://eyny.com/index.html" }, -{ "https://jiameng.com/index.html" }, -{ "https://gfycat.com/index.html" }, -{ "https://cnbc.com/index.html" }, -{ "https://ccm.net/index.html" }, -{ "https://sabq.org/index.html" }, -{ "https://iwanttodeliver.com/index.html" }, -{ "https://t-online.de/index.html" }, -{ "https://fiverr.com/index.html" }, -{ "https://1688.com/index.html" }, -{ "https://blastingnews.com/index.html" }, -{ "https://cpm10.com/index.html" }, -{ "https://nextlnk2.com/index.html" }, -{ "https://uptobox.com/index.html" }, -{ "https://commentcamarche.net/index.html" }, -{ "https://google.fr/index.html" }, -{ "https://viva.co.id/index.html" }, -{ "https://docs.python.org/2/library/robotparser.html" }, -{ "https://bankofamerica.com/index.html" }, -{ "https://nih.gov/index.html" }, -{ "https://epochtimes.com/index.html" }, -{ "https://hdzog.com/index.html" }, -{ "https://google.co.in/index.html" }, -{ "https://giphy.com/index.html" }, -{ "https://mediafire.com/index.html" }, -{ "https://speedtest.net/index.html" }, -{ "https://investing.com/index.html" }, -{ "https://oschina.net/index.html" }, -{ "https://liveadexchanger.com/index.html" }, -{ "https://google.ae/index.html" }, -{ "https://seasonvar.ru/index.html" }, -{ "https://avast.com/index.html" }, -{ "https://webex.com/index.html" }, -{ "https://buyma.com/index.html" }, -{ "https://google.iq/index.html" }, -{ "https://khanacademy.org/index.html" }, -{ "https://theepochtimes.com/index.html" }, -{ "https://doc.scrapy.org/en/latest/topics/spiders.html" }, -{ "https://friv.com/index.html" }, -{ "https://yahoo.com/index.html" }, -{ "https://codepen.io/index.html" }, -{ "https://mama.cn/index.html" }, -{ "http://www.rubydoc.info/gems/robotstxt-parser/0.1.1" }, -{ "https://wsj.com/index.html" }, -{ "https://gotporn.com/index.html" }, -{ "https://drom.ru/index.html" }, -{ "https://rarbg.is/index.html" }, -{ "https://google.com.ng/index.html" }, -{ "https://t.co/index.html" }, -{ "https://google.ie/index.html" }, -{ "https://google.com.ar/index.html" }, -{ "https://google.es/index.html" }, -{ "https://adhoc2.net/index.html" }, -{ "https://crunchyroll.com/index.html" }, -{ "https://leboncoin.fr/index.html" }, -{ "https://google.com.pk/index.html" }, -{ "https://bukalapak.com/index.html" }, -{ "https://forbes.com/index.html" }, -{ "https://douban.com/index.html" }, -{ "https://indiatimes.com/index.html" }, -{ "https://asus.com/index.html" }, -{ "https://olx.ua/index.html" }, -{ "https://interia.pl/index.html" }, -{ "https://hao123.com/index.html" }, -{ "https://doubleclick.net/index.html" }, -{ "https://www.versioneye.com/php/webignition:robots-txt-parser/1.0.2" }, -{ "https://wordpress.org/plugins/multipart-robotstxt-editor/" }, -{ "https://newtabtv.com/index.html" }, -{ "https://lenovo.com/index.html" }, -{ "https://gamespot.com/index.html" }, -{ "https://kompas.com/index.html" }, -{ "https://themeforest.net/index.html" }, -{ "https://coinmarketcap.com/index.html" }, -{ "https://google.fi/index.html" }, -{ "https://soso.com/index.html" }, -{ "https://cs.nyu.edu/courses/fall02/G22.3033-008/WebCrawler.java" }, -{ "https://piet2eix3l.com/index.html" }, -{ "https://google.com.do/index.html" }, -{ "https://gmw.cn/index.html" }, -{ "https://heavy.com/index.html" }, -{ "https://playstation.com/index.html" }, -{ "https://oracle.com/index.html" }, -{ "https://fbsbx.com/index.html" }, -{ "https://rednet.cn/index.html" }, -{ "https://sportbible.com/index.html" }, -{ "https://ck101.com/index.html" }, -{ "https://facebook.com/index.html" }, -{ "https://yandex.com/support/webmaster/indexing-options/robots-txt-analyzer.html" }, -{ "https://diamongs.com/index.html" }, -{ "https://nikkeibp.co.jp/index.html" }, -{ "https://9gag.com/index.html" }, -{ "https://aliyun.com/index.html" }, -{ "https://hicpm5.com/index.html" }, -{ "https://hilltopads.net/index.html" }, -{ "https://duolingo.com/index.html" }, -{ "https://google.tn/index.html" }, -{ "https://weblio.jp/index.html" }, -{ "https://wittyfeed.com/index.html" }, -{ "https://grid.id/index.html" }, -{ "https://google.ro/index.html" }, -{ "https://weebly.com/index.html" }, -{ "https://livedoor.com/index.html" }, -{ "https://gsmarena.com/index.html" }, -{ "https://detik.com/index.html" }, -{ "https://mercadolivre.com.br/index.html" }, -{ "https://ea.com/index.html" }, -{ "https://hootsuite.com/index.html" }, -{ "https://gismeteo.ru/index.html" }, -{ "https://tokopedia.com/index.html" }, -{ "https://ukr.net/index.html" }, -{ "https://mercadolibre.com.ar/index.html" }, -{ "https://wowhead.com/index.html" }, -{ "https://adf.ly/index.html" }, -{ "https://zhanqi.tv/index.html" }, -{ "https://fatosdesconhecidos.com.br/index.html" }, -{ "https://shopify.com/index.html" }, -{ "https://exoclick.com/index.html" }, -{ "https://ewatchseries.to/index.html" }, -{ "https://onoticioso.com/index.html" }, -{ "https://youboy.com/index.html" }, -{ "https://webmd.com/index.html" }, -{ "https://youtube.com/index.html" }, -{ "https://zoom.us/index.html" }, -{ "https://trackingclick.net/index.html" }, -{ "https://tomsguide.com/index.html" }, -{ "http://sitemaps.blogspot.de/2006/02/using-robotstxt-file.html" }, -{ "https://capitalone.com/index.html" }, -{ "https://bytes.com/topic/c-sharp/answers/229379-robots-txt-parser" }, -{ "https://daum.net/index.html" }, -{ "https://nocookie.net/index.html" }, -{ "https://issuu.com/index.html" }, -{ "https://searchengineland.com/a-deeper-look-at-robotstxt-17573" }, -{ "https://prnt.sc/index.html" }, -{ "https://google.tm/index.html" }, -{ "https://torrentz2.eu/index.html" }, -{ "https://nature.com/index.html" }, -{ "https://crates.io/keywords/spider" }, -{ "https://bitly.com/index.html" }, -{ "https://conservativetribune.com/index.html" }, -{ "https://thebalance.com/index.html" }, -{ "https://time.com/index.html" }, -{ "https://discover.com/index.html" }, -{ "https://alicdn.com/index.html" }, -{ "https://newegg.com/index.html" }, -{ "https://ign.com/index.html" }, -{ "https://google.com.co/index.html" }, -{ "https://yandex.ua/index.html" }, -{ "https://godaddy.com/index.html" }, -{ "https://asos.com/index.html" }, -{ "https://huffingtonpost.com/index.html" }, -{ "https://blibli.com/index.html" }, -{ "https://coccoc.com/index.html" }, -{ "https://amazon.it/index.html" }, -{ "https://mlb.com/index.html" }, -{ "https://politico.com/index.html" }, -{ "https://ptt.cc/index.html" }, -{ "https://bitauto.com/index.html" }, -{ "https://google.pl/index.html" }, -{ "https://steamcommunity.com/index.html" }, -{ "https://google.ca/index.html" }, -{ "https://subject.tmall.com/index.html" }, -{ "https://cam4.com/index.html" }, -{ "https://sourceforge.net/index.html" }, -{ "https://pantip.com/index.html" }, -{ "https://google.com.hk/index.html" }, -{ "https://reimageplus.com/index.html" }, -{ "https://metropcs.mobi/index.html" }, -{ "https://dropbox.com/index.html" }, -{ "https://donga.com/index.html" }, -{ "https://elmundo.es/index.html" }, -{ "https://wp.pl/index.html" }, -{ "https://quora.com/index.html" }, -{ "https://zendesk.com/index.html" }, -{ "https://www.nuget.org/packages?q=Tags%3A%22Robots.txt%22" }, -{ "https://bhphotovideo.com/index.html" }, -{ "https://yalla-shoot.com/index.html" }, -{ "https://youm7.com/index.html" }, -{ "https://hm.com/index.html" }, -{ "https://banvenez.com/index.html" }, -{ "https://icloud.com/index.html" }, -{ "https://popads.net/index.html" }, -{ "https://rutube.ru/index.html" }, -{ "https://zara.com/index.html" }, -{ "https://espncricinfo.com/index.html" }, -{ "https://smallpdf.com/index.html" }, -{ "https://kinopoisk.ru/index.html" }, -{ "https://focus.de/index.html" }, -{ "https://codeonclick.com/index.html" }, -{ "https://digikala.com/index.html" }, -{ "https://brilio.net/index.html" }, -{ "https://welt.de/index.html" }, -{ "https://nytimes.com/index.html" }, -{ "https://usnews.com/index.html" }, -{ "https://washingtonpost.com/index.html" }, -{ "https://nike.com/index.html" }, -{ "https://realtor.com/index.html" }, -{ "https://artstation.com/index.html" }, -{ "https://line.me/index.html" }, -{ "https://allrecipes.com/index.html" }, -{ "https://blog.stapps.io/" }, -{ "https://espn.com/index.html" }, -{ "https://babytree.com/index.html" }, -{ "https://hotels.com/index.html" }, -{ "https://express.co.uk/index.html" }, -{ "https://dailymotion.com/index.html" }, -{ "https://flickr.com/index.html" }, -{ "https://naukri.com/index.html" }, -{ "https://rottentomatoes.com/index.html" }, -{ "https://xfinity.com/index.html" }, -{ "https://messenger.com/index.html" }, -{ "https://alibaba.com/index.html" }, -{ "https://iqoption.com/index.html" }, -{ "https://informationvine.com/index.html" }, -{ "https://kijiji.ca/index.html" }, -{ "https://savefrom.net/index.html" }, -{ "https://moz.com/blog/interactive-guide-to-robots-txt" }, -{ "https://metacpan.org/pod/WWW::RobotRules::Parser" }, -{ "https://onedio.com/index.html" }, -{ "https://inven.co.kr/index.html" }, -{ "https://aliexpress.com/index.html" }, -{ "https://pchome.com.tw/index.html" }, -{ "https://southwest.com/index.html" }, -{ "https://web.de/index.html" }, -{ "https://google.co.il/index.html" }, -{ "https://souq.com/index.html" }, -{ "https://shutterstock.com/index.html" }, -{ "https://okta.com/index.html" }, -{ "https://uidai.gov.in/index.html" }, -{ "https://ytimg.com/index.html" }, -{ "https://skype.com/index.html" }, -{ "https://technicalseo.com/seo-tools/robots-txt/" }, -{ "https://freejobalert.com/index.html" }, -{ "https://axzsd.pro/index.html" }, -{ "https://gmanetwork.com/index.html" }, -{ "https://walmart.com/index.html" }, -{ "https://xnxx.com/index.html" }, -{ "https://4pda.ru/index.html" }, -{ "https://getadblock.com/index.html" }, -{ "https://reuters.com/index.html" }, -{ "https://cloudfront.net/index.html" }, -{ "https://support.google.com/webmasters/answer/6062598?hl=en" }, -{ "https://de.wikipedia.org/robots.txt" }, -{ "https://visualstudio.com/index.html" }, -{ "https://sogou.com/index.html" }, -{ "https://onlinevideoconverter.com/index.html" }, -{ "https://pinimg.com/index.html" }, -{ "https://vidzi.tv/index.html" }, -{ "https://reverso.net/index.html" }, -{ "https://xhamster.com/index.html" }, -{ "http://www.robotstxt.org/orig.html" }, -{ "https://jimdo.com/index.html" }, -{ "https://maka.im/index.html" }, -{ "https://yadi.sk/index.html" }, -{ "https://icicibank.com/index.html" }, -{ "https://okcupid.com/index.html" }, -{ "https://kooora.com/index.html" }, -{ "https://tripadvisor.com/index.html" }, -{ "https://google.com.au/index.html" }, -{ "https://google.si/index.html" }, -{ "https://bloomberg.com/index.html" }, -{ "https://harvard.edu/index.html" }, -{ "https://office.com/index.html" }, -{ "https://discordapp.com/index.html" }, -{ "https://4chan.org/index.html" }, -{ "https://github.com/pandzel/RobotsTxt" }, -{ "https://timeanddate.com/index.html" }, -{ "https://tumblr.com/index.html" }, -{ "https://videoyoum7.com/index.html" }, -{ "https://zippyshare.com/index.html" }, -{ "https://fanpage.gr/index.html" }, -{ "https://tutorialspoint.com/index.html" }, -{ "https://google.com.ph/index.html" }, -{ "https://sarkariresult.com/index.html" }, -{ "https://duckduckgo.com/index.html" }, -{ "https://goo.gl/index.html" }, -{ "https://investopedia.com/index.html" }, -{ "https://tistory.com/index.html" }, -{ "https://rbc.ru/index.html" }, -{ "https://autodesk.com/index.html" }, -{ "https://list-manage.com/index.html" }, -{ "https://bestbuy.com/index.html" }, -{ "https://spiegel.de/index.html" }, -{ "https://drive2.ru/index.html" }, -{ "https://cnzz.com/index.html" }, -{ "https://jd.com/index.html" }, -{ "https://104.com.tw/index.html" }, -{ "https://ca.gov/index.html" }, -{ "https://videodownloadconverter.com/index.html" }, -{ "https://google.nl/index.html" }, -{ "https://kapanlagi.com/index.html" }, -{ "https://healthline.com/index.html" }, -{ "https://bet9ja.com/index.html" }, -{ "https://discogs.com/index.html" }, -{ "https://livedoor.jp/index.html" }, -{ "https://canva.com/index.html" }, -{ "https://ibm.com/index.html" }, -{ "https://cricbuzz.com/index.html" }, -{ "https://eastday.com/index.html" }, -{ "https://ebc.net.tw/index.html" }, -{ "https://boredpanda.com/index.html" }, -{ "https://w3schools.com/index.html" }, -{ "https://box.com/index.html" }, -{ "https://airbnb.com/index.html" }, -{ "https://github.com/t1gor/Robots.txt-Parser-Class" }, -{ "https://jeuxvideo.com/index.html" }, -{ "https://www.youtube.com/watch?v=8ZZSd0cdymo" }, -{ "https://google.com.my/index.html" }, -{ "https://blogspot.com/index.html" }, -{ "https://focuusing.com/index.html" }, -{ "https://hotstar.com/index.html" }, -{ "https://dailymail.co.uk/index.html" }, -{ "https://4shared.com/index.html" }, -{ "https://foxnews.com/index.html" }, -{ "https://vimeo.com/index.html" }, -{ "https://office365.com/index.html" }, -{ "https://dell.com/index.html" }, -{ "https://instructure.com/index.html" }, -{ "https://amazonaws.com/index.html" }, -{ "https://mellowads.com/index.html" }, -{ "https://detail.tmall.com/index.html" }, -{ "https://www.quora.com/How-do-I-make-my-web-crawler-follow-robots-txt" }, -{ "https://rdrr.io/cran/robotstxt/" }, -{ "https://flvto.biz/index.html" }, -{ "https://google.com.pe/index.html" }, -{ "https://howtogeek.com/index.html" }, -{ "https://freepik.com/index.html" }, -{ "https://google.co.uk/index.html" }, -{ "https://bet365.com/index.html" }, -{ "https://azlyrics.com/index.html" }, -{ "https://chatwork.com/index.html" }, -{ "https://offertogo.online/index.html" }, -{ "https://pinterest.co.uk/index.html" }, -{ "https://udn.com/index.html" }, -{ "https://rapidgator.net/index.html" }, -{ "https://ask.fm/index.html" }, -{ "https://latimes.com/index.html" }, -{ "https://roblox.com/index.html" }, -{ "https://zz08047.com/index.html" }, -{ "https://ebay-kleinanzeigen.de/index.html" }, -{ "https://metropoles.com/index.html" }, -{ "https://haber7.com/index.html" }, -{ "https://bittrex.com/index.html" }, -{ "https://npm.runkit.com/robots-txt-parser" }, -{ "https://go.com/index.html" }, -{ "https://filehippo.com/index.html" }, -{ "https://google.be/index.html" }, -{ "https://pulseonclick.com/index.html" }, -{ "https://kissanime.ru/index.html" }, -{ "https://taboola.com/index.html" }, -{ "https://rubygems.org/gems/robotstxt-parser/versions/0.1.1" }, -{ "https://google.az/index.html" }, -{ "https://dingit.tv/index.html" }, -{ "https://fromdoctopdf.com/index.html" }, -{ "https://tianya.cn/index.html" }, -{ "https://scribd.com/index.html" }, -{ "http://socoder.net/?Snippet=23824" }, -{ "https://eatyellowmango.com/index.html" }, -{ "https://gamepedia.com/index.html" }, -{ "https://qiita.com/index.html" }, -{ "https://kinogo.club/index.html" }, -{ "https://nfl.com/index.html" }, -{ "https://stackoverflow.com/questions/3141031/robots-txt-parser-java" }, -{ "https://nikkei.com/index.html" }, -{ "https://gazzetta.it/index.html" }, -{ "https://appledaily.com.tw/index.html" }, -{ "https://jabong.com/index.html" }, -{ "https://ebay.it/index.html" }, -{ "https://yahoo.co.jp/index.html" }, -{ "https://ask.com/index.html" }, -{ "https://eventbrite.com/index.html" }, -{ "https://banggood.com/index.html" }, -{ "https://hp.com/index.html" }, -{ "https://adp.com/index.html" }, -{ "https://lavanguardia.com/index.html" }, -{ "https://wish.com/index.html" }, -{ "https://shaparak.ir/index.html" }, -{ "https://idnes.cz/index.html" }, -{ "https://biobiochile.cl/index.html" }, -{ "https://qingdaonews.com/index.html" }, -{ "https://nametests.com/index.html" }, -{ "https://speakol.com/index.html" }, -{ "https://hclips.com/index.html" }, -{ "https://imgur.com/index.html" }, -{ "https://ssl-images-amazon.com/index.html" }, -{ "https://thestartmagazine.com/index.html" }, -{ "https://yelp.com/index.html" }, -{ "https://google.co.id/index.html" }, -{ "https://cdiscount.com/index.html" }, -{ "https://redonetype.com/index.html" }, -{ "https://pinterest.com/index.html" }, -{ "https://yourporn.sexy/index.html" }, -{ "https://google.com.eg/index.html" }, -{ "https://wixsite.com/index.html" }, -{ "https://quizlet.com/index.html" }, -{ "https://redd.it/index.html" }, -{ "https://leagueoflegends.com/index.html" }, -{ "https://baidu.com/index.html" }, -{ "https://redtube.com/index.html" }, -{ "https://amazon.co.uk/index.html" }, -{ "https://www.jugglingedge.com/help/creatingagoodbot.php" }, -{ "https://drtuber.com/index.html" }, -{ "https://google.com.sa/index.html" }, -{ "https://lifehacker.com/index.html" }, -{ "https://ndtv.com/index.html" }, -{ "https://elmogaz.com/index.html" }, -{ "https://python.org/index.html" }, -{ "https://ths9j89.com/index.html" }, -{ "https://japanpost.jp/index.html" }, -{ "https://verizonwireless.com/index.html" }, -{ "https://gosuslugi.ru/index.html" }, -{ "https://thefreedictionary.com/index.html" }, -{ "https://sinoptik.ua/index.html" }, -{ "https://behance.net/index.html" }, -{ "https://fc2.com/index.html" }, -{ "https://thehill.com/index.html" }, -{ "https://innfrad.com/index.html" }, -{ "https://bbc.co.uk/index.html" }, -{ "https://norton.com/index.html" }, -{ "https://bild.de/index.html" }, -{ "https://frtyg.com/index.html" }, -{ "https://livejournal.com/index.html" }, -{ "https://sex.com/index.html" }, -{ "https://csdn.net/index.html" }, -{ "https://billdesk.com/index.html" }, -{ "https://segmentfault.com/index.html" }, -{ "https://elpais.com/index.html" }, -{ "https://squarespace.com/index.html" }, -{ "https://genius.com/index.html" }, -{ "https://blueseek.com/index.html" }, -{ "https://patreon.com/index.html" }, -{ "https://xe.com/index.html" }, -{ "https://indeed.com/index.html" }, -{ "https://independent.co.uk/index.html" }, -{ "https://dmm.com/index.html" }, -{ "https://ebay.de/index.html" }, -{ "https://spankbang.com/index.html" }, -{ "https://merriam-webster.com/index.html" }, -{ "https://comcast.net/index.html" }, -{ "https://google.cl/index.html" }, -{ "https://fanfiction.net/index.html" }, -{ "https://bitbucket.org/index.html" }, -{ "https://google.lt/index.html" }, -{ "https://list.tmall.com/index.html" }, -{ "https://huanqiu.com/index.html" }, -{ "https://hamariweb.com/index.html" }, -{ "https://target.com/index.html" }, -{ "https://youth.cn/index.html" }, -{ "https://beytoote.com/index.html" }, -{ "https://americanexpress.com/index.html" }, -{ "https://hurriyet.com.tr/index.html" }, -{ "https://uptodown.com/index.html" }, -{ "https://primevideo.com/index.html" }, -{ "https://camdolls.com/index.html" }, -{ "https://paypal.com/index.html" }, -{ "https://bookmyshow.com/index.html" }, -{ "https://audisto.com/insights/guides/4/" }, -{ "https://rumble.com/index.html" }, -{ "https://github.com/index.html" }, -{ "https://gyazo.com/index.html" }, -{ "https://appspot.com/index.html" }, -{ "https://y8.com/index.html" }, -{ "https://wellsfargo.com/index.html" }, -{ "https://google.com.vn/index.html" }, -{ "http://www.zaproxy.org/2.5/javadocs/org/zaproxy/zap/spider/parser/SpiderRobotstxtParser.html" }, -{ "https://rt.com/index.html" }, -{ "https://mailchimp.com/index.html" }, -{ "https://el-nacional.com/index.html" }, -{ "https://ltn.com.tw/index.html" }, -{ "https://vice.com/index.html" }, -{ "https://apple.com/index.html" }, -{ "https://orange.fr/index.html" }, -{ "https://dmm.co.jp/index.html" }, -{ "https://europa.eu/index.html" }, -{ "https://nicovideo.jp/index.html" }, -{ "https://lazada.com.my/index.html" }, -{ "https://nownews.com/index.html" }, -{ "https://adobe.com/index.html" }, -{ "https://blogger.com/index.html" }, -{ "https://sabah.com.tr/index.html" }, -{ "https://streamable.com/index.html" }, -{ "https://cnn.com/index.html" }, -{ "http://tools.seochat.com/tools/robots-txt-validator/#sthash.xiB29oJK.dpbs" }, -{ "https://primosearch.com/index.html" }, -{ "https://gongchang.com/index.html" }, -{ "https://tribunnews.com/index.html" }, -{ "https://wordpress.com/index.html" }, -{ "https://dafont.com/index.html" }, -{ "https://perfectgirls.net/index.html" }, -{ "https://quizzstar.com/index.html" }, -{ "https://disqus.com/index.html" }, -{ "https://ntd.tv/index.html" }, -{ "https://google.rs/index.html" }, -{ "https://mail.ru/index.html" }, -{ "https://performanceadexchange.com/index.html" }, -{ "https://doublepimpssl.com/index.html" }, -{ "https://providr.com/index.html" }, -{ "https://wp.com/index.html" }, -{ "https://dashbo15myapp.com/index.html" }, -{ "https://amazon.ca/index.html" }, -{ "https://youjizz.com/index.html" }, -{ "https://shink.in/index.html" }, -{ "https://cpm20.com/index.html" }, -{ "https://gmx.net/index.html" }, -{ "https://google.dk/index.html" }, -{ "https://pixabay.com/index.html" }, -{ "https://getpocket.com/index.html" }, -{ "https://naver.jp/index.html" }, -{ "https://neobux.com/index.html" }, -{ "https://kizlarsoruyor.com/index.html" }, -{ "https://playmediacenter.com/index.html" }, -{ "https://amazon.in/index.html" }, -{ "https://diply.com/index.html" }, -{ "https://hulu.com/index.html" }, -{ "https://etsy.com/index.html" }, -{ "https://google.cz/index.html" }, -{ "https://baike.com/index.html" }, -{ "https://nextlnk1.com/index.html" }, -{ "https://booking.com/index.html" }, -{ "https://jianshu.com/index.html" }, -{ "https://uploaded.net/index.html" }, -{ "https://cbssports.com/index.html" }, -{ "https://mozilla.org/index.html" }, -{ "https://usps.com/index.html" }, -{ "https://hubspot.com/index.html" }, -{ "https://westernjournalism.com/index.html" }, -{ "https://wattpad.com/index.html" }, -{ "https://ecollege.com/index.html" }, -{ "https://elbalad.news/index.html" }, -{ "https://udemy.com/index.html" }, -{ "https://blackboard.com/index.html" }, -{ "https://cran.r-project.org/web/packages/robotstxt/robotstxt.pdf" }, -{ "https://inadequal.com/index.html" }, -{ "https://softonic.com/index.html" }, -{ "https://marca.com/index.html" }, -{ "https://gamefaqs.com/index.html" }, -{ "https://goo.ne.jp/index.html" }, -{ "https://chaturbate.com/index.html" }, -{ "https://adexchangeperformance.com/index.html" }, -{ "https://360.cn/index.html" }, -{ "https://redirectvoluum.com/index.html" }, -{ "https://amazon.cn/index.html" }, -{ "https://iqiyi.com/index.html" }, -{ "https://namnak.com/index.html" }, -{ "https://ikea.com/index.html" }, -{ "https://doublepimp.com/index.html" }, -{ "https://google.com.ua/index.html" }, -{ "https://vnexpress.net/index.html" }, -{ "https://jrj.com.cn/index.html" }, -{ "https://rutracker.org/index.html" }, -{ "https://bilibili.com/index.html" }, -{ "https://tabelog.com/index.html" }, -{ "https://51sole.com/index.html" }, -{ "https://bankmellat.ir/index.html" }, -{ "https://tmall.com/index.html" }, -{ "https://evernote.com/index.html" }, -{ "https://dictionary.com/index.html" }, -{ "https://bitmedianetwork.com/index.html" }, -{ "https://jw.org/index.html" }, -{ "https://vporn.com/index.html" }, -{ "https://1337x.to/index.html" }, -{ "https://ecosia.org/index.html" }, -{ "https://people.com.cn/index.html" }, -{ "https://coinbase.com/index.html" }, -{ "http://search.cpan.org/~gaas/WWW-RobotRules-6.02/lib/WWW/RobotRules.pm" }, -{ "https://discuss.com.hk/index.html" }, -{ "https://infusionsoft.com/index.html" }, -{ "https://twimg.com/index.html" }, -{ "https://cnblogs.com/index.html" }, -{ "https://seznam.cz/index.html" }, -{ "https://soundcloud.com/index.html" }, -{ "https://yespornplease.com/index.html" }, -{ "https://www.npmjs.com/package/robots-txt-parse" }, -{ "https://www.promptcloud.com/blog/robots.txt-file-how-to-read-web-crawling" }, -{ "https://blpmovies.com/index.html" }, -{ "https://myanimelist.net/index.html" }, -{ "https://google.com.mm/index.html" }, -{ "https://google.se/index.html" }, -{ "https://grammarly.com/index.html" }, -{ "https://ouo.io/index.html" }, -{ "https://gmarket.co.kr/index.html" }, -{ "https://ebay.com/index.html" }, -{ "https://ebay.in/index.html" }, -{ "https://pandora.com/index.html" }, -{ "https://wikipedia.org/index.html" }, -{ "https://sina.com.cn/index.html" }, -{ "https://slideshare.net/index.html" }, -{ "https://umblr.com/index.html" }, -{ "https://asana.com/index.html" }, -{ "https://zz08037.com/index.html" }, -{ "https://almasryalyoum.com/index.html" }, -{ "https://kickstarter.com/index.html" }, -{ "https://banesconline.com/index.html" }, -{ "https://zillow.com/index.html" }, -{ "https://chinadaily.com.cn/index.html" }, -{ "https://twitch.tv/index.html" }, -{ "https://onclkds.com/index.html" }, -{ "https://google.co.nz/index.html" }, -{ "https://paytm.com/index.html" }, -{ "https://pornhub.com/index.html" }, -{ "https://39.net/index.html" }, -{ "https://alodokter.com/index.html" }, -{ "https://alipay.com/index.html" }, -{ "https://manoramaonline.com/index.html" }, -{ "https://fedex.com/index.html" }, -{ "https://stackexchange.com/index.html" }, -{ "https://vidio.com/index.html" }, -{ "https://varzesh3.com/index.html" }, -{ "https://microsoftonline.com/index.html" }, -{ "https://google.by/index.html" }, -{ "https://help.sumologic.com/Search/Get-Started-with-Search/Suggested-Searches/Suggested-Searches-for-the-Apache-Access-Parser" }, -{ "https://sahibinden.com/index.html" }, -{ "https://subito.it/index.html" }, -{ "https://utorrent.com/index.html" }, -{ "https://medium.com/index.html" }, -{ "https://perfecttoolmedia.com/index.html" }, -{ "https://easypdfcombine.com/index.html" }, -{ "https://adexchangeprediction.com/index.html" }, -{ "https://debate.com.mx/index.html" }, -{ "https://yts.ag/index.html" }, -{ "https://cnet.com/index.html" }, -{ "https://python.readthedocs.io/en/stable/library/urllib.robotparser.html" }, -{ "https://golanglibs.com/top?q=robots.txt-go" }, -{ "https://reallifecam.com/index.html" }, -{ "https://theverge.com/index.html" }, -{ "https://wetransfer.com/index.html" }, -{ "https://onlinesbi.com/index.html" }, -{ "https://amazon.com/index.html" }, -{ "https://yaplakal.com/index.html" }, -{ "https://libraries.io/search?keywords=robots.txt&platforms=NPM" }, -{ "https://noaa.gov/index.html" }, -{ "https://glassdoor.com/index.html" }, -{ "https://live.com/index.html" }, -{ "https://macys.com/index.html" }, -{ "https://bodybuilding.com/index.html" }, -{ "https://trello.com/index.html" }, -{ "https://goal.com/index.html" }, -{ "https://pikabu.ru/index.html" }, -{ "https://eksisozluk.com/index.html" }, -{ "https://googlevideo.com/index.html" }, -{ "https://ups.com/index.html" }, -{ "https://avg.com/index.html" }, -{ "https://google.bg/index.html" }, -{ "https://whatsapp.com/index.html" }, -{ "https://hdfcbank.com/index.html" }, -{ "https://openload.co/index.html" }, -{ "https://onclickmax.com/index.html" }, -{ "https://groupon.com/index.html" }, -{ "https://feedly.com/index.html" }, -{ "https://nur.kz/index.html" }, -{ "https://rarbg.to/index.html" }, -{ "https://taringa.net/index.html" }, -{ "https://mathrubhumi.com/index.html" }, -{ "https://youporn.com/index.html" }, -{ "https://solarmoviez.to/index.html" }, -{ "https://imdb.com/index.html" }, -{ "https://reddit.com/index.html" }, -{ "https://qq.com/index.html" }, -{ "https://bestadbid.com/index.html" }, -{ "https://google.com.ec/index.html" }, -{ "http://nikitathespider.com/python/rerp/" }, -{ "https://mirror.co.uk/index.html" }, -{ "https://gamer.com.tw/index.html" }, -{ "https://instructables.com/index.html" }, -{ "https://techradar.com/index.html" }, -{ "https://indoxxi.net/index.html" }, -{ "https://aparat.com/index.html" }, -{ "https://repubblica.it/index.html" }, -{ "https://linkshrink.net/index.html" }, -{ "https://naver.com/index.html" }, -{ "https://lapatilla.com/index.html" }, -{ "https://en.wikipedia.org/robots.txt" }, -{ "https://breitbart.com/index.html" }, -{ "http://www.the-art-of-web.com/php/parse-robots/" }, -{ "https://vk.com/index.html" }, -{ "https://engadget.com/index.html" }, -{ "https://irctc.co.in/index.html" }, -{ "https://files.wordpress.com/index.html" }, -{ "https://thewhizmarketing.com/index.html" }, -{ "https://craigslist.org/index.html" }, -{ "https://theguardian.com/index.html" }, -{ "https://slack.com/index.html" }, -{ "https://gov.uk/index.html" }, -{ "https://nordstrom.com/index.html" }, -{ "https://chegg.com/index.html" }, -{ "https://coderwall.com/p/pu2clg/robots-txt-parser-online" }, -{ "https://humblebundle.com/index.html" }, -{ "https://businessinsider.com/index.html" }, -{ "https://fwbntw.com/index.html" }, -{ "https://inquirer.net/index.html" }, -{ "https://google.gr/index.html" }, -{ "https://state.gov/index.html" }, -{ "https://google.cn/index.html" }, -{ "https://scribol.com/index.html" }, -{ "https://tfetimes.com/index.html" }, -{ "https://motherless.com/index.html" }, -{ "https://msn.com/index.html" }, -{ "https://snapdeal.com/index.html" }, -{ "https://spotify.com/index.html" }, -{ "https://mercadolibre.com.mx/index.html" }, -{ "https://t.me/index.html" }, -{ "https://so.com/index.html" }, -{ "https://china.com.cn/index.html" }, -{ "https://wtoip.com/index.html" }, -{ "https://as.com/index.html" }, -{ "https://corriere.it/index.html" }, -{ "https://beeg.com/index.html" }, -{ "https://gostream.is/index.html" }, -{ "https://google.it/index.html" }, -{ "https://myanmarload.com/index.html" }, -{ "https://world.tmall.com/index.html" }, -{ "https://voyeurhit.com/index.html" }, -{ "https://researchgate.net/index.html" }, -{ "https://amazon.co.jp/index.html" }, -{ "https://lenta.ru/index.html" }, -{ "https://google.dz/index.html" }, -{ "https://prezi.com/index.html" }, -{ "https://hh.ru/index.html" }, -{ "https://olx.in/index.html" }, -{ "https://lefigaro.fr/index.html" }, -{ "https://kompasiana.com/index.html" }, -{ "https://chase.com/index.html" }, -{ "https://stanford.edu/index.html" }, -{ "https://rediff.com/index.html" }, -{ "https://allegro.pl/index.html" }, -{ "http://pythonicprose.blogspot.de/2009/10/python-read-robotstxt-files-with-ease.html" }, -{ "https://fbcdn.net/index.html" }, -{ "https://163.com/index.html" }, -{ "https://xinhuanet.com/index.html" }, -{ "https://lazada.co.id/index.html" }, -{ "https://mercadolibre.com.ve/index.html" }, -{ "https://txxx.com/index.html" }, -{ "https://animeflv.net/index.html" }, -{ "https://fidelity.com/index.html" }, -{ "https://cambridge.org/index.html" }, -{ "https://libero.it/index.html" }, -{ "https://yenisafak.com/index.html" }, -{ "https://ebay.fr/index.html" }, -{ "https://ticketmaster.com/index.html" }, -{ "https://mashable.com/index.html" }, -{ "https://ieee.org/index.html" }, -{ "https://bleacherreport.com/index.html" }, -{ "https://intoli.com/blog/analyzing-one-million-robots-txt-files/" }, -{ "https://mobile.de/index.html" }, -{ "http://www.arewewebyet.org/topics/utils/" }, -{ "https://springer.com/index.html" }, -{ "https://chaoshi.tmall.com/index.html" }, -{ "https://ioredi.com/index.html" }, -{ "https://eskimi.com/index.html" }, -{ "https://bing.com/index.html" }, -{ "https://thepiratebay.org/index.html" }, -{ "https://u1trkqf.com/index.html" }, -{ "https://surveymonkey.com/index.html" }, -{ "https://deviantart.net/index.html" }, -{ "https://intuit.com/index.html" }, -{ "http://www.juust.org/index.php/php-classes/robots-txt-php-parser-class/" }, -{ "https://zone-telechargement.ws/index.html" }, -{ "https://olx.com.br/index.html" }, -{ "https://emol.com/index.html" }, -{ "https://namu.wiki/index.html" }, -{ "https://fmovies.is/index.html" }, -{ "https://flirt4free.com/index.html" }, -{ "https://zapmeta.ws/index.html" }, -{ "https://weather.com/index.html" }, -{ "https://drudgereport.com/index.html" }, -{ "https://caijing.com.cn/index.html" }, -{ "https://userapi.com/index.html" }, -{ "https://wunderground.com/index.html" }, -{ "https://ebay.co.uk/index.html" }, -{ "https://thesun.co.uk/index.html" }, -{ "https://sputniknews.com/index.html" }, -{ "https://elfagr.org/index.html" }, -{ "https://allocine.fr/index.html" }, -{ "https://ria.ru/index.html" }, -{ "https://telewebion.com/index.html" }, -{ "https://putrr18.com/index.html" }, -{ "https://zipnoticias.com/index.html" }, -{ "https://buzzfeed.com/index.html" }, -{ "https://microsoft.com/index.html" }, -{ "https://myfreecams.com/index.html" }, -{ "https://google.ch/index.html" }, -{ "https://olx.pl/index.html" }, -{ "https://upwork.com/index.html" }, -{ "https://doorblog.jp/index.html" }, -{ "https://chinaz.com/index.html" }, -{ "https://google.hr/index.html" }, -{ "https://wordreference.com/index.html" }, -{ "https://google.com.tr/index.html" }, -{ "https://tomshardware.com/index.html" }, -{ "https://channel1vids.com/index.html" }, -{ "https://instagram.com/index.html" }, -{ "https://php.net/index.html" }, -{ "https://accuweather.com/index.html" }, -{ "https://google.de/index.html" }, -{ "https://suning.com/index.html" }, -{ "https://sapo.pt/index.html" }, -{ "https://superuser.com/index.html" }, -{ "https://google.co.th/index.html" }, -{ "https://divar.ir/index.html" }, -{ "https://nbcnews.com/index.html" }, -{ "https://badoo.com/index.html" }, -{ "https://google.kz/index.html" }, -{ "https://caliente.mx/index.html" }, -{ "https://google.lk/index.html" }, -{ "https://ladbible.com/index.html" }, -{ "https://google.com.af/index.html" }, -{ "https://mi.com/index.html" }, -{ "https://xvideos.com/index.html" }, -{ "https://linkedin.com/index.html" }, -{ "https://pixnet.net/index.html" }, -{ "https://techcrunch.com/index.html" }, -{ "https://expedia.com/index.html" }, -{ "https://wiktionary.org/index.html" }, -{ "https://amazon.fr/index.html" }, -{ "https://secureserver.net/index.html" }, -{ "https://www.openhub.net/p/robots-txt" }, -{ "https://2ch.net/index.html" }, -{ "https://intoday.in/index.html" }, -{ "https://ruten.com.tw/index.html" }, -{ "https://merdeka.com/index.html" }, -{ "https://mega.nz/index.html" }, -{ "https://books.google.de/books?id=wwmbDgAAQBAJ&pg=PT25&lpg=PT25&dq=robotstxt+parser&source=bl&ots=44MiCBAKbi&sig=g-xgZZO0qYVjC42FuwpoA4EdWs4&hl=de&sa=X&redir_esc=y#v=onepage&q=robotstxt%20parser&f=false" }, -{ "https://translationbuddy.com/index.html" }, -{ "https://mmofreegames.online/index.html" }, -{ "https://battle.net/index.html" }, -{ "https://pages.tmall.com/index.html" }, -{ "https://taleo.net/index.html" }, -{ "https://archive.org/index.html" }, -{ "https://sakura.ne.jp/index.html" }, -{ "https://voc.com.cn/index.html" }, -{ "https://abs-cbn.com/index.html" }, -{ "https://amazon.es/index.html" }, -{ "https://wikimedia.org/index.html" }, -{ "https://google.co.za/index.html" }, -{ "https://kakaku.com/index.html" }, -{ "https://elsevier.com/index.html" }, -{ "https://steampowered.com/index.html" }, -{ "https://aol.com/index.html" }, -{ "https://ultimate-guitar.com/index.html" }, -{ "https://globo.com/index.html" }, -{ "https://google.com.br/index.html" }, -{ "https://goodreads.com/index.html" }, -{ "https://stackoverflow.com/index.html" }, -{ "https://meetup.com/index.html" }, -{ "https://media.tumblr.com/index.html" }, -{ "https://bbc.com/index.html" }, -{ "https://okdiario.com/index.html" }, -{ "https://google.ru/index.html" }, -{ "https://zoho.com/index.html" }, -{ "https://pymotw.com/2/robotparser/" }, -{ "https://ok.ru/index.html" }, -{ "https://otvfoco.com.br/index.html" }, -{ "https://xda-developers.com/index.html" }, -{ "https://issues.apache.org/jira/browse/NUTCH-1031" }, -{ "https://avito.ru/index.html" }, -{ "https://addthis.com/index.html" }, -{ "https://clipconverter.cc/index.html" }, -{ "https://hatena.ne.jp/index.html" }, -{ "https://stockstar.com/index.html" }, -{ "https://nypost.com/index.html" }, -{ "https://setn.com/index.html" }, -{ "https://xtube.com/index.html" }, -{ "https://wikia.com/index.html" }, -{ "https://www.yakaferci.io/robots-txt/" }, -{ "https://force.com/index.html" }, -{ "https://kayak.com/index.html" }, -{ "https://npr.org/index.html" }, -{ "https://google.pt/index.html" }, -{ "https://tvbs.com.tw/index.html" }, -{ "https://taobao.com/index.html" }, -{ "https://alsbbora.com/index.html" }, -{ "https://china.com/index.html" }, -{ "https://abcnews.go.com/index.html" }, -{ "https://albawabhnews.com/index.html" }, -{ "https://flipkart.com/index.html" }, -{ "https://android.com/index.html" }, -{ "https://blog.me/index.html" } +"https://telegraph.co.uk/index.html", +"https://bp.blogspot.com/index.html", +"https://17ok.com/index.html", +"https://lemonde.fr/index.html", +"https://google.no/index.html", +"https://yandex.ru/index.html", +"https://bandcamp.com/index.html", +"https://att.com/index.html", +"https://hawaaworld.com/index.html", +"https://siteadvisor.com/index.html", +"https://hatenablog.com/index.html", +"https://wiley.com/index.html", +"https://gitter.im/t1gor/Robots.txt-Parser-Class/archives", +"https://support.google.com/webmasters/answer/6062598?hl=en", +"https://gstatic.com/index.html", +"https://wix.com/index.html", +"https://sohu.com/index.html", +"https://salesforce.com/index.html", +"https://op.gg/index.html", +"https://patch.com/index.html", +"https://premierleague.com/index.html", +"https://givemesport.com/index.html", +"https://rambler.ru/index.html", +"https://coursera.org/index.html", +"https://sciencedirect.com/index.html", +"https://newstrend.news/index.html", +"https://liputan6.com/index.html", +"https://cbsnews.com/index.html", +"https://blog.jp/index.html", +"https://atlassian.net/index.html", +"https://livejasmin.com/index.html", +"https://hespress.com/index.html", +"https://google.com.tw/index.html", +"https://amazon.de/index.html", +"https://tube8.com/index.html", +"https://wikihow.com/index.html", +"https://books.google.de/books?id=hO7sBQAAQBAJ&pg=PA282&lpg=PA282&dq=robotstxt+parser&source=bl&ots=BJJf9p47U6&sig=w-uc3hT94YumAO1fL8GipBEkhE0&hl=de&sa=X&redir_esc=y#v=onepage&q=robotstxt%20parser&f=false", +"https://spotscenered.info/index.html", +"https://google.co.jp/index.html", +"https://adexchangegate.com/index.html", +"https://wordpress.org/index.html", +"http://logparserplus.com/examples/32/", +"https://google.com.gt/index.html", +"https://food.tmall.com/index.html", +"https://unity3d.com/index.html", +"https://onet.pl/index.html", +"https://slickdeals.net/index.html", +"https://bongacams.com/index.html", +"https://popcash.net/index.html", +"https://google.com.sg/index.html", +"https://hola.com/index.html", +"https://daikynguyenvn.com/index.html", +"https://chip.de/index.html", +"https://subscene.com/index.html", +"https://douyu.com/index.html", +"https://telegram.org/index.html", +"https://google.sk/index.html", +"https://www.webmasterworld.com/forum93/37.htm", +"https://twitter.com/index.html", +"https://gogoanime.io/index.html", +"https://academia.edu/index.html", +"https://ci123.com/index.html", +"https://google.co.kr/index.html", +"https://dianping.com/index.html", +"https://porn.com/index.html", +"https://google.com.ly/index.html", +"https://convert2mp3.net/index.html", +"https://ouedkniss.com/index.html", +"https://uzone.id/index.html", +"https://books.google.de/books?id=vk5ODAAAQBAJ&pg=PA63&lpg=PA63&dq=robotstxt+parser&source=bl&ots=lXXaGw820W&sig=7Mtcu5ABrTTw1kFfeTReh3YuvFU&hl=de&sa=X&redir_esc=y#v=onepage&q=robotstxt%20parser&f=false", +"https://youdao.com/index.html", +"https://mit.edu/index.html", +"https://kinokrad.co/index.html", +"https://urbandictionary.com/index.html", +"https://usatoday.com/index.html", +"https://weibo.com/index.html", +"https://rakuten.co.jp/index.html", +"https://rumblinggoodies.com/au/index.html", +"https://samsung.com/index.html", +"https://momoshop.com.tw/index.html", +"https://chron.com/index.html", +"https://intel.com/index.html", +"https://kaskus.co.id/index.html", +"https://google.com/index.html", +"https://homedepot.com/index.html", +"https://cqnews.net/index.html", +"https://free.fr/index.html", +"https://uol.com.br/index.html", +"https://google.com.kw/index.html", +"https://pipeschannels.com/index.html", +"https://kotaku.com/index.html", +"https://outbrain.com/index.html", +"https://moneycontrol.com/index.html", +"https://gearbest.com/index.html", +"https://gizmodo.com/index.html", +"https://myway.com/index.html", +"https://theatlantic.com/index.html", +"https://google.co.ve/index.html", +"https://nasa.gov/index.html", +"https://google.co.ao/index.html", +"https://mobile01.com/index.html", +"https://hackage.haskell.org/package/robots-txt", +"https://sberbank.ru/index.html", +"https://livescore.com/index.html", +"https://ebay.com.au/index.html", +"https://google.at/index.html", +"https://prothom-alo.com/index.html", +"https://thesaurus.com/index.html", +"https://google.hu/index.html", +"https://blogfa.com/index.html", +"https://lifewire.com/index.html", +"https://deviantart.com/index.html", +"https://milliyet.com.tr/index.html", +"https://cisco.com/index.html", +"https://naij.com/index.html", +"https://google.com.mx/index.html", +"https://digitaldsp.com/index.html", +"https://exosrv.com/index.html", +"https://citi.com/index.html", +"https://4dsply.com/index.html", +"https://netflix.com/index.html", +"https://ettoday.net/index.html", +"https://zhihu.com/index.html", +"https://ameblo.jp/index.html", +"https://eyny.com/index.html", +"https://jiameng.com/index.html", +"https://gfycat.com/index.html", +"https://cnbc.com/index.html", +"https://ccm.net/index.html", +"https://sabq.org/index.html", +"https://iwanttodeliver.com/index.html", +"https://t-online.de/index.html", +"https://fiverr.com/index.html", +"https://1688.com/index.html", +"https://blastingnews.com/index.html", +"https://cpm10.com/index.html", +"https://nextlnk2.com/index.html", +"https://uptobox.com/index.html", +"https://commentcamarche.net/index.html", +"https://google.fr/index.html", +"https://viva.co.id/index.html", +"https://docs.python.org/2/library/robotparser.html", +"https://bankofamerica.com/index.html", +"https://nih.gov/index.html", +"https://epochtimes.com/index.html", +"https://hdzog.com/index.html", +"https://google.co.in/index.html", +"https://giphy.com/index.html", +"https://mediafire.com/index.html", +"https://speedtest.net/index.html", +"https://investing.com/index.html", +"https://oschina.net/index.html", +"https://liveadexchanger.com/index.html", +"https://google.ae/index.html", +"https://seasonvar.ru/index.html", +"https://avast.com/index.html", +"https://webex.com/index.html", +"https://buyma.com/index.html", +"https://google.iq/index.html", +"https://khanacademy.org/index.html", +"https://theepochtimes.com/index.html", +"https://doc.scrapy.org/en/latest/topics/spiders.html", +"https://friv.com/index.html", +"https://yahoo.com/index.html", +"https://codepen.io/index.html", +"https://mama.cn/index.html", +"http://www.rubydoc.info/gems/robotstxt-parser/0.1.1", +"https://wsj.com/index.html", +"https://gotporn.com/index.html", +"https://drom.ru/index.html", +"https://rarbg.is/index.html", +"https://google.com.ng/index.html", +"https://t.co/index.html", +"https://google.ie/index.html", +"https://google.com.ar/index.html", +"https://google.es/index.html", +"https://adhoc2.net/index.html", +"https://crunchyroll.com/index.html", +"https://leboncoin.fr/index.html", +"https://google.com.pk/index.html", +"https://bukalapak.com/index.html", +"https://forbes.com/index.html", +"https://douban.com/index.html", +"https://indiatimes.com/index.html", +"https://asus.com/index.html", +"https://olx.ua/index.html", +"https://interia.pl/index.html", +"https://hao123.com/index.html", +"https://doubleclick.net/index.html", +"https://www.versioneye.com/php/webignition:robots-txt-parser/1.0.2", +"https://wordpress.org/plugins/multipart-robotstxt-editor/", +"https://newtabtv.com/index.html", +"https://lenovo.com/index.html", +"https://gamespot.com/index.html", +"https://kompas.com/index.html", +"https://themeforest.net/index.html", +"https://coinmarketcap.com/index.html", +"https://google.fi/index.html", +"https://soso.com/index.html", +"https://cs.nyu.edu/courses/fall02/G22.3033-008/WebCrawler.java", +"https://piet2eix3l.com/index.html", +"https://google.com.do/index.html", +"https://gmw.cn/index.html", +"https://heavy.com/index.html", +"https://playstation.com/index.html", +"https://oracle.com/index.html", +"https://fbsbx.com/index.html", +"https://rednet.cn/index.html", +"https://sportbible.com/index.html", +"https://ck101.com/index.html", +"https://facebook.com/index.html", +"https://yandex.com/support/webmaster/indexing-options/robots-txt-analyzer.html", +"https://diamongs.com/index.html", +"https://nikkeibp.co.jp/index.html", +"https://9gag.com/index.html", +"https://aliyun.com/index.html", +"https://hicpm5.com/index.html", +"https://hilltopads.net/index.html", +"https://duolingo.com/index.html", +"https://google.tn/index.html", +"https://weblio.jp/index.html", +"https://wittyfeed.com/index.html", +"https://grid.id/index.html", +"https://google.ro/index.html", +"https://weebly.com/index.html", +"https://livedoor.com/index.html", +"https://gsmarena.com/index.html", +"https://detik.com/index.html", +"https://mercadolivre.com.br/index.html", +"https://ea.com/index.html", +"https://hootsuite.com/index.html", +"https://gismeteo.ru/index.html", +"https://tokopedia.com/index.html", +"https://ukr.net/index.html", +"https://mercadolibre.com.ar/index.html", +"https://wowhead.com/index.html", +"https://adf.ly/index.html", +"https://zhanqi.tv/index.html", +"https://fatosdesconhecidos.com.br/index.html", +"https://shopify.com/index.html", +"https://exoclick.com/index.html", +"https://ewatchseries.to/index.html", +"https://onoticioso.com/index.html", +"https://youboy.com/index.html", +"https://webmd.com/index.html", +"https://youtube.com/index.html", +"https://zoom.us/index.html", +"https://trackingclick.net/index.html", +"https://tomsguide.com/index.html", +"http://sitemaps.blogspot.de/2006/02/using-robotstxt-file.html", +"https://capitalone.com/index.html", +"https://bytes.com/topic/c-sharp/answers/229379-robots-txt-parser", +"https://daum.net/index.html", +"https://nocookie.net/index.html", +"https://issuu.com/index.html", +"https://searchengineland.com/a-deeper-look-at-robotstxt-17573", +"https://prnt.sc/index.html", +"https://google.tm/index.html", +"https://torrentz2.eu/index.html", +"https://nature.com/index.html", +"https://crates.io/keywords/spider", +"https://bitly.com/index.html", +"https://conservativetribune.com/index.html", +"https://thebalance.com/index.html", +"https://time.com/index.html", +"https://discover.com/index.html", +"https://alicdn.com/index.html", +"https://newegg.com/index.html", +"https://ign.com/index.html", +"https://google.com.co/index.html", +"https://yandex.ua/index.html", +"https://godaddy.com/index.html", +"https://asos.com/index.html", +"https://huffingtonpost.com/index.html", +"https://blibli.com/index.html", +"https://coccoc.com/index.html", +"https://amazon.it/index.html", +"https://mlb.com/index.html", +"https://politico.com/index.html", +"https://ptt.cc/index.html", +"https://bitauto.com/index.html", +"https://google.pl/index.html", +"https://steamcommunity.com/index.html", +"https://google.ca/index.html", +"https://subject.tmall.com/index.html", +"https://cam4.com/index.html", +"https://sourceforge.net/index.html", +"https://pantip.com/index.html", +"https://google.com.hk/index.html", +"https://reimageplus.com/index.html", +"https://metropcs.mobi/index.html", +"https://dropbox.com/index.html", +"https://donga.com/index.html", +"https://elmundo.es/index.html", +"https://wp.pl/index.html", +"https://quora.com/index.html", +"https://zendesk.com/index.html", +"https://www.nuget.org/packages?q=Tags%3A%22Robots.txt%22", +"https://bhphotovideo.com/index.html", +"https://yalla-shoot.com/index.html", +"https://youm7.com/index.html", +"https://hm.com/index.html", +"https://banvenez.com/index.html", +"https://icloud.com/index.html", +"https://popads.net/index.html", +"https://rutube.ru/index.html", +"https://zara.com/index.html", +"https://espncricinfo.com/index.html", +"https://smallpdf.com/index.html", +"https://kinopoisk.ru/index.html", +"https://focus.de/index.html", +"https://codeonclick.com/index.html", +"https://digikala.com/index.html", +"https://brilio.net/index.html", +"https://welt.de/index.html", +"https://nytimes.com/index.html", +"https://usnews.com/index.html", +"https://washingtonpost.com/index.html", +"https://nike.com/index.html", +"https://realtor.com/index.html", +"https://artstation.com/index.html", +"https://line.me/index.html", +"https://allrecipes.com/index.html", +"https://blog.stapps.io/", +"https://espn.com/index.html", +"https://babytree.com/index.html", +"https://hotels.com/index.html", +"https://express.co.uk/index.html", +"https://dailymotion.com/index.html", +"https://flickr.com/index.html", +"https://naukri.com/index.html", +"https://rottentomatoes.com/index.html", +"https://xfinity.com/index.html", +"https://messenger.com/index.html", +"https://alibaba.com/index.html", +"https://iqoption.com/index.html", +"https://informationvine.com/index.html", +"https://kijiji.ca/index.html", +"https://savefrom.net/index.html", +"https://moz.com/blog/interactive-guide-to-robots-txt", +"https://metacpan.org/pod/WWW::RobotRules::Parser", +"https://onedio.com/index.html", +"https://inven.co.kr/index.html", +"https://aliexpress.com/index.html", +"https://pchome.com.tw/index.html", +"https://southwest.com/index.html", +"https://web.de/index.html", +"https://google.co.il/index.html", +"https://souq.com/index.html", +"https://shutterstock.com/index.html", +"https://okta.com/index.html", +"https://uidai.gov.in/index.html", +"https://ytimg.com/index.html", +"https://skype.com/index.html", +"https://technicalseo.com/seo-tools/robots-txt/", +"https://freejobalert.com/index.html", +"https://axzsd.pro/index.html", +"https://gmanetwork.com/index.html", +"https://walmart.com/index.html", +"https://xnxx.com/index.html", +"https://4pda.ru/index.html", +"https://getadblock.com/index.html", +"https://reuters.com/index.html", +"https://cloudfront.net/index.html", +"https://support.google.com/webmasters/answer/6062598?hl=en", +"https://de.wikipedia.org/robots.txt", +"https://visualstudio.com/index.html", +"https://sogou.com/index.html", +"https://onlinevideoconverter.com/index.html", +"https://pinimg.com/index.html", +"https://vidzi.tv/index.html", +"https://reverso.net/index.html", +"https://xhamster.com/index.html", +"http://www.robotstxt.org/orig.html", +"https://jimdo.com/index.html", +"https://maka.im/index.html", +"https://yadi.sk/index.html", +"https://icicibank.com/index.html", +"https://okcupid.com/index.html", +"https://kooora.com/index.html", +"https://tripadvisor.com/index.html", +"https://google.com.au/index.html", +"https://google.si/index.html", +"https://bloomberg.com/index.html", +"https://harvard.edu/index.html", +"https://office.com/index.html", +"https://discordapp.com/index.html", +"https://4chan.org/index.html", +"https://github.com/pandzel/RobotsTxt", +"https://timeanddate.com/index.html", +"https://tumblr.com/index.html", +"https://videoyoum7.com/index.html", +"https://zippyshare.com/index.html", +"https://fanpage.gr/index.html", +"https://tutorialspoint.com/index.html", +"https://google.com.ph/index.html", +"https://sarkariresult.com/index.html", +"https://duckduckgo.com/index.html", +"https://goo.gl/index.html", +"https://investopedia.com/index.html", +"https://tistory.com/index.html", +"https://rbc.ru/index.html", +"https://autodesk.com/index.html", +"https://list-manage.com/index.html", +"https://bestbuy.com/index.html", +"https://spiegel.de/index.html", +"https://drive2.ru/index.html", +"https://cnzz.com/index.html", +"https://jd.com/index.html", +"https://104.com.tw/index.html", +"https://ca.gov/index.html", +"https://videodownloadconverter.com/index.html", +"https://google.nl/index.html", +"https://kapanlagi.com/index.html", +"https://healthline.com/index.html", +"https://bet9ja.com/index.html", +"https://discogs.com/index.html", +"https://livedoor.jp/index.html", +"https://canva.com/index.html", +"https://ibm.com/index.html", +"https://cricbuzz.com/index.html", +"https://eastday.com/index.html", +"https://ebc.net.tw/index.html", +"https://boredpanda.com/index.html", +"https://w3schools.com/index.html", +"https://box.com/index.html", +"https://airbnb.com/index.html", +"https://github.com/t1gor/Robots.txt-Parser-Class", +"https://jeuxvideo.com/index.html", +"https://www.youtube.com/watch?v=8ZZSd0cdymo", +"https://google.com.my/index.html", +"https://blogspot.com/index.html", +"https://focuusing.com/index.html", +"https://hotstar.com/index.html", +"https://dailymail.co.uk/index.html", +"https://4shared.com/index.html", +"https://foxnews.com/index.html", +"https://vimeo.com/index.html", +"https://office365.com/index.html", +"https://dell.com/index.html", +"https://instructure.com/index.html", +"https://amazonaws.com/index.html", +"https://mellowads.com/index.html", +"https://detail.tmall.com/index.html", +"https://www.quora.com/How-do-I-make-my-web-crawler-follow-robots-txt", +"https://rdrr.io/cran/robotstxt/", +"https://flvto.biz/index.html", +"https://google.com.pe/index.html", +"https://howtogeek.com/index.html", +"https://freepik.com/index.html", +"https://google.co.uk/index.html", +"https://bet365.com/index.html", +"https://azlyrics.com/index.html", +"https://chatwork.com/index.html", +"https://offertogo.online/index.html", +"https://pinterest.co.uk/index.html", +"https://udn.com/index.html", +"https://rapidgator.net/index.html", +"https://ask.fm/index.html", +"https://latimes.com/index.html", +"https://roblox.com/index.html", +"https://zz08047.com/index.html", +"https://ebay-kleinanzeigen.de/index.html", +"https://metropoles.com/index.html", +"https://haber7.com/index.html", +"https://bittrex.com/index.html", +"https://npm.runkit.com/robots-txt-parser", +"https://go.com/index.html", +"https://filehippo.com/index.html", +"https://google.be/index.html", +"https://pulseonclick.com/index.html", +"https://kissanime.ru/index.html", +"https://taboola.com/index.html", +"https://rubygems.org/gems/robotstxt-parser/versions/0.1.1", +"https://google.az/index.html", +"https://dingit.tv/index.html", +"https://fromdoctopdf.com/index.html", +"https://tianya.cn/index.html", +"https://scribd.com/index.html", +"http://socoder.net/?Snippet=23824", +"https://eatyellowmango.com/index.html", +"https://gamepedia.com/index.html", +"https://qiita.com/index.html", +"https://kinogo.club/index.html", +"https://nfl.com/index.html", +"https://stackoverflow.com/questions/3141031/robots-txt-parser-java", +"https://nikkei.com/index.html", +"https://gazzetta.it/index.html", +"https://appledaily.com.tw/index.html", +"https://jabong.com/index.html", +"https://ebay.it/index.html", +"https://yahoo.co.jp/index.html", +"https://ask.com/index.html", +"https://eventbrite.com/index.html", +"https://banggood.com/index.html", +"https://hp.com/index.html", +"https://adp.com/index.html", +"https://lavanguardia.com/index.html", +"https://wish.com/index.html", +"https://shaparak.ir/index.html", +"https://idnes.cz/index.html", +"https://biobiochile.cl/index.html", +"https://qingdaonews.com/index.html", +"https://nametests.com/index.html", +"https://speakol.com/index.html", +"https://hclips.com/index.html", +"https://imgur.com/index.html", +"https://ssl-images-amazon.com/index.html", +"https://thestartmagazine.com/index.html", +"https://yelp.com/index.html", +"https://google.co.id/index.html", +"https://cdiscount.com/index.html", +"https://redonetype.com/index.html", +"https://pinterest.com/index.html", +"https://yourporn.sexy/index.html", +"https://google.com.eg/index.html", +"https://wixsite.com/index.html", +"https://quizlet.com/index.html", +"https://redd.it/index.html", +"https://leagueoflegends.com/index.html", +"https://baidu.com/index.html", +"https://redtube.com/index.html", +"https://amazon.co.uk/index.html", +"https://www.jugglingedge.com/help/creatingagoodbot.php", +"https://drtuber.com/index.html", +"https://google.com.sa/index.html", +"https://lifehacker.com/index.html", +"https://ndtv.com/index.html", +"https://elmogaz.com/index.html", +"https://python.org/index.html", +"https://ths9j89.com/index.html", +"https://japanpost.jp/index.html", +"https://verizonwireless.com/index.html", +"https://gosuslugi.ru/index.html", +"https://thefreedictionary.com/index.html", +"https://sinoptik.ua/index.html", +"https://behance.net/index.html", +"https://fc2.com/index.html", +"https://thehill.com/index.html", +"https://innfrad.com/index.html", +"https://bbc.co.uk/index.html", +"https://norton.com/index.html", +"https://bild.de/index.html", +"https://frtyg.com/index.html", +"https://livejournal.com/index.html", +"https://sex.com/index.html", +"https://csdn.net/index.html", +"https://billdesk.com/index.html", +"https://segmentfault.com/index.html", +"https://elpais.com/index.html", +"https://squarespace.com/index.html", +"https://genius.com/index.html", +"https://blueseek.com/index.html", +"https://patreon.com/index.html", +"https://xe.com/index.html", +"https://indeed.com/index.html", +"https://independent.co.uk/index.html", +"https://dmm.com/index.html", +"https://ebay.de/index.html", +"https://spankbang.com/index.html", +"https://merriam-webster.com/index.html", +"https://comcast.net/index.html", +"https://google.cl/index.html", +"https://fanfiction.net/index.html", +"https://bitbucket.org/index.html", +"https://google.lt/index.html", +"https://list.tmall.com/index.html", +"https://huanqiu.com/index.html", +"https://hamariweb.com/index.html", +"https://target.com/index.html", +"https://youth.cn/index.html", +"https://beytoote.com/index.html", +"https://americanexpress.com/index.html", +"https://hurriyet.com.tr/index.html", +"https://uptodown.com/index.html", +"https://primevideo.com/index.html", +"https://camdolls.com/index.html", +"https://paypal.com/index.html", +"https://bookmyshow.com/index.html", +"https://audisto.com/insights/guides/4/", +"https://rumble.com/index.html", +"https://github.com/index.html", +"https://gyazo.com/index.html", +"https://appspot.com/index.html", +"https://y8.com/index.html", +"https://wellsfargo.com/index.html", +"https://google.com.vn/index.html", +"http://www.zaproxy.org/2.5/javadocs/org/zaproxy/zap/spider/parser/SpiderRobotstxtParser.html", +"https://rt.com/index.html", +"https://mailchimp.com/index.html", +"https://el-nacional.com/index.html", +"https://ltn.com.tw/index.html", +"https://vice.com/index.html", +"https://apple.com/index.html", +"https://orange.fr/index.html", +"https://dmm.co.jp/index.html", +"https://europa.eu/index.html", +"https://nicovideo.jp/index.html", +"https://lazada.com.my/index.html", +"https://nownews.com/index.html", +"https://adobe.com/index.html", +"https://blogger.com/index.html", +"https://sabah.com.tr/index.html", +"https://streamable.com/index.html", +"https://cnn.com/index.html", +"http://tools.seochat.com/tools/robots-txt-validator/#sthash.xiB29oJK.dpbs", +"https://primosearch.com/index.html", +"https://gongchang.com/index.html", +"https://tribunnews.com/index.html", +"https://wordpress.com/index.html", +"https://dafont.com/index.html", +"https://perfectgirls.net/index.html", +"https://quizzstar.com/index.html", +"https://disqus.com/index.html", +"https://ntd.tv/index.html", +"https://google.rs/index.html", +"https://mail.ru/index.html", +"https://performanceadexchange.com/index.html", +"https://doublepimpssl.com/index.html", +"https://providr.com/index.html", +"https://wp.com/index.html", +"https://dashbo15myapp.com/index.html", +"https://amazon.ca/index.html", +"https://youjizz.com/index.html", +"https://shink.in/index.html", +"https://cpm20.com/index.html", +"https://gmx.net/index.html", +"https://google.dk/index.html", +"https://pixabay.com/index.html", +"https://getpocket.com/index.html", +"https://naver.jp/index.html", +"https://neobux.com/index.html", +"https://kizlarsoruyor.com/index.html", +"https://playmediacenter.com/index.html", +"https://amazon.in/index.html", +"https://diply.com/index.html", +"https://hulu.com/index.html", +"https://etsy.com/index.html", +"https://google.cz/index.html", +"https://baike.com/index.html", +"https://nextlnk1.com/index.html", +"https://booking.com/index.html", +"https://jianshu.com/index.html", +"https://uploaded.net/index.html", +"https://cbssports.com/index.html", +"https://mozilla.org/index.html", +"https://usps.com/index.html", +"https://hubspot.com/index.html", +"https://westernjournalism.com/index.html", +"https://wattpad.com/index.html", +"https://ecollege.com/index.html", +"https://elbalad.news/index.html", +"https://udemy.com/index.html", +"https://blackboard.com/index.html", +"https://cran.r-project.org/web/packages/robotstxt/robotstxt.pdf", +"https://inadequal.com/index.html", +"https://softonic.com/index.html", +"https://marca.com/index.html", +"https://gamefaqs.com/index.html", +"https://goo.ne.jp/index.html", +"https://chaturbate.com/index.html", +"https://adexchangeperformance.com/index.html", +"https://360.cn/index.html", +"https://redirectvoluum.com/index.html", +"https://amazon.cn/index.html", +"https://iqiyi.com/index.html", +"https://namnak.com/index.html", +"https://ikea.com/index.html", +"https://doublepimp.com/index.html", +"https://google.com.ua/index.html", +"https://vnexpress.net/index.html", +"https://jrj.com.cn/index.html", +"https://rutracker.org/index.html", +"https://bilibili.com/index.html", +"https://tabelog.com/index.html", +"https://51sole.com/index.html", +"https://bankmellat.ir/index.html", +"https://tmall.com/index.html", +"https://evernote.com/index.html", +"https://dictionary.com/index.html", +"https://bitmedianetwork.com/index.html", +"https://jw.org/index.html", +"https://vporn.com/index.html", +"https://1337x.to/index.html", +"https://ecosia.org/index.html", +"https://people.com.cn/index.html", +"https://coinbase.com/index.html", +"http://search.cpan.org/~gaas/WWW-RobotRules-6.02/lib/WWW/RobotRules.pm", +"https://discuss.com.hk/index.html", +"https://infusionsoft.com/index.html", +"https://twimg.com/index.html", +"https://cnblogs.com/index.html", +"https://seznam.cz/index.html", +"https://soundcloud.com/index.html", +"https://yespornplease.com/index.html", +"https://www.npmjs.com/package/robots-txt-parse", +"https://www.promptcloud.com/blog/robots.txt-file-how-to-read-web-crawling", +"https://blpmovies.com/index.html", +"https://myanimelist.net/index.html", +"https://google.com.mm/index.html", +"https://google.se/index.html", +"https://grammarly.com/index.html", +"https://ouo.io/index.html", +"https://gmarket.co.kr/index.html", +"https://ebay.com/index.html", +"https://ebay.in/index.html", +"https://pandora.com/index.html", +"https://wikipedia.org/index.html", +"https://sina.com.cn/index.html", +"https://slideshare.net/index.html", +"https://umblr.com/index.html", +"https://asana.com/index.html", +"https://zz08037.com/index.html", +"https://almasryalyoum.com/index.html", +"https://kickstarter.com/index.html", +"https://banesconline.com/index.html", +"https://zillow.com/index.html", +"https://chinadaily.com.cn/index.html", +"https://twitch.tv/index.html", +"https://onclkds.com/index.html", +"https://google.co.nz/index.html", +"https://paytm.com/index.html", +"https://pornhub.com/index.html", +"https://39.net/index.html", +"https://alodokter.com/index.html", +"https://alipay.com/index.html", +"https://manoramaonline.com/index.html", +"https://fedex.com/index.html", +"https://stackexchange.com/index.html", +"https://vidio.com/index.html", +"https://varzesh3.com/index.html", +"https://microsoftonline.com/index.html", +"https://google.by/index.html", +"https://help.sumologic.com/Search/Get-Started-with-Search/Suggested-Searches/Suggested-Searches-for-the-Apache-Access-Parser", +"https://sahibinden.com/index.html", +"https://subito.it/index.html", +"https://utorrent.com/index.html", +"https://medium.com/index.html", +"https://perfecttoolmedia.com/index.html", +"https://easypdfcombine.com/index.html", +"https://adexchangeprediction.com/index.html", +"https://debate.com.mx/index.html", +"https://yts.ag/index.html", +"https://cnet.com/index.html", +"https://python.readthedocs.io/en/stable/library/urllib.robotparser.html", +"https://golanglibs.com/top?q=robots.txt-go", +"https://reallifecam.com/index.html", +"https://theverge.com/index.html", +"https://wetransfer.com/index.html", +"https://onlinesbi.com/index.html", +"https://amazon.com/index.html", +"https://yaplakal.com/index.html", +"https://libraries.io/search?keywords=robots.txt&platforms=NPM", +"https://noaa.gov/index.html", +"https://glassdoor.com/index.html", +"https://live.com/index.html", +"https://macys.com/index.html", +"https://bodybuilding.com/index.html", +"https://trello.com/index.html", +"https://goal.com/index.html", +"https://pikabu.ru/index.html", +"https://eksisozluk.com/index.html", +"https://googlevideo.com/index.html", +"https://ups.com/index.html", +"https://avg.com/index.html", +"https://google.bg/index.html", +"https://whatsapp.com/index.html", +"https://hdfcbank.com/index.html", +"https://openload.co/index.html", +"https://onclickmax.com/index.html", +"https://groupon.com/index.html", +"https://feedly.com/index.html", +"https://nur.kz/index.html", +"https://rarbg.to/index.html", +"https://taringa.net/index.html", +"https://mathrubhumi.com/index.html", +"https://youporn.com/index.html", +"https://solarmoviez.to/index.html", +"https://imdb.com/index.html", +"https://reddit.com/index.html", +"https://qq.com/index.html", +"https://bestadbid.com/index.html", +"https://google.com.ec/index.html", +"http://nikitathespider.com/python/rerp/", +"https://mirror.co.uk/index.html", +"https://gamer.com.tw/index.html", +"https://instructables.com/index.html", +"https://techradar.com/index.html", +"https://indoxxi.net/index.html", +"https://aparat.com/index.html", +"https://repubblica.it/index.html", +"https://linkshrink.net/index.html", +"https://naver.com/index.html", +"https://lapatilla.com/index.html", +"https://en.wikipedia.org/robots.txt", +"https://breitbart.com/index.html", +"http://www.the-art-of-web.com/php/parse-robots/", +"https://vk.com/index.html", +"https://engadget.com/index.html", +"https://irctc.co.in/index.html", +"https://files.wordpress.com/index.html", +"https://thewhizmarketing.com/index.html", +"https://craigslist.org/index.html", +"https://theguardian.com/index.html", +"https://slack.com/index.html", +"https://gov.uk/index.html", +"https://nordstrom.com/index.html", +"https://chegg.com/index.html", +"https://coderwall.com/p/pu2clg/robots-txt-parser-online", +"https://humblebundle.com/index.html", +"https://businessinsider.com/index.html", +"https://fwbntw.com/index.html", +"https://inquirer.net/index.html", +"https://google.gr/index.html", +"https://state.gov/index.html", +"https://google.cn/index.html", +"https://scribol.com/index.html", +"https://tfetimes.com/index.html", +"https://motherless.com/index.html", +"https://msn.com/index.html", +"https://snapdeal.com/index.html", +"https://spotify.com/index.html", +"https://mercadolibre.com.mx/index.html", +"https://t.me/index.html", +"https://so.com/index.html", +"https://china.com.cn/index.html", +"https://wtoip.com/index.html", +"https://as.com/index.html", +"https://corriere.it/index.html", +"https://beeg.com/index.html", +"https://gostream.is/index.html", +"https://google.it/index.html", +"https://myanmarload.com/index.html", +"https://world.tmall.com/index.html", +"https://voyeurhit.com/index.html", +"https://researchgate.net/index.html", +"https://amazon.co.jp/index.html", +"https://lenta.ru/index.html", +"https://google.dz/index.html", +"https://prezi.com/index.html", +"https://hh.ru/index.html", +"https://olx.in/index.html", +"https://lefigaro.fr/index.html", +"https://kompasiana.com/index.html", +"https://chase.com/index.html", +"https://stanford.edu/index.html", +"https://rediff.com/index.html", +"https://allegro.pl/index.html", +"http://pythonicprose.blogspot.de/2009/10/python-read-robotstxt-files-with-ease.html", +"https://fbcdn.net/index.html", +"https://163.com/index.html", +"https://xinhuanet.com/index.html", +"https://lazada.co.id/index.html", +"https://mercadolibre.com.ve/index.html", +"https://txxx.com/index.html", +"https://animeflv.net/index.html", +"https://fidelity.com/index.html", +"https://cambridge.org/index.html", +"https://libero.it/index.html", +"https://yenisafak.com/index.html", +"https://ebay.fr/index.html", +"https://ticketmaster.com/index.html", +"https://mashable.com/index.html", +"https://ieee.org/index.html", +"https://bleacherreport.com/index.html", +"https://intoli.com/blog/analyzing-one-million-robots-txt-files/", +"https://mobile.de/index.html", +"http://www.arewewebyet.org/topics/utils/", +"https://springer.com/index.html", +"https://chaoshi.tmall.com/index.html", +"https://ioredi.com/index.html", +"https://eskimi.com/index.html", +"https://bing.com/index.html", +"https://thepiratebay.org/index.html", +"https://u1trkqf.com/index.html", +"https://surveymonkey.com/index.html", +"https://deviantart.net/index.html", +"https://intuit.com/index.html", +"http://www.juust.org/index.php/php-classes/robots-txt-php-parser-class/", +"https://zone-telechargement.ws/index.html", +"https://olx.com.br/index.html", +"https://emol.com/index.html", +"https://namu.wiki/index.html", +"https://fmovies.is/index.html", +"https://flirt4free.com/index.html", +"https://zapmeta.ws/index.html", +"https://weather.com/index.html", +"https://drudgereport.com/index.html", +"https://caijing.com.cn/index.html", +"https://userapi.com/index.html", +"https://wunderground.com/index.html", +"https://ebay.co.uk/index.html", +"https://thesun.co.uk/index.html", +"https://sputniknews.com/index.html", +"https://elfagr.org/index.html", +"https://allocine.fr/index.html", +"https://ria.ru/index.html", +"https://telewebion.com/index.html", +"https://putrr18.com/index.html", +"https://zipnoticias.com/index.html", +"https://buzzfeed.com/index.html", +"https://microsoft.com/index.html", +"https://myfreecams.com/index.html", +"https://google.ch/index.html", +"https://olx.pl/index.html", +"https://upwork.com/index.html", +"https://doorblog.jp/index.html", +"https://chinaz.com/index.html", +"https://google.hr/index.html", +"https://wordreference.com/index.html", +"https://google.com.tr/index.html", +"https://tomshardware.com/index.html", +"https://channel1vids.com/index.html", +"https://instagram.com/index.html", +"https://php.net/index.html", +"https://accuweather.com/index.html", +"https://google.de/index.html", +"https://suning.com/index.html", +"https://sapo.pt/index.html", +"https://superuser.com/index.html", +"https://google.co.th/index.html", +"https://divar.ir/index.html", +"https://nbcnews.com/index.html", +"https://badoo.com/index.html", +"https://google.kz/index.html", +"https://caliente.mx/index.html", +"https://google.lk/index.html", +"https://ladbible.com/index.html", +"https://google.com.af/index.html", +"https://mi.com/index.html", +"https://xvideos.com/index.html", +"https://linkedin.com/index.html", +"https://pixnet.net/index.html", +"https://techcrunch.com/index.html", +"https://expedia.com/index.html", +"https://wiktionary.org/index.html", +"https://amazon.fr/index.html", +"https://secureserver.net/index.html", +"https://www.openhub.net/p/robots-txt", +"https://2ch.net/index.html", +"https://intoday.in/index.html", +"https://ruten.com.tw/index.html", +"https://merdeka.com/index.html", +"https://mega.nz/index.html", +"https://books.google.de/books?id=wwmbDgAAQBAJ&pg=PT25&lpg=PT25&dq=robotstxt+parser&source=bl&ots=44MiCBAKbi&sig=g-xgZZO0qYVjC42FuwpoA4EdWs4&hl=de&sa=X&redir_esc=y#v=onepage&q=robotstxt%20parser&f=false", +"https://translationbuddy.com/index.html", +"https://mmofreegames.online/index.html", +"https://battle.net/index.html", +"https://pages.tmall.com/index.html", +"https://taleo.net/index.html", +"https://archive.org/index.html", +"https://sakura.ne.jp/index.html", +"https://voc.com.cn/index.html", +"https://abs-cbn.com/index.html", +"https://amazon.es/index.html", +"https://wikimedia.org/index.html", +"https://google.co.za/index.html", +"https://kakaku.com/index.html", +"https://elsevier.com/index.html", +"https://steampowered.com/index.html", +"https://aol.com/index.html", +"https://ultimate-guitar.com/index.html", +"https://globo.com/index.html", +"https://google.com.br/index.html", +"https://goodreads.com/index.html", +"https://stackoverflow.com/index.html", +"https://meetup.com/index.html", +"https://media.tumblr.com/index.html", +"https://bbc.com/index.html", +"https://okdiario.com/index.html", +"https://google.ru/index.html", +"https://zoho.com/index.html", +"https://pymotw.com/2/robotparser/", +"https://ok.ru/index.html", +"https://otvfoco.com.br/index.html", +"https://xda-developers.com/index.html", +"https://issues.apache.org/jira/browse/NUTCH-1031", +"https://avito.ru/index.html", +"https://addthis.com/index.html", +"https://clipconverter.cc/index.html", +"https://hatena.ne.jp/index.html", +"https://stockstar.com/index.html", +"https://nypost.com/index.html", +"https://setn.com/index.html", +"https://xtube.com/index.html", +"https://wikia.com/index.html", +"https://www.yakaferci.io/robots-txt/", +"https://force.com/index.html", +"https://kayak.com/index.html", +"https://npr.org/index.html", +"https://google.pt/index.html", +"https://tvbs.com.tw/index.html", +"https://taobao.com/index.html", +"https://alsbbora.com/index.html", +"https://china.com/index.html", +"https://abcnews.go.com/index.html", +"https://albawabhnews.com/index.html", +"https://flipkart.com/index.html", +"https://android.com/index.html", +"https://blog.me/index.html" diff --git a/examples/benchmarks.cpp b/examples/benchmarks.cpp index aaae2f2..44e57e3 100644 --- a/examples/benchmarks.cpp +++ b/examples/benchmarks.cpp @@ -51,7 +51,7 @@ BENCHMARK(basic_uri_1000) for (const auto& pp : uris) { basic_uri a1{pp}; - auto hs { a1.get_component(uri::host) }; + auto hs { a1.get_component() }; } // TEARDOWN_BENCHMARK() @@ -64,7 +64,7 @@ BENCHMARK(uri_1000) for (const auto& pp : uris) { uri a1{pp}; - auto hs { a1.get_component(uri::host) }; + auto hs { a1.get_component() }; } //TEARDOWN_BENCHMARK() @@ -76,8 +76,8 @@ BENCHMARK(uri_static_1000) for (const auto& pp : uris) { - uri_static a1{pp}; - auto hs { a1.get_component(uri::host) }; + uri_static<> a1{pp}; + auto hs { a1.get_component() }; } //TEARDOWN_BENCHMARK() diff --git a/examples/unittests.cpp b/examples/unittests.cpp index ad59fea..5e0c4da 100644 --- a/examples/unittests.cpp +++ b/examples/unittests.cpp @@ -54,8 +54,8 @@ TEST_CASE("get component") const uri u1{tests[0].first}; REQUIRE_NOTHROW(u1.get_component(host)); REQUIRE(u1.get_component(host) == "www.blah.com"); - REQUIRE(u1.get(host) == "www.blah.com"); - REQUIRE(u1.get(fragment) == ""); + REQUIRE(u1.get_component() == "www.blah.com"); + REQUIRE(u1.get_component() == ""); REQUIRE(u1.get_component(countof) == ""); } @@ -63,7 +63,7 @@ TEST_CASE("get component") TEST_CASE("subscript operator") { uri u1{tests[0].first}; - REQUIRE(u1.test()); + REQUIRE(u1.test()); const auto [tag,value] { u1[host] }; REQUIRE(tag == 8); REQUIRE(value == 12); @@ -74,14 +74,14 @@ TEST_CASE("bitset") { uri u1{tests[0].first}; REQUIRE(u1.get_present() == 0b0010100011); - u1.clear(); + u1.clear(); REQUIRE(u1.get_present() == 0); - u1.set(uri::countof); + u1.set(); REQUIRE(u1.get_present() == 0b1111111111); basic_uri b1{0b1111111111}; - REQUIRE (b1.get_component(scheme) == ""); - REQUIRE (b1.get_component(host) == ""); - b1.clear(scheme); + REQUIRE(b1.get_component(scheme) == ""); + REQUIRE(b1.get_component(host) == ""); + b1.clear(); REQUIRE(b1.get_present() == 0b1111111110); } @@ -89,26 +89,64 @@ TEST_CASE("bitset") TEST_CASE("get name") { REQUIRE_NOTHROW(uri::get_name(host)); - REQUIRE(uri::get_name(host) == "host"); + REQUIRE(uri::get_name() == "host"); + REQUIRE(uri::get_name(scheme) == "scheme"); REQUIRE(uri::get_name(countof) == ""); } +//----------------------------------------------------------------------------------------- +TEST_CASE("in range") +{ + const uri u1{"https://user:password@example.com:8080/path?search=1#frag"}; + // 0 1 2 3 4 5 + REQUIRE(u1.in_range(1) == uri::bitsum()); + REQUIRE(u1.in_range(9) == uri::bitsum()); + REQUIRE(u1.in_range(13) == uri::bitsum()); + REQUIRE(u1.in_range(22) == uri::bitsum()); + REQUIRE(u1.in_range(34) == uri::bitsum()); + REQUIRE(u1.in_range(39) == uri::bitsum()); + REQUIRE(u1.in_range(44) == uri::bitsum()); + REQUIRE(u1.in_range(53) == uri::bitsum()); +} + +//----------------------------------------------------------------------------------------- +TEST_CASE("test any/all range") +{ + const uri u1{"https://example.com/path?search=1"}; + REQUIRE(!u1.test_any()); + REQUIRE(u1.test_all()); + REQUIRE(u1.test_all()); + REQUIRE(!u1.test_all()); + REQUIRE(!u1.test_all()); +} + +//----------------------------------------------------------------------------------------- +TEST_CASE("clear/set all range") +{ + uri u1{"https://example.com/path?search=1"}; + u1.clear_all(); + REQUIRE(u1.test_all()); + REQUIRE(!u1.test_all()); + u1.set_all(); + REQUIRE(u1.test_all()); +} + //----------------------------------------------------------------------------------------- void run_test_comp(int id, const auto& ui) { const auto& vec { tests[id].second }; INFO("uri: " << id); // << ' ' << uri{u1}); - REQUIRE (ui.count() == vec.size()); + REQUIRE(ui.count() == vec.size()); for (const auto& [comp,str] : vec) { INFO("component: " << comp); - REQUIRE (ui.get_component(comp) == str); + REQUIRE(ui.get_component(comp) == str); } } TEST_CASE("uri component validations") { - static const std::unordered_set decode1st {12, 19, 26, 29, 30, 35}; + static const std::unordered_set decode1st {12, 19, 26, 29, 30, 31, 35}; for (int ii{}; ii < tests.size(); ++ii) { auto str{decode1st.contains(ii) ? uri::decode_hex(tests[ii].first, false) : tests[ii].first}; @@ -119,7 +157,8 @@ TEST_CASE("uri component validations") } //----------------------------------------------------------------------------------------- -#define testfuncs(var,x) (var.has_##x() == var.test(x) && var.get_##x() == var.get(x)) +#define testfuncs(var,x) (var.has_##x() == var.test(x) \ + && var.get_##x() == var.get_component() && var.get_##x() == var.get_component(x)) TEST_CASE("uri has/get") { @@ -139,6 +178,24 @@ TEST_CASE("uri has/get") } } +//----------------------------------------------------------------------------------------- +TEST_CASE("has_(special cases)") +{ + const uri u1{tests[0].first}; + REQUIRE(u1.has_any()); + REQUIRE(u1.has_any_authority()); + REQUIRE(!u1.has_any_userinfo()); + const uri u2{tests[3].first}; + REQUIRE(u2.has_any()); + REQUIRE(u2.has_any_authority()); + REQUIRE(u2.has_any_userinfo()); + const uri u3{tests[33].first}; + REQUIRE(!u3.has_any()); + REQUIRE(!u3); + REQUIRE(!u3.has_any_authority()); + REQUIRE(!u3.has_any_userinfo()); +} + //----------------------------------------------------------------------------------------- TEST_CASE("replace") { @@ -170,7 +227,7 @@ TEST_CASE("storage") //----------------------------------------------------------------------------------------- TEST_CASE("invalid uri") { - static constexpr const auto baduris + static constexpr auto baduris { std::to_array ({ @@ -199,6 +256,8 @@ TEST_CASE("limits") REQUIRE(u1.get_error() == uri::error::too_long); uri_static<> u2{buff}; // too long REQUIRE(u2.get_uri() == ""); + uri_static<64> u3{tests[35].first}; + REQUIRE(!u3); } //----------------------------------------------------------------------------------------- @@ -222,7 +281,7 @@ TEST_CASE("ports") //----------------------------------------------------------------------------------------- TEST_CASE("normalization") { - static constexpr const std::array uris + static constexpr std::array uris { std::to_array> ({ @@ -233,25 +292,48 @@ TEST_CASE("normalization") { "https://www.buyexample.com/.././.././"sv, "https://www.buyexample.com/"sv }, { "https://www.test.com"sv, "https://www.test.com/"sv }, { "https://www.nochange.com/"sv, "https://www.nochange.com/"sv }, - { "https://www.boost.org/doc/../index.html"sv, "https://www.boost.org/index.html"sv }, - { "http://www.boost.org:80/doc/../index.html"sv, "http://www.boost.org/index.html"sv }, - { "https://www.boost.org:443/doc/../index.html"sv, "https://www.boost.org/index.html"sv }, - { "https://www.boost.org:8080/doc/../index.html"sv, "https://www.boost.org:8080/index.html"sv }, - { "https://www.boost.org/doc/../%69%6e%64%65%78%20file.html"sv, "https://www.boost.org/index%20file.html"sv }, + { "https://www.hello.com/doc/../index.html"sv, "https://www.hello.com/index.html"sv }, + { "http://www.hello.com:80/doc/../index.html"sv, "http://www.hello.com/index.html"sv }, + { "https://www.hello.com:443/doc/../index.html"sv, "https://www.hello.com/index.html"sv }, + { "https://www.hello.com:8080/doc/../index.html"sv, "https://www.hello.com:8080/index.html"sv }, + { "https://www.hello.com/doc/../%69%6e%64%65%78%20file.html"sv, "https://www.hello.com/index%20file.html"sv }, }) }; - for (const auto& [before, after] : uris) + for (const auto [before, after] : uris) { if (before != after) REQUIRE(basic_uri(before) != basic_uri(after)); - REQUIRE(uri::normalize_http(before) == uri(after)); + REQUIRE(uri(uri::normalize_http_str(before)) == uri(after)); + uri u1{before}; + REQUIRE(u1.normalize_http() == before); + REQUIRE(u1.get_uri() == after); + } +} + +//----------------------------------------------------------------------------------------- +TEST_CASE("normalization_http") +{ + static constexpr std::array uris + { + "https://www.test.com/"sv, // all should normalize to this one + "https://www.test.com"sv, + "https://www.test.com:/"sv, + "https://www.test.com:443/"sv, + }; + for (const auto control{uris[0]}; const auto pp : uris) + { + uri u1{pp}; + u1.normalize_http(); + REQUIRE(u1.get_uri() == control); // basic_uri equivalence operator + uri u2{pp}, u3{control}; + REQUIRE(u2 % u3); // uri normalize_http equivalence operator } } //----------------------------------------------------------------------------------------- TEST_CASE("print") { - static constexpr const auto str + static constexpr auto str { R"(uri http://nodejs.org:89/docs/latest/api/foo/bar/qua/13949281/0f28b/5d49/b3020/url.html?payload1=true&payload2=false&test=1&benchmark=3&foo=38.38.011.293&bar=1234834910480&test=19299&3992&key=f5c65e1e98fe07e648249ad41e1cfdb0#test scheme http @@ -292,17 +374,14 @@ fragment test //----------------------------------------------------------------------------------------- TEST_CASE("decode hex") { - static constexpr const auto uris + static constexpr std::array uris { - std::to_array - ({ - { "https://www.netmeister.org/%62%6C%6F%67/%75%72%6C%73.%68%74%6D%6C?!@#$%25=+_)(*&^#top%3C" }, - { "https://www.netmeister.org/blog/urls.html?!@#$%=+_)(*&^#top<" }, - { "https://www.netmeister.org/path#top%3" }, - { "https://www.netmeister.org/%%62" }, - { "https://www.netmeister.org/blog/urls.html?!@#$%=+_)(*&^#top<" }, - { "https://www.netmeister.org/%62%6c%6f%67/%75%72%6c%73.%68%74%6d%6c?!@#$%25=+_)(*&^#top%3C" }, - }) + "https://www.netmeister.org/%62%6C%6F%67/%75%72%6C%73.%68%74%6D%6C?!@#$%25=+_)(*&^#top%3C"sv, + "https://www.netmeister.org/blog/urls.html?!@#$%=+_)(*&^#top<"sv, + "https://www.netmeister.org/path#top%3"sv, + "https://www.netmeister.org/%%62"sv, + "https://www.netmeister.org/blog/urls.html?!@#$%=+_)(*&^#top<"sv, + "https://www.netmeister.org/%62%6c%6f%67/%75%72%6c%73.%68%74%6d%6c?!@#$%25=+_)(*&^#top%3C"sv, }; REQUIRE(uri::has_hex(uris[0])); diff --git a/examples/uritest.cpp b/examples/uritest.cpp index f296dad..b294654 100644 --- a/examples/uritest.cpp +++ b/examples/uritest.cpp @@ -52,7 +52,7 @@ using namespace std::literals::string_view_literals; int main(int argc, char *argv[]) { static constexpr const char *optstr{"t:T:d:hlasxf:"}; - static constexpr const auto long_options + static constexpr auto long_options { std::to_array