Skip to content

Commit

Permalink
fix github projects recipe
Browse files Browse the repository at this point in the history
  • Loading branch information
dmitry committed Oct 16, 2014
1 parent 2292694 commit b6b8095
Showing 1 changed file with 24 additions and 20 deletions.
44 changes: 24 additions & 20 deletions recipes/github_projects.rb
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
# encoding: utf-8

# Instructions:
# The README pages for this recipe are scraped from https://github.com/danchoi/kindlefodder/wiki/Github-READMEs-and-pages-for-the-GitHub-project-docs-recipe
# You can edit that wiki page
# The README pages for this recipe are scraped from
# https://github.com/danchoi/kindlefodder/wiki/Github-READMEs-and-pages-for-the-GitHub-project-docs-recipe
# You can edit that wiki page, add more popular repositories that you would like to scrape
# and generate into a mobi format

require 'kindlefodder'

class GithubProjects < Kindlefodder

WIKIPAGE = "https://github.com/danchoi/kindlefodder/wiki/Github-READMEs-for-the-github_projects.rb-recipe"
WIKIPAGE = 'https://github.com/danchoi/kindlefodder/wiki/Github-READMEs-for-the-github_projects.rb-recipe'

def get_source_files
@urls = Nokogiri::HTML(`curl -Ls "#{WIKIPAGE}"`).search("#wiki-body h2").inject({}) {|m, h2|
m[h2.inner_text] = h2.xpath("./following-sibling::ul[1]/li").map {|li| li.inner_text}
m
}
@urls = Nokogiri::HTML(`curl -Ls "#{WIKIPAGE}"`).
search("#wiki-body h2").
inject({}) do |m, h2|
m[h2.inner_text] = h2.xpath("./following-sibling::ul[1]/li").map { |li| li.inner_text }
m
end
puts @urls.to_yaml
sections = extract_sections
puts sections.inspect
Expand All @@ -31,33 +34,34 @@ def document
end

def extract_sections
sections = @urls.map { |(title, urls)|
{ title: title,
articles: urls.map {|url|
@urls.map do |(title, urls)|
{
title: title,
articles: urls.map do |url|
html = run_shell_command("curl -s #{url}")
html = html.force_encoding('utf-8')
doc = Nokogiri::HTML html
title = doc.at('title').inner_text.sub(/ - GitHub$/,'')
html = html.force_encoding(Encoding::UTF_8)
doc = Nokogiri::HTML(html)
title = doc.at('title').inner_text.match(/([^ ]+)/)[0]
$stderr.puts title
readme = doc.at('#readme') || doc.at('#wiki-wrapper')
{
{
title: title,
path: save_article_and_return_path(readme, title)
}
}
end
}
}
end
end

def fixup_html! doc
def fixup_html!(doc)
# stub this out because it causes encoding issues with UTF characters like em-dash
# (investigate this later)
end

def save_article_and_return_path readme, title
path = "articles/" + title.gsub(/\W/, '-')
path = "articles/#{title.gsub(/\W/, '-')}"
content = readme.inner_html
File.open("#{output_dir}/#{path}", 'w') {|f| f.puts content}
File.open("#{output_dir}/#{path}", 'w') { |f| f.puts(content) }
path
end
end
Expand Down

0 comments on commit b6b8095

Please sign in to comment.