I've fubar'd up my code, please help

Here's how I would do it.

# A heavily annotated version of this script follows below.

module EachWindow
  def each_window(n, &block)
    if self.length < n
      raise "Length must be greater than window size"
    end

    if block_given?
      (0..self.size-n).each do |ix|
        yield self.slice(ix, ix + n)
      end
    else
      to_enum(__method__, n) do
        (self.size - n + 1)
      end
    end
  end
end

Array.include(EachWindow)

def sliceify(words, n)

  words.
    reject { |word| word.length < n }.
    map    { |word| word.
             chars.
             each_window(n).
             each_with_object(word).
             map { |slice, word|
               [slice.join(""), word]
             }}.
    flatten(1).
    sort_by { |(slice, word)| slice.downcase }
end

in_filename  = ARGV[1] || "hello_world.in"
out_filename = ARGV[2] || "hello_world.out"

begin
  infile = File.open(in_filename, "r")
  outfile = File.open(out_filename, "w+")

  words = infile.each_line.map(&:chomp)

  sliceify(words, 4).each do |(slice, word)|
    outfile.puts "#{slice} #{word}"
  end
ensure
  infile.close
  outfile.close
end

# -------------------------------------------------------------------------
# First, let's implement the idea of 'sliding' a window across a sequence 
# of elements. The Ruby way is to ask 'what methods does this object respond
# to' rather than 'what is the Class of this object'- therefore, we implement
# this as a Module which can be included into any Object which responds to
# the #each, #size, and #slice method calls.
#
# You have to trust that this isn't too much like the relevant XKCD (974)- it's
# more robust to implement this as a standalone idea. The reason? Because
# we're not relying on assumptions about *what the object is*- we're relying on 
# assumptions about *how the object behaves*.

module EachWindow

  # This method is an 'iterator-style' method, which can be chained
  # with other iterators like #each, #inject, and #map.
  #
  # Its first argument is the size of the window we would like to
  # iterate over. It yields each 'window' to the block it's given.
  def each_window(n, &block)

    # If we tried to chop a string of length 4 out of, say, "Foo",
    # we would get non-sensical results.  So we implement a guard
    # to prevent Weird Shit like that from happening.
    if self.length < n
      raise "Length must be greater than window size"
    end

    if block_given?

      # Here's the meat of the method. Suppose we're sliding a window
      # of length 2 over this Array:
      #
      # index:      0    1    2
      # array:    ['a', 'b', 'c'] (size is 3)
      # slice 1:  ['a', 'b']
      # slice 2:  [     'b', 'c']
      #
      # The size of the array itself is 3, and the size of our window
      # is 2. Simple reasoning shows that we can only *start* our slices 
      # at the indices 0 and 1.

      (0..self.size-n).each do |ix|
        # At each point, we extract the "slice" of the Array and yield it
        # to the block provided by the calling code.
        yield self.slice(ix, ix + n)
      end

    else
      # On the other hand, if we weren't given a block to yield our slices
      # to, we use the Kernel#to_enum method to create an Enumerator object
      # which we can chain other enumerator-style methods onto. We'll see
      # this in action later.
      to_enum(__method__, n) do
        (self.size - n + 1)
      end
    end
  end
end

# Because Array includes #each, #size, and #slice, we can "mix in" the module
# defined above, which means that any instance of Array will get a copy of the
# #each_window method. Score!

Array.include(EachWindow)

# Now, we implement the actual algorithm described in the problem.  It takes
# an array and a window size. Again, we don't want to abstract *too* much, but
# we have an invariant that we want to remove words which are shorter than
# our window size.
def sliceify(words, n)

  # First, we take the words we were given. For the sake of example, let's
  # use the array ['pooba', 'baz', 'quux'], with a window size of 4.
  words.

    # Now, we filter out words less than 4 characters long.  The return
    # value of #reject is ['pooba', 'quux'].
    reject { |word| word.length < n }.

    # Now, we are building a new array, whose elements represent each 
    # element of ['pooba', 'quux'] with a transformation applied.
    map    { |word| word.
             # The #chars method turns a String like 
             # "pooba" into an Array like ['p', 'o', 'o', 'b', 'a'].
             chars.

             # Our #each_window method has its chance to shine! It will yield 
             # the Arrays ['p', 'o', 'o', 'b'] and ['o', 'o', 'b', 'a']. But
             # we still need to get those into a dictionary-like format.
             each_window(n).

             # Next, we *compose* our Enumerator with the #each_with_object
             # method. Basically we want to iterate over each of the Arrays
             # above and do something with the word corresponding to it.
             each_with_object(word).

             # Finally, we want to build an Array out of the result of each
             # execution of this block. Within this block, the 'slice' parameter 
             # is being provided by #each_window, and the 'word' parameter is 
             # being provided by #each_with_object.
             map { |slice, word|

               # And now we wave the black-magic wand. In the first execution
               # of this block, 'slice' is the Array ['p', 'o', 'o', 'b'], and 
               # 'word' is the String "pooba". We #join the former back into a 
               # String, and stuff it into its own little pair, along with the 
               # word it was sliced out of.
               [slice.join(""), word]
             }}.

    # Whew! At this point, we have an Array of Arrays which looks like this:
    # [
    #   [
    #     ['poob', 'pooba'], 
    #     ['ooba', 'pooba']
    #   ],
    #   [
    #     ['quux', 'quux']
    #   ]
    # ]
    #
    # We need to get all those two-element arrays of String back into one
    # 'unified' Array. Because this structure is self-similar in a way, we
    # can #flatten one layer of it and get what we want:
    # [
    #   ['poob', 'pooba'], 
    #   ['ooba', 'pooba']
    #   ['quux', 'quux']
    # ]
    flatten(1).

    # Finally, we take each of these Arrays and sort them, using their
    # first element as the key.
    sort_by do |(slice, word)| 

             # One final bugfix: apparently, upper-case characters are 
             # sorted before lower-case characters: "Zot" will be sorted
             # before "alpha". So we make sure to sort these strings based
             # on their lower-case versions.
             #
             # One learns something new every day.
             slice.downcase
            end
    # So now, we have the following array:
    # [
    #   ['ooba', 'pooba']
    #   ['poob', 'pooba'], 
    #   ['quux', 'quux']
    # ]
    #
    # Which is what we wanted all along.
end

# So now, on to the actual usage of all this! Let's pull the filename
# out of our script's arguments, with a fallback.
in_filename  = ARGV[1] || "hello_world.in"
out_filename = ARGV[2] || "hello_world.out"

# We use the begin..ensure block to make sure we're being good stewards
# and closing our files, even if an Exception gets thrown somewhere.
begin

  # Good practice: only open your files with the flags you actually need.
  # We open our input file with a "read-only" flag and our output file with
  # the "write-only" flag.
  infile = File.open(in_filename, "r")
  outfile = File.open(out_filename, "w+")

  # We get each line of the input file, and map the #chomp method over it
  # to remove newline characters.
  words = infile.each_line.map(&:chomp)

  # And finally, we simply use our implementation of the algorithm above...
  slices = sliceify(words, 4)

  # ...and dump each line into our output file.  (We could have been really
  # clever and made #sliceify into an enumerator-style ethod like #each_window,
  # but I figure that's pushing things a little bit.)
  slices.each do |(slice, word)|
    outfile.puts "#{slice} #{word}"
  end
ensure

  # And close our files.
  infile.close
  outfile.close
end

# fin.
/r/ruby Thread