1 -module(xml2).
  2 -export([main/1]).
  3 -include_lib("xmerl/include/xmerl.hrl").
  4 
  5 parseAll(D) ->
  6     % find all RSS files underneath D
  7     FL = filelib:fold_files(D, ".+\.rss$", true, fun(F, L) -> [F|L] end, []),
  8     [ parse(F) || F <- FL ].
  9 
 10 parse(FName) ->
 11     % parses a single RSS file
 12     {R,_} = xmerl_scan:file(FName),
 13     % extract episode titles, publication dates and MP3 URLs
 14     L = lists:reverse(extract(R, [])),
 15     % print channel title and data for first two episodes
 16     io:format("~n>> ~p~n", [element(1,lists:split(3,L))]),
 17     L.
 18 
 19 % handle 'xmlElement' tags
 20 extract(R, L) when is_record(R, xmlElement) ->
 21     case R#xmlElement.name of
 22         enclosure ->
 23             if element(1, hd(R#xmlElement.parents)) == item ->
 24                     FFunc = fun(X) -> X#xmlAttribute.name == url end,
 25                     U = hd(lists:filter(FFunc, R#xmlElement.attributes)),
 26                     [ {url, U#xmlAttribute.value} | L ];
 27                 true -> L
 28             end;
 29         channel ->
 30             lists:foldl(fun extract/2, L, R#xmlElement.content);
 31         item ->
 32             ItemData = lists:foldl(fun extract/2, [], R#xmlElement.content),
 33             [ ItemData | L ];
 34         _ -> % for any other XML elements, simply iterate over children
 35             lists:foldl(fun extract/2, L, R#xmlElement.content)
 36     end;
 37 
 38 extract(#xmlText{parents=[{title,_},{channel,2},_], value=V}, L) ->
 39     [{channel, V}|L]; % extract channel/audiocast title
 40 
 41 extract(#xmlText{parents=[{title,_},{item,_},_,_], value=V}, L) ->
 42     [{title, V}|L]; % extract episode title
 43 
 44 extract(#xmlText{parents=[{link,_},{item,_},_,_], value=V}, L) ->
 45     [{link, V}|L]; % extract episode link
 46 
 47 extract(#xmlText{parents=[{pubDate,_},{item,_},_,_], value=V}, L) ->
 48     [{pubDate, V}|L]; % extract episode publication date ('pubDate' tag)
 49 
 50 extract(#xmlText{parents=[{'dc:date',_},{item,_},_,_], value=V}, L) ->
 51     [{pubDate, V}|L]; % extract episode publication date ('dc:date' tag)
 52 
 53 extract(#xmlText{}, L) -> L.  % ignore any other text data
 54 
 55 % 'main' function (invoked from shell, receives command line arguments)
 56 main(A) ->
 57     D = atom_to_list(hd(A)),
 58     parseAll(D).