1 -module(xml2). 2 -export([main/1]). 3 -include_lib("xmerl/include/xmerl.hrl"). 4 5 parseAll(D) -> 6 % find all RSS files underneath D 7 FL = filelib:fold_files(D, ".+\.rss$", true, fun(F, L) -> [F|L] end, []), 8 [ parse(F) || F <- FL ]. 9 10 parse(FName) -> 11 % parses a single RSS file 12 {R,_} = xmerl_scan:file(FName), 13 % extract episode titles, publication dates and MP3 URLs 14 L = lists:reverse(extract(R, [])), 15 % print channel title and data for first two episodes 16 io:format("~n>> ~p~n", [element(1,lists:split(3,L))]), 17 L. 18 19 % handle 'xmlElement' tags 20 extract(R, L) when is_record(R, xmlElement) -> 21 case R#xmlElement.name of 22 enclosure -> 23 if element(1, hd(R#xmlElement.parents)) == item -> 24 FFunc = fun(X) -> X#xmlAttribute.name == url end, 25 U = hd(lists:filter(FFunc, R#xmlElement.attributes)), 26 [ {url, U#xmlAttribute.value} | L ]; 27 true -> L 28 end; 29 channel -> 30 lists:foldl(fun extract/2, L, R#xmlElement.content); 31 item -> 32 ItemData = lists:foldl(fun extract/2, [], R#xmlElement.content), 33 [ ItemData | L ]; 34 _ -> % for any other XML elements, simply iterate over children 35 lists:foldl(fun extract/2, L, R#xmlElement.content) 36 end; 37 38 extract(#xmlText{parents=[{title,_},{channel,2},_], value=V}, L) -> 39 [{channel, V}|L]; % extract channel/audiocast title 40 41 extract(#xmlText{parents=[{title,_},{item,_},_,_], value=V}, L) -> 42 [{title, V}|L]; % extract episode title 43 44 extract(#xmlText{parents=[{link,_},{item,_},_,_], value=V}, L) -> 45 [{link, V}|L]; % extract episode link 46 47 extract(#xmlText{parents=[{pubDate,_},{item,_},_,_], value=V}, L) -> 48 [{pubDate, V}|L]; % extract episode publication date ('pubDate' tag) 49 50 extract(#xmlText{parents=[{'dc:date',_},{item,_},_,_], value=V}, L) -> 51 [{pubDate, V}|L]; % extract episode publication date ('dc:date' tag) 52 53 extract(#xmlText{}, L) -> L. % ignore any other text data 54 55 % 'main' function (invoked from shell, receives command line arguments) 56 main(A) -> 57 D = atom_to_list(hd(A)), 58 parseAll(D).