% tbray15 -- another Erlang solution to Tim Bray's Wide Finder project % Author: Steve Vinoski (http://steve.vinoski.net/), 18 October 2007. % See . -module(tbray15). -export([start/1, start/2, main/1]). -import(wfbm3). -compile([native]). receive_matches(Pid, Dict) -> receive done -> Pid ! Dict; Match -> receive_matches(Pid, dict:update_counter(Match, 1, Dict)) end. split_and_find(Bin, Blksize, Pid, Tbl) -> Size = Blksize - 1, case Bin of <> -> Rcv = spawn(fun() -> receive_matches(Pid, dict:new()) end), spawn(fun() -> wfbm3:find(Front, Tbl, Rcv) end), {Rcv, Tail}; _ -> split_and_find(Bin, Size, Pid, Tbl) end. receive_tbls(L, Start_table) -> lists:foldl(fun(_, Table2) -> receive Table1 -> dict:merge(fun(_,V1,V2) -> V1 + V2 end, Table1, Table2) end end, Start_table, L). receive_tbls(L) -> receive_tbls(L, dict:new()). scan_file(<<>>, _, _, _, Receivers) -> Receivers; scan_file(Bin, Blksize, Tbl, Me, Receivers) -> {Rcv, Next} = split_and_find(Bin, Blksize, Me, Tbl), scan_file(Next, Blksize, Tbl, Me, [Rcv | Receivers]). scan_file(Bin, Blksize, Tbl) -> scan_file(Bin, Blksize, Tbl, self(), []). top_ten(D) -> L = lists:sort(fun({_,V1}, {_,V2}) -> V1 > V2 end, dict:to_list(D)), if length(L) > 10 -> {First, _} = lists:split(10, L), First; true -> L end. start(File, Blksize) -> Tbl = wfbm3:init(), bfile:load_driver(), {ok, F} = bfile:fopen(File, "r"), {ok, Bin} = bfile:fread(F, filelib:file_size(File)), Receivers = scan_file(Bin, Blksize, Tbl), bfile:fclose(F), L = top_ten(receive_tbls(Receivers)), lists:map(fun({K,V}) -> io:format("~p: ~s~n", [V, K]) end, L). start(File) -> Blksize = filelib:file_size(File) div (erlang:system_info(schedulers)*32), start(File, Blksize). main([F, Blksize]) -> start(F, list_to_integer(Blksize)), halt(); main([F]) -> start(F), halt().