NTypes = 400; nums = 1:NTypes; D = (1./(1:NTypes)).^1.6; PD = D/sum(D); HD = -sum(PD.*log(PD)); % NN=100000; ND = round(NN*PD); C = zeros(NN,1); loc=1; for n=1:NTypes for m=1:ND(n) C(loc)=n; loc = loc+1; end end CC = C(randperm(NN)); CCm = CC(1:(NN-1)); % to avoid running off the end HLL = zeros(NTypes,1); Sizes = zeros(NTypes,1); for n=2:NTypes D=histc(CC(find(CCm<=n)),nums); DD=D(D>0); Sizes(n) = length(DD); PD=DD/sum(DD); HLL(n)= -sum(PD.*log(PD)); end HLLn = HLL(20:20:NTypes); % plot(20:20:NTypes,HLLn,'-or') ylim([0 6]); ylabel('Conditional Entropy'); xlabel('Number of Tokens');