Module:PopulationFromWikidata

Documentation for this module may be created at Module:PopulationFromWikidata/doc

---------------- Defining variables--------------------
local Pop_P = "P1082" 	-- population property
local Applies_P = "P518"	-- applies to part property
local Pointin_P = "P585" -- point in time property
local DetMeth_P = "P459"	-- determination method property
local RefURL_P = "P854"		-- reference URL
local RefTitle_P = "P1476"	-- reference title
local RefPubIn_P = "P1433"	--reference published in property	
local DatePub_P = "P577"	-- date of publication property
local Publisher_P = "P123"	-- publisher property
local Retrieved_P = "P813"	-- retrieved property
local CensusAU_I = "Q5058971"	-- Australian census item
local Instof_P = "P31"			-- instance of property
local ShortN_P = "P1813"		-- short name property

local SAL_I = "Q33112019" 	-- state suburb item (includes SSC and SAL)
--local GCCSA_I = "Q112762887"			-- Greater Capital City Statistical Area item
local LGA_I = "Q33127844"			-- Local Government Area item
local UCL_I = "Q33127891"		-- Urban Centre or Locality
--local SA2_I = "Q33128776"		-- SA2
local ILOC_I = "Q112729549"		-- Indigenous Location


local item = nil

--------------- Function GetRefsForClaim to check, collate and format all the reference components----------------------

function GetRefsForClaim(claim, defaulttitle)								
	local refs = ""
	for b,x in pairs(claim.references) do																	-- Loop through all references in a claim and pull out the components
		local refurl = ""																					-- initialise an empty URL, check if there is one then populate with the actual value (if not it stays as an empty string)
		if claim.references[b].snaks[RefURL_P] ~= nil then
			refurl = claim.references[b].snaks[RefURL_P][1].datavalue.value	
		end
		
		local reftitle = defaulttitle																		-- Initialise the default title as the Wikidata item title. This is the fallback title if one isn't provided in the references
		if claim.references[b].snaks[RefTitle_P] ~= nil then
			reftitle = claim.references[b].snaks[RefTitle_P][1].datavalue.value.text
		end
		
		local detmet = mw.wikibase.getEntity(claim.qualifiers[DetMeth_P][1].datavalue.value.id)					-- Get the dertermination method item
		
		local pubinlabel = ""																					--Initalising an empty published in label. This is the last option for this value.
		if claim.references[b].snaks[RefPubIn_P] ~= nil then													--Checking if the published in part of reference exists (it should for all references)
			local pubin = mw.wikibase.getEntity(claim.references[b].snaks[RefPubIn_P][1].datavalue.value.id)	--If it does then grab the published item and the label of that item
			pubinlabel = pubin.labels.en.value
		end
								
		local refwork = pubinlabel																				-- This value is used for non-census references, or as a fall-back value for census references with missing parts
		local pubdate = ""																						-- This is the fallback option if published date is missing (no date)
		if claim.references[b].snaks[DatePub_P] ~= nil then														-- This is the second option for the published date (given with references - this is used for non-census references)
			pubdate = mw.language.getContentLanguage():formatDate('j F Y', claim.references[b].snaks[DatePub_P][1].datavalue.value.time)
		end
		
		if detmet.claims[Instof_P] ~=nil and detmet.claims[Instof_P][1].mainsnak.datavalue.value.id == CensusAU_I then		-- Checking for census references (which will have different refwork and published date sources)
			refwork = detmet.labels.en.value.." "..pubinlabel																-- Concatenating to get work - determination method + published in (this overwrites the refwork value for census references)
			if detmet.claims[DatePub_P] ~=nil then
				pubdate = mw.language.getContentLanguage():formatDate('j F Y', detmet.claims[DatePub_P][1].mainsnak.datavalue.value.time) -- Overwrite the published date value if the determination method item has a date published (for census references)
			end
		end
		
		local refpublisher = ""																								-- The publisher is empty if missing
		if detmet.claims[Publisher_P] ~= nil then
			local publisheritem = mw.wikibase.getEntity(detmet.claims[Publisher_P][1].mainsnak.datavalue.value.id)			-- Get the determination method item and the publisher item ID
			refpublisher = publisheritem.labels.en.value																	-- Get the label of the publisher item
		end
		
		local refaccessdate = ""																							-- The retrieved date fallback is empty
		if claim.references[b].snaks[Retrieved_P] ~= nil then
			refaccessdate = mw.language.getContentLanguage():formatDate('j F Y', claim.references[b].snaks[Retrieved_P][1].datavalue.value.time) -- Populate the retrieved date if it's there
		end
		
		local appliespart = mw.wikibase.getEntity(claim.qualifiers[Applies_P][1].datavalue.value.id).labels.en.value					-- The ABS geography type for the particular claim (to use in reference name)
		local pointintime = mw.language.getContentLanguage():formatDate('Y', claim.qualifiers[Pointin_P][1].datavalue.value.time)		-- Getting the point in time as a YYYY (to use in the reference name)
		
		local citewebargs = {																								--Putting all the cite_web arguments into a list (for a single reference)
			url = refurl,
			title = reftitle.." ("..appliespart..")" ,
			date = pubdate,
			work = refwork,
			author = "[["..refpublisher.."]]",																				-- Changed the publisher to author for now to match existing population references
			accessdate = refaccessdate
		}
		
		local wdeditpencil = mw.getCurrentFrame():expandTemplate{title = 'EditAtWikidata', args = {qid = item.id, pid = claim.id, nbsp = 1}}		--Call the Edit At Wikidata template (to add the edit pencil to end of references)
		
		local reference = mw.getCurrentFrame():expandTemplate{ title = 'cite web', args = citewebargs }						--expand template to feed arguments to cite_web

		refs = refs..mw.getCurrentFrame():extensionTag{ name = 'ref', content = reference..wdeditpencil, { name = refwork.."_"..pointintime.."_"..appliespart.."_"..reftitle } }	--Add the reference from this iteration to the list of references for this particular claim
			 
	end	
	return refs																												-- List of references to be given to reflist
end	

---------------This is a function for getting the population geography abbreviation and the Wikipedia article link for the population year  ---------------
function GetAbbrLabelYearLink(returnclaim)
	local appliespartitem = mw.wikibase.getEntity(returnclaim.qualifiers[Applies_P][1].datavalue.value.id)											-- This gets the item ID for the current claim Applied to Part value
	local abbrelabel = appliespartitem.labels.en.value																								-- This is the fall back value for the geography label if no abbreviation (short name) value exists in Wikidata item
	if appliespartitem.claims[ShortN_P] ~= nil then																									-- If a short name value exists then use thi value instead of the full item label.
		abbrelabel = mw.getCurrentFrame():expandTemplate{title = 'Abbr', args = {appliespartitem.claims[ShortN_P][1].mainsnak.datavalue.value.text , appliespartitem.labels.en.value }}		-- Output the abbreviated name with tooltip showing the full label
	end
	
	local year = string.sub(returnclaim.qualifiers[Pointin_P][1].datavalue.value.time, 2, 5)														-- Get the population point in time as a year string
	local yearreturn = year																															-- If no links to Wikipedia articles describing population determination method exist then just output year
	
	local detmetitem = mw.wikibase.getEntity(returnclaim.qualifiers[DetMeth_P][1].datavalue.value.id)												-- Get the current claim determination method item
	if detmetitem.sitelinks ~=nil and detmetitem.sitelinks.enwiki ~=nil then																		-- Check if the determination method item has an enwiki URL
		yearreturn = "[["..detmetitem.sitelinks.enwiki.title.."|".. year.."]]"																		-- If it does, use this URL as the link with the year value
	elseif detmetitem.claims[Instof_P] ~=nil and detmetitem.claims[Instof_P][1].mainsnak.datavalue.value.id == CensusAU_I	then					-- If there isn't a populated enwiki URL for determination method item AND it's a census determination method then...
		yearreturn = "[[Census_in_Australia#"..year.."|".. year.."]]"																				-- ... return the population year linked to the corresponding section of the Census in Australia article
	end	
	return abbrelabel.." "..yearreturn																												-- Join the geography abbreviation to the year (with link) ready for the final output
end
	


---------------- This ListForInfobox function is being used to grab the correct population value ---------------

local p = {} 

function p.ListForInfobox( frame )					  
	local luaplacetype = ""														--Initialise the local place type
	if frame.args.type == nil then
		return ""
	end
	local articleplacetype = string.lower(frame.args.type)						-- Bring in the place type entered into Infobox Australian place template, change to a lower case string
	if articleplacetype == "town" then											-- Check which place type and match to the equivalent ABS geographic area "city", "suburb", "town", "lga", "region"
		luaplacetype = UCL_I
	elseif articleplacetype == "suburb" then
		luaplacetype = SAL_I
	elseif articleplacetype == "city" then
		luaplacetype = UCL_I
	elseif articleplacetype == "lga" then
		luaplacetype = LGA_I
	elseif articleplacetype == "region" then	-- for now saying region == LGA_I... but unclear what is most apprpriate ABS geography type.... can revise
		luaplacetype = LGA_I
	end	
	
	if frame.args.wikidata ~= nil and frame.args.wikidata ~= "" then			-- checking if there's a linked Wikidata item for the article
		item = mw.wikibase.getEntity(frame.args.wikidata)						-- this is the default item specified by the wikidata parameter in the template
	else 
		item = mw.wikibase.getEntity() 											-- If there's a Wikidata item connected to the article it will find it here.
	end
	

--	mw.logObject(item)
	
 --------------- CHECK: If item.claims[Pop_P] is nil. If it is return an empty string ----------------------
	if item == nil or item.claims[Pop_P] == nil then
		return	""										
	end
	
	
------------ PART 1: Find claims with (1) point in time is not nil, (2) applies to part is not nil, (3) determination method is not nil and (4) References table is not empty-------------

	local validpopclaims = {}													--initialise the blank claim table for storing all population claims that satisfy the four conditions 
	z=0																			-- initialise the table row count
	for j, s in pairs(item.claims[Pop_P]) do
		if	s.qualifiers ~= nil and 
		    s.qualifiers[Pointin_P]~= nil and
			s.qualifiers[Applies_P] ~= nil and 
			s.qualifiers[DetMeth_P] ~= nil and 
			s.references ~= nil then
	
			z=z+1																
			validpopclaims[z]=s													-- give the claim a new key in the table
		end
	end	

--------------- CHECK: If count of validpopclaims is less than one (eg 0) then return a html message. This checks we have something from which to get a population value and basic reference.--------------
	if #validpopclaims <1 then
		return ""
	end
	
--------------- PART 2: Compare claim 'applies to part' values against template place type-------------------------
 	local templategeog = {}												--initialise the blank claim table for if the template type matches to Wikidata claim applies to part
	local othergeog = {}												--initialise the blank claim table for when the template type doesn't match Wikidata claims applies to part
	local c=0
	local d=0
	for i, q in pairs(validpopclaims) do
		if q.qualifiers[Applies_P][1].datavalue.value.id == luaplacetype then		--filter for claims where applies to part = Infobox template type
			c=c+1																	-- give the claim a new key in the table											
			templategeog[c]=q														-- claims with Infobox template type geography
		else
			d=d+1
			othergeog[d]=q															-- claims with geography other than specified in the Infobox template type.
		end
	end	

----------PART 3: Get claims with the maximum 'Point in time' values ------------------
----------PART 3A: Find the max date of claims with template geography-----------------

	local maxclaimspertemplategeog = nil												--initialise the blank claim corresponding to the max date
	for k, v in pairs(templategeog) do													--loop through all the claims with geography type = Infobox template type
		local tclaimdate = v.qualifiers[Pointin_P][1].datavalue.value.time
		if  maxclaimspertemplategeog == nil
			or (maxclaimspertemplategeog ~= nil
				and tclaimdate >= maxclaimspertemplategeog.qualifiers[Pointin_P][1].datavalue.value.time)
			then								 
				maxclaimspertemplategeog=v
		end
	end


----------PART 3B: Find the max date of claims with non-Infobox template geography-----------------

	local maxclaimsperothergeog = {}

	for l, m in pairs(othergeog) do														--loop through all the claims with geography type = non Infobox place type
		local oclaimdate = m.qualifiers[Pointin_P][1].datavalue.value.time
		local claimgeog = m.qualifiers[Applies_P][1].datavalue.value.id
		if  maxclaimsperothergeog[claimgeog] == nil										-- using the applies to part value as the table key
			or (maxclaimsperothergeog[claimgeog] ~= nil
				and oclaimdate >= maxclaimsperothergeog[claimgeog].qualifiers[Pointin_P][1].datavalue.value.time)  -- checking the max date for a particular geography value
			then								 
				maxclaimsperothergeog[claimgeog]=m										-- overwrites with a geography-max date claim pair whenever the point in time is bigger than the last iteration.
		end
	end

	

------------------------------Compiling the module output--------------------------------	    

	local returnlist = {}																							-- Initiate an empty table to store the output claims
    if maxclaimspertemplategeog ~=nil then													-- Situation 1: Getting population for max date claim where applies to part matches the Infobox place type
		local templategeogrefs = GetRefsForClaim(maxclaimspertemplategeog, item.labels.en.value)					-- Getting the references for max date claim where Wikidata applies to part matches the Infobox place type
		table.insert (returnlist, mw.language.getContentLanguage():formatNum(tonumber(maxclaimspertemplategeog.mainsnak.datavalue.value.amount)).." ("..GetAbbrLabelYearLink(maxclaimspertemplategeog)..")"..templategeogrefs)  --Insert the return string to returnlist. With population value, applies to part, point in time, reference 
   
    elseif articleplacetype == "town" then													-- Situation 2: Getting population for max date claims where Infobox place type = town. If no UCL populations (earlier default) then get ILOC and SAL populations.
    	if maxclaimsperothergeog[ILOC_I]~=nil then
			local ILOCrefs = GetRefsForClaim(maxclaimsperothergeog[ILOC_I], item.labels.en.value)					-- Getting the references for max date claim where applies to part = ILOC
			table.insert (returnlist, mw.language.getContentLanguage():formatNum(tonumber(maxclaimsperothergeog[ILOC_I].mainsnak.datavalue.value.amount)).." ("..GetAbbrLabelYearLink(maxclaimsperothergeog[ILOC_I])..")"..ILOCrefs) --Insert the return string to returnlist. With population value, applies to part, point in time, reference 
    	end
		if maxclaimsperothergeog[SAL_I]~=nil then
			local SALrefs = GetRefsForClaim(maxclaimsperothergeog[SAL_I], item.labels.en.value)						-- Getting the references for max date claim where applies to part = SAL
			table.insert (returnlist, mw.language.getContentLanguage():formatNum(tonumber(maxclaimsperothergeog[SAL_I].mainsnak.datavalue.value.amount)).." ("..GetAbbrLabelYearLink(maxclaimsperothergeog[SAL_I])..")"..SALrefs)		--Insert the return string to returnlist. With population value, applies to part, point in time, reference
		end
	else
		for a, w in pairs(maxclaimsperothergeog) do											-- Situation 3: Getting population for max date claims where applies to part doesn't = Infobox place type, and Infobox place type doesn't = town.
			local othergeogrefs = GetRefsForClaim(w, item.labels.en.value)											-- Loop through the claims in maxclaimsperothergeog and output all of them
			table.insert (returnlist, mw.language.getContentLanguage():formatNum(tonumber(maxclaimsperothergeog[a].mainsnak.datavalue.value.amount)).." ("..GetAbbrLabelYearLink(maxclaimsperothergeog[a])..")"..othergeogrefs)  	--Insert the return string to returnlist. With population value, applies to part, point in time, reference
		end 
    end
    
    local wikitext = ""																								-- Initialise an empty string output (this is the value that goes back to the Infobox Australian place)					
    if #returnlist == 1 then																						-- If there is only one formatted "population (geography year)" string (one row) in returnlist then return it without a bullet point
    	wikitext = returnlist[1]
    else
    	wikitext = "\n*"..table.concat(returnlist,"\n*")															-- If there are multiple formatted "population (geography year)" strings (multiple rows) in returnlist then return all the rows with new line and bullet points between them
    end	
	return  wikitext..'[[Category:Australian place articles using Wikidata population values]]'						-- Append the category to the output so we can keep track of which articles are using this module to output a population value
end

return p