#!/usr/bin/lua --- Replication of [[Template:Internet Archive author]] on en.Wikipedia.org --- Note: Function names have been changed --- p.name() = name() --- mwname.name.() = name.name() --- text.split = split --- arg[1] = Temp directory --- arg[2] = Name --- arg[3] = Birth-Death (years only) --- arg[4] = Sopt --- eg. lua script "/home/dir/" "John Smith" "1900-2000" "t" --- eg. lua script "/home/dir/" "John Smith" "none" "t" -- Replace accented letters with non-accented equivalent letters function ia_deaccent(str) local s = str s = string.gsub(s, "á", "a") s = string.gsub(s, "a︡", "a") s = string.gsub(s, "Á", "A") s = string.gsub(s, "ă", "a") s = string.gsub(s, "â", "a") s = string.gsub(s, "æ", "ae") s = string.gsub(s, "Æ", "AE") s = string.gsub(s, "à", "a") s = string.gsub(s, "ā", "a") s = string.gsub(s, "Ā", "A") s = string.gsub(s, "ą", "a") s = string.gsub(s, "å", "a") s = string.gsub(s, "Å", "A") s = string.gsub(s, "ã", "a") s = string.gsub(s, "ä", "a") s = string.gsub(s, "Ä", "A") s = string.gsub(s, "β", "B") s = string.gsub(s, "ć", "c") s = string.gsub(s, "č", "c") s = string.gsub(s, "Č", "C") s = string.gsub(s, "ç", "c") s = string.gsub(s, "Ç", "C") s = string.gsub(s, "ĉ", "c") s = string.gsub(s, "ď", "d") s = string.gsub(s, "đ", "d") s = string.gsub(s, "é", "e") s = string.gsub(s, "É", "E") s = string.gsub(s, "ě", "e") s = string.gsub(s, "ê", "e") s = string.gsub(s, "è", "e") s = string.gsub(s, "È", "E") s = string.gsub(s, "ε", "e") s = string.gsub(s, "ē", "e") s = string.gsub(s, "Ē", "E") s = string.gsub(s, "ę", "e") s = string.gsub(s, "ð", "e") s = string.gsub(s, "ë", "e") s = string.gsub(s, "Ë", "E") s = string.gsub(s, "γ", "Y") s = string.gsub(s, "ħ", "h") s = string.gsub(s, "i︠a︡", "ia") s = string.gsub(s, "í", "i") s = string.gsub(s, "Í", "I") s = string.gsub(s, "ĭ", "i") s = string.gsub(s, "î", "i") s = string.gsub(s, "Î", "I") s = string.gsub(s, "ì", "i") s = string.gsub(s, "ī", "i") s = string.gsub(s, "ł", "i") s = string.gsub(s, "ï", "i") s = string.gsub(s, "Ï", "I") s = string.gsub(s, "ĺ", "I") s = string.gsub(s, "Ĺ", "L") s = string.gsub(s, "μ", "u") s = string.gsub(s, "µ", "u") s = string.gsub(s, "ń", "n") s = string.gsub(s, "ň", "n") s = string.gsub(s, "ņ", "n") s = string.gsub(s, "ñ", "n") s = string.gsub(s, "Ñ", "N") s = string.gsub(s, "ó", "o") s = string.gsub(s, "Ó", "O") s = string.gsub(s, "ô", "o") s = string.gsub(s, "œ", "oe") s = string.gsub(s, "ò", "o") s = string.gsub(s, "ō", "o") s = string.gsub(s, "ø", "o") s = string.gsub(s, "Ø", "o") s = string.gsub(s, "õ", "o") s = string.gsub(s, "ö", "o") s = string.gsub(s, "ő", "o") s = string.gsub(s, "Ö", "O") s = string.gsub(s, "φ", "o") s = string.gsub(s, "ŕ", "r") s = string.gsub(s, "ř", "r") s = string.gsub(s, "Ř", "R") s = string.gsub(s, "ß", "ss") s = string.gsub(s, "ś", "s") s = string.gsub(s, "Ś", "S") s = string.gsub(s, "š", "s") s = string.gsub(s, "ṣ", "s") s = string.gsub(s, "Š", "S") s = string.gsub(s, "ş", "s") s = string.gsub(s, "Ş", "S") s = string.gsub(s, "ŝ", "s") s = string.gsub(s, "σ", "s") s = string.gsub(s, "ť", "t") s = string.gsub(s, "ţ", "t") s = string.gsub(s, "τ", "t") s = string.gsub(s, "þ", "p") s = string.gsub(s, "Þ", "p") s = string.gsub(s, "ú", "u") s = string.gsub(s, "Ú", "U") s = string.gsub(s, "û", "u") s = string.gsub(s, "ù", "u") s = string.gsub(s, "ū", "u") s = string.gsub(s, "ů", "u") s = string.gsub(s, "ü", "u") s = string.gsub(s, "Ü", "U") s = string.gsub(s, "ŵ", "w") s = string.gsub(s, "ý", "y") s = string.gsub(s, "ŷ", "y") s = string.gsub(s, "¥", "y") s = string.gsub(s, "ÿ", "y") s = string.gsub(s, "Ÿ", "Y") s = string.gsub(s, "ź", "z") s = string.gsub(s, "Ž", "Z") s = string.gsub(s, "ž", "z") s = string.gsub(s, "ż", "z") s = string.gsub(s, "Ż", "Z") return s end -- Split string function split(s, delimiter) result = {}; for match in (s..delimiter):gmatch("(.-)"..delimiter) do table.insert(result, match); end return result; end -- Does str contain extended ascii? 1 = yes function ia_extendedascii(str) for i = 1, str:len() do if (str:byte(i) >= 32 and str:byte(i) <= 126) and str:byte(i) ~= 39 then -- 39 = ' --do nothing else return 1 end end return 0 end --- URL-encode special characters --- Note: this function was added later to deal with "&" characters instead of using p.ia_url_encode since --- that may break existing instances of the template. function urlX(str) if (str) then str = string.gsub (str, "&", "%%26") end return str end --- URL-encode a string --- http://lua-users.org/wiki/StringRecipes --- function ia_url_encode(str) if (str) then str = string.gsub (str, "\n", "\r\n") str = string.gsub (str, "([^%w %-%_%.%~])", function (c) return string.format ("%%%02X", string.byte(c)) end) str = string.gsub (str, " ", "+") end return str end -- Lucene returns too many false positives if first or last character in a word is "*" wildcard ie. accented letter -- Build special search in these cases. function wildcrazyfix(N, count) --- Split along "-" and use only first word ie. John-Taylor-Smith becomes John local NF = split(N[1], "-") local NL = split(N[count], "-") -- ..but use full name for 1-word names if count == 1 then NF[1] = N[1] NL[1] = N[1] end -- ((Fïrst OR First) AND (Lást OR Last)) return "%28%28%22" .. NF[1] .. "%22%20OR%20%22" .. ia_deaccent(NF[1]) .. "%22%29%20AND%20%28%22" .. NL[1] .. "%22%20OR%20%22" .. ia_deaccent(NL[1]) .. "%22%29" end -- Return 1 if the first or last character in any word within name N is extended ascii function wildcrazycheck(N, count) local i = 0 while i < count do i = i + 1 local firstl = N[i]:byte(1) local lastl = N[i]:byte(N[i]:len()) if (firstl < 32 or firstl > 126) then return 1 end if (lastl < 32 or lastl > 126) then return 1 end end return 0 end -- Replace all extended ascii characters with wildcard '*' function ia_extendedascii2wildcard(str) local s = "" local j = 0 local k = 0 for i = 1, str:len() do k = str:byte(i) if k >= 32 and k <= 126 then -- For list of Lucene special characters needing to be escaped: -- http://lucene.apache.org/core/4_10_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters -- We only worry about - (45) and " (34) since the others are unlikely to appear in a proper name. -- Also ' (39) since it is sometimes the extended character ’ if k == 45 or k == 34 or k == 39 then s = s .. "*" else s = s .. str:sub(i,i) end else if j == 1 then s = s .. "*" j = 2 end if j == 0 then j = 1 end if j == 2 then j = 0 end end end return s end -- UTF-8 aware replacement for string.sub() which doesn't support UTF-8 -- Note: Using instead of mw.ustring.sub() which I suspect might be cause of intermittent error, and faster here for first-letter job. -- Source: prapin @ Stack Overflow http://stackoverflow.com/questions/13235091/extract-the-first-letter-of-a-utf-8-string-with-lua function firstLetter(str) return str:match("[%z\1-\127\194-\244][\128-\191]*") end function oneWord(sname) local nameurl = ia_url_encode(sname) local A1 = "%28subject%3A%22"..nameurl local A2 = "%22%20OR%20creator%3A%22"..nameurl local A3 = "%22%20OR%20description%3A%22"..nameurl local A4 = "%22%20OR%20title%3A%22"..nameurl return A1 .. A2 .. A3 .. A4 .. "%22" end function twoWords(N, sopt) local FIRST = urlX(N[1]) local LAST = urlX(N[2]) local firstinitial = urlX( firstLetter(N[1]) ) -- Last, First local S1 = "%28subject%3A%22"..LAST.."%2C%20"..FIRST -- First Last local S2 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..LAST local SALL = S1..S2 -- Last, First local C1 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST -- First Last local C2 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..LAST local CALL = C1..C2 -- First Last local T1 = "%22%20OR%20title%3A%22"..FIRST.."%20"..LAST local TALL = T1 -- Last, First local D1 = "%22%20OR%20description%3A%22"..LAST.."%2C%20"..FIRST -- First Last local D2 = "%22%20OR%20description%3A%22"..FIRST.."%20"..LAST local DALL = D1..D2 if sopt == "t" or sopt == "tx" then return SALL .. CALL .. TALL .. DALL .. "%22" else -- Last, F. local C3 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..firstinitial.."%2E" local CALL = CALL..C3 return SALL .. CALL .. TALL .. DALL .. "%22" end end function threeWords(N, sopt) -- CAUTION: The following is near the max 2000 character URL limit for most browsers when using long names -- such as "René-Nicolas Dufriche Desgenettes". local FIRST = urlX(N[1]) local MIDDLE = urlX(N[2]) local LAST = urlX(N[3]) local firstinitial = urlX( firstLetter(N[1]) ) local middleinitial = urlX( firstLetter(N[2]) ) -- Last, First Middle local S1 = "%28subject%3A%22"..LAST.."%2C%20"..FIRST.."%20"..MIDDLE -- Last, First M. local S2 = "%22%20OR%20subject%3A%22"..LAST.."%2C%20"..FIRST.."%20"..middleinitial.."%2E" -- Last, F. M. local S3 = "%22%20OR%20subject%3A%22"..LAST.."%2C%20"..firstinitial.."%2E%20"..middleinitial.."%2E" -- First Middle Last local S4 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..MIDDLE.."%20"..LAST -- First M. Last local S5 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..middleinitial.."%2E%20"..LAST -- F. M. Last local S6 = "%22%20OR%20subject%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..LAST local SALL = S1..S2..S3..S4..S5..S6 -- First Middle Last local C1 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..MIDDLE.."%20"..LAST -- First M. Last local C2 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..middleinitial.."%2E%20"..LAST -- F. M. Last local C3 = "%22%20OR%20creator%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..LAST -- F. Middle Last local C4 = "%22%20OR%20creator%3A%22"..firstinitial.."%2E%20"..MIDDLE.."%20"..LAST -- Last, First Middle local C5 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST.."%20"..MIDDLE -- Last, First M. local C6 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST.."%20"..middleinitial.."%2E" -- Last, F. M. local C7 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..firstinitial.."%2E%20"..middleinitial.."%2E" -- Last, F. M. local C8 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..firstinitial.."%2E%20"..MIDDLE local CALL = C1..C2..C3..C4..C5..C6..C7..C8 -- First Middle Last local T1 = "%22%20OR%20title%3A%22"..FIRST.."%20"..MIDDLE.."%20"..LAST -- First M. Last local T2 = "%22%20OR%20title%3A%22"..FIRST.."%20"..middleinitial.."%2E%20"..LAST -- F. M. Last local T3 = "%22%20OR%20title%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..LAST local TALL = T1..T2..T3 -- First Middle Last local D1 = "%22%20OR%20description%3A%22"..FIRST.."%20"..MIDDLE.."%20"..LAST -- First M. Last local D2 = "%22%20OR%20description%3A%22"..FIRST.."%20"..middleinitial.."%2E%20"..LAST -- F. M. Last local D3 = "%22%20OR%20description%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..LAST -- Last, First Middle local D4 = "%22%20OR%20description%3A%22"..LAST.."%2C%20"..FIRST.."%20"..MIDDLE -- Last, First M. local D5 = "%22%20OR%20description%3A%22"..LAST.."%2C%20"..FIRST.."%20"..middleinitial.."%2E" local DALL = D1..D2..D3..D4..D5 if sopt == "t" or sopt == "tx" then return SALL .. CALL .. TALL .. DALL .. "%22" else -- Last, First local S7 = "%22%20OR%20subject%3A%22"..LAST.."%2C%20"..FIRST -- First Last local S8 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..LAST local SALL = SALL..S7..S8 -- First Last local C9 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..LAST -- Last, First local C10 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST local CALL = CALL..C9..C10 -- First Last local T4 = "%22%20OR%20title%3A%22"..FIRST.."%20"..LAST local TALL = TALL..T4 -- First Last local D6 = "%22%20OR%20description%3A%22"..FIRST.."%20"..LAST -- Last, First local D7 = "%22%20OR%20description%3A%22"..LAST.."%2C%20"..FIRST local DALL = DALL..D6..D7 return SALL .. CALL .. TALL .. DALL .. "%22" end end function fourWords(N, sopt) local FIRST = urlX(N[1]) local SECOND = urlX(N[2]) local THIRD = urlX(N[3]) local LAST = urlX(N[4]) local firstinitial = firstLetter(N[1]) local secondinitial = firstLetter(N[2]) local thirdinitial = firstLetter(N[3]) if sopt == "t" or sopt == "tx" then -- Last, First Second Third local S1 = "%28subject%3A%22"..LAST.."%2C%20"..FIRST.."%20"..SECOND.."%20"..THIRD -- First Second Third Last local S2 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST -- Last, First Second Third local C1 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST.."%20"..SECOND.."%20"..THIRD -- First Second Third Last local C2 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST -- First Second Third Last local T1 = "%22%20OR%20title%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST -- First Second Third Last local D1 = "%22%20OR%20description%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST return S1..S2..C1..C2..T1..D1.."%22" end -- Last, First Second Third local S1 = "%28subject%3A%22"..LAST.."%2C%20"..FIRST.."%20"..SECOND.."%20"..THIRD -- First Second Third Last local S2 = "%22%20OR%20subject%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST -- Last, First Second Third local C1 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..FIRST.."%20"..SECOND.."%20"..THIRD -- First Second Third Last local C2 = "%22%20OR%20creator%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST -- Last, F. S. T. local C3 = "%22%20OR%20creator%3A%22"..LAST.."%2C%20"..firstinitial.."%2E%20"..secondinitial.."%2E%20"..thirdinitial.."%2E" -- First Second Third Last local T1 = "%22%20OR%20title%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST -- First Second Third Last local D1 = "%22%20OR%20description%3A%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST return S1..S2..C1..C2..C3..T1..D1.."%22" end function done(str) local t = arg[1] .. "tmpO" local file = io.open(t, "w") file:write(str) file:close() os.exit() end function main () local tempdir = arg[1] local sname = arg[2] local birthdeath = arg[3] local sopt = arg[4] local byabout = "Works by or about" local tagline = "at [[Internet Archive]]" local urlhead = "http://archive.org/search.php?query=" -- local urlhead = "http://balbach.net/iacs.awk?query=" local dname = sname local media = "%28-mediatype:software%29%20AND%20%28" local mediaclose = "%29" local mydate = "" --- Print to console for debugging print("") if sopt ~= nil then print("sname = " .. arg[2] .. " sopt=" .. arg[4]) else print("sname = " .. arg[2]) end --- Split sname into words and count words local N = split(sname, " ") local l, count = string.gsub(sname, "%S+", "") --- Date string if birthdeath ~= "none" then if ia_extendedascii(N[count]) == 1 then mydate = "%20OR%20%28%22"..birthdeath.."%22%20AND%20%28%22"..urlX(N[count]).."%22%20OR%20%22"..urlX(ia_deaccent(N[count])).."%22%29%29" else mydate = "%20OR%20%28%22"..birthdeath.."%22%20AND%20%22"..urlX(N[count]).."%22%29" end end --- wild string wild = "%29" if sopt == "w" and ia_extendedascii(sname) == 1 then if wildcrazycheck(N, count) == 1 then myurl = wildcrazyfix(N, count) done("[" .. urlhead .. media .. myurl .. wild .. mydate .. mediaclose .. " " .. byabout .. " " .. dname .. "] " .. tagline) end wild = "%20OR%20" .. ia_url_encode(ia_extendedascii2wildcard(sname)) .. "%29" end --[[ Format URL ]] if count == 1 then local myurl = oneWord(sname) if sopt == "t" then local plainname = ia_deaccent(sname) local A1 = "%20OR%20%22"..urlX(plainname) myurl = myurl .. A1 .. "%22" done("[" .. urlhead .. media .. myurl .. wild .. mydate .. mediaclose .. " " .. byabout .. " " .. dname .. "] " .. tagline) end done("[" .. urlhead .. media .. myurl .. wild .. mydate .. mediaclose .. " " .. byabout .. " " .. dname .. "] " .. tagline) end if count == 2 then local myurl = twoWords(N, sopt) if sopt == "t" then local plainname = ia_deaccent(sname) local PN = split(plainname, " ") -- Last, First local A1 = "%20OR%20%22"..urlX(PN[2]).."%2C%20"..urlX(PN[1]) -- First Last local A2 = "%22%20OR%20%22"..urlX(PN[1]).."%20"..urlX(PN[2]) myurl = myurl .. A1 .. A2 .. "%22" done("[" .. urlhead .. media .. myurl .. wild .. mydate .. mediaclose .. " " .. byabout .. " " .. dname .. "] " .. tagline) end done("[" .. urlhead .. media .. myurl .. wild .. mydate .. mediaclose .. " " .. byabout .. " " .. dname .. "] " .. tagline) end if count == 3 then local myurl = threeWords(N, sopt) if sopt == "t" then local plainname = ia_deaccent(sname) local PN = split(plainname, " ") local FIRST = urlX(PN[1]) local MIDDLE = urlX(PN[2]) local LAST = urlX(PN[3]) local firstinitialp = urlX( firstLetter(PN[1]) ) local middleinitialp = urlX( firstLetter(PN[2]) ) -- First Middle Last local A1 = "%20OR%20%22"..FIRST.."%20"..MIDDLE.."%20"..LAST -- Last, First Middle local A2 = "%22%20OR%20%22"..LAST.."%2C%20"..FIRST.."%20"..MIDDLE -- Last, First M. local A3 = "%22%20OR%20%22"..LAST.."%2C%20"..FIRST.."%20"..middleinitialp.."%2E" -- Last, F. M. local A4 = "%22%20OR%20%22"..LAST.."%2C%20"..firstinitialp..".%20"..middleinitialp.."%2E" local ALL = A1 .. A2 .. A3 .. A4 .. "%22" myurl = myurl .. ALL done("[" .. urlhead .. media .. myurl .. wild .. mydate .. mediaclose .. " " .. byabout .. " " .. dname .. "] " .. tagline) end done("[" .. urlhead .. media .. myurl .. wild .. mydate .. mediaclose .. " " .. byabout .. " " .. dname .. "] " .. tagline) end if count == 4 then local myurl = fourWords(N, sopt) if sopt == "t" then local plainname = ia_deaccent(sname) local PN = split(plainname, " ") local FIRST = urlX(PN[1]) local SECOND = urlX(PN[2]) local THIRD = urlX(PN[3]) local LAST = urlX(PN[4]) local firstinitialp = urlX( firstLetter(PN[1]) ) local secondinitialp = urlX( firstLetter(PN[2]) ) local thirdinitialp = urlX( firstLetter(PN[3]) ) -- Last, First Second Third local A1 = "%20OR%20%22"..LAST.."%2C%20"..FIRST.."%20"..SECOND.."%20"..THIRD -- First Second Third Last local A2 = "%22%20OR%20%22"..FIRST.."%20"..SECOND.."%20"..THIRD.."%20"..LAST -- Last, F. S. T. local A3 = "%22%20OR%20%22"..LAST.."%2C%20"..firstinitialp.."%2E%20"..secondinitialp.."%2E%20"..thirdinitialp.."%2E" local ALL = A1 .. A2 .. A3 .. "%22" myurl = myurl .. ALL done("[" .. urlhead .. media .. myurl .. wild .. mydate .. mediaclose .. " " .. byabout .. " " .. dname .. "] " .. tagline) end done("[" .. urlhead .. media .. myurl .. wild .. mydate .. mediaclose .. " " .. byabout .. " " .. dname .. "] " .. tagline) end if count > 4 then local myurl = "%28" .. ia_url_encode(sname) if sopt == "t" then local plainname = ia_deaccent(sname) local A1 = "%29%20OR%20%28"..ia_url_encode(plainname) myurl = myurl .. A1 done("[" .. urlhead .. media .. myurl .. wild .. mydate .. mediaclose .. " " .. byabout .. " " .. dname .. "] " .. tagline) end done("[" .. urlhead .. media .. myurl .. wild .. mydate .. mediaclose .. " " .. byabout .. " " .. dname .. "] " .. tagline) end end main ()