#!/usr/local/bin/gawk -E # -E required for getopt so doesn't conflict with Gawk command line options # # Initialize a new project # @include "init.awk" @include "library.awk" #@include "getopt.awk" BEGIN { Optind = Opterr = 1 while ((C = getopt(ARGC, ARGV, "ticxhp:d:m:r:")) != -1) { opts++ if(C == "p") # -p Use project name. No default. pid = verifypid(Optarg) if(C == "d") # -d Data directory. Defaults to default.data in project.cfg did = verifyval(Optarg) if(C == "m") # -m Meta directory. Defaults to default.meta in project.cfg mid = verifyval(Optarg) if(C == "r") { # -r Retrieve list of people born in year #### type = "retrieve" year = verifyval(Optarg) } if(C == "t") # -t Generate tc.cfg. -p optional type = "tccfg" if(C == "i") # -i Generate ia9out-index from data directory. -p required type = "index" if(C == "x") # -x Delete project files. -p required type = "delete" if(C == "c") # -c Create project files. -p required type = "create" if(C == "h") { usage() exit } } # No options or an empty -p given if( type == "" || (type ~ /delete|create|index/ && pid == "") || pid ~ /error/ ){ usage() exit } if(type ~ /tccfg/ ) { maketccfg(pid) exit } # Load default Data and Meta from project.cfg if(did == "") did = Config["default"]["data"] if(mid == "") mid = Config["default"]["meta"] if(did == "" || mid == "") { print "Unable to determine Data or Meta directories. Create project.cfg with default.data and default.meta" exit } if(type ~ /index/ ) { makeindex(pid,did,mid) exit } if(type ~ /retrieve/) { getcatbirths(year,mid) exit } if(type ~ /delete/) { deleteproject(pid,mid,did) exit } # Everything following is type = create # Check everything looks ok if(substr(did,length(did),1) != "/" || substr(mid,length(mid),1) != "/") { print "data = " data print "meta = " meta print "data and meta should end in a trailing slash. Maybe check project.cfg for default.data/meta" exit } # Make Data and Meta directories if(checkexists(did pid)) print "Data directory already exists: " did pid else { print "OK Creating " did pid sys2var(Exe["mkdir"] " " did pid) checkexists(did pid, "initialize.awk", "exit") } if(checkexists(mid pid)) print "Meta directory already exists: " mid pid else { print "OK Creating " mid pid sys2var(Exe["mkdir"] " " mid pid) checkexists(mid pid, "initialize.awk", "exit") } if(checkexists(mid pid "/tc.in")) print "tc.in directory already exists: " mid pid "/tc.in" else { print "OK Creating " mid pid "/tc.in" sys2var(Exe["mkdir"] " " mid pid "/tc.in") checkexists(mid pid "/tc.in", "initialize.awk", "exit") } # Write new project.cfg # Remove leading and trailing blank lines # https://stackoverflow.com/questions/7359527/removing-trailing-starting-newlines-with-sed-awk-tr-and-friends # $AWK '{ LINES=LINES $0 "\n"; } /./ { printf "%s", LINES; LINES=""; }' "$SHAREDIR"article.txt | $SED '/./,$\!d' > "$SHAREDIR""o" # command = Exe["awk"] " '{ LINES=LINES $0 \"\\n\"; } /./ { printf \"%s\", LINES; LINES=\"\"; }' " Home "project.cfg | " Exe["sed"] " '/./,$!d' > " Home "project.cfg.orig" # sys2var(command) # system("") print stripfile(Home "project.cfg") > Home "project.cfg.orig" close(Home "project.cfg.orig") print "OK Saving project.cfg to project.cfg.orig" c = split(readfile(Home "project.cfg.orig"),a,"\n") # Set new default i = 0 re = "^default[.]id" while(i++ < c) { if(a[i] ~ re) { a[i] = "default.id = " pid break } } if(i == c) print "Unable to set default.id" else print "OK Setting default.id = " pid # Create new .data and .meta a[c + 1] = pid ".data = " did pid "/" a[c + 2] = pid ".meta = " mid pid "/" print "OK Writing new project.cfg" if(checkexists(Home "project.cfg")) sys2var( Exe["rm"] " " Home "project.cfg") i = 0 while(i++ < c + 2) print a[i] >> Home "project.cfg" close(Home "project.cfg") # Create .auth file split(pid,a,".") if(checkexists(mid a[1] ".auth")) { if(a[2] ~ /[0-9]{1,5}[-][0-9]{1,5}/) { c = split(a[2],b,"-") if(c == 2 && strip(b[1]) ~ /^[0-9]+$/ && strip(b[2]) ~ /^[0-9]+$/) { start = strip(b[1]) end = strip(b[2]) if(! checkexists(mid pid "/auth")) { # head -n 750 meta/births1870.auth | tail -n 250 > /home/adminuser/wi-awb/meta/births1870.501-750/auth command = Exe["head"] " -n " end " " mid a[1] ".auth | " Exe["tail"] " -n " int( int(end) - int( int(start) - 1) ) " > " mid pid "/auth" print "OK creating " mid pid "/auth" sys2var(command) } else print "Auth file " mid pid "/auth already exists. Not creating new one." } else print "(1) Project ID doesn't take the form Name.####-#### - Unable to create " mid pid "/auth" } else print "(2) Project ID doesn't take the form Name.####-#### - Unable to create " mid pid "/auth" } else { print "Unable to find " mid a[1] ".auth - Unable to create " mid pid "/auth" } } # # Make tc.cfg (to stdout) # function maketccfg(pid, c,a,i,l,b,command,out,d,urlencoded,bi,k,z,e,Save) { setProject(pid) # library.awk .. load Project[] paths via project.cfg # if(Save == 1) # Save = Project["tccfg"] # if(checkexists(Project["tccfg"])) { # sys2var( Exe["rm"] " " Project["tccfg"] ) # system("") # if(checkexists(Project["tccfg"])) { # print "mkcfg: Unable to delete " Project["tccfg"] # exit # } # } # } # else Save = "/dev/stdout" l = readfile(Project["auth"]) if(length(l) == 0) { print "Unable to read" Project["auth"] exit } checkexists(Project["tcin"], "mkcfg.awk", "exit") checkexists(Project["manual"], "mkcfg.awk", "exit") c = split(l, a, "\n") #c = 1 #delete a #a[1] = "Stephen Haynes" while(i++ < c) { #print c if(length(a[i])) { if( nameisinfile(a[i], Project["manual"] ) ) continue command = Exe["talgo"] " -d1 -p " Project["id"] " -f \"" a[i] "\"" print command > "/dev/stderr" out = sys2var( command ) d = split(out, b, "\n") urlencoded = strip(urlendecode(strip(a[i]),"encode")) while(bi++ < d) { if(match(b[bi], /^Final: tmpO[^$]*$/,k) ) { z = 1 split(k[0], e, " ") print strip(a[i]) "|" strip(e[2]) "|unknown|" urlencoded >> Save } } if(z == 0) { if(length(a[i]) > 1) print strip(a[i]) "|unknown|unknown|" urlencoded >> Save } bi = z = 0 } } } # # Return 1 if name is in file # function nameisinfile(name, filen, s, a, re) { checkexists(filen, "project.awk nameisinfile()", "exit") re = "^" regesc(strip(name)) "$" while ((getline s < filen ) > 0) { split(s, a, "|") if(strip(a[1]) ~ re) { close(filen) return 1 } } close(filen) return 0 } # # Make an index based on files in data directory # function makeindex(pid,did,mid, data,meta,a) { data = did pid "/" meta = mid pid "/" if( ! checkexists(data) || ! checkexists(meta) ) { print "Unable to find " data " OR " meta exit } if(checkexists(meta "ia9out-index")){ print "File exists, aborting." print "To delete: rm " meta "ia9out-index" exit } # list directories only # https://stackoverflow.com/questions/14352290/listing-only-directories-using-ls-in-bash-an-examination c = split( sys2var(Exe["ls"] " -d1 " data "wi-awb*/"), a, "\n") while(i++ < c) { if( ! exists(a[i] "namewiki.txt") ) print "Unable to find " a[i] "namewiki.txt" > "/dev/stderr" else print strip(readfile(a[i] "namewiki.txt")) "|" a[i] >> meta "ia9out-index" } close(meta "ia9out-index") } # # Download full list of names in [[Category:#### births]] to births####.auth in the Meta directory # function getcatbirths(year,mid, a) { if(year ~ /^[0-9]{4}$/) { # wget -q -O- "https://tools.wmflabs.org/ext-lnk-discover/sc/sc.php?category=1840_births" | \ # grep '
' | awk '{c=split($0,a,/
/); while(i++ < c){if(a[i] != "") print a[i]} }' | \ # sort > births1840.auth # command = Exe["wget"] " -q -O- \"https://tools.wmflabs.org/ext-lnk-discover/sc/sc.php?category=" year "_births\" | " Exe["grep"] " '
' | " Exe["awk"] " '{c=split($0,a,/
/); while(i++ < c){if(a[i] != \"\") print a[i]} }' | " Exe["sort"] " > " mid "births" year ".auth" command = Exe["wikiget"] " -c " shquote(year " births") print sys2var(command) > mid "births" year ".auth" print "OK Created " mid "births" year ".auth" command = Exe["wc"] " -l " mid "births" year ".auth" split(sys2var(command), a, " ") if(int(a[1]) > 999) a[1] = a[1] else if(int(a[1]) < 1000 && int(a[1]) > 99) a[1] = "0" a[1] else if(int(a[1]) < 100 && int(a[1]) > 9) a[1] = "00" a[1] else if(int(a[1]) < 10 && int(a[1]) > 0) a[1] = "000" a[1] else a[1] = "error" print "sed 's/PNAME/births" year ".0001-" a[1] "/g' 0INSTRUCTIONS > 0INSTRUCTIONS." year # print "Suggested Project: births" year ".0001-" a[1] } else { print "Unable to determine year: " year exit } } function deleteproject(pid,mid,did, i,c,re,a) { # Delete Data and Meta directories if( ! checkexists(did pid)) print "Data directory doesn't exist: " did pid else { print "OK Deleting " did pid sys2var(Exe["rm"] " -r " did pid) } if( ! checkexists(mid pid)) print "Meta directory doesn't exist: " mid pid else { print "OK Deleting " mid pid sys2var(Exe["rm"] " -r " mid pid) } # Remove .meta and .data lines from project.cfg but leave default.* lines untouched if(checkexists(Home "project.cfg.out")) sys2var( Exe["rm"] " project.cfg.out") if(checkexists(Home "project.cfg.orig")) sys2var( Exe["rm"] " project.cfg.orig") if(checkexists(Home "project.cfg")) command = Exe["mv"] " " Home "project.cfg" " " Home "project.cfg.orig" else { print "Unable to find " Home "project.cfg" return } print "OK Making backup project.cfg -> project.cfg.orig" sys2var(command) system("") c = split(readfile(Home "project.cfg.orig"),a,"\n") re = "^" regesc(pid) "[.](data|meta)" while(i++ < c) { if(a[i] ~ re) { # delete if re matches } else { print a[i] >> Home "project.cfg.out" } } close(Home "project.cfg.out") if(checkexists(Home "project.cfg.out")) { print stripfile(Home "project.cfg.out") > Home "project.cfg" close(Home "project.cfg") print "OK Removed data & meta lines from project.cfg (default.id untouched)" } else { print "Unable to modify project.cfg - restoring backup" sys2var(Exe["mv"] " " Home "project.cfg.orig" " " Home "project.cfg") } } function usage() { print "" print "Project - manage projects." print "" print "Usage:" print " -r Download list of names in [[Category:#### births]]" print " -t Generate tc.cfg. -p optional" print " -i Generate ia9out-index from ~/data files. -p required" print " -c Create project files. -p required" print " -x Delete project files. -p required" print " -p Project name." print " -d Data directory. Defaults to default.data in project.cfg" print " -m Meta directory. Defaults to default.meta in project.cfg" print " -h Help" print "" print "Examples: project -x -p births1870.0001-0250" print " project -c -d /mnt/data/ -m /mnt/meta/ -p births1870.0001-0250" print " project -r 1870" print "" print "Path names for -d and -m end with trailing slash." print "" }