atat-mirror/src/sqs2tdb

1003 lines
47 KiB
Perl
Executable File

#!/usr/bin/perl
#use strict;
# Initial setup of global variables
# read config file to know where atat directory is
my $atatdir;
BEGIN {
open(my $cf,"<",join('/',$ENV{"HOME"},".atat.rc")) or die "Can't open ~/.atat.rc\n";
$atatdir = <$cf>;
chomp $atatdir;
$atatdir =~ s/.*atatdir\s*=\s*//g;
our $libdir = join('',$atatdir,"/src");
}
# load libraries from atat/src/
use lib $libdir;
use Permutor;
use PowerSet;
use PowerSet 'powerset';
#process command line
my %cmdline; #this will contained all command line options, hashed by the option name (e.g. -l, etc.)
for (my $i=0; $i<@ARGV; $i++) {
my @opt = split /=/, $ARGV[$i];
if ( @opt == 1 ) {$opt[1]=1}
$cmdline{$opt[0]}=$opt[1];
}
# end of global var setup
# go the "Main" section below
#subroutine to setup a directory of the SQS database (-mk option)
sub makemanysqs() {
if (! -e "sqsgen.in") {# if input file does not exist create a template for user to edit
print qq{
Please create a sqsgen.in file of the form (for example):
level=0 a=1 b=1 [...]
level=1 a=1 b=0.5,0.5 [...]
level=1 a=0.5,0.5 b=1 [...]
level=2 a=0.5,0.5 b=0.5,0.5 [...]
[...]
where a,b,... would the wyckoff positions for each site (append a number if two different sites have the same letter)
and where the concentration could have more than two values (e.g. 0.33333,0.33333,0.33333).
You will still need to run mcsqs in each subdirectory created that has fractional occupation.
};
print join("","See ",$atatdir,"/","data/sqsdb/"," for examples.");
exit;
}
open(my $fh,"<","sqsgen.in") or die "Unable to open gensqs.in"; # to read sqs compositions, one per line
while (<$fh>) {
chomp;
my $dirname=$_; # will contain sqs directory name
my @cols = split /\s+/, $_ ; # split by whitespace separated columns
my @levs = split /=/, $cols[0]; # sqs level is in 1st column
my %siteocc; #will contain concentrations for each site, hashed by site wyckoff name
for (my $i=1; $i<@cols; $i++) { # loop over sites (i.e. columns 2,3,...) to get concentrations
my @rawsite = split /=/, $cols[$i]; # split site=concentration_list
my $atom="A"; # atom labels start at A
my @allconc = split /,/, $rawsite[1]; # parse coma-separated concentrations
@allconc = reverse sort @allconc; # sort from highest to lowest concentration
$siteocc{$rawsite[0]}=""; # will contain a string of all occupations (concentrations) for current site
for (my $at=0; $at<@allconc; $at++) { #loop over concentrations for current site
my $com=""; # put comas only between items
if ($at > 0) {$com=",";} #
$siteocc{$rawsite[0]}=join('',$siteocc{$rawsite[0]},$com,$rawsite[0],"_",$atom,"=",$allconc[$at]); # append site_atom=conc
$atom++; # A,B,C,...
}
}
$dirname =~ s/^\s*level/lev/; #cleanup directory name level->lev
$dirname =~ s/\s+/_/g; # whitespaces to _
$dirname =~ s/^/sqsdb_/; # sqsdb_ prefix
print $dirname,"\n"; # report directory being made
mkdir $dirname;
{
# open unit cell structure
open(my $sf,"<","rndstr.skel") or die "Can't open rndstr.skel (should contain unit cell of structure in ATAT format with lowercase Wyckoff position as atom names)";
open(my $of,">",join('/',$dirname,"rndstr.in")); # will contain the input file for mcsqs
while (<$sf>) {
chomp;
my @cols = split /\s+/, $_ ; # split each line by whitespace-separated columns
if ( $cols[3] =~ /[a-z]/ ) { # if line has an atom label in colunm 4
print $of $cols[0]," ",$cols[1]," ",$cols[2]," ",$siteocc{$cols[3]},"\n"; # replace wykoff site name with list of all concentrations for site
}
else {
print $of $_,"\n" ; #otherwise copy line as is
}
}
}
}
system("cd sqsdb_lev=0_*; cellcvrt -ro < rndstr.in >| bestsqs.out"); # special case: for end members no need to generate sqs: just copy unit cell
}
#subroutine to copy sqs from database to current directory
sub copysqs() {
#find where to get sqs from (from -l command-line option )
my $sqsdbdir=join("",$atatdir,"/","data/sqsdb/",$cmdline{"-l"});
#if a master species.in file in give in current directory, read it
my $masterspecies="";
{
if (-e "species.in") {
open(my $sh,"<","species.in");
$masterspecies=<$sh>;
chomp $masterspecies;
}
}
if (exists $cmdline{"-sp"}) {
$masterspecies=$cmdline{"-sp"};
if (! -e "species.in") {
open(my $sh,">","species.in");
print $sh $masterspecies . "\n";
}
}
if (! $masterspecies eq "") {
print "Using species: ",$masterspecies,"\n";
}
#make a string with list of (Wyckoff) sites in sqs, each followed with "=" and master list of species (elements)
my $specskel;
{
open(my $fh,"<",join("",$sqsdbdir,"/sqsgen.in")) or die "can't open sqsgen.in \n";
$specskel=<$fh>;
$specskel =~ s/^level[^\s]*[\s]*//g;
$specskel =~ s/=[0-9.,+-]*/=$masterspecies/g;
}
#create directory and template species.in file if nonexistant and exit.
if (!chdir($cmdline{"-l"})) {
mkdir($cmdline{"-l"});
chdir($cmdline{"-l"});
open(my $sf,">","species.in");
print $sf $specskel;
print "Edit the file ",$cmdline{"-l"},"/species.in (if needed) and rerun the same command.\n";
exit;
}
#read the species.in file (were now are in the subdirectory given by -l option)
my @sites; #will contain array of all sites (a site is a sublattice in CALPHAD nomenclature)
my %spec; #each entry (hashed by site name from @sites) will contain a reference to an array of the species on each site
my %perm; #each entry (hashed by site name from @sites) will point to an object that generates all permutation on species on a site
{
my @rawsite ;
open(my $sf,"<","species.in") or die "can't open species.in";
@rawsite = split /\s+/, <$sf>; # the different sites are whitespace separated
for (my $s=0; $s<@rawsite; $s++) { # for each site, parse the species
my @asitespec = split /=/ , $rawsite[$s] ; #split site=species
my $site=$asitespec[0];
@sites=(@sites,$site); #grow array of sites
$spec{$site} = [ split /,/ , $asitespec[1] ]; #record the array of species for current site
$perm{$site} = new List::Permutor @{$spec{$site}}; #make permuter object for current site
}
}
#read the multiplicities from sqsdb of each site and place them in mult.in
{
open(my $mh,">","mult.in");
foreach my $s ( @sites ) { #loop over sites
open(my $sh,join('',$sqsdbdir,"/rndstr.skel")); #this file contains all sites (with equivalent sites repeated)
my $m=0;
while (<$sh>) {
my @cols=split /\s+/, $_;
if ($cols[3] eq $s) {$m++;} #count number of time site $s appears
}
print $mh $s,"=",$m,"\t";
}
print $mh "\n";
}
#read in the wyckoff sites of the sqs that are equivalent by symmetry (only happens in simple structures, e.g. B2)
my @equivsite; #this will contain an array of statements of equivalence between wyckoff positions, e.g. a=b,c=b
{
if (-e "$sqsdbdir/equiv.in") {
open(my $eh,"<","$sqsdbdir/equiv.in");
while (<$eh>) {
chomp;
push @equivsite,$_; #add new entry to array
}
}
}
#read the different sqs in the db
my @sqslist=(); #this will contain names of all unique sqs
# this file list all available sqs compositions
open(my $fh,"<",join("",$sqsdbdir,"/sqsgen.in")) or die "can't open sqsgen.in \n";
while (<$fh>) { #loop over sqs
chomp;
# each line contain sqs level followed by site=compo,compo,... (repeated for each site)
my $dirdbname=$_; # will contain the sqs directory name to copy form the database
my @cols = split /\s+/, $_ ; #level and sites are whitespace separated
my @levs = split /=/, @cols[0]; # split level=number
if ($levs[1] <= $cmdline{"-lv"}) { # if level is below what specified by -lv command line option
my %siteocc; # this will contain references to arrays of the occupations (compositions) for each site, hashed by site name
for (my $i=1; $i<@cols; $i++) { # loop over all sites
my @arawsite = split /=/, @cols[$i]; #split into site name and composition list
my @allconc = split /,/, @arawsite[1]; #parse composition list and put into array
@allconc = reverse sort @allconc; # by convention list higher composition first
$siteocc{@arawsite[0]}=[ @allconc ]; # record all occupations for this site
}
#cleanup and make directory name
$dirdbname =~ s/^\s*level/lev/;
$dirdbname =~ s/\s+/_/g;
$dirdbname =~ s/^/sqsdb_/;
#check if the current sqs does not have too many possible occupations for the given number of species, at each site
my $nbok=1;
foreach my $site ( @sites ) {
if ( @{$spec{$site}} < @{$siteocc{$site}}) {$nbok=0; last;}
}
#if the nb of occupation and species are ok
if ($nbok) {
while (1) { #loop over permutations of the species on each site
# this is needed if the number of species in the database sqs is smaller than the nb of species desired
# example: A,B,C alloy with binary sqs => need sqs for A,B and B,C and A,C
#make sqs name in the calculation directory
my $sqsname=""; # will contain the name of the sqs calc directory
foreach my $site (@sites) {
my @aspec=$perm{$site}->peek; # get all species for that site in the current permutation order
my @speclist=();
for (my $i=0; $i<@{$siteocc{$site}}; $i++) {
# add an entry of the form site_specie=occupation
@speclist = (@speclist,join('',$site,"_",$aspec[$i],"=",@{$siteocc{$site}}[$i]));
}
@speclist = sort @speclist; #put wyckoff sites in alphabetical order
$sqsname=join(',',$sqsname,join(',',@speclist));
}
$sqsname =~ s/^,//; # remove extra , in front
#sqsname now completed, except for prefix
my $onespeconly=1; # flag that will indicate if it is an end member that might reduce to a simpler parent lattice
my $onespecname=""; # will contain name of species on simpler parent lattice
if (@sites==1) {$onespeconly=0;} # if only one sublattice cannot be equivalent to higher symmetry phase
else {
foreach my $site (@sites) {
my @aspec=$perm{$site}->peek; # get all species for that site in the current permutation order
# this block checks if the end member reduces to a simpler known parent lattice
if (@{$siteocc{$site}}>1) { # only one specie one sublattice?
$onespeconly=0; #no: cannot be simple lat
}
else {
if ( $onespecname eq "" ) {
$onespecname=$aspec[0]; # first time around, store specie name on 1st sublattice
}
else { # next time around, check if the same specie is on the other sublattices
if ( $onespecname ne $aspec[0] ) {$onespeconly=0;} # if not, cannot reduce to simpler lattice
}
}
}
}
# now check if that sqs is not equivalent to another one under permutation of equivalent sites
my $alreadylisted=0; #flag to indicate the sqs is redundant
my $sqspermname=""; #will contain various sqs with sites permuted
my $equivpowset = new Data::PowerSet(@equivsite); #we will select arbitrary subsets of site equivalence relations
while (my $equivon= $equivpowset->next) { #loop over subset of site equiv relations
my %mapsite; # will contain how each wyckoff site is mapped onto another
for my $s (@sites) {$mapsite{$s}=$s;} #initialize identity mapping
for my $ss (@$equivon) { # loop over equivalences
my $a=$ss;
$a =~ s/=.*//g; # site a is before =
my $b=$ss;
$b =~ s/.*=//g; # site b is after =
$mapsite{$a}=$b; # record permutation of those sites
$mapsite{$b}=$a;
}
$sqspermname=""; # will contain sqs name with permuted sites
foreach my $oldsite (@sites) { #loop over sites before permutation
my $site=$mapsite{$oldsite}; #permuted site name
my @aspec=$perm{$site}->peek; #get species for that site
my @speclist=(); #construct sqs name as before
for (my $i=0; $i<@{$siteocc{$site}}; $i++) {
@speclist = (@speclist,join('',$oldsite,"_",$aspec[$i],"=",@{$siteocc{$site}}[$i])); #use old site label with occupations of permuted site
}
@speclist = sort @speclist; # wyckoff sites in alphabet order
$sqspermname=join(',',$sqspermname,join(',',@speclist));
}
$sqspermname =~ s/^,//;
#sqspermname now completed
#print $sqspermname,"\n";
if ( grep(/^$sqspermname$/, @sqslist) == 1 ) {$alreadylisted=1; last;} # if permuted site sqs was already in list
}
#print $alreadylisted,"\n";
#print "list=\n",join("\n",@sqslist),"\nend\n";
if ( grep(/^$sqsname$/, @sqslist) == 0 ) { #if (unpermuted) sqs is not in list
push @sqslist,$sqsname; # add to list
my %subatom; # will contain which to put in which site (hashed by wyckoff_x where x=A,B,C,... the generic atom names in sqs db)
foreach my $site (@sites) { #loop over sites
my @aspec=$perm{$site}->peek; # species for current permutation
for (my $atom="A", my $i=0; $i < @{$siteocc{$site}}; $atom++, $i++) {
$subatom{join("_",$site,$atom)}=$aspec[$i]; #set, e.g., subatom{"b_A"}="Cu"
}
}
# put prefix and create dir
$sqsname = "sqs_lev=" . $levs[1] . "_" . $sqsname;
mkdir($sqsname);
if ( $alreadylisted == 1 ) { #if equivalent to permuted sqs, write a link to it
open(my $lh,">","$sqsname/link");
print $lh "sqs_lev=" . $levs[1] . "_" . $sqspermname . "\n";
}
else { #otherwise copy all files needed for calc
if ( ! -e "$sqsname/str.out" ) { #only do if didn't exist before
print "Preparing ",$sqsname," directory\n";
{
open(my $sqsf,">",join("",$sqsname,"/str.in")) or die "Can't open str.in\n"; #destination file
open(my $dbf,"<",join("",$sqsdbdir,"/",$dirdbname,"/bestsqs.out")) or die "Can't open bestsqs.out file.\n"; #origin file from database
while (<$dbf>) { #loop over lines in origin file
chomp;
my @cols = split /\s/, $_ ; #split into columns
if ( $cols[3] =~ /[a-z]/ ) { #if 4th column contains a site, replace it by the correct atom from subatom
print $sqsf $cols[0]," ",$cols[1]," ",$cols[2]," ",$subatom{$cols[3]},"\n";
}
else { # otherwise copy as is
print $sqsf $_,"\n";
}
}
}
# check if structure has higher symmetry after sites are decorated
my $hisym=0;
{
open(my $osf,join("","cellcvrt -sym < ",$sqsdbdir,"/",$dirdbname,"/bestsqs.out | head -n 1 |"));
my $osym=<$osf>;
chomp $osym;
open(my $fsf,join("","cellcvrt -sym < ",$sqsname,"/str.in |& grep -v Warning | head -n 1 |"));
my $fsym=<$fsf>;
chomp $fsym;
if ( $osym < $fsym ) {
print "Equivalent to another high-symmetry structure: ",$sqsname,"\n";
$hisym=1;
}
#print "sym= ",$osym," ",$fsym," ",$hisym,"\n";
}
if ($hisym) {
open(my $bf,">",$sqsname ."/bump");
print $bf "";
}
# isotropically stretch str.in so that atoms just touch and put in str.out
my $cmd=join("","nntouch -l=",$sqsname,"/str.in -r=",$atatdir,"/data/radii.in >| ",$sqsname,"/str.out");
system($cmd);
if ($levs[1]==0) { # for end members also write "endmem" file as a flag
open (my $emf,">",join("",$sqsname,"/endmem"));
print $emf "";
}
if ($onespeconly == 1) { # if this end member can be reduced to a simpler lattice
if (open(my $plf,"<",$sqsdbdir . "/" . "parentlat.in")) { #check if this simpler lattice is known in the sqs db
my $linkto=<$plf>; # read lattice and partial structure name (e.g. FCC_A1/sqs_lev=0_a_)
chomp $linkto;
$linkto = $linkto . $onespecname . "=1"; # add specie name to partial structure name
open(my $lf,">",$sqsname ."/link"); # write a suitable link file
print $lf $linkto . "\n";
print "link to parent lattice: " . $linkto . "\n"; # notify user
}
}
else { # if not known, then needs to be computed
open (my $wf,">",join("",$sqsname,"/wait")); #write "wait" file as a flag to mark as "to be computed"
print $wf "";
}
}
}
}
# increment to the next permutation of species
my $doneperm=1;
foreach my $a ( @sites ) { #try to permute one site at the time
my @set = $perm{$a}->next ;
if ( $perm{$a}->peek ) {$doneperm=0; last;} #stop as soon as one permutation was possible
$perm{$a}->reset; #if reach end of permutation, restart at first permutation for that site
}
if ( $doneperm ) {last;} #if all sites reached their last permutation, we're at the end
}
}
}
}
}
#subroutine to fit ab initio data, combine with sgte and create tdb file for one phase
sub fittdb() {
# SECTION 1: read all input files
my @sites; # will contain names of each Wyckoff site (a site is a sublattice in CALPHAD nomenclature)
my %spec; # will contain reference to array of possible species sitting on site, for each site (hashed by Wyckoff site)
{
my @rawsite ;
open(my $sf,"<","species.in") or die "Can't open species.in"; #read from species.in for one lattice
@rawsite = split /\s+/, <$sf>; #sites are whitespace separated
for (my $s=0; $s<@rawsite; $s++) {
my @asitespec = split /=/ , $rawsite[$s] ; # format is site=specie,specie
my $site=$asitespec[0]; #first site name
@sites=(@sites,$site); #add to list of sites
$spec{$site} = [ split /,/ , $asitespec[1] ]; #parse comma separated list of species and record it for that site
}
}
my %mult; #will contain multiplicity of each site per unit cell (hashed by Wyckoff site)
my $totsite; #will contain total numer of site per unit cell
{
open (my $mh,"<","mult.in") or die "can't open mult.in";
my @cols = split /\s+/ , <$mh>; #sites are whitespace separated
foreach my $m (@cols) { #loop over sites
my @tok = split /=/, $m; #parse site=mult
$mult{$tok[0]}=$tok[1]; # record multiplicity for that sie
$totsite+=$tok[1]; # increment total number of site
}
}
my @terms; #will contain all types of interaction terms as a 3d array
# The first index is the type of interaction (i.e. which line of the terms.in file)
# The second index is the site (as an integer, not a wyckoff symbol)
# the last index is 0 or 1, with [0] giving the order (1: linear in composition, 2: binary interaction, etc.)
# and [1] giving the level (e.g. order of polynomial in a binary interaction)
{
if (! -e "terms.in") { # if terms.in file does not exist, make a default one and output some help
open(my $tf,">","terms.in");
print $tf join(':',map { "1,0" } @sites) . "\n";
print qq{Please edit the file terms.in
Each entry on one line has the form order,level:order,level:...
in which sublattices are separated by :
and where order has the following meaning: 1: linear in composition 2: binary interaction, 3: ternary interaction, etc.
and where level is the maximum value of the additional parameter in a tdb file specifying the order of the polynomial used.
In a binary interaction, level can be 0,1,2,3,...
In a ternary interaction, level can be 0,1
In higher order interaction level can only be 0
(These are limitations of the tdb format.)
};
exit;
}
open(my $tf,"<","terms.in") or die "Can't open terms.in";
my $l=0;
while (<$tf>) {
chomp;
my @atermraw = split /[:]+/, $_ ; # each site is separated by :
@atermraw = ( @atermraw , map { "0,0" } @sites ); # pad with 0,0 for all sites that the user did not specify
my @aterm = map { [ split /,+/ ] } @atermraw ; # split each order,level pair into two array entries
$terms[$l]=[ @aterm[0..@sites-1] ]; # cut extraneous 0,0 that were padded and record cleanup terms in $terms for line $l
$l++; # next line in file
}
}
my %stablephase; # will contain the stable room temperature phase of each element, hashed by element name
{
open(my $sh,"<",$atatdir . "/data/sgte_elements.tdb"); #fetch from atat data directory
while (<$sh>) {
chomp;
my @col=split /\s+/, $_;
$col[1] =~ s/ELEM_//g;
$stablephase{$col[1]}=$col[2];
}
}
# SECTION 2: create input file for least squares fit
my @tdbparam; # will contain all interaction terms of the tdb file, each in the form specie,specie,...:specie,specie,...:...;level
my $bumpval=5e-3; # energy shift to avoid degeneracies between low symmetry end members and high symmetry ones.
if ($cmdline{"-bv"} != 0) {
$bumpval=$cmdline{"-bv"};
}
my @propname=("energy","svib_ht"); # input files that we will fit
my @propsuf=("","*T"); #suffix they need in tdb file
my $idealgas=8.3144621;
my @propnum=(1.60217653e-19*6.02214078e23,-$idealgas); #conversion factor from ab initio units to tdb units: eV/atom -> J/mol and k_B -> J/(K mol)
for my $aprop (@propname) { # loop over properties
open(my $dh,"ls -d sqs_* |"); # list all sqs directories - we'll read them from handle dh
open(my $mh,">","x_" . $aprop . ".in"); # file will contain regressors for least-squared fit
open(my $eh,">","y_" . $aprop . ".in"); # file will contain dependent variable for least-squared fit
open(my $mn,">","strname_" . $aprop . ".out"); # file will contain structures included in fit of this property
open(my $ch,">","conc_" . $aprop . ".out"); # file will contain overall concentrations of structures included in fit of this property
my $founddata=0;
while (<$dh>) { # loop over sqs directories
chomp;
my $dirname=$_; # get directory name
my $olddirname=$dirname; # name before redirection
if ( -e "$dirname/link" ) { # if find a link to another directory, use that directory instead
open(my $lh,"<","$dirname/link");
print $dirname," mapped to ";
$dirname=<$lh>;
chomp $dirname;
if ( ! -e $dirname ) {
$dirname = "../" . $dirname;
}
print $dirname,"\n";
}
my $concname=$_; # start to extract concentrations from sqs name
$concname =~ s/sqs_//g; # trim prefixes
$concname =~ s/lev=[^_]+_//g;
my %conc; # this will contain all concentrations for this sqs (hashed by Wyckoff site and then hashed by species)
my %overconc; # this will contain overall concentration for this sqs (hashed by species)
foreach my $s (@sites) { #initialize all concentrations to 0
$conc{$s}={};
for my $sp ( @{$spec{$s}} ) {
${$conc{$s}}{$sp}=0;
$overconc{$sp}=0;
}
}
my $badconc=0;
my @rawcs=split /,/ , $concname ; # concentation are coma separated and have the form site_specie=number
foreach my $rawc (@rawcs) {
my @piece=split /=/ , $rawc; # parse so that piece[0]=site_specie and piece[1]=number
my @lab=split /_/, $piece[0]; # parse so that lab[0]=site and lab[1]=specie
if (! exists ${$conc{$lab[0]}}{$lab[1]}) {$badconc=1;} #can't find such a composition
${$conc{$lab[0]}}{$lab[1]}=$piece[1]; #record the site composition
$overconc{$lab[1]}+=$piece[1]*$mult{$lab[0]}/$totsite; # increment overall composition
}
if ( ! -e join('',$dirname,"/",$aprop)) { # check if property aprop is available for current sqs
$badconc=1;
}
if ($badconc) { # if anything went wrong, skip
print $dirname," skipped for property " . $aprop . "\n";
}
else { # otherwise process
$founddata=1; # we have found at least one data point
print $dirname,"\n"; # report structure used
print $mn $dirname,"\n";
{
open(my $es,"<",join('',$dirname,"/str.out")) or die "can't open " . $dirname . "/str.out\n"; #read structure file
my $totsitesup=0; # will contain total number of atoms (i.e. site)
while (<$es>) { # loop over lines
my @cols=split /\s+/, $_; # split in columns
if ( $cols[3] =~ /[a-zA-Z]/ ) { # if line contain atom label, count it
$totsitesup++;
}
}
open(my $ef,"<",join('',$dirname,"/",$aprop)); # read property (e.g. energy)
my $energy=<$ef>;
chomp $energy;
# if directory contains file bump, bump energies by bumpval per atom (to avoid degeneracies at end members);
if ( $aprop eq "energy" && -e join('',$olddirname,"/bump") ) { # check for bump in dir before redirection
$energy+=$bumpval*$totsitesup;
}
# add code here for bump
my $energypercell=($energy*$totsite/$totsitesup); # convert per unit cell
print $eh $energypercell,"\n"; # record property value in dependent variable file for fitting
for my $el ( sort keys %overconc ) {print $ch $overconc{$el},"\t";}
print $ch "\n";
open(my $eo,">",join('',$dirname,"/",$aprop,"_cell")); # record in a *_cell file too
print $eo $energypercell,"\n";
}
@tdbparam=(); # clear thermo database param (from previous interaction calculation )
my %doneterm; # to keep track of terms already processed
foreach my $rterm (@terms) { # loop over possible interaction terms
#print "rterm="; #
#foreach my $tt (@{$rterm}) {print @{$tt}[0],":";} #
#print "\n"; #
my $perm = new List::Permutor @{$rterm}; # will consider all assignements of terms to sublattices (i.e. sites)
while (my @term=$perm->next) { # loop over permutation of terms
my $hiord=0; # will contain highest level of interaction
# sanity checks
for (my $s=0; $s<@term; $s++) {
if ($term[$s][0]==3 && $term[$s][1]>0) { $term[$s][1]=2; } # higher order param for ternaries are indexed by 0,1,2 so 2 is higher limit
# check if only one higher order param
if ($term[$s][1]>0 && $hiord>0) {die "TDB format only allows one higher order parameter.\n";}
if ($term[$s][1]>0) {$hiord=$term[$s][1];}
}
my @nterm; # will contain the number of components for each term (1=one component, 2=binary, 3=ternary, etc.) indexed by site number
my %ordterm; # will contain the level for each interaction, indexed by site (ord=level in this section of the code)
my $ns=0; # to loop over site number
my $toohi=0; # flag if interaction involves more components than there are on the site
foreach my $s (@sites) { # loop over sites
$nterm[$ns]=@{$term[$ns]}[0]; #copy nb of component
if (@{$term[$ns]}[0]>@{$spec{$s}}) {$toohi=1;} # flag if too many components
$ordterm{$s}=@{$term[$ns]}[1]; # copy level of interaction
$ns++; # next site
}
my $kterm=join('_',@nterm); #create label to check if done
if (! $toohi==1 && ! exists $doneterm{$kterm}) { # if not flagged and not done
$doneterm{$kterm}=1; # mark as done
my %powset; # will contain objects to find all subset of a set of species on a site, hashed by site name
my $ns=0; # site number
foreach my $s (@sites) { # loop over sites
$powset{$s} = new Data::PowerSet ({min=>$nterm[$ns], max=>$nterm[$ns]}, @{$spec{$s}}); # find all subsets of $nterm[$ns] species
$ns++; # next site
}
while (1) { # loop over subsets of species
for (my $ord=0; $ord<=$hiord; $ord++) { # loop over levels of interactions
my $acc=1.; # to accumulate the product of compositions
my @atdbparam; # will contain the label of the interaction as specie,specie,...:specie,specie,...:...
foreach my $s ( @sites ) { # loop over sites
my $elem=$powset{$s}->peek; # which species (elements) take part in that interaction
$atdbparam[@atdbparam]=(@{$elem}==0 ? "*" : join(',',@{$elem})); # add elem,elem,... at the end of the label or * if no element
foreach my $e ( @{$elem} ) { # do the product of compositions of all elements $e involved on site $s
$acc*=${$conc{$s}}{$e};
}
# handle higher level interaction
if (@{$elem}==2 && $ordterm{$s}>0) { # for binary interactions include terms of the form (X_A-X_B)^$ord
$acc*=(${$conc{$s}}{@{$elem}[0]}-${$conc{$s}}{@{$elem}[1]})**$ord;
}
if (@{$elem}==3 && $ordterm{$s}>0) { # for ternary interactions, use CALPHAD convention
my $comp=0;
for (my $i=0; $i<3; $i++) { # sum all compositions involved (may be <1 for higher-order systems)
$comp+=${$conc{$s}}{@{$elem}[$i]}
}
my $v=${$conc{$s}}{@{$elem}[$ord]}+(1-$comp)/3.; # contribution is X_{A,B or C} + (1-sum composition)/3
$acc*=$v; #accumulate product
}
}
$tdbparam[@tdbparam]=join('',join(':',@atdbparam),';',$ord); # record interaction name as specie,specie,...:specie,specie,...:...;level
print $mh $acc,"\t"; # record regressor for this interaction
}
# find next subset
my $donepower=1;
foreach my $s ( @sites ) { # try for each site
$powset{$s}->next; # go to next subset for this site
if ( $powset{$s}->peek ) {$donepower=0; last;} # if possible, we have found next subset choice
$powset{$s}->reset; # if not possible restart at first subset for this site and try next site
}
if ( $donepower ) {last;} # if all sites are at their last subset, we are done with exploring interactions
}
}
}
}
print $mh "\n"; # put next interaction on the next line
}
}
if ($founddata==1) { # if we have some data, do the fit
system("lsfit -x=x_" . $aprop . ".in -y=y_" . $aprop . ".in -colin >| param_" . $aprop . ".out; sleep 1"); # least square fit of parameters in param_[property].out
# report some quality measures in fit_[property].out
system("lsfit -x=x_" . $aprop . ".in -y=y_" . $aprop . ".in -colin -p -ty -e | sed 's/ /\t/g' >| fit.tmp");
system("paste conc_" . $aprop . ".out fit.tmp strname_" . $aprop . ".out >| fit_" . $aprop . ".out; rm fit.tmp ; sleep 1");
# and cv_[property].out
system("lsfit -x=x_" . $aprop . ".in -y=y_" . $aprop . ".in -colin -cv >| cv_" . $aprop . ".out");
}
else { # if have no data
print "Found no data for " . $aprop . "\n";
# create a param_[propery].out file with as many 0 as there are parameters
open(my $ph,">","param_" . $aprop . ".out");
print $ph map {"0\n"} @tdbparam;
}
}
# concatenate (by columns) all the param_[property].out files into allparam.out
system("paste " . join(' ',map {"param_" . $_ . ".out"} @propname) . " >| allparam.out");
# SECTION 3: Create tdb file
{
my $Tmin="298.15"; # default lowest and highest temperature cutoffs
my $Tmax="10000";
if ( exists $cmdline{"-Tu"} ) {
$Tmax=$cmdline{"-Tu"};
}
if ( exists $cmdline{"-Tl"} ) {
$Tmin=$cmdline{"-Tl"};
}
my $phase=`pwd`; # create phase name from current directory name without upper directory name
chomp($phase);
$phase =~ s/.*\///g;
my @exphase; # will contain which phases should not be loaded from SGTE
if (-e "../exfromsgte.in") { # this optional file lists the phases to not load from SGTE
open(my $xh,"<","../exfromsgte.in");
@exphase=<$xh>;
chomp(@exphase);
print "SGTE Phases excluded: ",join(' ',@exphase),"\n";
}
my $volden=1; #to nomalize all properties per cell
my $issgtephase=0; #flag
if (system(join('',"grep -q \'^PHASE ",$phase," \' ",$atatdir,"/data/sgte_phases.tdb"))==0) { # check if phase is in SGTE database
$volden=$totsite; # for sgte phase there is always one atom per cell (even for hcp...)
$issgtephase=1;
}
my @endfunc; # will contain gibb free energy functions for end members in tdb format
my %elemfunc; # will contain free energy reference shifter (to make elemental free energy match sgte), hashed by REF_phase_element
my @xsparam; # will contain excess quantities of mixing terms
my @ratiocoef=(1,-2,1.5,-0.5);
my $sroterm="";
my $coord=0;
{
if (open(my $zh,"<",$atatdir . "/data/sqsdb/" . (uc $phase) . "/coord.in")) {
$coord=<$zh>;
}
else {
print "No SRO included.\n";
}
}
my %paramall;
{
open(my $pf,"<","allparam.out"); # read in fitted numerical parameters
foreach my $termord (@tdbparam) { # loop over terms
my $allp=<$pf>; #read in all parameters for that term
chomp $allp;
$paramall{$termord}=$allp;
}
}
foreach my $termord (@tdbparam) { # loop over terms
my $term=$termord; # get term
$term =~ s/;.*//g; # without the level
my $ord=$termord; # get level
$ord =~ s/.*;//g; # without the term
my $allp=$paramall{$termord}; #fetch all parameters for that term
my @p=split /\s+/, $allp; # put white space-separated parameters in array p
my $pdb=""; # will contain the function corresponding to that term
my $needtoaddord2=0;
for (my $i=0; $i<@propname; $i++) { # loop over properties (energy, entropy)
my $coef=$p[$i]*$propnum[$i]/$volden; # convert units and normalize per cell
$pdb=$pdb . ($coef !=0 || $i==0 ? ($coef >= 0 ? "+" : "") . ($coef) . $propsuf[$i] : ""); # make function expression +coef*suffix and append to pdb
if ( $cmdline{"-sro"}>0 && $propsuf[$i] eq "" && $coord != 0 && ($term =~ s/,/,/g)==1 ) {
if ($ord==0) {
my $Gab;
{
my @coef2=split /\s+/,$paramall{"$term;2"};
$coef2[0]*=$propnum[$i]/$volden;
$Gab=abs($coef-$coef2[0])*$cmdline{"-sro"};
}
my $c=$Gab/($coord*$idealgas);
my @tmpsroterm;
for (my $s=0; $s<=2; $s+=2) {
for (my $n=0; $n<4; $n++) {
$tmpsroterm[$s] = $tmpsroterm[$s] . "+" . (($s-1)*$ratiocoef[$n]*$Gab/2.0) . ($n==0 ? "" : "*EXP(" . (-$n*$c) . "*T**(-1))");
}
$tmpsroterm[$s] =~ s/\+-/-/g;
}
$pdb = $pdb . $tmpsroterm[0];
$sroterm=$tmpsroterm[2];
if ( ("$term;2" ~~ @tdbparam)==0 ) {
$needtoaddord2=1;
}
}
elsif ($ord==2) {
$pdb = $pdb . $sroterm;
}
}
}
if ($term =~ /,/ || $term =~ /\*/) { # if it is an excess term
push @xsparam,join('',"PARAMETER L(",uc $phase,",",uc $termord,") ",$Tmin," ",$pdb," ; ",$Tmax," N !"); #create parameter tdb command
if ($needtoaddord2) {
push @xsparam,join('',"PARAMETER L(",uc $phase,",",uc $term,";2) ",$Tmin," ",$sroterm," ; ",$Tmax," N !"); #create parameter tdb command
}
}
else { # if it is an end member term (no coupling between atoms)
my @elem=split /:/, $term; #parse species (elements) from : separated list
my %conc; # will contain overall concentration of each species (not site-specific)
for (my $n=0; $n<@sites; $n++) { # loop over sites
$conc{$elem[$n]}+=$mult{$sites[$n]}/$volden; # add to concentration (accounting for multiplicity of site and per cell normalization)
}
my $fname=join('',"ABIN_",$phase,"_",$term); # make function name for ab initio end member data: ABIN_phase_elementlist
$fname =~ s/://g; # remove : from list
push @endfunc,join('',"FUNCTION ",uc $fname," ",$Tmin," ",$pdb," ; ",$Tmax," N !"); #create function tdb command
my $line=join('',"PARAMETER G(",uc $phase,",",uc $termord,") ",$Tmin," ",uc $fname); # will contain parameter tdb command for current term
# now shift with reference states (so that elemental free energies match sgte)
foreach my $e (sort keys %conc) { # loop over species (elements) involved in this term
my $elemref=join('',"REF_",uc $phase,"_",uc $e); # create label of reference free energy shifter for element
my $curstabphase; # will contain the stable phase used as a shifter
my $elemsgte=join('',"SGTE_",uc $phase,"_",uc $e); # create label of sgte free energy for element
if (($issgtephase==1 || $cmdline{"-os"}) && ! grep {/^$elemsgte$/} @exphase) { # if (current phase in sgte or -os specified) and not excluded by user
$curstabphase=$phase; # use current phase as shifter
}
else { # otherwise
if ($issgtephase==1) {print "Exclude SGTE data for: ",$elemsgte,"\n";} # notify sgte not used for current phase
$curstabphase=$stablephase{uc $e}; # use instead the stable phase of this element as reference
}
my $elemabin=join('',"ABIN_",uc $curstabphase,"_",uc $e); # make label for ab initio free energy for reference phase
$elemsgte=join('',"SGTE_",uc $curstabphase,"_",uc $e); # make label for sgte free energy for reference phase
$line=join('',$line,"-",$conc{$e},"*",$elemref); # subtract reference free energy shifter for current element weighted by concentration
$elemfunc{$elemref}=join('',"FUNCTION ",$elemref, " ",$Tmin," ",$elemabin," - ",$elemsgte," ; ",$Tmax," N !"); # add this element reference free energy shifter to list
}
$line=join('',$line," ; ",$Tmax," N !"); # finish tdb command
push @xsparam,($line); # record current line to tdb file
}
}
open(my $tf,">","$phase.tdb"); # now write tdb file for current phase
print $tf "PHASE ",$phase," % ",scalar(keys %mult)," "; # write PHASE tdb command with number of sublattice
foreach my $s (@sites) {
print $tf $mult{$s}/$volden," " # output multiplicity per cell of each sublattice
}
print $tf " !\n"; # finish tdb command
print $tf "CONSTITUENT ",$phase," :"; # write CONSTITUENT tdb command for current phase
foreach my $s (@sites) { # look over sublattices
my $frst=1;
foreach my $at (@{$spec{$s}}) { # loop of species (elements)
if (!$frst) {print $tf ",";} # comma separated, except before first element
$frst=0;
print $tf uc $at; # list atom
}
print $tf ":"; # sublattice are : separated
}
print $tf "!\n"; # finish tdb command
print $tf join("\n",@endfunc),"\n"; # output all ab initio free energies for end members
print $tf join("\n",values %elemfunc),"\n"; # output all end member reference free energy shifters
print $tf join("\n",@xsparam),"\n"; # output all excess free energy terms
}
}
# subroutine to combine multiple phases into a single tdb file
sub comb_phases() {
my $mysed=" | grep -v -e '^#' -e '^\$' | sed -e 's/<NL>/\\n/g' -e 's/ELEM_//g' "; # filter to create valid tdb file (eliminate comments expand newlines remove ELEM_)
open(my $eh,"grep -H '^CONSTITUENT ' */*.tdb | sed -e 's/^[^:]*:[^:]*://g' -e 's/: *!//g' -e 's/[:,]/\\n/g' | sort -u |"); #list all unique elements found in files */*.tdb
my @allelem=<$eh>; # slurp all lines into array of element names
chomp @allelem;
@allelem = map uc,@allelem; # cleanup: no newline and uppercase
print "Found elements ",join(' ',@allelem),"\n"; # report elements found
my $togrepe="grep " . join('',(map {"-e \'ELEM_" . $_ . " \' "} @allelem)). $atatdir . "/data/sgte_elements.tdb"; # command to fetch elemental data from sgte database
system($togrepe . "$mysed >| tmp.tdb"); # copy to our tdb file after cleanup filter
open(my $ph,"cat */*.tdb | grep SGTE_ | sed 's/.*SGTE_\\([^ ]*_\\)\\([^ _]*\\) .*/\\1ELEM_\\2/g' | sort -u |"); # list all unique phase needing sgte data in */*.tdb
my @allsgtephase=<$ph>; # slurp all phases into array
chomp @allsgtephase;
@allsgtephase = map uc,@allsgtephase; # cleanup: no newline and uppercase
my $phaselist = join(' ',@allsgtephase); #make phase list to report to user
$phaselist =~ s/ELEM_//g;
print "Phases found in SGTE database: ",$phaselist,"\n";
my $togrepp="grep " . join('',(map {"-e \'" . $_ . " \' "} @allsgtephase)). $atatdir . "/data/sgte_freee.tdb"; # fetch sgte free energy functions for each phase
system($togrepp . $mysed . " >> tmp.tdb"); # append to tdb file
print "Using:\n";
system("ls -1 */*.tdb"); # report which files are being used
system("cat */*.tdb >> tmp.tdb"); # catenate all */*.tdb available files
$togrepp =~ s/_ELEM_([^ ]*) /,ELEM_\1/g;
system($togrepp . $mysed . " >> tmp.tdb"); # fetch additional sgte data if any (for magnetic and gas phases)
my $tdbfile=join('',join('_',@allelem),".tdb"); # make final tdb file name
if (-e "subref.sed") { # if user-defined substitution rules exist
system(join('',"sed -f subref.sed tmp.tdb >| ",$tdbfile)); # apply them to file
}
else {
system(join('',"mv tmp.tdb ",$tdbfile)); # otherwise just move
}
if ($cmdline{"-oc"}) { # make open calphad compliant if -oc specified
system("mv " . $tdbfile . " tmp.tdb");
system(q{grep FUNCTION tmp.tdb | awk 'BEGIN {n=0;} {printf "s/%s/FUNC%04i/g\n",$2,n; n++}' > func.sed});
system(q{cat tmp.tdb | awk '/^ELEMENT/ {printf " ELEMENT %-2s %-24s %f %f %f !\n",$2,$3,$4,$5,$6;} ! /^ELEMENT/ {print $0;}' | sed -e 's/^PHASE/ PHASE/g' -e 's/^FUNCTION/ FUNCTION/g' -e 's/^CONSTITUENT/ CONSTITUENT/g' -e 's/^PARAMETER\(.*\)N !/ PARAMETER\1N REFDUM !/g' | sed -f func.sed |
awk '{maxlen=77;
line =$0;
while (length(line)>maxlen) {
cut=maxlen;
while (1) {
while (index(" -+",substr(line,cut,1)) == 0) {cut--;}
if(substr(line,cut-1,1)!="E") break;
cut--;
}
print substr(line,1,cut-1);
line=" " substr(line,cut);
}
print line;
}' > } . $tdbfile);
system("rm tmp.tdb func.sed");
}
}
# help
sub print_help() {
my $version;
{
open(my $vh,"grep define.MAPS_VERSION $atatdir/src/version.hh |");
$version = <$vh>;
chomp $version;
$version =~ s/.*\"(.*)\".*/\1/g;
}
print "sqs2tdb version " . $version;
print qq{ by Axel van de Walle and with contributions to the SQS database
from Ruoshi Sun, Qijun Hong and Sara Kadkhodaei.
Usage: sqs2tdb [options] where the available options are:
-h : More help.
-cp : Copy SQS from the database to the current directory. Must specify:
-lv=[level]
level indicates how fine of composition mesh to use (0: only end members, 1: mid points, etc.)
-l=[lattice]
Available lattices
};
system("ls ${atatdir}/data/sqsdb/ | sed 's/^/ /g'");
print qq{
Optional: -sp=Element,Element,... to indicate which elements to consider
(otherwise read from species.in if it exists).
-fit : Fit solution model from SQS energies (to be run within the desired lattice directory).
-tdb : Combine solution models from one or multiple lattices into a single .tdb file.
add the -oc (Open Calphad) option to generate a more portable tdb file.
-mk : Generate input file for SQS generation with mcsqs. (Usually not needed - only for experts.)
To be run in the SQS database directory:
};
print " ",join("",$atatdir,"/","data/sqsdb/","[lattice name]\n");
print qq{Note that -h,-mk,-cp,-fit,-tdb are mutually exclusive.
};
print qq{
For more information see user guide published in:
A. van de Walle, R. Sun, Q.-J. Hong, and S. Kadkhodaei.
Software tools for high-throughput calphad from first-principles data.
Calphad, 58:70, 2017.
https://doi.org/10.1016/j.calphad.2017.05.005
(Also, this paper should be cited in any publication resulting from this code's use.)
};
if ($cmdline{"-h"}) {
print q{
EXAMPLES
-> A simple session of sqs2tdb could be:
cp ~/atat/example/vasp.wrap .
emacs vasp.wrap #edit as needed
echo Al,Ni > species.in
sqs2tdb -cp -l=FCC_A1 -lv=1
sqs2tdb -cp -l=FCC_A1 -lv=1
cd FCC_A1
foreachfile wait pwd \; runstruct_vasp
sqs2tdb -fit
Edit the file terms.in to read
1,0
2,0
sqs2tdb -fit
cd ..
(repeat the above for other lattices if necessary)
sqs2tdb -tdb
The file AL_NI.tdb would contain a valid .tdb file.
-> In a more advanced example, the runstruct_vasp command could be replaced
by a job script submission containing the runstruct_vasp command.
-> When there are lattice instabilities, the runstruct_vasp could be replaced by
robustrelax_vasp -mk #this is to be run only once (see robustrelax_vasp -h for instructions)
robustrelax_vasp -id
-> If phonons are to be included (for end members) one could do:
foreachfile endmem pwd \; fitfc -si=str_relax.out -ernn=4 -ns=1 -nrr
foreachfile -d 3 wait \; runstruct_vasp -lu -w vaspf.wrap
where vaspf.wrap contains parameters for a static run to calculate forces.
foreachfile endmem pwd \; fitfc -si=str_relax.out -f -frnn=2
foreachfile endmem pwd \; robustrelax_vasp -vib
sqs2tdb -fit
OPTIONAL input files (to placed in the topmost directory):
exfromsgte.in : specify which phases should NOT be copied from SGTE database
(by default all phases for each relevant element are included,
but you may want to use ab initio data for virtual phase)
subref.sed : substitution rule (sed command to be applied to final tdb file)
OUTPUT FILES
In addition to the *.tdb files, there is some diagnostic info available in the files fit_[property].out where property could be
energy, svib_ht, etc.
ADVANCED FEATURES
In each sqs subdirectory created, there may be a file called link that contains the path to
another directory where energy data can be found. This is useful if another structure is
known be equivalent by symmetry. This file is created by default for equivalence within a
given structure type can even find symmetries across structure types.
You can also create the file yourself and the code will follow your link.
There may also be a file called bump which indicates that a structure must be moved up in energy by the
amount specified by the -bv option (or 5e-3 by default) to avoid degeneracy with an equivalent higher symmetry structure.
This file is created by default (during the -cp step) when the code detects higher symmetry (e.g. a GAMMA_L12 phase with all sites occupied
with the same type of atom is just FCC). You can delete this file if the higher-symmetry structure is not in your
set of structures. This file is read during the -fit step.
In summary, if you see a bump file, ask yourself if you have that structure elsewhere and put an appropriate link file.
If you don't have that structure elsewhere, you can delete the bump file.
The code will still work and give the right answer if you don't do this, but it will spend more computer time.
During the -fit step, one can specify the -sro option to include a low-order CVM approximation to the short range order.
The default lower and upper temperature limits of 298.15K and 10000K can be changed with the -Tl=... and -Tu=... options, respectively.
};
}
}
#Main
#parse one of the 5 main modes in which the code is operating:
if (@ARGV == 0 || $cmdline{"-h"}) {
print_help();
}
elsif ($cmdline{"-mk"}) {
makemanysqs();
print "Directories setup\n";
}
elsif ($cmdline{"-cp"}) {
copysqs();
print "Copied SQSs\n";
}
elsif ($cmdline{"-fit"}) {
fittdb();
print "Fit completed\n";
}
elsif ($cmdline{"-tdb"}) {
comb_phases();
print "TDB file created\n";
}
else {
print "Please specify at least one of -mk -cp -fit -tdb or -h for help\n";
}