VCF
=====


VCF to PED-MAP
------------------------


.. code-block:: console

   # Include extension of file. For example, input_file.vcf.
   ./plink --vcf input_file.vcf --recode --out output_file
   

VCF to RAW
------------------------

.. code-block:: console

   # Include extension of file. For example, input_file.vcf.
   ./plink --vcf input_file.vcf --recodeA --out output_file
 


VCF to BED-BIM-FAM
------------------------


.. code-block:: console

   # Include extension of file. For example, input_file.vcf.
   ./plink --vcf input_file.vcf --make-bed --out output_file
 



VCF to GEN-SAMPLE
------------------------


.. code-block:: console

   # Include extension of file. For example, input_file.vcf.
   ./plink --vcf input_file.vcf  --export oxford --out output_file
   


VCF to 23andme
------------------------


.. code-block:: console

   # Input file should not include extension
   # Make a directory in which 23andme files will be saved.
   if not os.path.isdir("23andme"):
      os.mkdir("23andme")
   
   #Convert VCF to BED_BIM_FAM --> VCFtoBED_BIM_FAM() 
   ./plink --vcf input_file+".vcf" --make-bed --out output_file
   
   # It will generate three files output_file.bed, output_file.fam, and output_file.bim

   #Extract id of each person
   os.system("bcftools query -l "+input_file+" > ./23andme/temp_samples.txt")
   
   #Open that file
   f = open("./23andme/temp_samples.txt", "r")
   for x in f:
      #Write each person name in a specific file
      temp = open("./23andme/temp.txt", "w")
      
      temp.write(x.strip('\n').split("_")[0] +"  "+x.strip('\n').split("_")[1])
      temp.close()

      #Extract each person from BED,BIM,FAM file and convert it to 23andme. 
      os.system("./plink --bfile "+input_file.split(".")[0]+" --keep ./23andme/temp.txt --recode 23 --snps-only --out ./23andme/"+x.strip('\n'))
   

VCF to AncestryDNA
------------------------


.. code-block:: console

   #Input file should not include extension
   Convert VCF to 23andme --> VCFto23andme()

   #Convert 23andme to AncestryDNA -->23andmeytoAncestryDNA()
   # Make a directory in which AncestryDNA files will be saved.
   if not os.path.isdir("AncestryDNA"):
      os.mkdir("AncestryDNA")
     #VCFto_23andme(input_file)
   
   #Read 23andme files
   _23andmefiles  = os.listdir('./23andme')
   
   #Read files one-by-one
   for files in _23andmefiles:
      # 23andme files are in .txt file format
      if ".txt" in files and "temp" not in files:
         
         #Check size
         if os.stat("./23andme"+os.sep+files).st_size == 0:
            continue
         else:
            data = pd.read_csv("./23andme"+os.sep+files,sep="\t",skiprows=8)
            new = pd.DataFrame()
            
            
            new['Rsid'] = data['# rsid'].values
            new['Chromosome'] = data['chromosome'].values
            new['position'] = data['position'].values
            
            #Split genotype into allele1 and allele2
            new['allele1'] = data['genotype'].str[0]
            new['allele2'] =data['genotype'].str[1]
            
            #Change chromosome numbers
            new['Chromosome'] = new['Chromosome'].replace(23, 'X')
            new['Chromosome'] = new['Chromosome'].replace(24, 'Y')
            new['Chromosome'] = new['Chromosome'].replace(25, 'XY')
            new['Chromosome'] = new['Chromosome'].replace(26, 'MT')
            new.to_csv("./AncestryDNA"+os.sep+files, sep="\t")


VCF to HAPS-LEGEND-SAMPLE
--------------------------


.. code-block:: console

   bcftools convert input_file.vcf  -h  output_file