RAW
===

RAW to PED-MAP
------------------------


.. code-block:: console

   def converter(x):
      # Fill "NA" with '0 0'
      x = x.fillna('0 0')
      
      # Convert numbers to integer
      x.astype(int, errors='ignore')
      ref = x.name[-1]

      # Encoding of PED file
      if ref=="G":
        x = x.replace(0, "G G")
        x = x.replace(1, "G C")
        x = x.replace(2, "C C")
      
      if ref=="C":
        x = x.replace(0, "C C")
        x = x.replace(1, "C G")
        x = x.replace(2, "G G")
        
      if ref=="T":
        x = x.replace(0, "T T")
        x = x.replace(1, "T A")
        x = x.replace(2, "A A")
        
      if ref=="A":
        x = x.replace(0, "A A")
        x = x.replace(1, "A T")
        x = x.replace(2, "T T")
      
      
      return x
      
    
   # Extract SNPs names, which is in this format SNP_REFAllele
   #os.system("cat "+input_file+" | head -n 1  >> snps.txt")
   print("cat "+input_file+" | head -n 1  >> snps.txt")
    
    
   data = pd.read_csv("snps.txt",index_col=None,header=None,sep="\s+").loc[:, 6:].T
   
   # Make a directory to store chunks
   # Chunking is required because RAW file is usually large in size
   if not os.path.isdir("Chunks"):
      os.mkdir("Chunks")   
    
   # Make ".MAP" file
   # RAW file does not contain the position and chromosome number information so, all other columns except 2nd are 0.
   maps = pd.DataFrame()
   maps[0] = [0]*len(data)
   maps[1] = data[0].values
   maps[2] = [0]*len(data)
   maps[3] = [0]*len(data)
   maps.to_csv("final.map",sep="\t",header=False,index=False)
    
   _smallraw  = os.listdir('./Chunks')
   count=0
   _smallraw = sorted(_smallraw)

   # Encode each chunk which is same as that of ped file. 
   for files in _smallraw:
      if ".txt" not in files:
         if count==0:
            count=1
            data2 = pd.read_csv("Chunks"+os.sep+files,sep="\s+")
            data2[list(data[0].values)] = data2[list(data[0].values)].apply(converter)
            data2.to_csv("Chunks"+os.sep+files+".txt",sep="\t",index=False,header=False)

        else:
            data2 = pd.read_csv("Chunks"+os.sep+files,sep="\s+",names=list(data2.columns.values))
            data2[list(data[0].values)] = data2[list(data[0].values)].apply(converter)
            data2.to_csv("Chunks"+os.sep+files+".txt",sep="\t",index=False,header=False)
   final = pd.DataFrame() 
   #Merge all chunks 
   for files in _smallraw:
      if ".txt" in files:
         if count==0:
            count=1
            final = pd.read_csv("Chunks"+os.sep+files,sep="\t",index_col=None,low_memory=False,header=None)
         else:
            data2 = pd.read_csv("Chunks"+os.sep+files,sep="\t",header=None,index_col=None,low_memory=False)
            final = final.append(data2, ignore_index=True)
            del data2
   final.to_csv("final.ped",sep="\t",index=False,header=None)    

   # After this step you will have two files: final.ped and final.map
   
   

RAW to VCF
------------------------

.. code-block:: console

   #Step 1. Convert Raw file to PED-MAP. See RAWtoPED-MAP. 
   It generates two files: final.ped and final.map
   #Step 2.
   ./plink --file final --recode vcf --out output_file
 



RAW to BED-BIM-FAM
------------------------


.. code-block:: console

   #Step 1. Convert Raw file to PED-MAP. See RAWtoPED-MAP. 
   It generates two files: final.ped and final.map
   #Step 2.
   ./plink --file final  --make-bed --out output_file
 



RAW to GEN-SAMPLE
------------------------


.. code-block:: console

   #Step 1. Convert Raw file to PED-MAP. See RAWtoPED-MAP. 
   It generates two files: final.ped and final.map
   #Step 2.
   ./plink --file final  --export oxford --out output_file
 
 



RAW to 23andme
------------------------


.. code-block:: console

   #Step 1. Convert Raw file to PED-MAP. See RAWtoPED-MAP. 
   It generates two files: final.ped and final.map
   
   #Step 2. Convert PED-MAP to BED-BIM-FAM  --> PED-MAPtoBED-BIM-FAM()
   ./plink --file input_file --make-bed --out output_file

   2. Convert BED-BIM-FAM to 23andme --> BED-BIM-FAMto23andme()
   See BED-BIM-FAMto23andme
   
 



RAW to AncestryDNA
------------------------


.. code-block:: console

   #Step 1. Convert Raw file to PED-MAP. See RAWtoPED-MAP. 
   It generates two files: final.ped and final.map
   
   #Step 2. Convert PED-MAP to BED-BIM-FAM  --> PED-MAPtoBED-BIM-FAM()
   ./plink --file input_file --make-bed --out output_file

   #Step 3. Convert BED-BIM-FAM to 23andme --> BED-BIM-FAMto23andme()
   See BED-BIM-FAMto23andme
   
   #Step 4. Convert 23andme to AncestryDNA --> 23andmetoAncestryDNA()
   See 23andmetoAncestryDNA
 



RAW to HAPS-LEGEND-SAMPLE
--------------------------


.. code-block:: console

   #Step 1. Convert Raw file to PED-MAP. 
   It generates two files: final.ped and final.map
   #Step 2. Convert PED-MAP file to VCF.  
   ./plink --file  final --recode vcf --out output_file
   #Step 3. Convert VCF file to HAPS-LEGEND-SAMPLE.  
   bcftools convert output_file.vcf  -h  output_file2