from __future__ import print_function
------------------------------------------------
How to reverse a string in Python?
------------------------------------------------
data = "aha"
print(data)
revdata = ''.join(reversed(data))
print(revdata)
------------------------------------------------
Regex to parse value(that starts with 'hdfs')
for Keyword 'Location'
------------------------------------------------
def Get_location(data):
import re
result = re.match('.*Location.*(hdfs:.*)', data)
if result:
parsed = result.group(1)
return parsed
else:
return None
print(Get_location("Location hdfs://someserver/home/someuser/"))
------------------------------------------------
Split using Regex
------------------------------------------------
import re
data = "This is a#data"
result = re.split("[ #]", data)
print(result)
------------------------------------------------
How to print Directory Content?
------------------------------------------------
from __future__ import print_function
def Print_dir(path):
import os
for child in os.listdir(path):
#print("child -> ", child)
absPath = os.path.join(path, child)
if os.path.isdir(absPath):
Print_dir(absPath)
else:
print("absPath -> ", absPath)
Print_dir("./")
------------------------------------------------
List Operations in Python
(There is no Array ni Python or...)
------------------------------------------------
data = []
# Assign value of 1, 2, 3 & 4
data = range(1, 5)
print(data)
# Assign heterogeneous types
data = [1, "hello", 2.3]
print(data)
# Sort an List
data = [5, 3, 2, 9]
result = sorted(data)
print(result)
data = ['d', 'a', 'x', 'w', 'z']
result = sorted(data)
print(result)
------------------------------------------------
'List' comprehension example
------------------------------------------------
# A list of even numbers b/w 1 to 10
even = [x for x in range(1, 11) if x % 2 == 0]
print(even)
# Litte bit more complex
data = [x for x in range(1, 3) for y in range(3, 5)]
print(data)
[1, 1, 2, 2]
data = [x * y for x in range(1, 3) for y in range(3, 5)]
print(data)
[3, 4, 6, 8]
------------------------------------------------
'Dictionary' example
------------------------------------------------
data = {}
data['a'] = "val1"
data['b'] = "val2"
print(data)
------------------------------------------------
Load yaml file
------------------------------------------------
import yaml
import os
from __future__ import print_function
def Load_yaml(yamlfile):
if os.path.exists(yamlfile):
with open(yamlfile, 'rt') as fin:
content = yaml.load(fin.read())
print("content -> ", content)
else:
print("Logging config file not available : ", \
yamlfile)
Load_yaml("test.yaml")
content -> key1:val1 key2:val2
Python Interview questions
Counters in Hadoop...
- Task Counters #248
- MAP_INPUT_RECORDS
- No of records read by all Map Tasks
- MAP_OUTPUT_RECORDS
- No of records produced by all Map Tasks
- PHYSICAL_MEMORY_BYTES
- Amount of Physical memory used on a Particular Task attempt
- Filesystem counters
- BYTES_READ
- No of bytes read by the filesystem by map & reduce tasks
- BYTES_WRITTEN
- No of bytes written to filesystem by Map & Reduce tasks
- FileInputFormat Counters
- BYTES_READ
- No of bytes read by Map tasks via FileInputFormat
- FileOutputFormat Counters
- BYTES_WRITTEN
- No of bytes written by Map or Reduce task via the FileOutputFormat
- Job Counters
- Are maintained by a Application Master #250
- Maintains Job statistics
- TOTAL_LAUNCHED_MAPS
- No of Map tasks launched (Including the failed one and the one that started speculatively)
Subscribe to:
Posts (Atom)